summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt')
-rw-r--r--src/backend/utils/adt/.gitignore2
-rw-r--r--src/backend/utils/adt/Makefile140
-rw-r--r--src/backend/utils/adt/acl.c5340
-rw-r--r--src/backend/utils/adt/amutils.c470
-rw-r--r--src/backend/utils/adt/array_expanded.c453
-rw-r--r--src/backend/utils/adt/array_selfuncs.c1193
-rw-r--r--src/backend/utils/adt/array_typanalyze.c791
-rw-r--r--src/backend/utils/adt/array_userfuncs.c912
-rw-r--r--src/backend/utils/adt/arrayfuncs.c6770
-rw-r--r--src/backend/utils/adt/arraysubs.c577
-rw-r--r--src/backend/utils/adt/arrayutils.c259
-rw-r--r--src/backend/utils/adt/ascii.c198
-rw-r--r--src/backend/utils/adt/bool.c404
-rw-r--r--src/backend/utils/adt/cash.c1175
-rw-r--r--src/backend/utils/adt/char.c253
-rw-r--r--src/backend/utils/adt/cryptohashfuncs.c168
-rw-r--r--src/backend/utils/adt/date.c3135
-rw-r--r--src/backend/utils/adt/datetime.c5102
-rw-r--r--src/backend/utils/adt/datum.c554
-rw-r--r--src/backend/utils/adt/dbsize.c996
-rw-r--r--src/backend/utils/adt/domains.c390
-rw-r--r--src/backend/utils/adt/encode.c602
-rw-r--r--src/backend/utils/adt/enum.c610
-rw-r--r--src/backend/utils/adt/expandeddatum.c145
-rw-r--r--src/backend/utils/adt/expandedrecord.c1633
-rw-r--r--src/backend/utils/adt/float.c4074
-rw-r--r--src/backend/utils/adt/format_type.c480
-rw-r--r--src/backend/utils/adt/formatting.c6729
-rw-r--r--src/backend/utils/adt/genfile.c709
-rw-r--r--src/backend/utils/adt/geo_ops.c5519
-rw-r--r--src/backend/utils/adt/geo_selfuncs.c96
-rw-r--r--src/backend/utils/adt/geo_spgist.c885
-rw-r--r--src/backend/utils/adt/hbafuncs.c564
-rw-r--r--src/backend/utils/adt/inet_cidr_ntop.c294
-rw-r--r--src/backend/utils/adt/inet_net_pton.c564
-rw-r--r--src/backend/utils/adt/int.c1648
-rw-r--r--src/backend/utils/adt/int8.c1524
-rw-r--r--src/backend/utils/adt/json.c1375
-rw-r--r--src/backend/utils/adt/jsonb.c2086
-rw-r--r--src/backend/utils/adt/jsonb_gin.c1411
-rw-r--r--src/backend/utils/adt/jsonb_op.c338
-rw-r--r--src/backend/utils/adt/jsonb_util.c1968
-rw-r--r--src/backend/utils/adt/jsonbsubs.c416
-rw-r--r--src/backend/utils/adt/jsonfuncs.c5546
-rw-r--r--src/backend/utils/adt/jsonpath.c1080
-rw-r--r--src/backend/utils/adt/jsonpath_exec.c2812
-rw-r--r--src/backend/utils/adt/jsonpath_gram.c2416
-rw-r--r--src/backend/utils/adt/jsonpath_gram.y606
-rw-r--r--src/backend/utils/adt/jsonpath_scan.c4873
-rw-r--r--src/backend/utils/adt/jsonpath_scan.l619
-rw-r--r--src/backend/utils/adt/levenshtein.c401
-rw-r--r--src/backend/utils/adt/like.c455
-rw-r--r--src/backend/utils/adt/like_match.c360
-rw-r--r--src/backend/utils/adt/like_support.c1800
-rw-r--r--src/backend/utils/adt/lockfuncs.c1069
-rw-r--r--src/backend/utils/adt/mac.c531
-rw-r--r--src/backend/utils/adt/mac8.c577
-rw-r--r--src/backend/utils/adt/mcxtfuncs.c195
-rw-r--r--src/backend/utils/adt/misc.c950
-rw-r--r--src/backend/utils/adt/multirangetypes.c2920
-rw-r--r--src/backend/utils/adt/multirangetypes_selfuncs.c1336
-rw-r--r--src/backend/utils/adt/name.c359
-rw-r--r--src/backend/utils/adt/network.c2114
-rw-r--r--src/backend/utils/adt/network_gist.c809
-rw-r--r--src/backend/utils/adt/network_selfuncs.c972
-rw-r--r--src/backend/utils/adt/network_spgist.c711
-rw-r--r--src/backend/utils/adt/numeric.c11484
-rw-r--r--src/backend/utils/adt/numutils.c604
-rw-r--r--src/backend/utils/adt/oid.c468
-rw-r--r--src/backend/utils/adt/oracle_compat.c1156
-rw-r--r--src/backend/utils/adt/orderedsetaggs.c1431
-rw-r--r--src/backend/utils/adt/partitionfuncs.c249
-rw-r--r--src/backend/utils/adt/pg_locale.c2187
-rw-r--r--src/backend/utils/adt/pg_lsn.c313
-rw-r--r--src/backend/utils/adt/pg_upgrade_support.c265
-rw-r--r--src/backend/utils/adt/pgstatfuncs.c2422
-rw-r--r--src/backend/utils/adt/pseudotypes.c391
-rw-r--r--src/backend/utils/adt/quote.c131
-rw-r--r--src/backend/utils/adt/rangetypes.c2622
-rw-r--r--src/backend/utils/adt/rangetypes_gist.c1798
-rw-r--r--src/backend/utils/adt/rangetypes_selfuncs.c1223
-rw-r--r--src/backend/utils/adt/rangetypes_spgist.c1000
-rw-r--r--src/backend/utils/adt/rangetypes_typanalyze.c427
-rw-r--r--src/backend/utils/adt/regexp.c1996
-rw-r--r--src/backend/utils/adt/regproc.c2062
-rw-r--r--src/backend/utils/adt/ri_triggers.c3020
-rw-r--r--src/backend/utils/adt/rowtypes.c2017
-rw-r--r--src/backend/utils/adt/ruleutils.c12406
-rw-r--r--src/backend/utils/adt/selfuncs.c7961
-rw-r--r--src/backend/utils/adt/tid.c429
-rw-r--r--src/backend/utils/adt/timestamp.c5921
-rw-r--r--src/backend/utils/adt/trigfuncs.c85
-rw-r--r--src/backend/utils/adt/tsginidx.c356
-rw-r--r--src/backend/utils/adt/tsgistidx.c816
-rw-r--r--src/backend/utils/adt/tsquery.c1349
-rw-r--r--src/backend/utils/adt/tsquery_cleanup.c444
-rw-r--r--src/backend/utils/adt/tsquery_gist.c277
-rw-r--r--src/backend/utils/adt/tsquery_op.c358
-rw-r--r--src/backend/utils/adt/tsquery_rewrite.c462
-rw-r--r--src/backend/utils/adt/tsquery_util.c447
-rw-r--r--src/backend/utils/adt/tsrank.c1012
-rw-r--r--src/backend/utils/adt/tsvector.c551
-rw-r--r--src/backend/utils/adt/tsvector_op.c2726
-rw-r--r--src/backend/utils/adt/tsvector_parser.c367
-rw-r--r--src/backend/utils/adt/uuid.c423
-rw-r--r--src/backend/utils/adt/varbit.c1894
-rw-r--r--src/backend/utils/adt/varchar.c1236
-rw-r--r--src/backend/utils/adt/varlena.c6556
-rw-r--r--src/backend/utils/adt/version.c24
-rw-r--r--src/backend/utils/adt/windowfuncs.c537
-rw-r--r--src/backend/utils/adt/xid.c373
-rw-r--r--src/backend/utils/adt/xid8funcs.c708
-rw-r--r--src/backend/utils/adt/xml.c4769
113 files changed, 186216 insertions, 0 deletions
diff --git a/src/backend/utils/adt/.gitignore b/src/backend/utils/adt/.gitignore
new file mode 100644
index 0000000..48cf941
--- /dev/null
+++ b/src/backend/utils/adt/.gitignore
@@ -0,0 +1,2 @@
+/jsonpath_gram.c
+/jsonpath_scan.c
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
new file mode 100644
index 0000000..7c722ea
--- /dev/null
+++ b/src/backend/utils/adt/Makefile
@@ -0,0 +1,140 @@
+#
+# Makefile for utils/adt
+#
+# src/backend/utils/adt/Makefile
+#
+
+subdir = src/backend/utils/adt
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+
+override CPPFLAGS := -I. -I$(srcdir) $(CPPFLAGS)
+
+# keep this list arranged alphabetically or it gets to be a mess
+OBJS = \
+ acl.o \
+ amutils.o \
+ array_expanded.o \
+ array_selfuncs.o \
+ array_typanalyze.o \
+ array_userfuncs.o \
+ arrayfuncs.o \
+ arraysubs.o \
+ arrayutils.o \
+ ascii.o \
+ bool.o \
+ cash.o \
+ char.o \
+ cryptohashfuncs.o \
+ date.o \
+ datetime.o \
+ datum.o \
+ dbsize.o \
+ domains.o \
+ encode.o \
+ enum.o \
+ expandeddatum.o \
+ expandedrecord.o \
+ float.o \
+ format_type.o \
+ formatting.o \
+ genfile.o \
+ geo_ops.o \
+ geo_selfuncs.o \
+ geo_spgist.o \
+ hbafuncs.o \
+ inet_cidr_ntop.o \
+ inet_net_pton.o \
+ int.o \
+ int8.o \
+ json.o \
+ jsonb.o \
+ jsonb_gin.o \
+ jsonb_op.o \
+ jsonb_util.o \
+ jsonfuncs.o \
+ jsonbsubs.o \
+ jsonpath.o \
+ jsonpath_exec.o \
+ jsonpath_gram.o \
+ like.o \
+ like_support.o \
+ lockfuncs.o \
+ mac.o \
+ mac8.o \
+ mcxtfuncs.o \
+ misc.o \
+ multirangetypes.o \
+ multirangetypes_selfuncs.o \
+ name.o \
+ network.o \
+ network_gist.o \
+ network_selfuncs.o \
+ network_spgist.o \
+ numeric.o \
+ numutils.o \
+ oid.o \
+ oracle_compat.o \
+ orderedsetaggs.o \
+ partitionfuncs.o \
+ pg_locale.o \
+ pg_lsn.o \
+ pg_upgrade_support.o \
+ pgstatfuncs.o \
+ pseudotypes.o \
+ quote.o \
+ rangetypes.o \
+ rangetypes_gist.o \
+ rangetypes_selfuncs.o \
+ rangetypes_spgist.o \
+ rangetypes_typanalyze.o \
+ regexp.o \
+ regproc.o \
+ ri_triggers.o \
+ rowtypes.o \
+ ruleutils.o \
+ selfuncs.o \
+ tid.o \
+ timestamp.o \
+ trigfuncs.o \
+ tsginidx.o \
+ tsgistidx.o \
+ tsquery.o \
+ tsquery_cleanup.o \
+ tsquery_gist.o \
+ tsquery_op.o \
+ tsquery_rewrite.o \
+ tsquery_util.o \
+ tsrank.o \
+ tsvector.o \
+ tsvector_op.o \
+ tsvector_parser.o \
+ uuid.o \
+ varbit.o \
+ varchar.o \
+ varlena.o \
+ version.o \
+ windowfuncs.o \
+ xid.o \
+ xid8funcs.o \
+ xml.o
+
+jsonpath_scan.c: FLEXFLAGS = -CF -p -p
+jsonpath_scan.c: FLEX_NO_BACKUP=yes
+
+# jsonpath_scan is compiled as part of jsonpath_gram
+jsonpath_gram.o: jsonpath_scan.c
+
+# jsonpath_gram.c and jsonpath_scan.c are in the distribution tarball,
+# so they are not cleaned here.
+clean distclean maintainer-clean:
+ rm -f lex.backup
+
+like.o: like.c like_match.c
+
+# Some code in numeric.c benefits from auto-vectorization
+numeric.o: CFLAGS += ${CFLAGS_VECTORIZE}
+
+varlena.o: varlena.c levenshtein.c
+
+include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
new file mode 100644
index 0000000..5b7236a
--- /dev/null
+++ b/src/backend/utils/adt/acl.c
@@ -0,0 +1,5340 @@
+/*-------------------------------------------------------------------------
+ *
+ * acl.c
+ * Basic access control list data structures manipulation routines.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/acl.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/htup_details.h"
+#include "catalog/catalog.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_auth_members.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_database.h"
+#include "catalog/pg_parameter_acl.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "commands/proclang.h"
+#include "commands/tablespace.h"
+#include "common/hashfn.h"
+#include "foreign/foreign.h"
+#include "funcapi.h"
+#include "lib/qunique.h"
+#include "miscadmin.h"
+#include "utils/acl.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/catcache.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+typedef struct
+{
+ const char *name;
+ AclMode value;
+} priv_map;
+
+/*
+ * We frequently need to test whether a given role is a member of some other
+ * role. In most of these tests the "given role" is the same, namely the
+ * active current user. So we can optimize it by keeping cached lists of all
+ * the roles the "given role" is a member of, directly or indirectly.
+ *
+ * Possibly this mechanism should be generalized to allow caching membership
+ * info for multiple roles?
+ *
+ * Each element of cached_roles is an OID list of constituent roles for the
+ * corresponding element of cached_role (always including the cached_role
+ * itself). One cache has ROLERECURSE_PRIVS semantics, and the other has
+ * ROLERECURSE_MEMBERS semantics.
+ */
+enum RoleRecurseType
+{
+ ROLERECURSE_PRIVS = 0, /* recurse if rolinherit */
+ ROLERECURSE_MEMBERS = 1 /* recurse unconditionally */
+};
+static Oid cached_role[] = {InvalidOid, InvalidOid};
+static List *cached_roles[] = {NIL, NIL};
+static uint32 cached_db_hash;
+
+
+static const char *getid(const char *s, char *n);
+static void putid(char *p, const char *s);
+static Acl *allocacl(int n);
+static void check_acl(const Acl *acl);
+static const char *aclparse(const char *s, AclItem *aip);
+static bool aclitem_match(const AclItem *a1, const AclItem *a2);
+static int aclitemComparator(const void *arg1, const void *arg2);
+static void check_circularity(const Acl *old_acl, const AclItem *mod_aip,
+ Oid ownerId);
+static Acl *recursive_revoke(Acl *acl, Oid grantee, AclMode revoke_privs,
+ Oid ownerId, DropBehavior behavior);
+
+static AclMode convert_priv_string(text *priv_type_text);
+static AclMode convert_any_priv_string(text *priv_type_text,
+ const priv_map *privileges);
+
+static Oid convert_table_name(text *tablename);
+static AclMode convert_table_priv_string(text *priv_type_text);
+static AclMode convert_sequence_priv_string(text *priv_type_text);
+static AttrNumber convert_column_name(Oid tableoid, text *column);
+static AclMode convert_column_priv_string(text *priv_type_text);
+static Oid convert_database_name(text *databasename);
+static AclMode convert_database_priv_string(text *priv_type_text);
+static Oid convert_foreign_data_wrapper_name(text *fdwname);
+static AclMode convert_foreign_data_wrapper_priv_string(text *priv_type_text);
+static Oid convert_function_name(text *functionname);
+static AclMode convert_function_priv_string(text *priv_type_text);
+static Oid convert_language_name(text *languagename);
+static AclMode convert_language_priv_string(text *priv_type_text);
+static Oid convert_schema_name(text *schemaname);
+static AclMode convert_schema_priv_string(text *priv_type_text);
+static Oid convert_server_name(text *servername);
+static AclMode convert_server_priv_string(text *priv_type_text);
+static Oid convert_tablespace_name(text *tablespacename);
+static AclMode convert_tablespace_priv_string(text *priv_type_text);
+static Oid convert_type_name(text *typename);
+static AclMode convert_type_priv_string(text *priv_type_text);
+static AclMode convert_parameter_priv_string(text *priv_text);
+static AclMode convert_role_priv_string(text *priv_type_text);
+static AclResult pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode);
+
+static void RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue);
+
+
+/*
+ * getid
+ * Consumes the first alphanumeric string (identifier) found in string
+ * 's', ignoring any leading white space. If it finds a double quote
+ * it returns the word inside the quotes.
+ *
+ * RETURNS:
+ * the string position in 's' that points to the next non-space character
+ * in 's', after any quotes. Also:
+ * - loads the identifier into 'n'. (If no identifier is found, 'n'
+ * contains an empty string.) 'n' must be NAMEDATALEN bytes.
+ */
+static const char *
+getid(const char *s, char *n)
+{
+ int len = 0;
+ bool in_quotes = false;
+
+ Assert(s && n);
+
+ while (isspace((unsigned char) *s))
+ s++;
+ /* This code had better match what putid() does, below */
+ for (;
+ *s != '\0' &&
+ (isalnum((unsigned char) *s) ||
+ *s == '_' ||
+ *s == '"' ||
+ in_quotes);
+ s++)
+ {
+ if (*s == '"')
+ {
+ /* safe to look at next char (could be '\0' though) */
+ if (*(s + 1) != '"')
+ {
+ in_quotes = !in_quotes;
+ continue;
+ }
+ /* it's an escaped double quote; skip the escaping char */
+ s++;
+ }
+
+ /* Add the character to the string */
+ if (len >= NAMEDATALEN - 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_NAME_TOO_LONG),
+ errmsg("identifier too long"),
+ errdetail("Identifier must be less than %d characters.",
+ NAMEDATALEN)));
+
+ n[len++] = *s;
+ }
+ n[len] = '\0';
+ while (isspace((unsigned char) *s))
+ s++;
+ return s;
+}
+
+/*
+ * Write a role name at *p, adding double quotes if needed.
+ * There must be at least (2*NAMEDATALEN)+2 bytes available at *p.
+ * This needs to be kept in sync with copyAclUserName in pg_dump/dumputils.c
+ */
+static void
+putid(char *p, const char *s)
+{
+ const char *src;
+ bool safe = true;
+
+ for (src = s; *src; src++)
+ {
+ /* This test had better match what getid() does, above */
+ if (!isalnum((unsigned char) *src) && *src != '_')
+ {
+ safe = false;
+ break;
+ }
+ }
+ if (!safe)
+ *p++ = '"';
+ for (src = s; *src; src++)
+ {
+ /* A double quote character in a username is encoded as "" */
+ if (*src == '"')
+ *p++ = '"';
+ *p++ = *src;
+ }
+ if (!safe)
+ *p++ = '"';
+ *p = '\0';
+}
+
+/*
+ * aclparse
+ * Consumes and parses an ACL specification of the form:
+ * [group|user] [A-Za-z0-9]*=[rwaR]*
+ * from string 's', ignoring any leading white space or white space
+ * between the optional id type keyword (group|user) and the actual
+ * ACL specification.
+ *
+ * The group|user decoration is unnecessary in the roles world,
+ * but we still accept it for backward compatibility.
+ *
+ * This routine is called by the parser as well as aclitemin(), hence
+ * the added generality.
+ *
+ * RETURNS:
+ * the string position in 's' immediately following the ACL
+ * specification. Also:
+ * - loads the structure pointed to by 'aip' with the appropriate
+ * UID/GID, id type identifier and mode type values.
+ */
+static const char *
+aclparse(const char *s, AclItem *aip)
+{
+ AclMode privs,
+ goption,
+ read;
+ char name[NAMEDATALEN];
+ char name2[NAMEDATALEN];
+
+ Assert(s && aip);
+
+ s = getid(s, name);
+ if (*s != '=')
+ {
+ /* we just read a keyword, not a name */
+ if (strcmp(name, "group") != 0 && strcmp(name, "user") != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("unrecognized key word: \"%s\"", name),
+ errhint("ACL key word must be \"group\" or \"user\".")));
+ s = getid(s, name); /* move s to the name beyond the keyword */
+ if (name[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("missing name"),
+ errhint("A name must follow the \"group\" or \"user\" key word.")));
+ }
+
+ if (*s != '=')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("missing \"=\" sign")));
+
+ privs = goption = ACL_NO_RIGHTS;
+
+ for (++s, read = 0; isalpha((unsigned char) *s) || *s == '*'; s++)
+ {
+ switch (*s)
+ {
+ case '*':
+ goption |= read;
+ break;
+ case ACL_INSERT_CHR:
+ read = ACL_INSERT;
+ break;
+ case ACL_SELECT_CHR:
+ read = ACL_SELECT;
+ break;
+ case ACL_UPDATE_CHR:
+ read = ACL_UPDATE;
+ break;
+ case ACL_DELETE_CHR:
+ read = ACL_DELETE;
+ break;
+ case ACL_TRUNCATE_CHR:
+ read = ACL_TRUNCATE;
+ break;
+ case ACL_REFERENCES_CHR:
+ read = ACL_REFERENCES;
+ break;
+ case ACL_TRIGGER_CHR:
+ read = ACL_TRIGGER;
+ break;
+ case ACL_EXECUTE_CHR:
+ read = ACL_EXECUTE;
+ break;
+ case ACL_USAGE_CHR:
+ read = ACL_USAGE;
+ break;
+ case ACL_CREATE_CHR:
+ read = ACL_CREATE;
+ break;
+ case ACL_CREATE_TEMP_CHR:
+ read = ACL_CREATE_TEMP;
+ break;
+ case ACL_CONNECT_CHR:
+ read = ACL_CONNECT;
+ break;
+ case ACL_SET_CHR:
+ read = ACL_SET;
+ break;
+ case ACL_ALTER_SYSTEM_CHR:
+ read = ACL_ALTER_SYSTEM;
+ break;
+ case 'R': /* ignore old RULE privileges */
+ read = 0;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid mode character: must be one of \"%s\"",
+ ACL_ALL_RIGHTS_STR)));
+ }
+
+ privs |= read;
+ }
+
+ if (name[0] == '\0')
+ aip->ai_grantee = ACL_ID_PUBLIC;
+ else
+ aip->ai_grantee = get_role_oid(name, false);
+
+ /*
+ * XXX Allow a degree of backward compatibility by defaulting the grantor
+ * to the superuser.
+ */
+ if (*s == '/')
+ {
+ s = getid(s + 1, name2);
+ if (name2[0] == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("a name must follow the \"/\" sign")));
+ aip->ai_grantor = get_role_oid(name2, false);
+ }
+ else
+ {
+ aip->ai_grantor = BOOTSTRAP_SUPERUSERID;
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_GRANTOR),
+ errmsg("defaulting grantor to user ID %u",
+ BOOTSTRAP_SUPERUSERID)));
+ }
+
+ ACLITEM_SET_PRIVS_GOPTIONS(*aip, privs, goption);
+
+ return s;
+}
+
+/*
+ * allocacl
+ * Allocates storage for a new Acl with 'n' entries.
+ *
+ * RETURNS:
+ * the new Acl
+ */
+static Acl *
+allocacl(int n)
+{
+ Acl *new_acl;
+ Size size;
+
+ if (n < 0)
+ elog(ERROR, "invalid size: %d", n);
+ size = ACL_N_SIZE(n);
+ new_acl = (Acl *) palloc0(size);
+ SET_VARSIZE(new_acl, size);
+ new_acl->ndim = 1;
+ new_acl->dataoffset = 0; /* we never put in any nulls */
+ new_acl->elemtype = ACLITEMOID;
+ ARR_LBOUND(new_acl)[0] = 1;
+ ARR_DIMS(new_acl)[0] = n;
+ return new_acl;
+}
+
+/*
+ * Create a zero-entry ACL
+ */
+Acl *
+make_empty_acl(void)
+{
+ return allocacl(0);
+}
+
+/*
+ * Copy an ACL
+ */
+Acl *
+aclcopy(const Acl *orig_acl)
+{
+ Acl *result_acl;
+
+ result_acl = allocacl(ACL_NUM(orig_acl));
+
+ memcpy(ACL_DAT(result_acl),
+ ACL_DAT(orig_acl),
+ ACL_NUM(orig_acl) * sizeof(AclItem));
+
+ return result_acl;
+}
+
+/*
+ * Concatenate two ACLs
+ *
+ * This is a bit cheesy, since we may produce an ACL with redundant entries.
+ * Be careful what the result is used for!
+ */
+Acl *
+aclconcat(const Acl *left_acl, const Acl *right_acl)
+{
+ Acl *result_acl;
+
+ result_acl = allocacl(ACL_NUM(left_acl) + ACL_NUM(right_acl));
+
+ memcpy(ACL_DAT(result_acl),
+ ACL_DAT(left_acl),
+ ACL_NUM(left_acl) * sizeof(AclItem));
+
+ memcpy(ACL_DAT(result_acl) + ACL_NUM(left_acl),
+ ACL_DAT(right_acl),
+ ACL_NUM(right_acl) * sizeof(AclItem));
+
+ return result_acl;
+}
+
+/*
+ * Merge two ACLs
+ *
+ * This produces a properly merged ACL with no redundant entries.
+ * Returns NULL on NULL input.
+ */
+Acl *
+aclmerge(const Acl *left_acl, const Acl *right_acl, Oid ownerId)
+{
+ Acl *result_acl;
+ AclItem *aip;
+ int i,
+ num;
+
+ /* Check for cases where one or both are empty/null */
+ if (left_acl == NULL || ACL_NUM(left_acl) == 0)
+ {
+ if (right_acl == NULL || ACL_NUM(right_acl) == 0)
+ return NULL;
+ else
+ return aclcopy(right_acl);
+ }
+ else
+ {
+ if (right_acl == NULL || ACL_NUM(right_acl) == 0)
+ return aclcopy(left_acl);
+ }
+
+ /* Merge them the hard way, one item at a time */
+ result_acl = aclcopy(left_acl);
+
+ aip = ACL_DAT(right_acl);
+ num = ACL_NUM(right_acl);
+
+ for (i = 0; i < num; i++, aip++)
+ {
+ Acl *tmp_acl;
+
+ tmp_acl = aclupdate(result_acl, aip, ACL_MODECHG_ADD,
+ ownerId, DROP_RESTRICT);
+ pfree(result_acl);
+ result_acl = tmp_acl;
+ }
+
+ return result_acl;
+}
+
+/*
+ * Sort the items in an ACL (into an arbitrary but consistent order)
+ */
+void
+aclitemsort(Acl *acl)
+{
+ if (acl != NULL && ACL_NUM(acl) > 1)
+ qsort(ACL_DAT(acl), ACL_NUM(acl), sizeof(AclItem), aclitemComparator);
+}
+
+/*
+ * Check if two ACLs are exactly equal
+ *
+ * This will not detect equality if the two arrays contain the same items
+ * in different orders. To handle that case, sort both inputs first,
+ * using aclitemsort().
+ */
+bool
+aclequal(const Acl *left_acl, const Acl *right_acl)
+{
+ /* Check for cases where one or both are empty/null */
+ if (left_acl == NULL || ACL_NUM(left_acl) == 0)
+ {
+ if (right_acl == NULL || ACL_NUM(right_acl) == 0)
+ return true;
+ else
+ return false;
+ }
+ else
+ {
+ if (right_acl == NULL || ACL_NUM(right_acl) == 0)
+ return false;
+ }
+
+ if (ACL_NUM(left_acl) != ACL_NUM(right_acl))
+ return false;
+
+ if (memcmp(ACL_DAT(left_acl),
+ ACL_DAT(right_acl),
+ ACL_NUM(left_acl) * sizeof(AclItem)) == 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * Verify that an ACL array is acceptable (one-dimensional and has no nulls)
+ */
+static void
+check_acl(const Acl *acl)
+{
+ if (ARR_ELEMTYPE(acl) != ACLITEMOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ACL array contains wrong data type")));
+ if (ARR_NDIM(acl) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ACL arrays must be one-dimensional")));
+ if (ARR_HASNULL(acl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("ACL arrays must not contain null values")));
+}
+
+/*
+ * aclitemin
+ * Allocates storage for, and fills in, a new AclItem given a string
+ * 's' that contains an ACL specification. See aclparse for details.
+ *
+ * RETURNS:
+ * the new AclItem
+ */
+Datum
+aclitemin(PG_FUNCTION_ARGS)
+{
+ const char *s = PG_GETARG_CSTRING(0);
+ AclItem *aip;
+
+ aip = (AclItem *) palloc(sizeof(AclItem));
+ s = aclparse(s, aip);
+ while (isspace((unsigned char) *s))
+ ++s;
+ if (*s)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("extra garbage at the end of the ACL specification")));
+
+ PG_RETURN_ACLITEM_P(aip);
+}
+
+/*
+ * aclitemout
+ * Allocates storage for, and fills in, a new null-delimited string
+ * containing a formatted ACL specification. See aclparse for details.
+ *
+ * RETURNS:
+ * the new string
+ */
+Datum
+aclitemout(PG_FUNCTION_ARGS)
+{
+ AclItem *aip = PG_GETARG_ACLITEM_P(0);
+ char *p;
+ char *out;
+ HeapTuple htup;
+ unsigned i;
+
+ out = palloc(strlen("=/") +
+ 2 * N_ACL_RIGHTS +
+ 2 * (2 * NAMEDATALEN + 2) +
+ 1);
+
+ p = out;
+ *p = '\0';
+
+ if (aip->ai_grantee != ACL_ID_PUBLIC)
+ {
+ htup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(aip->ai_grantee));
+ if (HeapTupleIsValid(htup))
+ {
+ putid(p, NameStr(((Form_pg_authid) GETSTRUCT(htup))->rolname));
+ ReleaseSysCache(htup);
+ }
+ else
+ {
+ /* Generate numeric OID if we don't find an entry */
+ sprintf(p, "%u", aip->ai_grantee);
+ }
+ }
+ while (*p)
+ ++p;
+
+ *p++ = '=';
+
+ for (i = 0; i < N_ACL_RIGHTS; ++i)
+ {
+ if (ACLITEM_GET_PRIVS(*aip) & (1 << i))
+ *p++ = ACL_ALL_RIGHTS_STR[i];
+ if (ACLITEM_GET_GOPTIONS(*aip) & (1 << i))
+ *p++ = '*';
+ }
+
+ *p++ = '/';
+ *p = '\0';
+
+ htup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(aip->ai_grantor));
+ if (HeapTupleIsValid(htup))
+ {
+ putid(p, NameStr(((Form_pg_authid) GETSTRUCT(htup))->rolname));
+ ReleaseSysCache(htup);
+ }
+ else
+ {
+ /* Generate numeric OID if we don't find an entry */
+ sprintf(p, "%u", aip->ai_grantor);
+ }
+
+ PG_RETURN_CSTRING(out);
+}
+
+/*
+ * aclitem_match
+ * Two AclItems are considered to match iff they have the same
+ * grantee and grantor; the privileges are ignored.
+ */
+static bool
+aclitem_match(const AclItem *a1, const AclItem *a2)
+{
+ return a1->ai_grantee == a2->ai_grantee &&
+ a1->ai_grantor == a2->ai_grantor;
+}
+
+/*
+ * aclitemComparator
+ * qsort comparison function for AclItems
+ */
+static int
+aclitemComparator(const void *arg1, const void *arg2)
+{
+ const AclItem *a1 = (const AclItem *) arg1;
+ const AclItem *a2 = (const AclItem *) arg2;
+
+ if (a1->ai_grantee > a2->ai_grantee)
+ return 1;
+ if (a1->ai_grantee < a2->ai_grantee)
+ return -1;
+ if (a1->ai_grantor > a2->ai_grantor)
+ return 1;
+ if (a1->ai_grantor < a2->ai_grantor)
+ return -1;
+ if (a1->ai_privs > a2->ai_privs)
+ return 1;
+ if (a1->ai_privs < a2->ai_privs)
+ return -1;
+ return 0;
+}
+
+/*
+ * aclitem equality operator
+ */
+Datum
+aclitem_eq(PG_FUNCTION_ARGS)
+{
+ AclItem *a1 = PG_GETARG_ACLITEM_P(0);
+ AclItem *a2 = PG_GETARG_ACLITEM_P(1);
+ bool result;
+
+ result = a1->ai_privs == a2->ai_privs &&
+ a1->ai_grantee == a2->ai_grantee &&
+ a1->ai_grantor == a2->ai_grantor;
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * aclitem hash function
+ *
+ * We make aclitems hashable not so much because anyone is likely to hash
+ * them, as because we want array equality to work on aclitem arrays, and
+ * with the typcache mechanism we must have a hash or btree opclass.
+ */
+Datum
+hash_aclitem(PG_FUNCTION_ARGS)
+{
+ AclItem *a = PG_GETARG_ACLITEM_P(0);
+
+ /* not very bright, but avoids any issue of padding in struct */
+ PG_RETURN_UINT32((uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor));
+}
+
+/*
+ * 64-bit hash function for aclitem.
+ *
+ * Similar to hash_aclitem, but accepts a seed and returns a uint64 value.
+ */
+Datum
+hash_aclitem_extended(PG_FUNCTION_ARGS)
+{
+ AclItem *a = PG_GETARG_ACLITEM_P(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ uint32 sum = (uint32) (a->ai_privs + a->ai_grantee + a->ai_grantor);
+
+ return (seed == 0) ? UInt64GetDatum(sum) : hash_uint32_extended(sum, seed);
+}
+
+/*
+ * acldefault() --- create an ACL describing default access permissions
+ *
+ * Change this routine if you want to alter the default access policy for
+ * newly-created objects (or any object with a NULL acl entry). When
+ * you make a change here, don't forget to update the GRANT man page,
+ * which explains all the default permissions.
+ *
+ * Note that these are the hard-wired "defaults" that are used in the
+ * absence of any pg_default_acl entry.
+ */
+Acl *
+acldefault(ObjectType objtype, Oid ownerId)
+{
+ AclMode world_default;
+ AclMode owner_default;
+ int nacl;
+ Acl *acl;
+ AclItem *aip;
+
+ switch (objtype)
+ {
+ case OBJECT_COLUMN:
+ /* by default, columns have no extra privileges */
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_NO_RIGHTS;
+ break;
+ case OBJECT_TABLE:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_RELATION;
+ break;
+ case OBJECT_SEQUENCE:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_SEQUENCE;
+ break;
+ case OBJECT_DATABASE:
+ /* for backwards compatibility, grant some rights by default */
+ world_default = ACL_CREATE_TEMP | ACL_CONNECT;
+ owner_default = ACL_ALL_RIGHTS_DATABASE;
+ break;
+ case OBJECT_FUNCTION:
+ /* Grant EXECUTE by default, for now */
+ world_default = ACL_EXECUTE;
+ owner_default = ACL_ALL_RIGHTS_FUNCTION;
+ break;
+ case OBJECT_LANGUAGE:
+ /* Grant USAGE by default, for now */
+ world_default = ACL_USAGE;
+ owner_default = ACL_ALL_RIGHTS_LANGUAGE;
+ break;
+ case OBJECT_LARGEOBJECT:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_LARGEOBJECT;
+ break;
+ case OBJECT_SCHEMA:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_SCHEMA;
+ break;
+ case OBJECT_TABLESPACE:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_TABLESPACE;
+ break;
+ case OBJECT_FDW:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_FDW;
+ break;
+ case OBJECT_FOREIGN_SERVER:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_FOREIGN_SERVER;
+ break;
+ case OBJECT_DOMAIN:
+ case OBJECT_TYPE:
+ world_default = ACL_USAGE;
+ owner_default = ACL_ALL_RIGHTS_TYPE;
+ break;
+ case OBJECT_PARAMETER_ACL:
+ world_default = ACL_NO_RIGHTS;
+ owner_default = ACL_ALL_RIGHTS_PARAMETER_ACL;
+ break;
+ default:
+ elog(ERROR, "unrecognized objtype: %d", (int) objtype);
+ world_default = ACL_NO_RIGHTS; /* keep compiler quiet */
+ owner_default = ACL_NO_RIGHTS;
+ break;
+ }
+
+ nacl = 0;
+ if (world_default != ACL_NO_RIGHTS)
+ nacl++;
+ if (owner_default != ACL_NO_RIGHTS)
+ nacl++;
+
+ acl = allocacl(nacl);
+ aip = ACL_DAT(acl);
+
+ if (world_default != ACL_NO_RIGHTS)
+ {
+ aip->ai_grantee = ACL_ID_PUBLIC;
+ aip->ai_grantor = ownerId;
+ ACLITEM_SET_PRIVS_GOPTIONS(*aip, world_default, ACL_NO_RIGHTS);
+ aip++;
+ }
+
+ /*
+ * Note that the owner's entry shows all ordinary privileges but no grant
+ * options. This is because his grant options come "from the system" and
+ * not from his own efforts. (The SQL spec says that the owner's rights
+ * come from a "_SYSTEM" authid.) However, we do consider that the
+ * owner's ordinary privileges are self-granted; this lets him revoke
+ * them. We implement the owner's grant options without any explicit
+ * "_SYSTEM"-like ACL entry, by internally special-casing the owner
+ * wherever we are testing grant options.
+ */
+ if (owner_default != ACL_NO_RIGHTS)
+ {
+ aip->ai_grantee = ownerId;
+ aip->ai_grantor = ownerId;
+ ACLITEM_SET_PRIVS_GOPTIONS(*aip, owner_default, ACL_NO_RIGHTS);
+ }
+
+ return acl;
+}
+
+
+/*
+ * SQL-accessible version of acldefault(). Hackish mapping from "char" type to
+ * OBJECT_* values.
+ */
+Datum
+acldefault_sql(PG_FUNCTION_ARGS)
+{
+ char objtypec = PG_GETARG_CHAR(0);
+ Oid owner = PG_GETARG_OID(1);
+ ObjectType objtype = 0;
+
+ switch (objtypec)
+ {
+ case 'c':
+ objtype = OBJECT_COLUMN;
+ break;
+ case 'r':
+ objtype = OBJECT_TABLE;
+ break;
+ case 's':
+ objtype = OBJECT_SEQUENCE;
+ break;
+ case 'd':
+ objtype = OBJECT_DATABASE;
+ break;
+ case 'f':
+ objtype = OBJECT_FUNCTION;
+ break;
+ case 'l':
+ objtype = OBJECT_LANGUAGE;
+ break;
+ case 'L':
+ objtype = OBJECT_LARGEOBJECT;
+ break;
+ case 'n':
+ objtype = OBJECT_SCHEMA;
+ break;
+ case 'p':
+ objtype = OBJECT_PARAMETER_ACL;
+ break;
+ case 't':
+ objtype = OBJECT_TABLESPACE;
+ break;
+ case 'F':
+ objtype = OBJECT_FDW;
+ break;
+ case 'S':
+ objtype = OBJECT_FOREIGN_SERVER;
+ break;
+ case 'T':
+ objtype = OBJECT_TYPE;
+ break;
+ default:
+ elog(ERROR, "unrecognized objtype abbreviation: %c", objtypec);
+ }
+
+ PG_RETURN_ACL_P(acldefault(objtype, owner));
+}
+
+
+/*
+ * Update an ACL array to add or remove specified privileges.
+ *
+ * old_acl: the input ACL array
+ * mod_aip: defines the privileges to be added, removed, or substituted
+ * modechg: ACL_MODECHG_ADD, ACL_MODECHG_DEL, or ACL_MODECHG_EQL
+ * ownerId: Oid of object owner
+ * behavior: RESTRICT or CASCADE behavior for recursive removal
+ *
+ * ownerid and behavior are only relevant when the update operation specifies
+ * deletion of grant options.
+ *
+ * The result is a modified copy; the input object is not changed.
+ *
+ * NB: caller is responsible for having detoasted the input ACL, if needed.
+ */
+Acl *
+aclupdate(const Acl *old_acl, const AclItem *mod_aip,
+ int modechg, Oid ownerId, DropBehavior behavior)
+{
+ Acl *new_acl = NULL;
+ AclItem *old_aip,
+ *new_aip = NULL;
+ AclMode old_rights,
+ old_goptions,
+ new_rights,
+ new_goptions;
+ int dst,
+ num;
+
+ /* Caller probably already checked old_acl, but be safe */
+ check_acl(old_acl);
+
+ /* If granting grant options, check for circularity */
+ if (modechg != ACL_MODECHG_DEL &&
+ ACLITEM_GET_GOPTIONS(*mod_aip) != ACL_NO_RIGHTS)
+ check_circularity(old_acl, mod_aip, ownerId);
+
+ num = ACL_NUM(old_acl);
+ old_aip = ACL_DAT(old_acl);
+
+ /*
+ * Search the ACL for an existing entry for this grantee and grantor. If
+ * one exists, just modify the entry in-place (well, in the same position,
+ * since we actually return a copy); otherwise, insert the new entry at
+ * the end.
+ */
+
+ for (dst = 0; dst < num; ++dst)
+ {
+ if (aclitem_match(mod_aip, old_aip + dst))
+ {
+ /* found a match, so modify existing item */
+ new_acl = allocacl(num);
+ new_aip = ACL_DAT(new_acl);
+ memcpy(new_acl, old_acl, ACL_SIZE(old_acl));
+ break;
+ }
+ }
+
+ if (dst == num)
+ {
+ /* need to append a new item */
+ new_acl = allocacl(num + 1);
+ new_aip = ACL_DAT(new_acl);
+ memcpy(new_aip, old_aip, num * sizeof(AclItem));
+
+ /* initialize the new entry with no permissions */
+ new_aip[dst].ai_grantee = mod_aip->ai_grantee;
+ new_aip[dst].ai_grantor = mod_aip->ai_grantor;
+ ACLITEM_SET_PRIVS_GOPTIONS(new_aip[dst],
+ ACL_NO_RIGHTS, ACL_NO_RIGHTS);
+ num++; /* set num to the size of new_acl */
+ }
+
+ old_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
+ old_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);
+
+ /* apply the specified permissions change */
+ switch (modechg)
+ {
+ case ACL_MODECHG_ADD:
+ ACLITEM_SET_RIGHTS(new_aip[dst],
+ old_rights | ACLITEM_GET_RIGHTS(*mod_aip));
+ break;
+ case ACL_MODECHG_DEL:
+ ACLITEM_SET_RIGHTS(new_aip[dst],
+ old_rights & ~ACLITEM_GET_RIGHTS(*mod_aip));
+ break;
+ case ACL_MODECHG_EQL:
+ ACLITEM_SET_RIGHTS(new_aip[dst],
+ ACLITEM_GET_RIGHTS(*mod_aip));
+ break;
+ }
+
+ new_rights = ACLITEM_GET_RIGHTS(new_aip[dst]);
+ new_goptions = ACLITEM_GET_GOPTIONS(new_aip[dst]);
+
+ /*
+ * If the adjusted entry has no permissions, delete it from the list.
+ */
+ if (new_rights == ACL_NO_RIGHTS)
+ {
+ memmove(new_aip + dst,
+ new_aip + dst + 1,
+ (num - dst - 1) * sizeof(AclItem));
+ /* Adjust array size to be 'num - 1' items */
+ ARR_DIMS(new_acl)[0] = num - 1;
+ SET_VARSIZE(new_acl, ACL_N_SIZE(num - 1));
+ }
+
+ /*
+ * Remove abandoned privileges (cascading revoke). Currently we can only
+ * handle this when the grantee is not PUBLIC.
+ */
+ if ((old_goptions & ~new_goptions) != 0)
+ {
+ Assert(mod_aip->ai_grantee != ACL_ID_PUBLIC);
+ new_acl = recursive_revoke(new_acl, mod_aip->ai_grantee,
+ (old_goptions & ~new_goptions),
+ ownerId, behavior);
+ }
+
+ return new_acl;
+}
+
+/*
+ * Update an ACL array to reflect a change of owner to the parent object
+ *
+ * old_acl: the input ACL array (must not be NULL)
+ * oldOwnerId: Oid of the old object owner
+ * newOwnerId: Oid of the new object owner
+ *
+ * The result is a modified copy; the input object is not changed.
+ *
+ * NB: caller is responsible for having detoasted the input ACL, if needed.
+ */
+Acl *
+aclnewowner(const Acl *old_acl, Oid oldOwnerId, Oid newOwnerId)
+{
+ Acl *new_acl;
+ AclItem *new_aip;
+ AclItem *old_aip;
+ AclItem *dst_aip;
+ AclItem *src_aip;
+ AclItem *targ_aip;
+ bool newpresent = false;
+ int dst,
+ src,
+ targ,
+ num;
+
+ check_acl(old_acl);
+
+ /*
+ * Make a copy of the given ACL, substituting new owner ID for old
+ * wherever it appears as either grantor or grantee. Also note if the new
+ * owner ID is already present.
+ */
+ num = ACL_NUM(old_acl);
+ old_aip = ACL_DAT(old_acl);
+ new_acl = allocacl(num);
+ new_aip = ACL_DAT(new_acl);
+ memcpy(new_aip, old_aip, num * sizeof(AclItem));
+ for (dst = 0, dst_aip = new_aip; dst < num; dst++, dst_aip++)
+ {
+ if (dst_aip->ai_grantor == oldOwnerId)
+ dst_aip->ai_grantor = newOwnerId;
+ else if (dst_aip->ai_grantor == newOwnerId)
+ newpresent = true;
+ if (dst_aip->ai_grantee == oldOwnerId)
+ dst_aip->ai_grantee = newOwnerId;
+ else if (dst_aip->ai_grantee == newOwnerId)
+ newpresent = true;
+ }
+
+ /*
+ * If the old ACL contained any references to the new owner, then we may
+ * now have generated an ACL containing duplicate entries. Find them and
+ * merge them so that there are not duplicates. (This is relatively
+ * expensive since we use a stupid O(N^2) algorithm, but it's unlikely to
+ * be the normal case.)
+ *
+ * To simplify deletion of duplicate entries, we temporarily leave them in
+ * the array but set their privilege masks to zero; when we reach such an
+ * entry it's just skipped. (Thus, a side effect of this code will be to
+ * remove privilege-free entries, should there be any in the input.) dst
+ * is the next output slot, targ is the currently considered input slot
+ * (always >= dst), and src scans entries to the right of targ looking for
+ * duplicates. Once an entry has been emitted to dst it is known
+ * duplicate-free and need not be considered anymore.
+ */
+ if (newpresent)
+ {
+ dst = 0;
+ for (targ = 0, targ_aip = new_aip; targ < num; targ++, targ_aip++)
+ {
+ /* ignore if deleted in an earlier pass */
+ if (ACLITEM_GET_RIGHTS(*targ_aip) == ACL_NO_RIGHTS)
+ continue;
+ /* find and merge any duplicates */
+ for (src = targ + 1, src_aip = targ_aip + 1; src < num;
+ src++, src_aip++)
+ {
+ if (ACLITEM_GET_RIGHTS(*src_aip) == ACL_NO_RIGHTS)
+ continue;
+ if (aclitem_match(targ_aip, src_aip))
+ {
+ ACLITEM_SET_RIGHTS(*targ_aip,
+ ACLITEM_GET_RIGHTS(*targ_aip) |
+ ACLITEM_GET_RIGHTS(*src_aip));
+ /* mark the duplicate deleted */
+ ACLITEM_SET_RIGHTS(*src_aip, ACL_NO_RIGHTS);
+ }
+ }
+ /* and emit to output */
+ new_aip[dst] = *targ_aip;
+ dst++;
+ }
+ /* Adjust array size to be 'dst' items */
+ ARR_DIMS(new_acl)[0] = dst;
+ SET_VARSIZE(new_acl, ACL_N_SIZE(dst));
+ }
+
+ return new_acl;
+}
+
+
+/*
+ * When granting grant options, we must disallow attempts to set up circular
+ * chains of grant options. Suppose A (the object owner) grants B some
+ * privileges with grant option, and B re-grants them to C. If C could
+ * grant the privileges to B as well, then A would be unable to effectively
+ * revoke the privileges from B, since recursive_revoke would consider that
+ * B still has 'em from C.
+ *
+ * We check for this by recursively deleting all grant options belonging to
+ * the target grantee, and then seeing if the would-be grantor still has the
+ * grant option or not.
+ */
+static void
+check_circularity(const Acl *old_acl, const AclItem *mod_aip,
+ Oid ownerId)
+{
+ Acl *acl;
+ AclItem *aip;
+ int i,
+ num;
+ AclMode own_privs;
+
+ check_acl(old_acl);
+
+ /*
+ * For now, grant options can only be granted to roles, not PUBLIC.
+ * Otherwise we'd have to work a bit harder here.
+ */
+ Assert(mod_aip->ai_grantee != ACL_ID_PUBLIC);
+
+ /* The owner always has grant options, no need to check */
+ if (mod_aip->ai_grantor == ownerId)
+ return;
+
+ /* Make a working copy */
+ acl = allocacl(ACL_NUM(old_acl));
+ memcpy(acl, old_acl, ACL_SIZE(old_acl));
+
+ /* Zap all grant options of target grantee, plus what depends on 'em */
+cc_restart:
+ num = ACL_NUM(acl);
+ aip = ACL_DAT(acl);
+ for (i = 0; i < num; i++)
+ {
+ if (aip[i].ai_grantee == mod_aip->ai_grantee &&
+ ACLITEM_GET_GOPTIONS(aip[i]) != ACL_NO_RIGHTS)
+ {
+ Acl *new_acl;
+
+ /* We'll actually zap ordinary privs too, but no matter */
+ new_acl = aclupdate(acl, &aip[i], ACL_MODECHG_DEL,
+ ownerId, DROP_CASCADE);
+
+ pfree(acl);
+ acl = new_acl;
+
+ goto cc_restart;
+ }
+ }
+
+ /* Now we can compute grantor's independently-derived privileges */
+ own_privs = aclmask(acl,
+ mod_aip->ai_grantor,
+ ownerId,
+ ACL_GRANT_OPTION_FOR(ACLITEM_GET_GOPTIONS(*mod_aip)),
+ ACLMASK_ALL);
+ own_privs = ACL_OPTION_TO_PRIVS(own_privs);
+
+ if ((ACLITEM_GET_GOPTIONS(*mod_aip) & ~own_privs) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_GRANT_OPERATION),
+ errmsg("grant options cannot be granted back to your own grantor")));
+
+ pfree(acl);
+}
+
+
+/*
+ * Ensure that no privilege is "abandoned". A privilege is abandoned
+ * if the user that granted the privilege loses the grant option. (So
+ * the chain through which it was granted is broken.) Either the
+ * abandoned privileges are revoked as well, or an error message is
+ * printed, depending on the drop behavior option.
+ *
+ * acl: the input ACL list
+ * grantee: the user from whom some grant options have been revoked
+ * revoke_privs: the grant options being revoked
+ * ownerId: Oid of object owner
+ * behavior: RESTRICT or CASCADE behavior for recursive removal
+ *
+ * The input Acl object is pfree'd if replaced.
+ */
+static Acl *
+recursive_revoke(Acl *acl,
+ Oid grantee,
+ AclMode revoke_privs,
+ Oid ownerId,
+ DropBehavior behavior)
+{
+ AclMode still_has;
+ AclItem *aip;
+ int i,
+ num;
+
+ check_acl(acl);
+
+ /* The owner can never truly lose grant options, so short-circuit */
+ if (grantee == ownerId)
+ return acl;
+
+ /* The grantee might still have some grant options via another grantor */
+ still_has = aclmask(acl, grantee, ownerId,
+ ACL_GRANT_OPTION_FOR(revoke_privs),
+ ACLMASK_ALL);
+ revoke_privs &= ~ACL_OPTION_TO_PRIVS(still_has);
+ if (revoke_privs == ACL_NO_RIGHTS)
+ return acl;
+
+restart:
+ num = ACL_NUM(acl);
+ aip = ACL_DAT(acl);
+ for (i = 0; i < num; i++)
+ {
+ if (aip[i].ai_grantor == grantee
+ && (ACLITEM_GET_PRIVS(aip[i]) & revoke_privs) != 0)
+ {
+ AclItem mod_acl;
+ Acl *new_acl;
+
+ if (behavior == DROP_RESTRICT)
+ ereport(ERROR,
+ (errcode(ERRCODE_DEPENDENT_OBJECTS_STILL_EXIST),
+ errmsg("dependent privileges exist"),
+ errhint("Use CASCADE to revoke them too.")));
+
+ mod_acl.ai_grantor = grantee;
+ mod_acl.ai_grantee = aip[i].ai_grantee;
+ ACLITEM_SET_PRIVS_GOPTIONS(mod_acl,
+ revoke_privs,
+ revoke_privs);
+
+ new_acl = aclupdate(acl, &mod_acl, ACL_MODECHG_DEL,
+ ownerId, behavior);
+
+ pfree(acl);
+ acl = new_acl;
+
+ goto restart;
+ }
+ }
+
+ return acl;
+}
+
+
+/*
+ * aclmask --- compute bitmask of all privileges held by roleid.
+ *
+ * When 'how' = ACLMASK_ALL, this simply returns the privilege bits
+ * held by the given roleid according to the given ACL list, ANDed
+ * with 'mask'. (The point of passing 'mask' is to let the routine
+ * exit early if all privileges of interest have been found.)
+ *
+ * When 'how' = ACLMASK_ANY, returns as soon as any bit in the mask
+ * is known true. (This lets us exit soonest in cases where the
+ * caller is only going to test for zero or nonzero result.)
+ *
+ * Usage patterns:
+ *
+ * To see if any of a set of privileges are held:
+ * if (aclmask(acl, roleid, ownerId, privs, ACLMASK_ANY) != 0)
+ *
+ * To see if all of a set of privileges are held:
+ * if (aclmask(acl, roleid, ownerId, privs, ACLMASK_ALL) == privs)
+ *
+ * To determine exactly which of a set of privileges are held:
+ * heldprivs = aclmask(acl, roleid, ownerId, privs, ACLMASK_ALL);
+ */
+AclMode
+aclmask(const Acl *acl, Oid roleid, Oid ownerId,
+ AclMode mask, AclMaskHow how)
+{
+ AclMode result;
+ AclMode remaining;
+ AclItem *aidat;
+ int i,
+ num;
+
+ /*
+ * Null ACL should not happen, since caller should have inserted
+ * appropriate default
+ */
+ if (acl == NULL)
+ elog(ERROR, "null ACL");
+
+ check_acl(acl);
+
+ /* Quick exit for mask == 0 */
+ if (mask == 0)
+ return 0;
+
+ result = 0;
+
+ /* Owner always implicitly has all grant options */
+ if ((mask & ACLITEM_ALL_GOPTION_BITS) &&
+ has_privs_of_role(roleid, ownerId))
+ {
+ result = mask & ACLITEM_ALL_GOPTION_BITS;
+ if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0))
+ return result;
+ }
+
+ num = ACL_NUM(acl);
+ aidat = ACL_DAT(acl);
+
+ /*
+ * Check privileges granted directly to roleid or to public
+ */
+ for (i = 0; i < num; i++)
+ {
+ AclItem *aidata = &aidat[i];
+
+ if (aidata->ai_grantee == ACL_ID_PUBLIC ||
+ aidata->ai_grantee == roleid)
+ {
+ result |= aidata->ai_privs & mask;
+ if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0))
+ return result;
+ }
+ }
+
+ /*
+ * Check privileges granted indirectly via role memberships. We do this in
+ * a separate pass to minimize expensive indirect membership tests. In
+ * particular, it's worth testing whether a given ACL entry grants any
+ * privileges still of interest before we perform the has_privs_of_role
+ * test.
+ */
+ remaining = mask & ~result;
+ for (i = 0; i < num; i++)
+ {
+ AclItem *aidata = &aidat[i];
+
+ if (aidata->ai_grantee == ACL_ID_PUBLIC ||
+ aidata->ai_grantee == roleid)
+ continue; /* already checked it */
+
+ if ((aidata->ai_privs & remaining) &&
+ has_privs_of_role(roleid, aidata->ai_grantee))
+ {
+ result |= aidata->ai_privs & mask;
+ if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0))
+ return result;
+ remaining = mask & ~result;
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * aclmask_direct --- compute bitmask of all privileges held by roleid.
+ *
+ * This is exactly like aclmask() except that we consider only privileges
+ * held *directly* by roleid, not those inherited via role membership.
+ */
+static AclMode
+aclmask_direct(const Acl *acl, Oid roleid, Oid ownerId,
+ AclMode mask, AclMaskHow how)
+{
+ AclMode result;
+ AclItem *aidat;
+ int i,
+ num;
+
+ /*
+ * Null ACL should not happen, since caller should have inserted
+ * appropriate default
+ */
+ if (acl == NULL)
+ elog(ERROR, "null ACL");
+
+ check_acl(acl);
+
+ /* Quick exit for mask == 0 */
+ if (mask == 0)
+ return 0;
+
+ result = 0;
+
+ /* Owner always implicitly has all grant options */
+ if ((mask & ACLITEM_ALL_GOPTION_BITS) &&
+ roleid == ownerId)
+ {
+ result = mask & ACLITEM_ALL_GOPTION_BITS;
+ if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0))
+ return result;
+ }
+
+ num = ACL_NUM(acl);
+ aidat = ACL_DAT(acl);
+
+ /*
+ * Check privileges granted directly to roleid (and not to public)
+ */
+ for (i = 0; i < num; i++)
+ {
+ AclItem *aidata = &aidat[i];
+
+ if (aidata->ai_grantee == roleid)
+ {
+ result |= aidata->ai_privs & mask;
+ if ((how == ACLMASK_ALL) ? (result == mask) : (result != 0))
+ return result;
+ }
+ }
+
+ return result;
+}
+
+
+/*
+ * aclmembers
+ * Find out all the roleids mentioned in an Acl.
+ * Note that we do not distinguish grantors from grantees.
+ *
+ * *roleids is set to point to a palloc'd array containing distinct OIDs
+ * in sorted order. The length of the array is the function result.
+ */
+int
+aclmembers(const Acl *acl, Oid **roleids)
+{
+ Oid *list;
+ const AclItem *acldat;
+ int i,
+ j;
+
+ if (acl == NULL || ACL_NUM(acl) == 0)
+ {
+ *roleids = NULL;
+ return 0;
+ }
+
+ check_acl(acl);
+
+ /* Allocate the worst-case space requirement */
+ list = palloc(ACL_NUM(acl) * 2 * sizeof(Oid));
+ acldat = ACL_DAT(acl);
+
+ /*
+ * Walk the ACL collecting mentioned RoleIds.
+ */
+ j = 0;
+ for (i = 0; i < ACL_NUM(acl); i++)
+ {
+ const AclItem *ai = &acldat[i];
+
+ if (ai->ai_grantee != ACL_ID_PUBLIC)
+ list[j++] = ai->ai_grantee;
+ /* grantor is currently never PUBLIC, but let's check anyway */
+ if (ai->ai_grantor != ACL_ID_PUBLIC)
+ list[j++] = ai->ai_grantor;
+ }
+
+ /* Sort the array */
+ qsort(list, j, sizeof(Oid), oid_cmp);
+
+ /*
+ * We could repalloc the array down to minimum size, but it's hardly worth
+ * it since it's only transient memory.
+ */
+ *roleids = list;
+
+ /* Remove duplicates from the array */
+ return qunique(list, j, sizeof(Oid), oid_cmp);
+}
+
+
+/*
+ * aclinsert (exported function)
+ */
+Datum
+aclinsert(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("aclinsert is no longer supported")));
+
+ PG_RETURN_NULL(); /* keep compiler quiet */
+}
+
+Datum
+aclremove(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("aclremove is no longer supported")));
+
+ PG_RETURN_NULL(); /* keep compiler quiet */
+}
+
+Datum
+aclcontains(PG_FUNCTION_ARGS)
+{
+ Acl *acl = PG_GETARG_ACL_P(0);
+ AclItem *aip = PG_GETARG_ACLITEM_P(1);
+ AclItem *aidat;
+ int i,
+ num;
+
+ check_acl(acl);
+ num = ACL_NUM(acl);
+ aidat = ACL_DAT(acl);
+ for (i = 0; i < num; ++i)
+ {
+ if (aip->ai_grantee == aidat[i].ai_grantee &&
+ aip->ai_grantor == aidat[i].ai_grantor &&
+ (ACLITEM_GET_RIGHTS(*aip) & ACLITEM_GET_RIGHTS(aidat[i])) == ACLITEM_GET_RIGHTS(*aip))
+ PG_RETURN_BOOL(true);
+ }
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+makeaclitem(PG_FUNCTION_ARGS)
+{
+ Oid grantee = PG_GETARG_OID(0);
+ Oid grantor = PG_GETARG_OID(1);
+ text *privtext = PG_GETARG_TEXT_PP(2);
+ bool goption = PG_GETARG_BOOL(3);
+ AclItem *result;
+ AclMode priv;
+
+ priv = convert_priv_string(privtext);
+
+ result = (AclItem *) palloc(sizeof(AclItem));
+
+ result->ai_grantee = grantee;
+ result->ai_grantor = grantor;
+
+ ACLITEM_SET_PRIVS_GOPTIONS(*result, priv,
+ (goption ? priv : ACL_NO_RIGHTS));
+
+ PG_RETURN_ACLITEM_P(result);
+}
+
+static AclMode
+convert_priv_string(text *priv_type_text)
+{
+ char *priv_type = text_to_cstring(priv_type_text);
+
+ if (pg_strcasecmp(priv_type, "SELECT") == 0)
+ return ACL_SELECT;
+ if (pg_strcasecmp(priv_type, "INSERT") == 0)
+ return ACL_INSERT;
+ if (pg_strcasecmp(priv_type, "UPDATE") == 0)
+ return ACL_UPDATE;
+ if (pg_strcasecmp(priv_type, "DELETE") == 0)
+ return ACL_DELETE;
+ if (pg_strcasecmp(priv_type, "TRUNCATE") == 0)
+ return ACL_TRUNCATE;
+ if (pg_strcasecmp(priv_type, "REFERENCES") == 0)
+ return ACL_REFERENCES;
+ if (pg_strcasecmp(priv_type, "TRIGGER") == 0)
+ return ACL_TRIGGER;
+ if (pg_strcasecmp(priv_type, "EXECUTE") == 0)
+ return ACL_EXECUTE;
+ if (pg_strcasecmp(priv_type, "USAGE") == 0)
+ return ACL_USAGE;
+ if (pg_strcasecmp(priv_type, "CREATE") == 0)
+ return ACL_CREATE;
+ if (pg_strcasecmp(priv_type, "TEMP") == 0)
+ return ACL_CREATE_TEMP;
+ if (pg_strcasecmp(priv_type, "TEMPORARY") == 0)
+ return ACL_CREATE_TEMP;
+ if (pg_strcasecmp(priv_type, "CONNECT") == 0)
+ return ACL_CONNECT;
+ if (pg_strcasecmp(priv_type, "SET") == 0)
+ return ACL_SET;
+ if (pg_strcasecmp(priv_type, "ALTER SYSTEM") == 0)
+ return ACL_ALTER_SYSTEM;
+ if (pg_strcasecmp(priv_type, "RULE") == 0)
+ return 0; /* ignore old RULE privileges */
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized privilege type: \"%s\"", priv_type)));
+ return ACL_NO_RIGHTS; /* keep compiler quiet */
+}
+
+
+/*
+ * convert_any_priv_string: recognize privilege strings for has_foo_privilege
+ *
+ * We accept a comma-separated list of case-insensitive privilege names,
+ * producing a bitmask of the OR'd privilege bits. We are liberal about
+ * whitespace between items, not so much about whitespace within items.
+ * The allowed privilege names are given as an array of priv_map structs,
+ * terminated by one with a NULL name pointer.
+ */
+static AclMode
+convert_any_priv_string(text *priv_type_text,
+ const priv_map *privileges)
+{
+ AclMode result = 0;
+ char *priv_type = text_to_cstring(priv_type_text);
+ char *chunk;
+ char *next_chunk;
+
+ /* We rely on priv_type being a private, modifiable string */
+ for (chunk = priv_type; chunk; chunk = next_chunk)
+ {
+ int chunk_len;
+ const priv_map *this_priv;
+
+ /* Split string at commas */
+ next_chunk = strchr(chunk, ',');
+ if (next_chunk)
+ *next_chunk++ = '\0';
+
+ /* Drop leading/trailing whitespace in this chunk */
+ while (*chunk && isspace((unsigned char) *chunk))
+ chunk++;
+ chunk_len = strlen(chunk);
+ while (chunk_len > 0 && isspace((unsigned char) chunk[chunk_len - 1]))
+ chunk_len--;
+ chunk[chunk_len] = '\0';
+
+ /* Match to the privileges list */
+ for (this_priv = privileges; this_priv->name; this_priv++)
+ {
+ if (pg_strcasecmp(this_priv->name, chunk) == 0)
+ {
+ result |= this_priv->value;
+ break;
+ }
+ }
+ if (!this_priv->name)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized privilege type: \"%s\"", chunk)));
+ }
+
+ pfree(priv_type);
+ return result;
+}
+
+
+static const char *
+convert_aclright_to_string(int aclright)
+{
+ switch (aclright)
+ {
+ case ACL_INSERT:
+ return "INSERT";
+ case ACL_SELECT:
+ return "SELECT";
+ case ACL_UPDATE:
+ return "UPDATE";
+ case ACL_DELETE:
+ return "DELETE";
+ case ACL_TRUNCATE:
+ return "TRUNCATE";
+ case ACL_REFERENCES:
+ return "REFERENCES";
+ case ACL_TRIGGER:
+ return "TRIGGER";
+ case ACL_EXECUTE:
+ return "EXECUTE";
+ case ACL_USAGE:
+ return "USAGE";
+ case ACL_CREATE:
+ return "CREATE";
+ case ACL_CREATE_TEMP:
+ return "TEMPORARY";
+ case ACL_CONNECT:
+ return "CONNECT";
+ case ACL_SET:
+ return "SET";
+ case ACL_ALTER_SYSTEM:
+ return "ALTER SYSTEM";
+ default:
+ elog(ERROR, "unrecognized aclright: %d", aclright);
+ return NULL;
+ }
+}
+
+
+/*----------
+ * Convert an aclitem[] to a table.
+ *
+ * Example:
+ *
+ * aclexplode('{=r/joe,foo=a*w/joe}'::aclitem[])
+ *
+ * returns the table
+ *
+ * {{ OID(joe), 0::OID, 'SELECT', false },
+ * { OID(joe), OID(foo), 'INSERT', true },
+ * { OID(joe), OID(foo), 'UPDATE', false }}
+ *----------
+ */
+Datum
+aclexplode(PG_FUNCTION_ARGS)
+{
+ Acl *acl = PG_GETARG_ACL_P(0);
+ FuncCallContext *funcctx;
+ int *idx;
+ AclItem *aidat;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+
+ check_acl(acl);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /*
+ * build tupdesc for result tuples (matches out parameters in pg_proc
+ * entry)
+ */
+ tupdesc = CreateTemplateTupleDesc(4);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "grantor",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "grantee",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "privilege_type",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "is_grantable",
+ BOOLOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /* allocate memory for user context */
+ idx = (int *) palloc(sizeof(int[2]));
+ idx[0] = 0; /* ACL array item index */
+ idx[1] = -1; /* privilege type counter */
+ funcctx->user_fctx = (void *) idx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ idx = (int *) funcctx->user_fctx;
+ aidat = ACL_DAT(acl);
+
+ /* need test here in case acl has no items */
+ while (idx[0] < ACL_NUM(acl))
+ {
+ AclItem *aidata;
+ AclMode priv_bit;
+
+ idx[1]++;
+ if (idx[1] == N_ACL_RIGHTS)
+ {
+ idx[1] = 0;
+ idx[0]++;
+ if (idx[0] >= ACL_NUM(acl)) /* done */
+ break;
+ }
+ aidata = &aidat[idx[0]];
+ priv_bit = 1 << idx[1];
+
+ if (ACLITEM_GET_PRIVS(*aidata) & priv_bit)
+ {
+ Datum result;
+ Datum values[4];
+ bool nulls[4];
+ HeapTuple tuple;
+
+ values[0] = ObjectIdGetDatum(aidata->ai_grantor);
+ values[1] = ObjectIdGetDatum(aidata->ai_grantee);
+ values[2] = CStringGetTextDatum(convert_aclright_to_string(priv_bit));
+ values[3] = BoolGetDatum((ACLITEM_GET_GOPTIONS(*aidata) & priv_bit) != 0);
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * has_table_privilege variants
+ * These are all named "has_table_privilege" at the SQL level.
+ * They take various combinations of relation name, relation OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not. The variants that take a relation OID
+ * return NULL if the OID doesn't exist (rather than failing, as
+ * they did before Postgres 8.4).
+ */
+
+/*
+ * has_table_privilege_name_name
+ * Check user privileges on a table given
+ * name username, text tablename, and text priv name.
+ */
+Datum
+has_table_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*rolename));
+ tableoid = convert_table_name(tablename);
+ mode = convert_table_priv_string(priv_type_text);
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_table_privilege_name
+ * Check user privileges on a table given
+ * text tablename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_table_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *tablename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ tableoid = convert_table_name(tablename);
+ mode = convert_table_priv_string(priv_type_text);
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_table_privilege_name_id
+ * Check user privileges on a table given
+ * name usename, table oid, and text priv name.
+ */
+Datum
+has_table_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_table_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_table_privilege_id
+ * Check user privileges on a table given
+ * table oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_table_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid tableoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_table_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_table_privilege_id_name
+ * Check user privileges on a table given
+ * roleid, text tablename, and text priv name.
+ */
+Datum
+has_table_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ tableoid = convert_table_name(tablename);
+ mode = convert_table_priv_string(priv_type_text);
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_table_privilege_id_id
+ * Check user privileges on a table given
+ * roleid, table oid, and text priv name.
+ */
+Datum
+has_table_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_table_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_table_privilege family.
+ */
+
+/*
+ * Given a table name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_table_name(text *tablename)
+{
+ RangeVar *relrv;
+
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(tablename));
+
+ /* We might not even have permissions on this relation; don't lock it. */
+ return RangeVarGetRelid(relrv, NoLock, false);
+}
+
+/*
+ * convert_table_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_table_priv_string(text *priv_type_text)
+{
+ static const priv_map table_priv_map[] = {
+ {"SELECT", ACL_SELECT},
+ {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)},
+ {"INSERT", ACL_INSERT},
+ {"INSERT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_INSERT)},
+ {"UPDATE", ACL_UPDATE},
+ {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)},
+ {"DELETE", ACL_DELETE},
+ {"DELETE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_DELETE)},
+ {"TRUNCATE", ACL_TRUNCATE},
+ {"TRUNCATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_TRUNCATE)},
+ {"REFERENCES", ACL_REFERENCES},
+ {"REFERENCES WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_REFERENCES)},
+ {"TRIGGER", ACL_TRIGGER},
+ {"TRIGGER WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_TRIGGER)},
+ {"RULE", 0}, /* ignore old RULE privileges */
+ {"RULE WITH GRANT OPTION", 0},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, table_priv_map);
+}
+
+/*
+ * has_sequence_privilege variants
+ * These are all named "has_sequence_privilege" at the SQL level.
+ * They take various combinations of relation name, relation OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not. The variants that take a relation OID
+ * return NULL if the OID doesn't exist.
+ */
+
+/*
+ * has_sequence_privilege_name_name
+ * Check user privileges on a sequence given
+ * name username, text sequencename, and text priv name.
+ */
+Datum
+has_sequence_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *sequencename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid sequenceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*rolename));
+ mode = convert_sequence_priv_string(priv_type_text);
+ sequenceoid = convert_table_name(sequencename);
+ if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ text_to_cstring(sequencename))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_sequence_privilege_name
+ * Check user privileges on a sequence given
+ * text sequencename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_sequence_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *sequencename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid sequenceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_sequence_priv_string(priv_type_text);
+ sequenceoid = convert_table_name(sequencename);
+ if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ text_to_cstring(sequencename))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_sequence_privilege_name_id
+ * Check user privileges on a sequence given
+ * name usename, sequence oid, and text priv name.
+ */
+Datum
+has_sequence_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid sequenceoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+ char relkind;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_sequence_priv_string(priv_type_text);
+ relkind = get_rel_relkind(sequenceoid);
+ if (relkind == '\0')
+ PG_RETURN_NULL();
+ else if (relkind != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ get_rel_name(sequenceoid))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_sequence_privilege_id
+ * Check user privileges on a sequence given
+ * sequence oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_sequence_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid sequenceoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+ char relkind;
+
+ roleid = GetUserId();
+ mode = convert_sequence_priv_string(priv_type_text);
+ relkind = get_rel_relkind(sequenceoid);
+ if (relkind == '\0')
+ PG_RETURN_NULL();
+ else if (relkind != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ get_rel_name(sequenceoid))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_sequence_privilege_id_name
+ * Check user privileges on a sequence given
+ * roleid, text sequencename, and text priv name.
+ */
+Datum
+has_sequence_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *sequencename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid sequenceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_sequence_priv_string(priv_type_text);
+ sequenceoid = convert_table_name(sequencename);
+ if (get_rel_relkind(sequenceoid) != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ text_to_cstring(sequencename))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_sequence_privilege_id_id
+ * Check user privileges on a sequence given
+ * roleid, sequence oid, and text priv name.
+ */
+Datum
+has_sequence_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid sequenceoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+ char relkind;
+
+ mode = convert_sequence_priv_string(priv_type_text);
+ relkind = get_rel_relkind(sequenceoid);
+ if (relkind == '\0')
+ PG_RETURN_NULL();
+ else if (relkind != RELKIND_SEQUENCE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a sequence",
+ get_rel_name(sequenceoid))));
+
+ aclresult = pg_class_aclcheck(sequenceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * convert_sequence_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_sequence_priv_string(text *priv_type_text)
+{
+ static const priv_map sequence_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {"SELECT", ACL_SELECT},
+ {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)},
+ {"UPDATE", ACL_UPDATE},
+ {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, sequence_priv_map);
+}
+
+
+/*
+ * has_any_column_privilege variants
+ * These are all named "has_any_column_privilege" at the SQL level.
+ * They take various combinations of relation name, relation OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege for any column of the table, false if not. The variants
+ * that take a relation OID return NULL if the OID doesn't exist.
+ */
+
+/*
+ * has_any_column_privilege_name_name
+ * Check user privileges on any column of a table given
+ * name username, text tablename, and text priv name.
+ */
+Datum
+has_any_column_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*rolename));
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_any_column_privilege_name
+ * Check user privileges on any column of a table given
+ * text tablename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_any_column_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *tablename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_any_column_privilege_name_id
+ * Check user privileges on any column of a table given
+ * name usename, table oid, and text priv name.
+ */
+Datum
+has_any_column_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_column_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_any_column_privilege_id
+ * Check user privileges on any column of a table given
+ * table oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_any_column_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid tableoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_column_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_any_column_privilege_id_name
+ * Check user privileges on any column of a table given
+ * roleid, text tablename, and text priv name.
+ */
+Datum
+has_any_column_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid tableoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_any_column_privilege_id_id
+ * Check user privileges on any column of a table given
+ * roleid, table oid, and text priv name.
+ */
+Datum
+has_any_column_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_column_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(tableoid)))
+ PG_RETURN_NULL();
+
+ /* First check at table level, then examine each column if needed */
+ aclresult = pg_class_aclcheck(tableoid, roleid, mode);
+ if (aclresult != ACLCHECK_OK)
+ aclresult = pg_attribute_aclcheck_all(tableoid, roleid, mode,
+ ACLMASK_ANY);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+
+/*
+ * has_column_privilege variants
+ * These are all named "has_column_privilege" at the SQL level.
+ * They take various combinations of relation name, relation OID,
+ * column name, column attnum, user name, user OID, or
+ * implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not. The variants that take a relation OID
+ * return NULL (rather than throwing an error) if that relation OID
+ * doesn't exist. Likewise, the variants that take an integer attnum
+ * return NULL (rather than throwing an error) if there is no such
+ * pg_attribute entry. All variants return NULL if an attisdropped
+ * column is selected. These rules are meant to avoid unnecessary
+ * failures in queries that scan pg_attribute.
+ */
+
+/*
+ * column_privilege_check: check column privileges, but don't throw an error
+ * for dropped column or table
+ *
+ * Returns 1 if have the privilege, 0 if not, -1 if dropped column/table.
+ */
+static int
+column_privilege_check(Oid tableoid, AttrNumber attnum,
+ Oid roleid, AclMode mode)
+{
+ AclResult aclresult;
+ bool is_missing = false;
+
+ /*
+ * If convert_column_name failed, we can just return -1 immediately.
+ */
+ if (attnum == InvalidAttrNumber)
+ return -1;
+
+ /*
+ * Check for column-level privileges first. This serves in part as a check
+ * on whether the column even exists, so we need to do it before checking
+ * table-level privilege.
+ */
+ aclresult = pg_attribute_aclcheck_ext(tableoid, attnum, roleid,
+ mode, &is_missing);
+ if (aclresult == ACLCHECK_OK)
+ return 1;
+ else if (is_missing)
+ return -1;
+
+ /* Next check if we have the privilege at the table level */
+ aclresult = pg_class_aclcheck_ext(tableoid, roleid, mode, &is_missing);
+ if (aclresult == ACLCHECK_OK)
+ return 1;
+ else if (is_missing)
+ return -1;
+ else
+ return 0;
+}
+
+/*
+ * has_column_privilege_name_name_name
+ * Check user privileges on a column given
+ * name username, text tablename, text colname, and text priv name.
+ */
+Datum
+has_column_privilege_name_name_name(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *column = PG_GETARG_TEXT_PP(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid roleid;
+ Oid tableoid;
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ roleid = get_role_oid_or_public(NameStr(*rolename));
+ tableoid = convert_table_name(tablename);
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_name_name_attnum
+ * Check user privileges on a column given
+ * name username, text tablename, int attnum, and text priv name.
+ */
+Datum
+has_column_privilege_name_name_attnum(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ AttrNumber colattnum = PG_GETARG_INT16(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ int privresult;
+
+ roleid = get_role_oid_or_public(NameStr(*rolename));
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_name_id_name
+ * Check user privileges on a column given
+ * name username, table oid, text colname, and text priv name.
+ */
+Datum
+has_column_privilege_name_id_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *column = PG_GETARG_TEXT_PP(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid roleid;
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_name_id_attnum
+ * Check user privileges on a column given
+ * name username, table oid, int attnum, and text priv name.
+ */
+Datum
+has_column_privilege_name_id_attnum(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ AttrNumber colattnum = PG_GETARG_INT16(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid roleid;
+ AclMode mode;
+ int privresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_name_name
+ * Check user privileges on a column given
+ * oid roleid, text tablename, text colname, and text priv name.
+ */
+Datum
+has_column_privilege_id_name_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ text *column = PG_GETARG_TEXT_PP(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid tableoid;
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ tableoid = convert_table_name(tablename);
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_name_attnum
+ * Check user privileges on a column given
+ * oid roleid, text tablename, int attnum, and text priv name.
+ */
+Datum
+has_column_privilege_id_name_attnum(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *tablename = PG_GETARG_TEXT_PP(1);
+ AttrNumber colattnum = PG_GETARG_INT16(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ Oid tableoid;
+ AclMode mode;
+ int privresult;
+
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_id_name
+ * Check user privileges on a column given
+ * oid roleid, table oid, text colname, and text priv name.
+ */
+Datum
+has_column_privilege_id_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ text *column = PG_GETARG_TEXT_PP(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_id_attnum
+ * Check user privileges on a column given
+ * oid roleid, table oid, int attnum, and text priv name.
+ */
+Datum
+has_column_privilege_id_id_attnum(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid tableoid = PG_GETARG_OID(1);
+ AttrNumber colattnum = PG_GETARG_INT16(2);
+ text *priv_type_text = PG_GETARG_TEXT_PP(3);
+ AclMode mode;
+ int privresult;
+
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_name_name
+ * Check user privileges on a column given
+ * text tablename, text colname, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_column_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ text *tablename = PG_GETARG_TEXT_PP(0);
+ text *column = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid tableoid;
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ roleid = GetUserId();
+ tableoid = convert_table_name(tablename);
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_name_attnum
+ * Check user privileges on a column given
+ * text tablename, int attnum, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_column_privilege_name_attnum(PG_FUNCTION_ARGS)
+{
+ text *tablename = PG_GETARG_TEXT_PP(0);
+ AttrNumber colattnum = PG_GETARG_INT16(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid tableoid;
+ AclMode mode;
+ int privresult;
+
+ roleid = GetUserId();
+ tableoid = convert_table_name(tablename);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_name
+ * Check user privileges on a column given
+ * table oid, text colname, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_column_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid tableoid = PG_GETARG_OID(0);
+ text *column = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AttrNumber colattnum;
+ AclMode mode;
+ int privresult;
+
+ roleid = GetUserId();
+ colattnum = convert_column_name(tableoid, column);
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * has_column_privilege_id_attnum
+ * Check user privileges on a column given
+ * table oid, int attnum, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_column_privilege_id_attnum(PG_FUNCTION_ARGS)
+{
+ Oid tableoid = PG_GETARG_OID(0);
+ AttrNumber colattnum = PG_GETARG_INT16(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ int privresult;
+
+ roleid = GetUserId();
+ mode = convert_column_priv_string(priv_type_text);
+
+ privresult = column_privilege_check(tableoid, colattnum, roleid, mode);
+ if (privresult < 0)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(privresult);
+}
+
+/*
+ * Support routines for has_column_privilege family.
+ */
+
+/*
+ * Given a table OID and a column name expressed as a string, look it up
+ * and return the column number. Returns InvalidAttrNumber in cases
+ * where caller should return NULL instead of failing.
+ */
+static AttrNumber
+convert_column_name(Oid tableoid, text *column)
+{
+ char *colname;
+ HeapTuple attTuple;
+ AttrNumber attnum;
+
+ colname = text_to_cstring(column);
+
+ /*
+ * We don't use get_attnum() here because it will report that dropped
+ * columns don't exist. We need to treat dropped columns differently from
+ * nonexistent columns.
+ */
+ attTuple = SearchSysCache2(ATTNAME,
+ ObjectIdGetDatum(tableoid),
+ CStringGetDatum(colname));
+ if (HeapTupleIsValid(attTuple))
+ {
+ Form_pg_attribute attributeForm;
+
+ attributeForm = (Form_pg_attribute) GETSTRUCT(attTuple);
+ /* We want to return NULL for dropped columns */
+ if (attributeForm->attisdropped)
+ attnum = InvalidAttrNumber;
+ else
+ attnum = attributeForm->attnum;
+ ReleaseSysCache(attTuple);
+ }
+ else
+ {
+ char *tablename = get_rel_name(tableoid);
+
+ /*
+ * If the table OID is bogus, or it's just been dropped, we'll get
+ * NULL back. In such cases we want has_column_privilege to return
+ * NULL too, so just return InvalidAttrNumber.
+ */
+ if (tablename != NULL)
+ {
+ /* tableoid exists, colname does not, so throw error */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ colname, tablename)));
+ }
+ /* tableoid doesn't exist, so act like attisdropped case */
+ attnum = InvalidAttrNumber;
+ }
+
+ pfree(colname);
+ return attnum;
+}
+
+/*
+ * convert_column_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_column_priv_string(text *priv_type_text)
+{
+ static const priv_map column_priv_map[] = {
+ {"SELECT", ACL_SELECT},
+ {"SELECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SELECT)},
+ {"INSERT", ACL_INSERT},
+ {"INSERT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_INSERT)},
+ {"UPDATE", ACL_UPDATE},
+ {"UPDATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_UPDATE)},
+ {"REFERENCES", ACL_REFERENCES},
+ {"REFERENCES WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_REFERENCES)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, column_priv_map);
+}
+
+
+/*
+ * has_database_privilege variants
+ * These are all named "has_database_privilege" at the SQL level.
+ * They take various combinations of database name, database OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not, or NULL if object doesn't exist.
+ */
+
+/*
+ * has_database_privilege_name_name
+ * Check user privileges on a database given
+ * name username, text databasename, and text priv name.
+ */
+Datum
+has_database_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *databasename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid databaseoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ databaseoid = convert_database_name(databasename);
+ mode = convert_database_priv_string(priv_type_text);
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_database_privilege_name
+ * Check user privileges on a database given
+ * text databasename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_database_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *databasename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid databaseoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ databaseoid = convert_database_name(databasename);
+ mode = convert_database_priv_string(priv_type_text);
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_database_privilege_name_id
+ * Check user privileges on a database given
+ * name usename, database oid, and text priv name.
+ */
+Datum
+has_database_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid databaseoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_database_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_database_privilege_id
+ * Check user privileges on a database given
+ * database oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_database_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid databaseoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_database_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_database_privilege_id_name
+ * Check user privileges on a database given
+ * roleid, text databasename, and text priv name.
+ */
+Datum
+has_database_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *databasename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid databaseoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ databaseoid = convert_database_name(databasename);
+ mode = convert_database_priv_string(priv_type_text);
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_database_privilege_id_id
+ * Check user privileges on a database given
+ * roleid, database oid, and text priv name.
+ */
+Datum
+has_database_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid databaseoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_database_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(DATABASEOID, ObjectIdGetDatum(databaseoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_database_aclcheck(databaseoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_database_privilege family.
+ */
+
+/*
+ * Given a database name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_database_name(text *databasename)
+{
+ char *dbname = text_to_cstring(databasename);
+
+ return get_database_oid(dbname, false);
+}
+
+/*
+ * convert_database_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_database_priv_string(text *priv_type_text)
+{
+ static const priv_map database_priv_map[] = {
+ {"CREATE", ACL_CREATE},
+ {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {"TEMPORARY", ACL_CREATE_TEMP},
+ {"TEMPORARY WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE_TEMP)},
+ {"TEMP", ACL_CREATE_TEMP},
+ {"TEMP WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE_TEMP)},
+ {"CONNECT", ACL_CONNECT},
+ {"CONNECT WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CONNECT)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, database_priv_map);
+}
+
+
+/*
+ * has_foreign_data_wrapper_privilege variants
+ * These are all named "has_foreign_data_wrapper_privilege" at the SQL level.
+ * They take various combinations of foreign-data wrapper name,
+ * fdw OID, user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not.
+ */
+
+/*
+ * has_foreign_data_wrapper_privilege_name_name
+ * Check user privileges on a foreign-data wrapper given
+ * name username, text fdwname, and text priv name.
+ */
+Datum
+has_foreign_data_wrapper_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *fdwname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid fdwid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ fdwid = convert_foreign_data_wrapper_name(fdwname);
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_foreign_data_wrapper_privilege_name
+ * Check user privileges on a foreign-data wrapper given
+ * text fdwname and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_foreign_data_wrapper_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *fdwname = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid fdwid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ fdwid = convert_foreign_data_wrapper_name(fdwname);
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_foreign_data_wrapper_privilege_name_id
+ * Check user privileges on a foreign-data wrapper given
+ * name usename, foreign-data wrapper oid, and text priv name.
+ */
+Datum
+has_foreign_data_wrapper_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid fdwid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_foreign_data_wrapper_privilege_id
+ * Check user privileges on a foreign-data wrapper given
+ * foreign-data wrapper oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_foreign_data_wrapper_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid fdwid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_foreign_data_wrapper_privilege_id_name
+ * Check user privileges on a foreign-data wrapper given
+ * roleid, text fdwname, and text priv name.
+ */
+Datum
+has_foreign_data_wrapper_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *fdwname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid fdwid;
+ AclMode mode;
+ AclResult aclresult;
+
+ fdwid = convert_foreign_data_wrapper_name(fdwname);
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_foreign_data_wrapper_privilege_id_id
+ * Check user privileges on a foreign-data wrapper given
+ * roleid, fdw oid, and text priv name.
+ */
+Datum
+has_foreign_data_wrapper_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid fdwid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_foreign_data_wrapper_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNDATAWRAPPEROID, ObjectIdGetDatum(fdwid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_data_wrapper_aclcheck(fdwid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_foreign_data_wrapper_privilege family.
+ */
+
+/*
+ * Given a FDW name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_foreign_data_wrapper_name(text *fdwname)
+{
+ char *fdwstr = text_to_cstring(fdwname);
+
+ return get_foreign_data_wrapper_oid(fdwstr, false);
+}
+
+/*
+ * convert_foreign_data_wrapper_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_foreign_data_wrapper_priv_string(text *priv_type_text)
+{
+ static const priv_map foreign_data_wrapper_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, foreign_data_wrapper_priv_map);
+}
+
+
+/*
+ * has_function_privilege variants
+ * These are all named "has_function_privilege" at the SQL level.
+ * They take various combinations of function name, function OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not, or NULL if object doesn't exist.
+ */
+
+/*
+ * has_function_privilege_name_name
+ * Check user privileges on a function given
+ * name username, text functionname, and text priv name.
+ */
+Datum
+has_function_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *functionname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid functionoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ functionoid = convert_function_name(functionname);
+ mode = convert_function_priv_string(priv_type_text);
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_function_privilege_name
+ * Check user privileges on a function given
+ * text functionname and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_function_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *functionname = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid functionoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ functionoid = convert_function_name(functionname);
+ mode = convert_function_priv_string(priv_type_text);
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_function_privilege_name_id
+ * Check user privileges on a function given
+ * name usename, function oid, and text priv name.
+ */
+Datum
+has_function_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid functionoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_function_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_function_privilege_id
+ * Check user privileges on a function given
+ * function oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_function_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid functionoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_function_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_function_privilege_id_name
+ * Check user privileges on a function given
+ * roleid, text functionname, and text priv name.
+ */
+Datum
+has_function_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *functionname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid functionoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ functionoid = convert_function_name(functionname);
+ mode = convert_function_priv_string(priv_type_text);
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_function_privilege_id_id
+ * Check user privileges on a function given
+ * roleid, function oid, and text priv name.
+ */
+Datum
+has_function_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid functionoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_function_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(PROCOID, ObjectIdGetDatum(functionoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_proc_aclcheck(functionoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_function_privilege family.
+ */
+
+/*
+ * Given a function name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_function_name(text *functionname)
+{
+ char *funcname = text_to_cstring(functionname);
+ Oid oid;
+
+ oid = DatumGetObjectId(DirectFunctionCall1(regprocedurein,
+ CStringGetDatum(funcname)));
+
+ if (!OidIsValid(oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function \"%s\" does not exist", funcname)));
+
+ return oid;
+}
+
+/*
+ * convert_function_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_function_priv_string(text *priv_type_text)
+{
+ static const priv_map function_priv_map[] = {
+ {"EXECUTE", ACL_EXECUTE},
+ {"EXECUTE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_EXECUTE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, function_priv_map);
+}
+
+
+/*
+ * has_language_privilege variants
+ * These are all named "has_language_privilege" at the SQL level.
+ * They take various combinations of language name, language OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not, or NULL if object doesn't exist.
+ */
+
+/*
+ * has_language_privilege_name_name
+ * Check user privileges on a language given
+ * name username, text languagename, and text priv name.
+ */
+Datum
+has_language_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *languagename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid languageoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ languageoid = convert_language_name(languagename);
+ mode = convert_language_priv_string(priv_type_text);
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_language_privilege_name
+ * Check user privileges on a language given
+ * text languagename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_language_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *languagename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid languageoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ languageoid = convert_language_name(languagename);
+ mode = convert_language_priv_string(priv_type_text);
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_language_privilege_name_id
+ * Check user privileges on a language given
+ * name usename, language oid, and text priv name.
+ */
+Datum
+has_language_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid languageoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_language_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_language_privilege_id
+ * Check user privileges on a language given
+ * language oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_language_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid languageoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_language_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_language_privilege_id_name
+ * Check user privileges on a language given
+ * roleid, text languagename, and text priv name.
+ */
+Datum
+has_language_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *languagename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid languageoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ languageoid = convert_language_name(languagename);
+ mode = convert_language_priv_string(priv_type_text);
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_language_privilege_id_id
+ * Check user privileges on a language given
+ * roleid, language oid, and text priv name.
+ */
+Datum
+has_language_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid languageoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_language_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(LANGOID, ObjectIdGetDatum(languageoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_language_aclcheck(languageoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_language_privilege family.
+ */
+
+/*
+ * Given a language name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_language_name(text *languagename)
+{
+ char *langname = text_to_cstring(languagename);
+
+ return get_language_oid(langname, false);
+}
+
+/*
+ * convert_language_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_language_priv_string(text *priv_type_text)
+{
+ static const priv_map language_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, language_priv_map);
+}
+
+
+/*
+ * has_schema_privilege variants
+ * These are all named "has_schema_privilege" at the SQL level.
+ * They take various combinations of schema name, schema OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not, or NULL if object doesn't exist.
+ */
+
+/*
+ * has_schema_privilege_name_name
+ * Check user privileges on a schema given
+ * name username, text schemaname, and text priv name.
+ */
+Datum
+has_schema_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *schemaname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid schemaoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ schemaoid = convert_schema_name(schemaname);
+ mode = convert_schema_priv_string(priv_type_text);
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_schema_privilege_name
+ * Check user privileges on a schema given
+ * text schemaname and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_schema_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *schemaname = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid schemaoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ schemaoid = convert_schema_name(schemaname);
+ mode = convert_schema_priv_string(priv_type_text);
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_schema_privilege_name_id
+ * Check user privileges on a schema given
+ * name usename, schema oid, and text priv name.
+ */
+Datum
+has_schema_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid schemaoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_schema_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_schema_privilege_id
+ * Check user privileges on a schema given
+ * schema oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_schema_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid schemaoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_schema_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_schema_privilege_id_name
+ * Check user privileges on a schema given
+ * roleid, text schemaname, and text priv name.
+ */
+Datum
+has_schema_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *schemaname = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid schemaoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ schemaoid = convert_schema_name(schemaname);
+ mode = convert_schema_priv_string(priv_type_text);
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_schema_privilege_id_id
+ * Check user privileges on a schema given
+ * roleid, schema oid, and text priv name.
+ */
+Datum
+has_schema_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid schemaoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_schema_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(schemaoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_namespace_aclcheck(schemaoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_schema_privilege family.
+ */
+
+/*
+ * Given a schema name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_schema_name(text *schemaname)
+{
+ char *nspname = text_to_cstring(schemaname);
+
+ return get_namespace_oid(nspname, false);
+}
+
+/*
+ * convert_schema_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_schema_priv_string(text *priv_type_text)
+{
+ static const priv_map schema_priv_map[] = {
+ {"CREATE", ACL_CREATE},
+ {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, schema_priv_map);
+}
+
+
+/*
+ * has_server_privilege variants
+ * These are all named "has_server_privilege" at the SQL level.
+ * They take various combinations of foreign server name,
+ * server OID, user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not.
+ */
+
+/*
+ * has_server_privilege_name_name
+ * Check user privileges on a foreign server given
+ * name username, text servername, and text priv name.
+ */
+Datum
+has_server_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *servername = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid serverid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ serverid = convert_server_name(servername);
+ mode = convert_server_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_server_privilege_name
+ * Check user privileges on a foreign server given
+ * text servername and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_server_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *servername = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid serverid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ serverid = convert_server_name(servername);
+ mode = convert_server_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_server_privilege_name_id
+ * Check user privileges on a foreign server given
+ * name usename, foreign server oid, and text priv name.
+ */
+Datum
+has_server_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid serverid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_server_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_server_privilege_id
+ * Check user privileges on a foreign server given
+ * server oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_server_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid serverid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_server_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_server_privilege_id_name
+ * Check user privileges on a foreign server given
+ * roleid, text servername, and text priv name.
+ */
+Datum
+has_server_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *servername = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid serverid;
+ AclMode mode;
+ AclResult aclresult;
+
+ serverid = convert_server_name(servername);
+ mode = convert_server_priv_string(priv_type_text);
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_server_privilege_id_id
+ * Check user privileges on a foreign server given
+ * roleid, server oid, and text priv name.
+ */
+Datum
+has_server_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid serverid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_server_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(FOREIGNSERVEROID, ObjectIdGetDatum(serverid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_foreign_server_aclcheck(serverid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_server_privilege family.
+ */
+
+/*
+ * Given a server name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_server_name(text *servername)
+{
+ char *serverstr = text_to_cstring(servername);
+
+ return get_foreign_server_oid(serverstr, false);
+}
+
+/*
+ * convert_server_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_server_priv_string(text *priv_type_text)
+{
+ static const priv_map server_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, server_priv_map);
+}
+
+
+/*
+ * has_tablespace_privilege variants
+ * These are all named "has_tablespace_privilege" at the SQL level.
+ * They take various combinations of tablespace name, tablespace OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not.
+ */
+
+/*
+ * has_tablespace_privilege_name_name
+ * Check user privileges on a tablespace given
+ * name username, text tablespacename, and text priv name.
+ */
+Datum
+has_tablespace_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *tablespacename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid tablespaceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ tablespaceoid = convert_tablespace_name(tablespacename);
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_tablespace_privilege_name
+ * Check user privileges on a tablespace given
+ * text tablespacename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_tablespace_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *tablespacename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid tablespaceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ tablespaceoid = convert_tablespace_name(tablespacename);
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_tablespace_privilege_name_id
+ * Check user privileges on a tablespace given
+ * name usename, tablespace oid, and text priv name.
+ */
+Datum
+has_tablespace_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid tablespaceoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_tablespace_privilege_id
+ * Check user privileges on a tablespace given
+ * tablespace oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_tablespace_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid tablespaceoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_tablespace_privilege_id_name
+ * Check user privileges on a tablespace given
+ * roleid, text tablespacename, and text priv name.
+ */
+Datum
+has_tablespace_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *tablespacename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid tablespaceoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ tablespaceoid = convert_tablespace_name(tablespacename);
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_tablespace_privilege_id_id
+ * Check user privileges on a tablespace given
+ * roleid, tablespace oid, and text priv name.
+ */
+Datum
+has_tablespace_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid tablespaceoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_tablespace_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tablespaceoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_tablespace_aclcheck(tablespaceoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_tablespace_privilege family.
+ */
+
+/*
+ * Given a tablespace name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_tablespace_name(text *tablespacename)
+{
+ char *spcname = text_to_cstring(tablespacename);
+
+ return get_tablespace_oid(spcname, false);
+}
+
+/*
+ * convert_tablespace_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_tablespace_priv_string(text *priv_type_text)
+{
+ static const priv_map tablespace_priv_map[] = {
+ {"CREATE", ACL_CREATE},
+ {"CREATE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, tablespace_priv_map);
+}
+
+/*
+ * has_type_privilege variants
+ * These are all named "has_type_privilege" at the SQL level.
+ * They take various combinations of type name, type OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not, or NULL if object doesn't exist.
+ */
+
+/*
+ * has_type_privilege_name_name
+ * Check user privileges on a type given
+ * name username, text typename, and text priv name.
+ */
+Datum
+has_type_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *typename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid typeoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ typeoid = convert_type_name(typename);
+ mode = convert_type_priv_string(priv_type_text);
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_type_privilege_name
+ * Check user privileges on a type given
+ * text typename and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_type_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *typename = PG_GETARG_TEXT_PP(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid typeoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ typeoid = convert_type_name(typename);
+ mode = convert_type_priv_string(priv_type_text);
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_type_privilege_name_id
+ * Check user privileges on a type given
+ * name usename, type oid, and text priv name.
+ */
+Datum
+has_type_privilege_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid typeoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid_or_public(NameStr(*username));
+ mode = convert_type_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_type_privilege_id
+ * Check user privileges on a type given
+ * type oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+has_type_privilege_id(PG_FUNCTION_ARGS)
+{
+ Oid typeoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_type_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_type_privilege_id_name
+ * Check user privileges on a type given
+ * roleid, text typename, and text priv name.
+ */
+Datum
+has_type_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *typename = PG_GETARG_TEXT_PP(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid typeoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ typeoid = convert_type_name(typename);
+ mode = convert_type_priv_string(priv_type_text);
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * has_type_privilege_id_id
+ * Check user privileges on a type given
+ * roleid, type oid, and text priv name.
+ */
+Datum
+has_type_privilege_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid typeoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_type_priv_string(priv_type_text);
+
+ if (!SearchSysCacheExists1(TYPEOID, ObjectIdGetDatum(typeoid)))
+ PG_RETURN_NULL();
+
+ aclresult = pg_type_aclcheck(typeoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for has_type_privilege family.
+ */
+
+/*
+ * Given a type name expressed as a string, look it up and return Oid
+ */
+static Oid
+convert_type_name(text *typename)
+{
+ char *typname = text_to_cstring(typename);
+ Oid oid;
+
+ oid = DatumGetObjectId(DirectFunctionCall1(regtypein,
+ CStringGetDatum(typname)));
+
+ if (!OidIsValid(oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("type \"%s\" does not exist", typname)));
+
+ return oid;
+}
+
+/*
+ * convert_type_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_type_priv_string(text *priv_type_text)
+{
+ static const priv_map type_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_USAGE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, type_priv_map);
+}
+
+/*
+ * has_parameter_privilege variants
+ * These are all named "has_parameter_privilege" at the SQL level.
+ * They take various combinations of parameter name with
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has been granted
+ * the indicated privilege or false if not.
+ */
+
+/*
+ * has_param_priv_byname
+ *
+ * Helper function to check user privileges on a parameter given the
+ * role by Oid, parameter by text name, and privileges as AclMode.
+ */
+static bool
+has_param_priv_byname(Oid roleid, const text *parameter, AclMode priv)
+{
+ char *paramstr = text_to_cstring(parameter);
+
+ return pg_parameter_aclcheck(paramstr, roleid, priv) == ACLCHECK_OK;
+}
+
+/*
+ * has_parameter_privilege_name_name
+ * Check user privileges on a parameter given name username, text
+ * parameter, and text priv name.
+ */
+Datum
+has_parameter_privilege_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ text *parameter = PG_GETARG_TEXT_PP(1);
+ AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(2));
+ Oid roleid = get_role_oid_or_public(NameStr(*username));
+
+ PG_RETURN_BOOL(has_param_priv_byname(roleid, parameter, priv));
+}
+
+/*
+ * has_parameter_privilege_name
+ * Check user privileges on a parameter given text parameter and text priv
+ * name. current_user is assumed
+ */
+Datum
+has_parameter_privilege_name(PG_FUNCTION_ARGS)
+{
+ text *parameter = PG_GETARG_TEXT_PP(0);
+ AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(1));
+
+ PG_RETURN_BOOL(has_param_priv_byname(GetUserId(), parameter, priv));
+}
+
+/*
+ * has_parameter_privilege_id_name
+ * Check user privileges on a parameter given roleid, text parameter, and
+ * text priv name.
+ */
+Datum
+has_parameter_privilege_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ text *parameter = PG_GETARG_TEXT_PP(1);
+ AclMode priv = convert_parameter_priv_string(PG_GETARG_TEXT_PP(2));
+
+ PG_RETURN_BOOL(has_param_priv_byname(roleid, parameter, priv));
+}
+
+/*
+ * Support routines for has_parameter_privilege family.
+ */
+
+/*
+ * convert_parameter_priv_string
+ * Convert text string to AclMode value.
+ */
+static AclMode
+convert_parameter_priv_string(text *priv_text)
+{
+ static const priv_map parameter_priv_map[] = {
+ {"SET", ACL_SET},
+ {"SET WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_SET)},
+ {"ALTER SYSTEM", ACL_ALTER_SYSTEM},
+ {"ALTER SYSTEM WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_ALTER_SYSTEM)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_text, parameter_priv_map);
+}
+
+/*
+ * pg_has_role variants
+ * These are all named "pg_has_role" at the SQL level.
+ * They take various combinations of role name, role OID,
+ * user name, user OID, or implicit user = current_user.
+ *
+ * The result is a boolean value: true if user has the indicated
+ * privilege, false if not.
+ */
+
+/*
+ * pg_has_role_name_name
+ * Check user privileges on a role given
+ * name username, name rolename, and text priv name.
+ */
+Datum
+pg_has_role_name_name(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Name rolename = PG_GETARG_NAME(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ Oid roleoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid(NameStr(*username), false);
+ roleoid = get_role_oid(NameStr(*rolename), false);
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * pg_has_role_name
+ * Check user privileges on a role given
+ * name rolename and text priv name.
+ * current_user is assumed
+ */
+Datum
+pg_has_role_name(PG_FUNCTION_ARGS)
+{
+ Name rolename = PG_GETARG_NAME(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ Oid roleoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ roleoid = get_role_oid(NameStr(*rolename), false);
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * pg_has_role_name_id
+ * Check user privileges on a role given
+ * name usename, role oid, and text priv name.
+ */
+Datum
+pg_has_role_name_id(PG_FUNCTION_ARGS)
+{
+ Name username = PG_GETARG_NAME(0);
+ Oid roleoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = get_role_oid(NameStr(*username), false);
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * pg_has_role_id
+ * Check user privileges on a role given
+ * role oid, and text priv name.
+ * current_user is assumed
+ */
+Datum
+pg_has_role_id(PG_FUNCTION_ARGS)
+{
+ Oid roleoid = PG_GETARG_OID(0);
+ text *priv_type_text = PG_GETARG_TEXT_PP(1);
+ Oid roleid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleid = GetUserId();
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * pg_has_role_id_name
+ * Check user privileges on a role given
+ * roleid, name rolename, and text priv name.
+ */
+Datum
+pg_has_role_id_name(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Name rolename = PG_GETARG_NAME(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ Oid roleoid;
+ AclMode mode;
+ AclResult aclresult;
+
+ roleoid = get_role_oid(NameStr(*rolename), false);
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * pg_has_role_id_id
+ * Check user privileges on a role given
+ * roleid, role oid, and text priv name.
+ */
+Datum
+pg_has_role_id_id(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Oid roleoid = PG_GETARG_OID(1);
+ text *priv_type_text = PG_GETARG_TEXT_PP(2);
+ AclMode mode;
+ AclResult aclresult;
+
+ mode = convert_role_priv_string(priv_type_text);
+
+ aclresult = pg_role_aclcheck(roleoid, roleid, mode);
+
+ PG_RETURN_BOOL(aclresult == ACLCHECK_OK);
+}
+
+/*
+ * Support routines for pg_has_role family.
+ */
+
+/*
+ * convert_role_priv_string
+ * Convert text string to AclMode value.
+ *
+ * We use USAGE to denote whether the privileges of the role are accessible
+ * (has_privs), MEMBER to denote is_member, and MEMBER WITH GRANT OPTION
+ * (or ADMIN OPTION) to denote is_admin. There is no ACL bit corresponding
+ * to MEMBER so we cheat and use ACL_CREATE for that. This convention
+ * is shared only with pg_role_aclcheck, below.
+ */
+static AclMode
+convert_role_priv_string(text *priv_type_text)
+{
+ static const priv_map role_priv_map[] = {
+ {"USAGE", ACL_USAGE},
+ {"MEMBER", ACL_CREATE},
+ {"USAGE WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {"USAGE WITH ADMIN OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {"MEMBER WITH GRANT OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {"MEMBER WITH ADMIN OPTION", ACL_GRANT_OPTION_FOR(ACL_CREATE)},
+ {NULL, 0}
+ };
+
+ return convert_any_priv_string(priv_type_text, role_priv_map);
+}
+
+/*
+ * pg_role_aclcheck
+ * Quick-and-dirty support for pg_has_role
+ */
+static AclResult
+pg_role_aclcheck(Oid role_oid, Oid roleid, AclMode mode)
+{
+ if (mode & ACL_GRANT_OPTION_FOR(ACL_CREATE))
+ {
+ if (is_admin_of_role(roleid, role_oid))
+ return ACLCHECK_OK;
+ }
+ if (mode & ACL_CREATE)
+ {
+ if (is_member_of_role(roleid, role_oid))
+ return ACLCHECK_OK;
+ }
+ if (mode & ACL_USAGE)
+ {
+ if (has_privs_of_role(roleid, role_oid))
+ return ACLCHECK_OK;
+ }
+ return ACLCHECK_NO_PRIV;
+}
+
+
+/*
+ * initialization function (called by InitPostgres)
+ */
+void
+initialize_acl(void)
+{
+ if (!IsBootstrapProcessingMode())
+ {
+ cached_db_hash =
+ GetSysCacheHashValue1(DATABASEOID,
+ ObjectIdGetDatum(MyDatabaseId));
+
+ /*
+ * In normal mode, set a callback on any syscache invalidation of rows
+ * of pg_auth_members (for roles_is_member_of()), pg_authid (for
+ * has_rolinherit()), or pg_database (for roles_is_member_of())
+ */
+ CacheRegisterSyscacheCallback(AUTHMEMROLEMEM,
+ RoleMembershipCacheCallback,
+ (Datum) 0);
+ CacheRegisterSyscacheCallback(AUTHOID,
+ RoleMembershipCacheCallback,
+ (Datum) 0);
+ CacheRegisterSyscacheCallback(DATABASEOID,
+ RoleMembershipCacheCallback,
+ (Datum) 0);
+ }
+}
+
+/*
+ * RoleMembershipCacheCallback
+ * Syscache inval callback function
+ */
+static void
+RoleMembershipCacheCallback(Datum arg, int cacheid, uint32 hashvalue)
+{
+ if (cacheid == DATABASEOID &&
+ hashvalue != cached_db_hash &&
+ hashvalue != 0)
+ {
+ return; /* ignore pg_database changes for other DBs */
+ }
+
+ /* Force membership caches to be recomputed on next use */
+ cached_role[ROLERECURSE_PRIVS] = InvalidOid;
+ cached_role[ROLERECURSE_MEMBERS] = InvalidOid;
+}
+
+
+/* Check if specified role has rolinherit set */
+static bool
+has_rolinherit(Oid roleid)
+{
+ bool result = false;
+ HeapTuple utup;
+
+ utup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(roleid));
+ if (HeapTupleIsValid(utup))
+ {
+ result = ((Form_pg_authid) GETSTRUCT(utup))->rolinherit;
+ ReleaseSysCache(utup);
+ }
+ return result;
+}
+
+
+/*
+ * Get a list of roles that the specified roleid is a member of
+ *
+ * Type ROLERECURSE_PRIVS recurses only through roles that have rolinherit
+ * set, while ROLERECURSE_MEMBERS recurses through all roles. This sets
+ * *is_admin==true if and only if role "roleid" has an ADMIN OPTION membership
+ * in role "admin_of".
+ *
+ * Since indirect membership testing is relatively expensive, we cache
+ * a list of memberships. Hence, the result is only guaranteed good until
+ * the next call of roles_is_member_of()!
+ *
+ * For the benefit of select_best_grantor, the result is defined to be
+ * in breadth-first order, ie, closer relationships earlier.
+ */
+static List *
+roles_is_member_of(Oid roleid, enum RoleRecurseType type,
+ Oid admin_of, bool *is_admin)
+{
+ Oid dba;
+ List *roles_list;
+ ListCell *l;
+ List *new_cached_roles;
+ MemoryContext oldctx;
+
+ Assert(OidIsValid(admin_of) == PointerIsValid(is_admin));
+
+ /* If cache is valid and ADMIN OPTION not sought, just return the list */
+ if (cached_role[type] == roleid && !OidIsValid(admin_of) &&
+ OidIsValid(cached_role[type]))
+ return cached_roles[type];
+
+ /*
+ * Role expansion happens in a non-database backend when guc.c checks
+ * ROLE_PG_READ_ALL_SETTINGS for a physical walsender SHOW command. In
+ * that case, no role gets pg_database_owner.
+ */
+ if (!OidIsValid(MyDatabaseId))
+ dba = InvalidOid;
+ else
+ {
+ HeapTuple dbtup;
+
+ dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId));
+ if (!HeapTupleIsValid(dbtup))
+ elog(ERROR, "cache lookup failed for database %u", MyDatabaseId);
+ dba = ((Form_pg_database) GETSTRUCT(dbtup))->datdba;
+ ReleaseSysCache(dbtup);
+ }
+
+ /*
+ * Find all the roles that roleid is a member of, including multi-level
+ * recursion. The role itself will always be the first element of the
+ * resulting list.
+ *
+ * Each element of the list is scanned to see if it adds any indirect
+ * memberships. We can use a single list as both the record of
+ * already-found memberships and the agenda of roles yet to be scanned.
+ * This is a bit tricky but works because the foreach() macro doesn't
+ * fetch the next list element until the bottom of the loop.
+ */
+ roles_list = list_make1_oid(roleid);
+
+ foreach(l, roles_list)
+ {
+ Oid memberid = lfirst_oid(l);
+ CatCList *memlist;
+ int i;
+
+ if (type == ROLERECURSE_PRIVS && !has_rolinherit(memberid))
+ continue; /* ignore non-inheriting roles */
+
+ /* Find roles that memberid is directly a member of */
+ memlist = SearchSysCacheList1(AUTHMEMMEMROLE,
+ ObjectIdGetDatum(memberid));
+ for (i = 0; i < memlist->n_members; i++)
+ {
+ HeapTuple tup = &memlist->members[i]->tuple;
+ Oid otherid = ((Form_pg_auth_members) GETSTRUCT(tup))->roleid;
+
+ /*
+ * While otherid==InvalidOid shouldn't appear in the catalog, the
+ * OidIsValid() avoids crashing if that arises.
+ */
+ if (otherid == admin_of &&
+ ((Form_pg_auth_members) GETSTRUCT(tup))->admin_option &&
+ OidIsValid(admin_of))
+ *is_admin = true;
+
+ /*
+ * Even though there shouldn't be any loops in the membership
+ * graph, we must test for having already seen this role. It is
+ * legal for instance to have both A->B and A->C->B.
+ */
+ roles_list = list_append_unique_oid(roles_list, otherid);
+ }
+ ReleaseSysCacheList(memlist);
+
+ /* implement pg_database_owner implicit membership */
+ if (memberid == dba && OidIsValid(dba))
+ roles_list = list_append_unique_oid(roles_list,
+ ROLE_PG_DATABASE_OWNER);
+ }
+
+ /*
+ * Copy the completed list into TopMemoryContext so it will persist.
+ */
+ oldctx = MemoryContextSwitchTo(TopMemoryContext);
+ new_cached_roles = list_copy(roles_list);
+ MemoryContextSwitchTo(oldctx);
+ list_free(roles_list);
+
+ /*
+ * Now safe to assign to state variable
+ */
+ cached_role[type] = InvalidOid; /* just paranoia */
+ list_free(cached_roles[type]);
+ cached_roles[type] = new_cached_roles;
+ cached_role[type] = roleid;
+
+ /* And now we can return the answer */
+ return cached_roles[type];
+}
+
+
+/*
+ * Does member have the privileges of role (directly or indirectly)?
+ *
+ * This is defined not to recurse through roles that don't have rolinherit
+ * set; for such roles, membership implies the ability to do SET ROLE, but
+ * the privileges are not available until you've done so.
+ */
+bool
+has_privs_of_role(Oid member, Oid role)
+{
+ /* Fast path for simple case */
+ if (member == role)
+ return true;
+
+ /* Superusers have every privilege, so are part of every role */
+ if (superuser_arg(member))
+ return true;
+
+ /*
+ * Find all the roles that member has the privileges of, including
+ * multi-level recursion, then see if target role is any one of them.
+ */
+ return list_member_oid(roles_is_member_of(member, ROLERECURSE_PRIVS,
+ InvalidOid, NULL),
+ role);
+}
+
+
+/*
+ * Is member a member of role (directly or indirectly)?
+ *
+ * This is defined to recurse through roles regardless of rolinherit.
+ *
+ * Do not use this for privilege checking, instead use has_privs_of_role()
+ */
+bool
+is_member_of_role(Oid member, Oid role)
+{
+ /* Fast path for simple case */
+ if (member == role)
+ return true;
+
+ /* Superusers have every privilege, so are part of every role */
+ if (superuser_arg(member))
+ return true;
+
+ /*
+ * Find all the roles that member is a member of, including multi-level
+ * recursion, then see if target role is any one of them.
+ */
+ return list_member_oid(roles_is_member_of(member, ROLERECURSE_MEMBERS,
+ InvalidOid, NULL),
+ role);
+}
+
+/*
+ * check_is_member_of_role
+ * is_member_of_role with a standard permission-violation error if not
+ */
+void
+check_is_member_of_role(Oid member, Oid role)
+{
+ if (!is_member_of_role(member, role))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be member of role \"%s\"",
+ GetUserNameFromId(role, false))));
+}
+
+/*
+ * Is member a member of role, not considering superuserness?
+ *
+ * This is identical to is_member_of_role except we ignore superuser
+ * status.
+ *
+ * Do not use this for privilege checking, instead use has_privs_of_role()
+ */
+bool
+is_member_of_role_nosuper(Oid member, Oid role)
+{
+ /* Fast path for simple case */
+ if (member == role)
+ return true;
+
+ /*
+ * Find all the roles that member is a member of, including multi-level
+ * recursion, then see if target role is any one of them.
+ */
+ return list_member_oid(roles_is_member_of(member, ROLERECURSE_MEMBERS,
+ InvalidOid, NULL),
+ role);
+}
+
+
+/*
+ * Is member an admin of role? That is, is member the role itself (subject to
+ * restrictions below), a member (directly or indirectly) WITH ADMIN OPTION,
+ * or a superuser?
+ */
+bool
+is_admin_of_role(Oid member, Oid role)
+{
+ bool result = false;
+
+ if (superuser_arg(member))
+ return true;
+
+ /* By policy, a role cannot have WITH ADMIN OPTION on itself. */
+ if (member == role)
+ return false;
+
+ (void) roles_is_member_of(member, ROLERECURSE_MEMBERS, role, &result);
+ return result;
+}
+
+
+/* does what it says ... */
+static int
+count_one_bits(AclMode mask)
+{
+ int nbits = 0;
+
+ /* this code relies on AclMode being an unsigned type */
+ while (mask)
+ {
+ if (mask & 1)
+ nbits++;
+ mask >>= 1;
+ }
+ return nbits;
+}
+
+
+/*
+ * Select the effective grantor ID for a GRANT or REVOKE operation.
+ *
+ * The grantor must always be either the object owner or some role that has
+ * been explicitly granted grant options. This ensures that all granted
+ * privileges appear to flow from the object owner, and there are never
+ * multiple "original sources" of a privilege. Therefore, if the would-be
+ * grantor is a member of a role that has the needed grant options, we have
+ * to do the grant as that role instead.
+ *
+ * It is possible that the would-be grantor is a member of several roles
+ * that have different subsets of the desired grant options, but no one
+ * role has 'em all. In this case we pick a role with the largest number
+ * of desired options. Ties are broken in favor of closer ancestors.
+ *
+ * roleId: the role attempting to do the GRANT/REVOKE
+ * privileges: the privileges to be granted/revoked
+ * acl: the ACL of the object in question
+ * ownerId: the role owning the object in question
+ * *grantorId: receives the OID of the role to do the grant as
+ * *grantOptions: receives the grant options actually held by grantorId
+ *
+ * If no grant options exist, we set grantorId to roleId, grantOptions to 0.
+ */
+void
+select_best_grantor(Oid roleId, AclMode privileges,
+ const Acl *acl, Oid ownerId,
+ Oid *grantorId, AclMode *grantOptions)
+{
+ AclMode needed_goptions = ACL_GRANT_OPTION_FOR(privileges);
+ List *roles_list;
+ int nrights;
+ ListCell *l;
+
+ /*
+ * The object owner is always treated as having all grant options, so if
+ * roleId is the owner it's easy. Also, if roleId is a superuser it's
+ * easy: superusers are implicitly members of every role, so they act as
+ * the object owner.
+ */
+ if (roleId == ownerId || superuser_arg(roleId))
+ {
+ *grantorId = ownerId;
+ *grantOptions = needed_goptions;
+ return;
+ }
+
+ /*
+ * Otherwise we have to do a careful search to see if roleId has the
+ * privileges of any suitable role. Note: we can hang onto the result of
+ * roles_is_member_of() throughout this loop, because aclmask_direct()
+ * doesn't query any role memberships.
+ */
+ roles_list = roles_is_member_of(roleId, ROLERECURSE_PRIVS,
+ InvalidOid, NULL);
+
+ /* initialize candidate result as default */
+ *grantorId = roleId;
+ *grantOptions = ACL_NO_RIGHTS;
+ nrights = 0;
+
+ foreach(l, roles_list)
+ {
+ Oid otherrole = lfirst_oid(l);
+ AclMode otherprivs;
+
+ otherprivs = aclmask_direct(acl, otherrole, ownerId,
+ needed_goptions, ACLMASK_ALL);
+ if (otherprivs == needed_goptions)
+ {
+ /* Found a suitable grantor */
+ *grantorId = otherrole;
+ *grantOptions = otherprivs;
+ return;
+ }
+
+ /*
+ * If it has just some of the needed privileges, remember best
+ * candidate.
+ */
+ if (otherprivs != ACL_NO_RIGHTS)
+ {
+ int nnewrights = count_one_bits(otherprivs);
+
+ if (nnewrights > nrights)
+ {
+ *grantorId = otherrole;
+ *grantOptions = otherprivs;
+ nrights = nnewrights;
+ }
+ }
+ }
+}
+
+/*
+ * get_role_oid - Given a role name, look up the role's OID.
+ *
+ * If missing_ok is false, throw an error if role name not found. If
+ * true, just return InvalidOid.
+ */
+Oid
+get_role_oid(const char *rolname, bool missing_ok)
+{
+ Oid oid;
+
+ oid = GetSysCacheOid1(AUTHNAME, Anum_pg_authid_oid,
+ CStringGetDatum(rolname));
+ if (!OidIsValid(oid) && !missing_ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", rolname)));
+ return oid;
+}
+
+/*
+ * get_role_oid_or_public - As above, but return ACL_ID_PUBLIC if the
+ * role name is "public".
+ */
+Oid
+get_role_oid_or_public(const char *rolname)
+{
+ if (strcmp(rolname, "public") == 0)
+ return ACL_ID_PUBLIC;
+
+ return get_role_oid(rolname, false);
+}
+
+/*
+ * Given a RoleSpec node, return the OID it corresponds to. If missing_ok is
+ * true, return InvalidOid if the role does not exist.
+ *
+ * PUBLIC is always disallowed here. Routines wanting to handle the PUBLIC
+ * case must check the case separately.
+ */
+Oid
+get_rolespec_oid(const RoleSpec *role, bool missing_ok)
+{
+ Oid oid;
+
+ switch (role->roletype)
+ {
+ case ROLESPEC_CSTRING:
+ Assert(role->rolename);
+ oid = get_role_oid(role->rolename, missing_ok);
+ break;
+
+ case ROLESPEC_CURRENT_ROLE:
+ case ROLESPEC_CURRENT_USER:
+ oid = GetUserId();
+ break;
+
+ case ROLESPEC_SESSION_USER:
+ oid = GetSessionUserId();
+ break;
+
+ case ROLESPEC_PUBLIC:
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", "public")));
+ oid = InvalidOid; /* make compiler happy */
+ break;
+
+ default:
+ elog(ERROR, "unexpected role type %d", role->roletype);
+ }
+
+ return oid;
+}
+
+/*
+ * Given a RoleSpec node, return the pg_authid HeapTuple it corresponds to.
+ * Caller must ReleaseSysCache when done with the result tuple.
+ */
+HeapTuple
+get_rolespec_tuple(const RoleSpec *role)
+{
+ HeapTuple tuple;
+
+ switch (role->roletype)
+ {
+ case ROLESPEC_CSTRING:
+ Assert(role->rolename);
+ tuple = SearchSysCache1(AUTHNAME, CStringGetDatum(role->rolename));
+ if (!HeapTupleIsValid(tuple))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", role->rolename)));
+ break;
+
+ case ROLESPEC_CURRENT_ROLE:
+ case ROLESPEC_CURRENT_USER:
+ tuple = SearchSysCache1(AUTHOID, GetUserId());
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for role %u", GetUserId());
+ break;
+
+ case ROLESPEC_SESSION_USER:
+ tuple = SearchSysCache1(AUTHOID, GetSessionUserId());
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for role %u", GetSessionUserId());
+ break;
+
+ case ROLESPEC_PUBLIC:
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("role \"%s\" does not exist", "public")));
+ tuple = NULL; /* make compiler happy */
+ break;
+
+ default:
+ elog(ERROR, "unexpected role type %d", role->roletype);
+ }
+
+ return tuple;
+}
+
+/*
+ * Given a RoleSpec, returns a palloc'ed copy of the corresponding role's name.
+ */
+char *
+get_rolespec_name(const RoleSpec *role)
+{
+ HeapTuple tp;
+ Form_pg_authid authForm;
+ char *rolename;
+
+ tp = get_rolespec_tuple(role);
+ authForm = (Form_pg_authid) GETSTRUCT(tp);
+ rolename = pstrdup(NameStr(authForm->rolname));
+ ReleaseSysCache(tp);
+
+ return rolename;
+}
+
+/*
+ * Given a RoleSpec, throw an error if the name is reserved, using detail_msg,
+ * if provided (which must be already translated).
+ *
+ * If node is NULL, no error is thrown. If detail_msg is NULL then no detail
+ * message is provided.
+ */
+void
+check_rolespec_name(const RoleSpec *role, const char *detail_msg)
+{
+ if (!role)
+ return;
+
+ if (role->roletype != ROLESPEC_CSTRING)
+ return;
+
+ if (IsReservedName(role->rolename))
+ {
+ if (detail_msg)
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("role name \"%s\" is reserved",
+ role->rolename),
+ errdetail_internal("%s", detail_msg)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_RESERVED_NAME),
+ errmsg("role name \"%s\" is reserved",
+ role->rolename)));
+ }
+}
diff --git a/src/backend/utils/adt/amutils.c b/src/backend/utils/adt/amutils.c
new file mode 100644
index 0000000..9d78a90
--- /dev/null
+++ b/src/backend/utils/adt/amutils.c
@@ -0,0 +1,470 @@
+/*-------------------------------------------------------------------------
+ *
+ * amutils.c
+ * SQL-level APIs related to index access methods.
+ *
+ * Copyright (c) 2016-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/amutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/amapi.h"
+#include "access/htup_details.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_index.h"
+#include "utils/builtins.h"
+#include "utils/syscache.h"
+
+
+/* Convert string property name to enum, for efficiency */
+struct am_propname
+{
+ const char *name;
+ IndexAMProperty prop;
+};
+
+static const struct am_propname am_propnames[] =
+{
+ {
+ "asc", AMPROP_ASC
+ },
+ {
+ "desc", AMPROP_DESC
+ },
+ {
+ "nulls_first", AMPROP_NULLS_FIRST
+ },
+ {
+ "nulls_last", AMPROP_NULLS_LAST
+ },
+ {
+ "orderable", AMPROP_ORDERABLE
+ },
+ {
+ "distance_orderable", AMPROP_DISTANCE_ORDERABLE
+ },
+ {
+ "returnable", AMPROP_RETURNABLE
+ },
+ {
+ "search_array", AMPROP_SEARCH_ARRAY
+ },
+ {
+ "search_nulls", AMPROP_SEARCH_NULLS
+ },
+ {
+ "clusterable", AMPROP_CLUSTERABLE
+ },
+ {
+ "index_scan", AMPROP_INDEX_SCAN
+ },
+ {
+ "bitmap_scan", AMPROP_BITMAP_SCAN
+ },
+ {
+ "backward_scan", AMPROP_BACKWARD_SCAN
+ },
+ {
+ "can_order", AMPROP_CAN_ORDER
+ },
+ {
+ "can_unique", AMPROP_CAN_UNIQUE
+ },
+ {
+ "can_multi_col", AMPROP_CAN_MULTI_COL
+ },
+ {
+ "can_exclude", AMPROP_CAN_EXCLUDE
+ },
+ {
+ "can_include", AMPROP_CAN_INCLUDE
+ },
+};
+
+static IndexAMProperty
+lookup_prop_name(const char *name)
+{
+ int i;
+
+ for (i = 0; i < lengthof(am_propnames); i++)
+ {
+ if (pg_strcasecmp(am_propnames[i].name, name) == 0)
+ return am_propnames[i].prop;
+ }
+
+ /* We do not throw an error, so that AMs can define their own properties */
+ return AMPROP_UNKNOWN;
+}
+
+/*
+ * Common code for properties that are just bit tests of indoptions.
+ *
+ * tuple: the pg_index heaptuple
+ * attno: identify the index column to test the indoptions of.
+ * guard: if false, a boolean false result is forced (saves code in caller).
+ * iopt_mask: mask for interesting indoption bit.
+ * iopt_expect: value for a "true" result (should be 0 or iopt_mask).
+ *
+ * Returns false to indicate a NULL result (for "unknown/inapplicable"),
+ * otherwise sets *res to the boolean value to return.
+ */
+static bool
+test_indoption(HeapTuple tuple, int attno, bool guard,
+ int16 iopt_mask, int16 iopt_expect,
+ bool *res)
+{
+ Datum datum;
+ bool isnull;
+ int2vector *indoption;
+ int16 indoption_val;
+
+ if (!guard)
+ {
+ *res = false;
+ return true;
+ }
+
+ datum = SysCacheGetAttr(INDEXRELID, tuple,
+ Anum_pg_index_indoption, &isnull);
+ Assert(!isnull);
+
+ indoption = ((int2vector *) DatumGetPointer(datum));
+ indoption_val = indoption->values[attno - 1];
+
+ *res = (indoption_val & iopt_mask) == iopt_expect;
+
+ return true;
+}
+
+
+/*
+ * Test property of an index AM, index, or index column.
+ *
+ * This is common code for different SQL-level funcs, so the amoid and
+ * index_oid parameters are mutually exclusive; we look up the amoid from the
+ * index_oid if needed, or if no index oid is given, we're looking at AM-wide
+ * properties.
+ */
+static Datum
+indexam_property(FunctionCallInfo fcinfo,
+ const char *propname,
+ Oid amoid, Oid index_oid, int attno)
+{
+ bool res = false;
+ bool isnull = false;
+ int natts = 0;
+ IndexAMProperty prop;
+ IndexAmRoutine *routine;
+
+ /* Try to convert property name to enum (no error if not known) */
+ prop = lookup_prop_name(propname);
+
+ /* If we have an index OID, look up the AM, and get # of columns too */
+ if (OidIsValid(index_oid))
+ {
+ HeapTuple tuple;
+ Form_pg_class rd_rel;
+
+ Assert(!OidIsValid(amoid));
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(index_oid));
+ if (!HeapTupleIsValid(tuple))
+ PG_RETURN_NULL();
+ rd_rel = (Form_pg_class) GETSTRUCT(tuple);
+ if (rd_rel->relkind != RELKIND_INDEX &&
+ rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
+ {
+ ReleaseSysCache(tuple);
+ PG_RETURN_NULL();
+ }
+ amoid = rd_rel->relam;
+ natts = rd_rel->relnatts;
+ ReleaseSysCache(tuple);
+ }
+
+ /*
+ * At this point, either index_oid == InvalidOid or it's a valid index
+ * OID. Also, after this test and the one below, either attno == 0 for
+ * index-wide or AM-wide tests, or it's a valid column number in a valid
+ * index.
+ */
+ if (attno < 0 || attno > natts)
+ PG_RETURN_NULL();
+
+ /*
+ * Get AM information. If we don't have a valid AM OID, return NULL.
+ */
+ routine = GetIndexAmRoutineByAmId(amoid, true);
+ if (routine == NULL)
+ PG_RETURN_NULL();
+
+ /*
+ * If there's an AM property routine, give it a chance to override the
+ * generic logic. Proceed if it returns false.
+ */
+ if (routine->amproperty &&
+ routine->amproperty(index_oid, attno, prop, propname,
+ &res, &isnull))
+ {
+ if (isnull)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(res);
+ }
+
+ if (attno > 0)
+ {
+ HeapTuple tuple;
+ Form_pg_index rd_index;
+ bool iskey = true;
+
+ /*
+ * Handle column-level properties. Many of these need the pg_index row
+ * (which we also need to use to check for nonkey atts) so we fetch
+ * that first.
+ */
+ tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(index_oid));
+ if (!HeapTupleIsValid(tuple))
+ PG_RETURN_NULL();
+ rd_index = (Form_pg_index) GETSTRUCT(tuple);
+
+ Assert(index_oid == rd_index->indexrelid);
+ Assert(attno > 0 && attno <= rd_index->indnatts);
+
+ isnull = true;
+
+ /*
+ * If amcaninclude, we might be looking at an attno for a nonkey
+ * column, for which we (generically) assume that most properties are
+ * null.
+ */
+ if (routine->amcaninclude
+ && attno > rd_index->indnkeyatts)
+ iskey = false;
+
+ switch (prop)
+ {
+ case AMPROP_ASC:
+ if (iskey &&
+ test_indoption(tuple, attno, routine->amcanorder,
+ INDOPTION_DESC, 0, &res))
+ isnull = false;
+ break;
+
+ case AMPROP_DESC:
+ if (iskey &&
+ test_indoption(tuple, attno, routine->amcanorder,
+ INDOPTION_DESC, INDOPTION_DESC, &res))
+ isnull = false;
+ break;
+
+ case AMPROP_NULLS_FIRST:
+ if (iskey &&
+ test_indoption(tuple, attno, routine->amcanorder,
+ INDOPTION_NULLS_FIRST, INDOPTION_NULLS_FIRST, &res))
+ isnull = false;
+ break;
+
+ case AMPROP_NULLS_LAST:
+ if (iskey &&
+ test_indoption(tuple, attno, routine->amcanorder,
+ INDOPTION_NULLS_FIRST, 0, &res))
+ isnull = false;
+ break;
+
+ case AMPROP_ORDERABLE:
+
+ /*
+ * generic assumption is that nonkey columns are not orderable
+ */
+ res = iskey ? routine->amcanorder : false;
+ isnull = false;
+ break;
+
+ case AMPROP_DISTANCE_ORDERABLE:
+
+ /*
+ * The conditions for whether a column is distance-orderable
+ * are really up to the AM (at time of writing, only GiST
+ * supports it at all). The planner has its own idea based on
+ * whether it finds an operator with amoppurpose 'o', but
+ * getting there from just the index column type seems like a
+ * lot of work. So instead we expect the AM to handle this in
+ * its amproperty routine. The generic result is to return
+ * false if the AM says it never supports this, or if this is
+ * a nonkey column, and null otherwise (meaning we don't
+ * know).
+ */
+ if (!iskey || !routine->amcanorderbyop)
+ {
+ res = false;
+ isnull = false;
+ }
+ break;
+
+ case AMPROP_RETURNABLE:
+
+ /* note that we ignore iskey for this property */
+
+ isnull = false;
+ res = false;
+
+ if (routine->amcanreturn)
+ {
+ /*
+ * If possible, the AM should handle this test in its
+ * amproperty function without opening the rel. But this
+ * is the generic fallback if it does not.
+ */
+ Relation indexrel = index_open(index_oid, AccessShareLock);
+
+ res = index_can_return(indexrel, attno);
+ index_close(indexrel, AccessShareLock);
+ }
+ break;
+
+ case AMPROP_SEARCH_ARRAY:
+ if (iskey)
+ {
+ res = routine->amsearcharray;
+ isnull = false;
+ }
+ break;
+
+ case AMPROP_SEARCH_NULLS:
+ if (iskey)
+ {
+ res = routine->amsearchnulls;
+ isnull = false;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ ReleaseSysCache(tuple);
+
+ if (!isnull)
+ PG_RETURN_BOOL(res);
+ PG_RETURN_NULL();
+ }
+
+ if (OidIsValid(index_oid))
+ {
+ /*
+ * Handle index-level properties. Currently, these only depend on the
+ * AM, but that might not be true forever, so we make users name an
+ * index not just an AM.
+ */
+ switch (prop)
+ {
+ case AMPROP_CLUSTERABLE:
+ PG_RETURN_BOOL(routine->amclusterable);
+
+ case AMPROP_INDEX_SCAN:
+ PG_RETURN_BOOL(routine->amgettuple ? true : false);
+
+ case AMPROP_BITMAP_SCAN:
+ PG_RETURN_BOOL(routine->amgetbitmap ? true : false);
+
+ case AMPROP_BACKWARD_SCAN:
+ PG_RETURN_BOOL(routine->amcanbackward);
+
+ default:
+ PG_RETURN_NULL();
+ }
+ }
+
+ /*
+ * Handle AM-level properties (those that control what you can say in
+ * CREATE INDEX).
+ */
+ switch (prop)
+ {
+ case AMPROP_CAN_ORDER:
+ PG_RETURN_BOOL(routine->amcanorder);
+
+ case AMPROP_CAN_UNIQUE:
+ PG_RETURN_BOOL(routine->amcanunique);
+
+ case AMPROP_CAN_MULTI_COL:
+ PG_RETURN_BOOL(routine->amcanmulticol);
+
+ case AMPROP_CAN_EXCLUDE:
+ PG_RETURN_BOOL(routine->amgettuple ? true : false);
+
+ case AMPROP_CAN_INCLUDE:
+ PG_RETURN_BOOL(routine->amcaninclude);
+
+ default:
+ PG_RETURN_NULL();
+ }
+}
+
+/*
+ * Test property of an AM specified by AM OID
+ */
+Datum
+pg_indexam_has_property(PG_FUNCTION_ARGS)
+{
+ Oid amoid = PG_GETARG_OID(0);
+ char *propname = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+ return indexam_property(fcinfo, propname, amoid, InvalidOid, 0);
+}
+
+/*
+ * Test property of an index specified by index OID
+ */
+Datum
+pg_index_has_property(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ char *propname = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+ return indexam_property(fcinfo, propname, InvalidOid, relid, 0);
+}
+
+/*
+ * Test property of an index column specified by index OID and column number
+ */
+Datum
+pg_index_column_has_property(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int32 attno = PG_GETARG_INT32(1);
+ char *propname = text_to_cstring(PG_GETARG_TEXT_PP(2));
+
+ /* Reject attno 0 immediately, so that attno > 0 identifies this case */
+ if (attno <= 0)
+ PG_RETURN_NULL();
+
+ return indexam_property(fcinfo, propname, InvalidOid, relid, attno);
+}
+
+/*
+ * Return the name of the given phase, as used for progress reporting by the
+ * given AM.
+ */
+Datum
+pg_indexam_progress_phasename(PG_FUNCTION_ARGS)
+{
+ Oid amoid = PG_GETARG_OID(0);
+ int32 phasenum = PG_GETARG_INT32(1);
+ IndexAmRoutine *routine;
+ char *name;
+
+ routine = GetIndexAmRoutineByAmId(amoid, true);
+ if (routine == NULL || !routine->ambuildphasename)
+ PG_RETURN_NULL();
+
+ name = routine->ambuildphasename(phasenum);
+ if (!name)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(CStringGetTextDatum(name));
+}
diff --git a/src/backend/utils/adt/array_expanded.c b/src/backend/utils/adt/array_expanded.c
new file mode 100644
index 0000000..20c584e
--- /dev/null
+++ b/src/backend/utils/adt/array_expanded.c
@@ -0,0 +1,453 @@
+/*-------------------------------------------------------------------------
+ *
+ * array_expanded.c
+ * Basic functions for manipulating expanded arrays.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/array_expanded.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tupmacs.h"
+#include "utils/array.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+
+
+/* "Methods" required for an expanded object */
+static Size EA_get_flat_size(ExpandedObjectHeader *eohptr);
+static void EA_flatten_into(ExpandedObjectHeader *eohptr,
+ void *result, Size allocated_size);
+
+static const ExpandedObjectMethods EA_methods =
+{
+ EA_get_flat_size,
+ EA_flatten_into
+};
+
+/* Other local functions */
+static void copy_byval_expanded_array(ExpandedArrayHeader *eah,
+ ExpandedArrayHeader *oldeah);
+
+
+/*
+ * expand_array: convert an array Datum into an expanded array
+ *
+ * The expanded object will be a child of parentcontext.
+ *
+ * Some callers can provide cache space to avoid repeated lookups of element
+ * type data across calls; if so, pass a metacache pointer, making sure that
+ * metacache->element_type is initialized to InvalidOid before first call.
+ * If no cross-call caching is required, pass NULL for metacache.
+ */
+Datum
+expand_array(Datum arraydatum, MemoryContext parentcontext,
+ ArrayMetaState *metacache)
+{
+ ArrayType *array;
+ ExpandedArrayHeader *eah;
+ MemoryContext objcxt;
+ MemoryContext oldcxt;
+ ArrayMetaState fakecache;
+
+ /*
+ * Allocate private context for expanded object. We start by assuming
+ * that the array won't be very large; but if it does grow a lot, don't
+ * constrain aset.c's large-context behavior.
+ */
+ objcxt = AllocSetContextCreate(parentcontext,
+ "expanded array",
+ ALLOCSET_START_SMALL_SIZES);
+
+ /* Set up expanded array header */
+ eah = (ExpandedArrayHeader *)
+ MemoryContextAlloc(objcxt, sizeof(ExpandedArrayHeader));
+
+ EOH_init_header(&eah->hdr, &EA_methods, objcxt);
+ eah->ea_magic = EA_MAGIC;
+
+ /* If the source is an expanded array, we may be able to optimize */
+ if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum)))
+ {
+ ExpandedArrayHeader *oldeah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum);
+
+ Assert(oldeah->ea_magic == EA_MAGIC);
+
+ /*
+ * Update caller's cache if provided; we don't need it this time, but
+ * next call might be for a non-expanded source array. Furthermore,
+ * if the caller didn't provide a cache area, use some local storage
+ * to cache anyway, thereby avoiding a catalog lookup in the case
+ * where we fall through to the flat-copy code path.
+ */
+ if (metacache == NULL)
+ metacache = &fakecache;
+ metacache->element_type = oldeah->element_type;
+ metacache->typlen = oldeah->typlen;
+ metacache->typbyval = oldeah->typbyval;
+ metacache->typalign = oldeah->typalign;
+
+ /*
+ * If element type is pass-by-value and we have a Datum-array
+ * representation, just copy the source's metadata and Datum/isnull
+ * arrays. The original flat array, if present at all, adds no
+ * additional information so we need not copy it.
+ */
+ if (oldeah->typbyval && oldeah->dvalues != NULL)
+ {
+ copy_byval_expanded_array(eah, oldeah);
+ /* return a R/W pointer to the expanded array */
+ return EOHPGetRWDatum(&eah->hdr);
+ }
+
+ /*
+ * Otherwise, either we have only a flat representation or the
+ * elements are pass-by-reference. In either case, the best thing
+ * seems to be to copy the source as a flat representation and then
+ * deconstruct that later if necessary. For the pass-by-ref case, we
+ * could perhaps save some cycles with custom code that generates the
+ * deconstructed representation in parallel with copying the values,
+ * but it would be a lot of extra code for fairly marginal gain. So,
+ * fall through into the flat-source code path.
+ */
+ }
+
+ /*
+ * Detoast and copy source array into private context, as a flat array.
+ *
+ * Note that this coding risks leaking some memory in the private context
+ * if we have to fetch data from a TOAST table; however, experimentation
+ * says that the leak is minimal. Doing it this way saves a copy step,
+ * which seems worthwhile, especially if the array is large enough to need
+ * external storage.
+ */
+ oldcxt = MemoryContextSwitchTo(objcxt);
+ array = DatumGetArrayTypePCopy(arraydatum);
+ MemoryContextSwitchTo(oldcxt);
+
+ eah->ndims = ARR_NDIM(array);
+ /* note these pointers point into the fvalue header! */
+ eah->dims = ARR_DIMS(array);
+ eah->lbound = ARR_LBOUND(array);
+
+ /* Save array's element-type data for possible use later */
+ eah->element_type = ARR_ELEMTYPE(array);
+ if (metacache && metacache->element_type == eah->element_type)
+ {
+ /* We have a valid cache of representational data */
+ eah->typlen = metacache->typlen;
+ eah->typbyval = metacache->typbyval;
+ eah->typalign = metacache->typalign;
+ }
+ else
+ {
+ /* No, so look it up */
+ get_typlenbyvalalign(eah->element_type,
+ &eah->typlen,
+ &eah->typbyval,
+ &eah->typalign);
+ /* Update cache if provided */
+ if (metacache)
+ {
+ metacache->element_type = eah->element_type;
+ metacache->typlen = eah->typlen;
+ metacache->typbyval = eah->typbyval;
+ metacache->typalign = eah->typalign;
+ }
+ }
+
+ /* we don't make a deconstructed representation now */
+ eah->dvalues = NULL;
+ eah->dnulls = NULL;
+ eah->dvalueslen = 0;
+ eah->nelems = 0;
+ eah->flat_size = 0;
+
+ /* remember we have a flat representation */
+ eah->fvalue = array;
+ eah->fstartptr = ARR_DATA_PTR(array);
+ eah->fendptr = ((char *) array) + ARR_SIZE(array);
+
+ /* return a R/W pointer to the expanded array */
+ return EOHPGetRWDatum(&eah->hdr);
+}
+
+/*
+ * helper for expand_array(): copy pass-by-value Datum-array representation
+ */
+static void
+copy_byval_expanded_array(ExpandedArrayHeader *eah,
+ ExpandedArrayHeader *oldeah)
+{
+ MemoryContext objcxt = eah->hdr.eoh_context;
+ int ndims = oldeah->ndims;
+ int dvalueslen = oldeah->dvalueslen;
+
+ /* Copy array dimensionality information */
+ eah->ndims = ndims;
+ /* We can alloc both dimensionality arrays with one palloc */
+ eah->dims = (int *) MemoryContextAlloc(objcxt, ndims * 2 * sizeof(int));
+ eah->lbound = eah->dims + ndims;
+ /* .. but don't assume the source's arrays are contiguous */
+ memcpy(eah->dims, oldeah->dims, ndims * sizeof(int));
+ memcpy(eah->lbound, oldeah->lbound, ndims * sizeof(int));
+
+ /* Copy element-type data */
+ eah->element_type = oldeah->element_type;
+ eah->typlen = oldeah->typlen;
+ eah->typbyval = oldeah->typbyval;
+ eah->typalign = oldeah->typalign;
+
+ /* Copy the deconstructed representation */
+ eah->dvalues = (Datum *) MemoryContextAlloc(objcxt,
+ dvalueslen * sizeof(Datum));
+ memcpy(eah->dvalues, oldeah->dvalues, dvalueslen * sizeof(Datum));
+ if (oldeah->dnulls)
+ {
+ eah->dnulls = (bool *) MemoryContextAlloc(objcxt,
+ dvalueslen * sizeof(bool));
+ memcpy(eah->dnulls, oldeah->dnulls, dvalueslen * sizeof(bool));
+ }
+ else
+ eah->dnulls = NULL;
+ eah->dvalueslen = dvalueslen;
+ eah->nelems = oldeah->nelems;
+ eah->flat_size = oldeah->flat_size;
+
+ /* we don't make a flat representation */
+ eah->fvalue = NULL;
+ eah->fstartptr = NULL;
+ eah->fendptr = NULL;
+}
+
+/*
+ * get_flat_size method for expanded arrays
+ */
+static Size
+EA_get_flat_size(ExpandedObjectHeader *eohptr)
+{
+ ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr;
+ int nelems;
+ int ndims;
+ Datum *dvalues;
+ bool *dnulls;
+ Size nbytes;
+ int i;
+
+ Assert(eah->ea_magic == EA_MAGIC);
+
+ /* Easy if we have a valid flattened value */
+ if (eah->fvalue)
+ return ARR_SIZE(eah->fvalue);
+
+ /* If we have a cached size value, believe that */
+ if (eah->flat_size)
+ return eah->flat_size;
+
+ /*
+ * Compute space needed by examining dvalues/dnulls. Note that the result
+ * array will have a nulls bitmap if dnulls isn't NULL, even if the array
+ * doesn't actually contain any nulls now.
+ */
+ nelems = eah->nelems;
+ ndims = eah->ndims;
+ Assert(nelems == ArrayGetNItems(ndims, eah->dims));
+ dvalues = eah->dvalues;
+ dnulls = eah->dnulls;
+ nbytes = 0;
+ for (i = 0; i < nelems; i++)
+ {
+ if (dnulls && dnulls[i])
+ continue;
+ nbytes = att_addlength_datum(nbytes, eah->typlen, dvalues[i]);
+ nbytes = att_align_nominal(nbytes, eah->typalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+
+ if (dnulls)
+ nbytes += ARR_OVERHEAD_WITHNULLS(ndims, nelems);
+ else
+ nbytes += ARR_OVERHEAD_NONULLS(ndims);
+
+ /* cache for next time */
+ eah->flat_size = nbytes;
+
+ return nbytes;
+}
+
+/*
+ * flatten_into method for expanded arrays
+ */
+static void
+EA_flatten_into(ExpandedObjectHeader *eohptr,
+ void *result, Size allocated_size)
+{
+ ExpandedArrayHeader *eah = (ExpandedArrayHeader *) eohptr;
+ ArrayType *aresult = (ArrayType *) result;
+ int nelems;
+ int ndims;
+ int32 dataoffset;
+
+ Assert(eah->ea_magic == EA_MAGIC);
+
+ /* Easy if we have a valid flattened value */
+ if (eah->fvalue)
+ {
+ Assert(allocated_size == ARR_SIZE(eah->fvalue));
+ memcpy(result, eah->fvalue, allocated_size);
+ return;
+ }
+
+ /* Else allocation should match previous get_flat_size result */
+ Assert(allocated_size == eah->flat_size);
+
+ /* Fill result array from dvalues/dnulls */
+ nelems = eah->nelems;
+ ndims = eah->ndims;
+
+ if (eah->dnulls)
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nelems);
+ else
+ dataoffset = 0; /* marker for no null bitmap */
+
+ /* We must ensure that any pad space is zero-filled */
+ memset(aresult, 0, allocated_size);
+
+ SET_VARSIZE(aresult, allocated_size);
+ aresult->ndim = ndims;
+ aresult->dataoffset = dataoffset;
+ aresult->elemtype = eah->element_type;
+ memcpy(ARR_DIMS(aresult), eah->dims, ndims * sizeof(int));
+ memcpy(ARR_LBOUND(aresult), eah->lbound, ndims * sizeof(int));
+
+ CopyArrayEls(aresult,
+ eah->dvalues, eah->dnulls, nelems,
+ eah->typlen, eah->typbyval, eah->typalign,
+ false);
+}
+
+/*
+ * Argument fetching support code
+ */
+
+/*
+ * DatumGetExpandedArray: get a writable expanded array from an input argument
+ *
+ * Caution: if the input is a read/write pointer, this returns the input
+ * argument; so callers must be sure that their changes are "safe", that is
+ * they cannot leave the array in a corrupt state.
+ */
+ExpandedArrayHeader *
+DatumGetExpandedArray(Datum d)
+{
+ /* If it's a writable expanded array already, just return it */
+ if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)))
+ {
+ ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d);
+
+ Assert(eah->ea_magic == EA_MAGIC);
+ return eah;
+ }
+
+ /* Else expand the hard way */
+ d = expand_array(d, CurrentMemoryContext, NULL);
+ return (ExpandedArrayHeader *) DatumGetEOHP(d);
+}
+
+/*
+ * As above, when caller has the ability to cache element type info
+ */
+ExpandedArrayHeader *
+DatumGetExpandedArrayX(Datum d, ArrayMetaState *metacache)
+{
+ /* If it's a writable expanded array already, just return it */
+ if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)))
+ {
+ ExpandedArrayHeader *eah = (ExpandedArrayHeader *) DatumGetEOHP(d);
+
+ Assert(eah->ea_magic == EA_MAGIC);
+ /* Update cache if provided */
+ if (metacache)
+ {
+ metacache->element_type = eah->element_type;
+ metacache->typlen = eah->typlen;
+ metacache->typbyval = eah->typbyval;
+ metacache->typalign = eah->typalign;
+ }
+ return eah;
+ }
+
+ /* Else expand using caller's cache if any */
+ d = expand_array(d, CurrentMemoryContext, metacache);
+ return (ExpandedArrayHeader *) DatumGetEOHP(d);
+}
+
+/*
+ * DatumGetAnyArrayP: return either an expanded array or a detoasted varlena
+ * array. The result must not be modified in-place.
+ */
+AnyArrayType *
+DatumGetAnyArrayP(Datum d)
+{
+ ExpandedArrayHeader *eah;
+
+ /*
+ * If it's an expanded array (RW or RO), return the header pointer.
+ */
+ if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(d)))
+ {
+ eah = (ExpandedArrayHeader *) DatumGetEOHP(d);
+ Assert(eah->ea_magic == EA_MAGIC);
+ return (AnyArrayType *) eah;
+ }
+
+ /* Else do regular detoasting as needed */
+ return (AnyArrayType *) PG_DETOAST_DATUM(d);
+}
+
+/*
+ * Create the Datum/isnull representation of an expanded array object
+ * if we didn't do so previously
+ */
+void
+deconstruct_expanded_array(ExpandedArrayHeader *eah)
+{
+ if (eah->dvalues == NULL)
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context);
+ Datum *dvalues;
+ bool *dnulls;
+ int nelems;
+
+ dnulls = NULL;
+ deconstruct_array(eah->fvalue,
+ eah->element_type,
+ eah->typlen, eah->typbyval, eah->typalign,
+ &dvalues,
+ ARR_HASNULL(eah->fvalue) ? &dnulls : NULL,
+ &nelems);
+
+ /*
+ * Update header only after successful completion of this step. If
+ * deconstruct_array fails partway through, worst consequence is some
+ * leaked memory in the object's context. If the caller fails at a
+ * later point, that's fine, since the deconstructed representation is
+ * valid anyhow.
+ */
+ eah->dvalues = dvalues;
+ eah->dnulls = dnulls;
+ eah->dvalueslen = eah->nelems = nelems;
+ MemoryContextSwitchTo(oldcxt);
+ }
+}
diff --git a/src/backend/utils/adt/array_selfuncs.c b/src/backend/utils/adt/array_selfuncs.c
new file mode 100644
index 0000000..8cbee14
--- /dev/null
+++ b/src/backend/utils/adt/array_selfuncs.c
@@ -0,0 +1,1193 @@
+/*-------------------------------------------------------------------------
+ *
+ * array_selfuncs.c
+ * Functions for selectivity estimation of array operators
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/array_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+#include "utils/typcache.h"
+
+
+/* Default selectivity constant for "@>" and "<@" operators */
+#define DEFAULT_CONTAIN_SEL 0.005
+
+/* Default selectivity constant for "&&" operator */
+#define DEFAULT_OVERLAP_SEL 0.01
+
+/* Default selectivity for given operator */
+#define DEFAULT_SEL(operator) \
+ ((operator) == OID_ARRAY_OVERLAP_OP ? \
+ DEFAULT_OVERLAP_SEL : DEFAULT_CONTAIN_SEL)
+
+static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval,
+ Oid elemtype, Oid operator);
+static Selectivity mcelem_array_selec(ArrayType *array,
+ TypeCacheEntry *typentry,
+ Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ float4 *hist, int nhist,
+ Oid operator);
+static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ Datum *array_data, int nitems,
+ Oid operator, TypeCacheEntry *typentry);
+static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ Datum *array_data, int nitems,
+ float4 *hist, int nhist,
+ Oid operator, TypeCacheEntry *typentry);
+static float *calc_hist(const float4 *hist, int nhist, int n);
+static float *calc_distr(const float *p, int n, int m, float rest);
+static int floor_log2(uint32 n);
+static bool find_next_mcelem(Datum *mcelem, int nmcelem, Datum value,
+ int *index, TypeCacheEntry *typentry);
+static int element_compare(const void *key1, const void *key2, void *arg);
+static int float_compare_desc(const void *key1, const void *key2);
+
+
+/*
+ * scalararraysel_containment
+ * Estimate selectivity of ScalarArrayOpExpr via array containment.
+ *
+ * If we have const =/<> ANY/ALL (array_var) then we can estimate the
+ * selectivity as though this were an array containment operator,
+ * array_var op ARRAY[const].
+ *
+ * scalararraysel() has already verified that the ScalarArrayOpExpr's operator
+ * is the array element type's default equality or inequality operator, and
+ * has aggressively simplified both inputs to constants.
+ *
+ * Returns selectivity (0..1), or -1 if we fail to estimate selectivity.
+ */
+Selectivity
+scalararraysel_containment(PlannerInfo *root,
+ Node *leftop, Node *rightop,
+ Oid elemtype, bool isEquality, bool useOr,
+ int varRelid)
+{
+ Selectivity selec;
+ VariableStatData vardata;
+ Datum constval;
+ TypeCacheEntry *typentry;
+ FmgrInfo *cmpfunc;
+
+ /*
+ * rightop must be a variable, else punt.
+ */
+ examine_variable(root, rightop, varRelid, &vardata);
+ if (!vardata.rel)
+ {
+ ReleaseVariableStats(vardata);
+ return -1.0;
+ }
+
+ /*
+ * leftop must be a constant, else punt.
+ */
+ if (!IsA(leftop, Const))
+ {
+ ReleaseVariableStats(vardata);
+ return -1.0;
+ }
+ if (((Const *) leftop)->constisnull)
+ {
+ /* qual can't succeed if null on left */
+ ReleaseVariableStats(vardata);
+ return (Selectivity) 0.0;
+ }
+ constval = ((Const *) leftop)->constvalue;
+
+ /* Get element type's default comparison function */
+ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ {
+ ReleaseVariableStats(vardata);
+ return -1.0;
+ }
+ cmpfunc = &typentry->cmp_proc_finfo;
+
+ /*
+ * If the operator is <>, swap ANY/ALL, then invert the result later.
+ */
+ if (!isEquality)
+ useOr = !useOr;
+
+ /* Get array element stats for var, if available */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
+ {
+ Form_pg_statistic stats;
+ AttStatsSlot sslot;
+ AttStatsSlot hslot;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+
+ /* MCELEM will be an array of same type as element */
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_MCELEM, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ {
+ /* For ALL case, also get histogram of distinct-element counts */
+ if (useOr ||
+ !get_attstatsslot(&hslot, vardata.statsTuple,
+ STATISTIC_KIND_DECHIST, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ memset(&hslot, 0, sizeof(hslot));
+
+ /*
+ * For = ANY, estimate as var @> ARRAY[const].
+ *
+ * For = ALL, estimate as var <@ ARRAY[const].
+ */
+ if (useOr)
+ selec = mcelem_array_contain_overlap_selec(sslot.values,
+ sslot.nvalues,
+ sslot.numbers,
+ sslot.nnumbers,
+ &constval, 1,
+ OID_ARRAY_CONTAINS_OP,
+ typentry);
+ else
+ selec = mcelem_array_contained_selec(sslot.values,
+ sslot.nvalues,
+ sslot.numbers,
+ sslot.nnumbers,
+ &constval, 1,
+ hslot.numbers,
+ hslot.nnumbers,
+ OID_ARRAY_CONTAINED_OP,
+ typentry);
+
+ free_attstatsslot(&hslot);
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /* No most-common-elements info, so do without */
+ if (useOr)
+ selec = mcelem_array_contain_overlap_selec(NULL, 0,
+ NULL, 0,
+ &constval, 1,
+ OID_ARRAY_CONTAINS_OP,
+ typentry);
+ else
+ selec = mcelem_array_contained_selec(NULL, 0,
+ NULL, 0,
+ &constval, 1,
+ NULL, 0,
+ OID_ARRAY_CONTAINED_OP,
+ typentry);
+ }
+
+ /*
+ * MCE stats count only non-null rows, so adjust for null rows.
+ */
+ selec *= (1.0 - stats->stanullfrac);
+ }
+ else
+ {
+ /* No stats at all, so do without */
+ if (useOr)
+ selec = mcelem_array_contain_overlap_selec(NULL, 0,
+ NULL, 0,
+ &constval, 1,
+ OID_ARRAY_CONTAINS_OP,
+ typentry);
+ else
+ selec = mcelem_array_contained_selec(NULL, 0,
+ NULL, 0,
+ &constval, 1,
+ NULL, 0,
+ OID_ARRAY_CONTAINED_OP,
+ typentry);
+ /* we assume no nulls here, so no stanullfrac correction */
+ }
+
+ ReleaseVariableStats(vardata);
+
+ /*
+ * If the operator is <>, invert the results.
+ */
+ if (!isEquality)
+ selec = 1.0 - selec;
+
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
+ */
+Datum
+arraycontsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec;
+ Oid element_typeid;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+ }
+
+ /*
+ * The "&&", "@>" and "<@" operators are strict, so we can cope with a
+ * NULL constant right away.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ /*
+ * If var is on the right, commute the operator, so that we can assume the
+ * var is on the left in what follows.
+ */
+ if (!varonleft)
+ {
+ if (operator == OID_ARRAY_CONTAINS_OP)
+ operator = OID_ARRAY_CONTAINED_OP;
+ else if (operator == OID_ARRAY_CONTAINED_OP)
+ operator = OID_ARRAY_CONTAINS_OP;
+ }
+
+ /*
+ * OK, there's a Var and a Const we're dealing with here. We need the
+ * Const to be an array with same element type as column, else we can't do
+ * anything useful. (Such cases will likely fail at runtime, but here
+ * we'd rather just return a default estimate.)
+ */
+ element_typeid = get_base_element_type(((Const *) other)->consttype);
+ if (element_typeid != InvalidOid &&
+ element_typeid == get_base_element_type(vardata.vartype))
+ {
+ selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
+ element_typeid, operator);
+ }
+ else
+ {
+ selec = DEFAULT_SEL(operator);
+ }
+
+ ReleaseVariableStats(vardata);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
+ */
+Datum
+arraycontjoinsel(PG_FUNCTION_ARGS)
+{
+ /* For the moment this is just a stub */
+ Oid operator = PG_GETARG_OID(1);
+
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+}
+
+/*
+ * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const"
+ * or "arraycolumn <@ const" based on the statistics
+ *
+ * This function is mainly responsible for extracting the pg_statistic data
+ * to be used; we then pass the problem on to mcelem_array_selec().
+ */
+static Selectivity
+calc_arraycontsel(VariableStatData *vardata, Datum constval,
+ Oid elemtype, Oid operator)
+{
+ Selectivity selec;
+ TypeCacheEntry *typentry;
+ FmgrInfo *cmpfunc;
+ ArrayType *array;
+
+ /* Get element type's default comparison function */
+ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ return DEFAULT_SEL(operator);
+ cmpfunc = &typentry->cmp_proc_finfo;
+
+ /*
+ * The caller made sure the const is an array with same element type, so
+ * get it now
+ */
+ array = DatumGetArrayTypeP(constval);
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, cmpfunc->fn_oid))
+ {
+ Form_pg_statistic stats;
+ AttStatsSlot sslot;
+ AttStatsSlot hslot;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+
+ /* MCELEM will be an array of same type as column */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCELEM, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ {
+ /*
+ * For "array <@ const" case we also need histogram of distinct
+ * element counts.
+ */
+ if (operator != OID_ARRAY_CONTAINED_OP ||
+ !get_attstatsslot(&hslot, vardata->statsTuple,
+ STATISTIC_KIND_DECHIST, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ memset(&hslot, 0, sizeof(hslot));
+
+ /* Use the most-common-elements slot for the array Var. */
+ selec = mcelem_array_selec(array, typentry,
+ sslot.values, sslot.nvalues,
+ sslot.numbers, sslot.nnumbers,
+ hslot.numbers, hslot.nnumbers,
+ operator);
+
+ free_attstatsslot(&hslot);
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /* No most-common-elements info, so do without */
+ selec = mcelem_array_selec(array, typentry,
+ NULL, 0, NULL, 0, NULL, 0,
+ operator);
+ }
+
+ /*
+ * MCE stats count only non-null rows, so adjust for null rows.
+ */
+ selec *= (1.0 - stats->stanullfrac);
+ }
+ else
+ {
+ /* No stats at all, so do without */
+ selec = mcelem_array_selec(array, typentry,
+ NULL, 0, NULL, 0, NULL, 0,
+ operator);
+ /* we assume no nulls here, so no stanullfrac correction */
+ }
+
+ /* If constant was toasted, release the copy we made */
+ if (PointerGetDatum(array) != constval)
+ pfree(array);
+
+ return selec;
+}
+
+/*
+ * Array selectivity estimation based on most common elements statistics
+ *
+ * This function just deconstructs and sorts the array constant's contents,
+ * and then passes the problem on to mcelem_array_contain_overlap_selec or
+ * mcelem_array_contained_selec depending on the operator.
+ */
+static Selectivity
+mcelem_array_selec(ArrayType *array, TypeCacheEntry *typentry,
+ Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ float4 *hist, int nhist,
+ Oid operator)
+{
+ Selectivity selec;
+ int num_elems;
+ Datum *elem_values;
+ bool *elem_nulls;
+ bool null_present;
+ int nonnull_nitems;
+ int i;
+
+ /*
+ * Prepare constant array data for sorting. Sorting lets us find unique
+ * elements and efficiently merge with the MCELEM array.
+ */
+ deconstruct_array(array,
+ typentry->type_id,
+ typentry->typlen,
+ typentry->typbyval,
+ typentry->typalign,
+ &elem_values, &elem_nulls, &num_elems);
+
+ /* Collapse out any null elements */
+ nonnull_nitems = 0;
+ null_present = false;
+ for (i = 0; i < num_elems; i++)
+ {
+ if (elem_nulls[i])
+ null_present = true;
+ else
+ elem_values[nonnull_nitems++] = elem_values[i];
+ }
+
+ /*
+ * Query "column @> '{anything, null}'" matches nothing. For the other
+ * two operators, presence of a null in the constant can be ignored.
+ */
+ if (null_present && operator == OID_ARRAY_CONTAINS_OP)
+ {
+ pfree(elem_values);
+ pfree(elem_nulls);
+ return (Selectivity) 0.0;
+ }
+
+ /* Sort extracted elements using their default comparison function. */
+ qsort_arg(elem_values, nonnull_nitems, sizeof(Datum),
+ element_compare, typentry);
+
+ /* Separate cases according to operator */
+ if (operator == OID_ARRAY_CONTAINS_OP || operator == OID_ARRAY_OVERLAP_OP)
+ selec = mcelem_array_contain_overlap_selec(mcelem, nmcelem,
+ numbers, nnumbers,
+ elem_values, nonnull_nitems,
+ operator, typentry);
+ else if (operator == OID_ARRAY_CONTAINED_OP)
+ selec = mcelem_array_contained_selec(mcelem, nmcelem,
+ numbers, nnumbers,
+ elem_values, nonnull_nitems,
+ hist, nhist,
+ operator, typentry);
+ else
+ {
+ elog(ERROR, "arraycontsel called for unrecognized operator %u",
+ operator);
+ selec = 0.0; /* keep compiler quiet */
+ }
+
+ pfree(elem_values);
+ pfree(elem_nulls);
+ return selec;
+}
+
+/*
+ * Estimate selectivity of "column @> const" and "column && const" based on
+ * most common element statistics. This estimation assumes element
+ * occurrences are independent.
+ *
+ * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
+ * the array column's MCELEM statistics slot, or are NULL/0 if stats are
+ * not available. array_data (of length nitems) is the constant's elements.
+ *
+ * Both the mcelem and array_data arrays are assumed presorted according
+ * to the element type's cmpfunc. Null elements are not present.
+ *
+ * TODO: this estimate probably could be improved by using the distinct
+ * elements count histogram. For example, excepting the special case of
+ * "column @> '{}'", we can multiply the calculated selectivity by the
+ * fraction of nonempty arrays in the column.
+ */
+static Selectivity
+mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ Datum *array_data, int nitems,
+ Oid operator, TypeCacheEntry *typentry)
+{
+ Selectivity selec,
+ elem_selec;
+ int mcelem_index,
+ i;
+ bool use_bsearch;
+ float4 minfreq;
+
+ /*
+ * There should be three more Numbers than Values, because the last three
+ * cells should hold minimal and maximal frequency among the non-null
+ * elements, and then the frequency of null elements. Ignore the Numbers
+ * if not right.
+ */
+ if (nnumbers != nmcelem + 3)
+ {
+ numbers = NULL;
+ nnumbers = 0;
+ }
+
+ if (numbers)
+ {
+ /* Grab the lowest observed frequency */
+ minfreq = numbers[nmcelem];
+ }
+ else
+ {
+ /* Without statistics make some default assumptions */
+ minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL;
+ }
+
+ /* Decide whether it is faster to use binary search or not. */
+ if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems)
+ use_bsearch = true;
+ else
+ use_bsearch = false;
+
+ if (operator == OID_ARRAY_CONTAINS_OP)
+ {
+ /*
+ * Initial selectivity for "column @> const" query is 1.0, and it will
+ * be decreased with each element of constant array.
+ */
+ selec = 1.0;
+ }
+ else
+ {
+ /*
+ * Initial selectivity for "column && const" query is 0.0, and it will
+ * be increased with each element of constant array.
+ */
+ selec = 0.0;
+ }
+
+ /* Scan mcelem and array in parallel. */
+ mcelem_index = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ bool match = false;
+
+ /* Ignore any duplicates in the array data. */
+ if (i > 0 &&
+ element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
+ continue;
+
+ /* Find the smallest MCELEM >= this array item. */
+ if (use_bsearch)
+ {
+ match = find_next_mcelem(mcelem, nmcelem, array_data[i],
+ &mcelem_index, typentry);
+ }
+ else
+ {
+ while (mcelem_index < nmcelem)
+ {
+ int cmp = element_compare(&mcelem[mcelem_index],
+ &array_data[i],
+ typentry);
+
+ if (cmp < 0)
+ mcelem_index++;
+ else
+ {
+ if (cmp == 0)
+ match = true; /* mcelem is found */
+ break;
+ }
+ }
+ }
+
+ if (match && numbers)
+ {
+ /* MCELEM matches the array item; use its frequency. */
+ elem_selec = numbers[mcelem_index];
+ mcelem_index++;
+ }
+ else
+ {
+ /*
+ * The element is not in MCELEM. Punt, but assume that the
+ * selectivity cannot be more than minfreq / 2.
+ */
+ elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2);
+ }
+
+ /*
+ * Update overall selectivity using the current element's selectivity
+ * and an assumption of element occurrence independence.
+ */
+ if (operator == OID_ARRAY_CONTAINS_OP)
+ selec *= elem_selec;
+ else
+ selec = selec + elem_selec - selec * elem_selec;
+
+ /* Clamp intermediate results to stay sane despite roundoff error */
+ CLAMP_PROBABILITY(selec);
+ }
+
+ return selec;
+}
+
+/*
+ * Estimate selectivity of "column <@ const" based on most common element
+ * statistics.
+ *
+ * mcelem (of length nmcelem) and numbers (of length nnumbers) are from
+ * the array column's MCELEM statistics slot, or are NULL/0 if stats are
+ * not available. array_data (of length nitems) is the constant's elements.
+ * hist (of length nhist) is from the array column's DECHIST statistics slot,
+ * or is NULL/0 if those stats are not available.
+ *
+ * Both the mcelem and array_data arrays are assumed presorted according
+ * to the element type's cmpfunc. Null elements are not present.
+ *
+ * Independent element occurrence would imply a particular distribution of
+ * distinct element counts among matching rows. Real data usually falsifies
+ * that assumption. For example, in a set of 11-element integer arrays having
+ * elements in the range [0..10], element occurrences are typically not
+ * independent. If they were, a sufficiently-large set would include all
+ * distinct element counts 0 through 11. We correct for this using the
+ * histogram of distinct element counts.
+ *
+ * In the "column @> const" and "column && const" cases, we usually have a
+ * "const" with low number of elements (otherwise we have selectivity close
+ * to 0 or 1 respectively). That's why the effect of dependence related
+ * to distinct element count distribution is negligible there. In the
+ * "column <@ const" case, number of elements is usually high (otherwise we
+ * have selectivity close to 0). That's why we should do a correction with
+ * the array distinct element count distribution here.
+ *
+ * Using the histogram of distinct element counts produces a different
+ * distribution law than independent occurrences of elements. This
+ * distribution law can be described as follows:
+ *
+ * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 *
+ * (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m]
+ *
+ * where:
+ * o1, o2, ..., on - occurrences of elements 1, 2, ..., n
+ * (1 - occurrence, 0 - no occurrence) in row
+ * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n
+ * (scalar values in [0..1]) according to collected statistics
+ * m = o1 + o2 + ... + on = total number of distinct elements in row
+ * hist[m] - histogram data for occurrence of m elements.
+ * ind[m] - probability of m occurrences from n events assuming their
+ * probabilities to be equal to frequencies of array elements.
+ *
+ * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) *
+ * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m
+ */
+static Selectivity
+mcelem_array_contained_selec(Datum *mcelem, int nmcelem,
+ float4 *numbers, int nnumbers,
+ Datum *array_data, int nitems,
+ float4 *hist, int nhist,
+ Oid operator, TypeCacheEntry *typentry)
+{
+ int mcelem_index,
+ i,
+ unique_nitems = 0;
+ float selec,
+ minfreq,
+ nullelem_freq;
+ float *dist,
+ *mcelem_dist,
+ *hist_part;
+ float avg_count,
+ mult,
+ rest;
+ float *elem_selec;
+
+ /*
+ * There should be three more Numbers than Values in the MCELEM slot,
+ * because the last three cells should hold minimal and maximal frequency
+ * among the non-null elements, and then the frequency of null elements.
+ * Punt if not right, because we can't do much without the element freqs.
+ */
+ if (numbers == NULL || nnumbers != nmcelem + 3)
+ return DEFAULT_CONTAIN_SEL;
+
+ /* Can't do much without a count histogram, either */
+ if (hist == NULL || nhist < 3)
+ return DEFAULT_CONTAIN_SEL;
+
+ /*
+ * Grab some of the summary statistics that compute_array_stats() stores:
+ * lowest frequency, frequency of null elements, and average distinct
+ * element count.
+ */
+ minfreq = numbers[nmcelem];
+ nullelem_freq = numbers[nmcelem + 2];
+ avg_count = hist[nhist - 1];
+
+ /*
+ * "rest" will be the sum of the frequencies of all elements not
+ * represented in MCELEM. The average distinct element count is the sum
+ * of the frequencies of *all* elements. Begin with that; we will proceed
+ * to subtract the MCELEM frequencies.
+ */
+ rest = avg_count;
+
+ /*
+ * mult is a multiplier representing estimate of probability that each
+ * mcelem that is not present in constant doesn't occur.
+ */
+ mult = 1.0f;
+
+ /*
+ * elem_selec is array of estimated frequencies for elements in the
+ * constant.
+ */
+ elem_selec = (float *) palloc(sizeof(float) * nitems);
+
+ /* Scan mcelem and array in parallel. */
+ mcelem_index = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ bool match = false;
+
+ /* Ignore any duplicates in the array data. */
+ if (i > 0 &&
+ element_compare(&array_data[i - 1], &array_data[i], typentry) == 0)
+ continue;
+
+ /*
+ * Iterate over MCELEM until we find an entry greater than or equal to
+ * this element of the constant. Update "rest" and "mult" for mcelem
+ * entries skipped over.
+ */
+ while (mcelem_index < nmcelem)
+ {
+ int cmp = element_compare(&mcelem[mcelem_index],
+ &array_data[i],
+ typentry);
+
+ if (cmp < 0)
+ {
+ mult *= (1.0f - numbers[mcelem_index]);
+ rest -= numbers[mcelem_index];
+ mcelem_index++;
+ }
+ else
+ {
+ if (cmp == 0)
+ match = true; /* mcelem is found */
+ break;
+ }
+ }
+
+ if (match)
+ {
+ /* MCELEM matches the array item. */
+ elem_selec[unique_nitems] = numbers[mcelem_index];
+ /* "rest" is decremented for all mcelems, matched or not */
+ rest -= numbers[mcelem_index];
+ mcelem_index++;
+ }
+ else
+ {
+ /*
+ * The element is not in MCELEM. Punt, but assume that the
+ * selectivity cannot be more than minfreq / 2.
+ */
+ elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL,
+ minfreq / 2);
+ }
+
+ unique_nitems++;
+ }
+
+ /*
+ * If we handled all constant elements without exhausting the MCELEM
+ * array, finish walking it to complete calculation of "rest" and "mult".
+ */
+ while (mcelem_index < nmcelem)
+ {
+ mult *= (1.0f - numbers[mcelem_index]);
+ rest -= numbers[mcelem_index];
+ mcelem_index++;
+ }
+
+ /*
+ * The presence of many distinct rare elements materially decreases
+ * selectivity. Use the Poisson distribution to estimate the probability
+ * of a column value having zero occurrences of such elements. See above
+ * for the definition of "rest".
+ */
+ mult *= exp(-rest);
+
+ /*----------
+ * Using the distinct element count histogram requires
+ * O(unique_nitems * (nmcelem + unique_nitems))
+ * operations. Beyond a certain computational cost threshold, it's
+ * reasonable to sacrifice accuracy for decreased planning time. We limit
+ * the number of operations to EFFORT * nmcelem; since nmcelem is limited
+ * by the column's statistics target, the work done is user-controllable.
+ *
+ * If the number of operations would be too large, we can reduce it
+ * without losing all accuracy by reducing unique_nitems and considering
+ * only the most-common elements of the constant array. To make the
+ * results exactly match what we would have gotten with only those
+ * elements to start with, we'd have to remove any discarded elements'
+ * frequencies from "mult", but since this is only an approximation
+ * anyway, we don't bother with that. Therefore it's sufficient to qsort
+ * elem_selec[] and take the largest elements. (They will no longer match
+ * up with the elements of array_data[], but we don't care.)
+ *----------
+ */
+#define EFFORT 100
+
+ if ((nmcelem + unique_nitems) > 0 &&
+ unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems))
+ {
+ /*
+ * Use the quadratic formula to solve for largest allowable N. We
+ * have A = 1, B = nmcelem, C = - EFFORT * nmcelem.
+ */
+ double b = (double) nmcelem;
+ int n;
+
+ n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2);
+
+ /* Sort, then take just the first n elements */
+ qsort(elem_selec, unique_nitems, sizeof(float),
+ float_compare_desc);
+ unique_nitems = n;
+ }
+
+ /*
+ * Calculate probabilities of each distinct element count for both mcelems
+ * and constant elements. At this point, assume independent element
+ * occurrence.
+ */
+ dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f);
+ mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest);
+
+ /* ignore hist[nhist-1], which is the average not a histogram member */
+ hist_part = calc_hist(hist, nhist - 1, unique_nitems);
+
+ selec = 0.0f;
+ for (i = 0; i <= unique_nitems; i++)
+ {
+ /*
+ * mult * dist[i] / mcelem_dist[i] gives us probability of qual
+ * matching from assumption of independent element occurrence with the
+ * condition that distinct element count = i.
+ */
+ if (mcelem_dist[i] > 0)
+ selec += hist_part[i] * mult * dist[i] / mcelem_dist[i];
+ }
+
+ pfree(dist);
+ pfree(mcelem_dist);
+ pfree(hist_part);
+ pfree(elem_selec);
+
+ /* Take into account occurrence of NULL element. */
+ selec *= (1.0f - nullelem_freq);
+
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * Calculate the first n distinct element count probabilities from a
+ * histogram of distinct element counts.
+ *
+ * Returns a palloc'd array of n+1 entries, with array[k] being the
+ * probability of element count k, k in [0..n].
+ *
+ * We assume that a histogram box with bounds a and b gives 1 / ((b - a + 1) *
+ * (nhist - 1)) probability to each value in (a,b) and an additional half of
+ * that to a and b themselves.
+ */
+static float *
+calc_hist(const float4 *hist, int nhist, int n)
+{
+ float *hist_part;
+ int k,
+ i = 0;
+ float prev_interval = 0,
+ next_interval;
+ float frac;
+
+ hist_part = (float *) palloc((n + 1) * sizeof(float));
+
+ /*
+ * frac is a probability contribution for each interval between histogram
+ * values. We have nhist - 1 intervals, so contribution of each one will
+ * be 1 / (nhist - 1).
+ */
+ frac = 1.0f / ((float) (nhist - 1));
+
+ for (k = 0; k <= n; k++)
+ {
+ int count = 0;
+
+ /*
+ * Count the histogram boundaries equal to k. (Although the histogram
+ * should theoretically contain only exact integers, entries are
+ * floats so there could be roundoff error in large values. Treat any
+ * fractional value as equal to the next larger k.)
+ */
+ while (i < nhist && hist[i] <= k)
+ {
+ count++;
+ i++;
+ }
+
+ if (count > 0)
+ {
+ /* k is an exact bound for at least one histogram box. */
+ float val;
+
+ /* Find length between current histogram value and the next one */
+ if (i < nhist)
+ next_interval = hist[i] - hist[i - 1];
+ else
+ next_interval = 0;
+
+ /*
+ * count - 1 histogram boxes contain k exclusively. They
+ * contribute a total of (count - 1) * frac probability. Also
+ * factor in the partial histogram boxes on either side.
+ */
+ val = (float) (count - 1);
+ if (next_interval > 0)
+ val += 0.5f / next_interval;
+ if (prev_interval > 0)
+ val += 0.5f / prev_interval;
+ hist_part[k] = frac * val;
+
+ prev_interval = next_interval;
+ }
+ else
+ {
+ /* k does not appear as an exact histogram bound. */
+ if (prev_interval > 0)
+ hist_part[k] = frac / prev_interval;
+ else
+ hist_part[k] = 0.0f;
+ }
+ }
+
+ return hist_part;
+}
+
+/*
+ * Consider n independent events with probabilities p[]. This function
+ * calculates probabilities of exact k of events occurrence for k in [0..m].
+ * Returns a palloc'd array of size m+1.
+ *
+ * "rest" is the sum of the probabilities of all low-probability events not
+ * included in p.
+ *
+ * Imagine matrix M of size (n + 1) x (m + 1). Element M[i,j] denotes the
+ * probability that exactly j of first i events occur. Obviously M[0,0] = 1.
+ * For any constant j, each increment of i increases the probability iff the
+ * event occurs. So, by the law of total probability:
+ * M[i,j] = M[i - 1, j] * (1 - p[i]) + M[i - 1, j - 1] * p[i]
+ * for i > 0, j > 0.
+ * M[i,0] = M[i - 1, 0] * (1 - p[i]) for i > 0.
+ */
+static float *
+calc_distr(const float *p, int n, int m, float rest)
+{
+ float *row,
+ *prev_row,
+ *tmp;
+ int i,
+ j;
+
+ /*
+ * Since we return only the last row of the matrix and need only the
+ * current and previous row for calculations, allocate two rows.
+ */
+ row = (float *) palloc((m + 1) * sizeof(float));
+ prev_row = (float *) palloc((m + 1) * sizeof(float));
+
+ /* M[0,0] = 1 */
+ row[0] = 1.0f;
+ for (i = 1; i <= n; i++)
+ {
+ float t = p[i - 1];
+
+ /* Swap rows */
+ tmp = row;
+ row = prev_row;
+ prev_row = tmp;
+
+ /* Calculate next row */
+ for (j = 0; j <= i && j <= m; j++)
+ {
+ float val = 0.0f;
+
+ if (j < i)
+ val += prev_row[j] * (1.0f - t);
+ if (j > 0)
+ val += prev_row[j - 1] * t;
+ row[j] = val;
+ }
+ }
+
+ /*
+ * The presence of many distinct rare (not in "p") elements materially
+ * decreases selectivity. Model their collective occurrence with the
+ * Poisson distribution.
+ */
+ if (rest > DEFAULT_CONTAIN_SEL)
+ {
+ float t;
+
+ /* Swap rows */
+ tmp = row;
+ row = prev_row;
+ prev_row = tmp;
+
+ for (i = 0; i <= m; i++)
+ row[i] = 0.0f;
+
+ /* Value of Poisson distribution for 0 occurrences */
+ t = exp(-rest);
+
+ /*
+ * Calculate convolution of previously computed distribution and the
+ * Poisson distribution.
+ */
+ for (i = 0; i <= m; i++)
+ {
+ for (j = 0; j <= m - i; j++)
+ row[j + i] += prev_row[j] * t;
+
+ /* Get Poisson distribution value for (i + 1) occurrences */
+ t *= rest / (float) (i + 1);
+ }
+ }
+
+ pfree(prev_row);
+ return row;
+}
+
+/* Fast function for floor value of 2 based logarithm calculation. */
+static int
+floor_log2(uint32 n)
+{
+ int logval = 0;
+
+ if (n == 0)
+ return -1;
+ if (n >= (1 << 16))
+ {
+ n >>= 16;
+ logval += 16;
+ }
+ if (n >= (1 << 8))
+ {
+ n >>= 8;
+ logval += 8;
+ }
+ if (n >= (1 << 4))
+ {
+ n >>= 4;
+ logval += 4;
+ }
+ if (n >= (1 << 2))
+ {
+ n >>= 2;
+ logval += 2;
+ }
+ if (n >= (1 << 1))
+ {
+ logval += 1;
+ }
+ return logval;
+}
+
+/*
+ * find_next_mcelem binary-searches a most common elements array, starting
+ * from *index, for the first member >= value. It saves the position of the
+ * match into *index and returns true if it's an exact match. (Note: we
+ * assume the mcelem elements are distinct so there can't be more than one
+ * exact match.)
+ */
+static bool
+find_next_mcelem(Datum *mcelem, int nmcelem, Datum value, int *index,
+ TypeCacheEntry *typentry)
+{
+ int l = *index,
+ r = nmcelem - 1,
+ i,
+ res;
+
+ while (l <= r)
+ {
+ i = (l + r) / 2;
+ res = element_compare(&mcelem[i], &value, typentry);
+ if (res == 0)
+ {
+ *index = i;
+ return true;
+ }
+ else if (res < 0)
+ l = i + 1;
+ else
+ r = i - 1;
+ }
+ *index = l;
+ return false;
+}
+
+/*
+ * Comparison function for elements.
+ *
+ * We use the element type's default btree opclass, and its default collation
+ * if the type is collation-sensitive.
+ *
+ * XXX consider using SortSupport infrastructure
+ */
+static int
+element_compare(const void *key1, const void *key2, void *arg)
+{
+ Datum d1 = *((const Datum *) key1);
+ Datum d2 = *((const Datum *) key2);
+ TypeCacheEntry *typentry = (TypeCacheEntry *) arg;
+ FmgrInfo *cmpfunc = &typentry->cmp_proc_finfo;
+ Datum c;
+
+ c = FunctionCall2Coll(cmpfunc, typentry->typcollation, d1, d2);
+ return DatumGetInt32(c);
+}
+
+/*
+ * Comparison function for sorting floats into descending order.
+ */
+static int
+float_compare_desc(const void *key1, const void *key2)
+{
+ float d1 = *((const float *) key1);
+ float d2 = *((const float *) key2);
+
+ if (d1 > d2)
+ return -1;
+ else if (d1 < d2)
+ return 1;
+ else
+ return 0;
+}
diff --git a/src/backend/utils/adt/array_typanalyze.c b/src/backend/utils/adt/array_typanalyze.c
new file mode 100644
index 0000000..2360c68
--- /dev/null
+++ b/src/backend/utils/adt/array_typanalyze.c
@@ -0,0 +1,791 @@
+/*-------------------------------------------------------------------------
+ *
+ * array_typanalyze.c
+ * Functions for gathering statistics from array columns
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/array_typanalyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "commands/vacuum.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+
+
+/*
+ * To avoid consuming too much memory, IO and CPU load during analysis, and/or
+ * too much space in the resulting pg_statistic rows, we ignore arrays that
+ * are wider than ARRAY_WIDTH_THRESHOLD (after detoasting!). Note that this
+ * number is considerably more than the similar WIDTH_THRESHOLD limit used
+ * in analyze.c's standard typanalyze code.
+ */
+#define ARRAY_WIDTH_THRESHOLD 0x10000
+
+/* Extra data for compute_array_stats function */
+typedef struct
+{
+ /* Information about array element type */
+ Oid type_id; /* element type's OID */
+ Oid eq_opr; /* default equality operator's OID */
+ Oid coll_id; /* collation to use */
+ bool typbyval; /* physical properties of element type */
+ int16 typlen;
+ char typalign;
+
+ /*
+ * Lookup data for element type's comparison and hash functions (these are
+ * in the type's typcache entry, which we expect to remain valid over the
+ * lifespan of the ANALYZE run)
+ */
+ FmgrInfo *cmp;
+ FmgrInfo *hash;
+
+ /* Saved state from std_typanalyze() */
+ AnalyzeAttrComputeStatsFunc std_compute_stats;
+ void *std_extra_data;
+} ArrayAnalyzeExtraData;
+
+/*
+ * While compute_array_stats is running, we keep a pointer to the extra data
+ * here for use by assorted subroutines. compute_array_stats doesn't
+ * currently need to be re-entrant, so avoiding this is not worth the extra
+ * notational cruft that would be needed.
+ */
+static ArrayAnalyzeExtraData *array_extra_data;
+
+/* A hash table entry for the Lossy Counting algorithm */
+typedef struct
+{
+ Datum key; /* This is 'e' from the LC algorithm. */
+ int frequency; /* This is 'f'. */
+ int delta; /* And this is 'delta'. */
+ int last_container; /* For de-duplication of array elements. */
+} TrackItem;
+
+/* A hash table entry for distinct-elements counts */
+typedef struct
+{
+ int count; /* Count of distinct elements in an array */
+ int frequency; /* Number of arrays seen with this count */
+} DECountItem;
+
+static void compute_array_stats(VacAttrStats *stats,
+ AnalyzeAttrFetchFunc fetchfunc, int samplerows, double totalrows);
+static void prune_element_hashtable(HTAB *elements_tab, int b_current);
+static uint32 element_hash(const void *key, Size keysize);
+static int element_match(const void *key1, const void *key2, Size keysize);
+static int element_compare(const void *key1, const void *key2);
+static int trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg);
+static int trackitem_compare_element(const void *e1, const void *e2, void *arg);
+static int countitem_compare_count(const void *e1, const void *e2, void *arg);
+
+
+/*
+ * array_typanalyze -- typanalyze function for array columns
+ */
+Datum
+array_typanalyze(PG_FUNCTION_ARGS)
+{
+ VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+ Oid element_typeid;
+ TypeCacheEntry *typentry;
+ ArrayAnalyzeExtraData *extra_data;
+
+ /*
+ * Call the standard typanalyze function. It may fail to find needed
+ * operators, in which case we also can't do anything, so just fail.
+ */
+ if (!std_typanalyze(stats))
+ PG_RETURN_BOOL(false);
+
+ /*
+ * Check attribute data type is a varlena array (or a domain over one).
+ */
+ element_typeid = get_base_element_type(stats->attrtypid);
+ if (!OidIsValid(element_typeid))
+ elog(ERROR, "array_typanalyze was invoked for non-array type %u",
+ stats->attrtypid);
+
+ /*
+ * Gather information about the element type. If we fail to find
+ * something, return leaving the state from std_typanalyze() in place.
+ */
+ typentry = lookup_type_cache(element_typeid,
+ TYPECACHE_EQ_OPR |
+ TYPECACHE_CMP_PROC_FINFO |
+ TYPECACHE_HASH_PROC_FINFO);
+
+ if (!OidIsValid(typentry->eq_opr) ||
+ !OidIsValid(typentry->cmp_proc_finfo.fn_oid) ||
+ !OidIsValid(typentry->hash_proc_finfo.fn_oid))
+ PG_RETURN_BOOL(true);
+
+ /* Store our findings for use by compute_array_stats() */
+ extra_data = (ArrayAnalyzeExtraData *) palloc(sizeof(ArrayAnalyzeExtraData));
+ extra_data->type_id = typentry->type_id;
+ extra_data->eq_opr = typentry->eq_opr;
+ extra_data->coll_id = stats->attrcollid; /* collation we should use */
+ extra_data->typbyval = typentry->typbyval;
+ extra_data->typlen = typentry->typlen;
+ extra_data->typalign = typentry->typalign;
+ extra_data->cmp = &typentry->cmp_proc_finfo;
+ extra_data->hash = &typentry->hash_proc_finfo;
+
+ /* Save old compute_stats and extra_data for scalar statistics ... */
+ extra_data->std_compute_stats = stats->compute_stats;
+ extra_data->std_extra_data = stats->extra_data;
+
+ /* ... and replace with our info */
+ stats->compute_stats = compute_array_stats;
+ stats->extra_data = extra_data;
+
+ /*
+ * Note we leave stats->minrows set as std_typanalyze set it. Should it
+ * be increased for array analysis purposes?
+ */
+
+ PG_RETURN_BOOL(true);
+}
+
+/*
+ * compute_array_stats() -- compute statistics for an array column
+ *
+ * This function computes statistics useful for determining selectivity of
+ * the array operators <@, &&, and @>. It is invoked by ANALYZE via the
+ * compute_stats hook after sample rows have been collected.
+ *
+ * We also invoke the standard compute_stats function, which will compute
+ * "scalar" statistics relevant to the btree-style array comparison operators.
+ * However, exact duplicates of an entire array may be rare despite many
+ * arrays sharing individual elements. This especially afflicts long arrays,
+ * which are also liable to lack all scalar statistics due to the low
+ * WIDTH_THRESHOLD used in analyze.c. So, in addition to the standard stats,
+ * we find the most common array elements and compute a histogram of distinct
+ * element counts.
+ *
+ * The algorithm used is Lossy Counting, as proposed in the paper "Approximate
+ * frequency counts over data streams" by G. S. Manku and R. Motwani, in
+ * Proceedings of the 28th International Conference on Very Large Data Bases,
+ * Hong Kong, China, August 2002, section 4.2. The paper is available at
+ * http://www.vldb.org/conf/2002/S10P03.pdf
+ *
+ * The Lossy Counting (aka LC) algorithm goes like this:
+ * Let s be the threshold frequency for an item (the minimum frequency we
+ * are interested in) and epsilon the error margin for the frequency. Let D
+ * be a set of triples (e, f, delta), where e is an element value, f is that
+ * element's frequency (actually, its current occurrence count) and delta is
+ * the maximum error in f. We start with D empty and process the elements in
+ * batches of size w. (The batch size is also known as "bucket size" and is
+ * equal to 1/epsilon.) Let the current batch number be b_current, starting
+ * with 1. For each element e we either increment its f count, if it's
+ * already in D, or insert a new triple into D with values (e, 1, b_current
+ * - 1). After processing each batch we prune D, by removing from it all
+ * elements with f + delta <= b_current. After the algorithm finishes we
+ * suppress all elements from D that do not satisfy f >= (s - epsilon) * N,
+ * where N is the total number of elements in the input. We emit the
+ * remaining elements with estimated frequency f/N. The LC paper proves
+ * that this algorithm finds all elements with true frequency at least s,
+ * and that no frequency is overestimated or is underestimated by more than
+ * epsilon. Furthermore, given reasonable assumptions about the input
+ * distribution, the required table size is no more than about 7 times w.
+ *
+ * In the absence of a principled basis for other particular values, we
+ * follow ts_typanalyze() and use parameters s = 0.07/K, epsilon = s/10.
+ * But we leave out the correction for stopwords, which do not apply to
+ * arrays. These parameters give bucket width w = K/0.007 and maximum
+ * expected hashtable size of about 1000 * K.
+ *
+ * Elements may repeat within an array. Since duplicates do not change the
+ * behavior of <@, && or @>, we want to count each element only once per
+ * array. Therefore, we store in the finished pg_statistic entry each
+ * element's frequency as the fraction of all non-null rows that contain it.
+ * We divide the raw counts by nonnull_cnt to get those figures.
+ */
+static void
+compute_array_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows, double totalrows)
+{
+ ArrayAnalyzeExtraData *extra_data;
+ int num_mcelem;
+ int null_elem_cnt = 0;
+ int analyzed_rows = 0;
+
+ /* This is D from the LC algorithm. */
+ HTAB *elements_tab;
+ HASHCTL elem_hash_ctl;
+ HASH_SEQ_STATUS scan_status;
+
+ /* This is the current bucket number from the LC algorithm */
+ int b_current;
+
+ /* This is 'w' from the LC algorithm */
+ int bucket_width;
+ int array_no;
+ int64 element_no;
+ TrackItem *item;
+ int slot_idx;
+ HTAB *count_tab;
+ HASHCTL count_hash_ctl;
+ DECountItem *count_item;
+
+ extra_data = (ArrayAnalyzeExtraData *) stats->extra_data;
+
+ /*
+ * Invoke analyze.c's standard analysis function to create scalar-style
+ * stats for the column. It will expect its own extra_data pointer, so
+ * temporarily install that.
+ */
+ stats->extra_data = extra_data->std_extra_data;
+ extra_data->std_compute_stats(stats, fetchfunc, samplerows, totalrows);
+ stats->extra_data = extra_data;
+
+ /*
+ * Set up static pointer for use by subroutines. We wait till here in
+ * case std_compute_stats somehow recursively invokes us (probably not
+ * possible, but ...)
+ */
+ array_extra_data = extra_data;
+
+ /*
+ * We want statistics_target * 10 elements in the MCELEM array. This
+ * multiplier is pretty arbitrary, but is meant to reflect the fact that
+ * the number of individual elements tracked in pg_statistic ought to be
+ * more than the number of values for a simple scalar column.
+ */
+ num_mcelem = stats->attr->attstattarget * 10;
+
+ /*
+ * We set bucket width equal to num_mcelem / 0.007 as per the comment
+ * above.
+ */
+ bucket_width = num_mcelem * 1000 / 7;
+
+ /*
+ * Create the hashtable. It will be in local memory, so we don't need to
+ * worry about overflowing the initial size. Also we don't need to pay any
+ * attention to locking and memory management.
+ */
+ elem_hash_ctl.keysize = sizeof(Datum);
+ elem_hash_ctl.entrysize = sizeof(TrackItem);
+ elem_hash_ctl.hash = element_hash;
+ elem_hash_ctl.match = element_match;
+ elem_hash_ctl.hcxt = CurrentMemoryContext;
+ elements_tab = hash_create("Analyzed elements table",
+ num_mcelem,
+ &elem_hash_ctl,
+ HASH_ELEM | HASH_FUNCTION | HASH_COMPARE | HASH_CONTEXT);
+
+ /* hashtable for array distinct elements counts */
+ count_hash_ctl.keysize = sizeof(int);
+ count_hash_ctl.entrysize = sizeof(DECountItem);
+ count_hash_ctl.hcxt = CurrentMemoryContext;
+ count_tab = hash_create("Array distinct element count table",
+ 64,
+ &count_hash_ctl,
+ HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
+
+ /* Initialize counters. */
+ b_current = 1;
+ element_no = 0;
+
+ /* Loop over the arrays. */
+ for (array_no = 0; array_no < samplerows; array_no++)
+ {
+ Datum value;
+ bool isnull;
+ ArrayType *array;
+ int num_elems;
+ Datum *elem_values;
+ bool *elem_nulls;
+ bool null_present;
+ int j;
+ int64 prev_element_no = element_no;
+ int distinct_count;
+ bool count_item_found;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, array_no, &isnull);
+ if (isnull)
+ {
+ /* ignore arrays that are null overall */
+ continue;
+ }
+
+ /* Skip too-large values. */
+ if (toast_raw_datum_size(value) > ARRAY_WIDTH_THRESHOLD)
+ continue;
+ else
+ analyzed_rows++;
+
+ /*
+ * Now detoast the array if needed, and deconstruct into datums.
+ */
+ array = DatumGetArrayTypeP(value);
+
+ Assert(ARR_ELEMTYPE(array) == extra_data->type_id);
+ deconstruct_array(array,
+ extra_data->type_id,
+ extra_data->typlen,
+ extra_data->typbyval,
+ extra_data->typalign,
+ &elem_values, &elem_nulls, &num_elems);
+
+ /*
+ * We loop through the elements in the array and add them to our
+ * tracking hashtable.
+ */
+ null_present = false;
+ for (j = 0; j < num_elems; j++)
+ {
+ Datum elem_value;
+ bool found;
+
+ /* No null element processing other than flag setting here */
+ if (elem_nulls[j])
+ {
+ null_present = true;
+ continue;
+ }
+
+ /* Lookup current element in hashtable, adding it if new */
+ elem_value = elem_values[j];
+ item = (TrackItem *) hash_search(elements_tab,
+ (const void *) &elem_value,
+ HASH_ENTER, &found);
+
+ if (found)
+ {
+ /* The element value is already on the tracking list */
+
+ /*
+ * The operators we assist ignore duplicate array elements, so
+ * count a given distinct element only once per array.
+ */
+ if (item->last_container == array_no)
+ continue;
+
+ item->frequency++;
+ item->last_container = array_no;
+ }
+ else
+ {
+ /* Initialize new tracking list element */
+
+ /*
+ * If element type is pass-by-reference, we must copy it into
+ * palloc'd space, so that we can release the array below. (We
+ * do this so that the space needed for element values is
+ * limited by the size of the hashtable; if we kept all the
+ * array values around, it could be much more.)
+ */
+ item->key = datumCopy(elem_value,
+ extra_data->typbyval,
+ extra_data->typlen);
+
+ item->frequency = 1;
+ item->delta = b_current - 1;
+ item->last_container = array_no;
+ }
+
+ /* element_no is the number of elements processed (ie N) */
+ element_no++;
+
+ /* We prune the D structure after processing each bucket */
+ if (element_no % bucket_width == 0)
+ {
+ prune_element_hashtable(elements_tab, b_current);
+ b_current++;
+ }
+ }
+
+ /* Count null element presence once per array. */
+ if (null_present)
+ null_elem_cnt++;
+
+ /* Update frequency of the particular array distinct element count. */
+ distinct_count = (int) (element_no - prev_element_no);
+ count_item = (DECountItem *) hash_search(count_tab, &distinct_count,
+ HASH_ENTER,
+ &count_item_found);
+
+ if (count_item_found)
+ count_item->frequency++;
+ else
+ count_item->frequency = 1;
+
+ /* Free memory allocated while detoasting. */
+ if (PointerGetDatum(array) != value)
+ pfree(array);
+ pfree(elem_values);
+ pfree(elem_nulls);
+ }
+
+ /* Skip pg_statistic slots occupied by standard statistics */
+ slot_idx = 0;
+ while (slot_idx < STATISTIC_NUM_SLOTS && stats->stakind[slot_idx] != 0)
+ slot_idx++;
+ if (slot_idx > STATISTIC_NUM_SLOTS - 2)
+ elog(ERROR, "insufficient pg_statistic slots for array stats");
+
+ /* We can only compute real stats if we found some non-null values. */
+ if (analyzed_rows > 0)
+ {
+ int nonnull_cnt = analyzed_rows;
+ int count_items_count;
+ int i;
+ TrackItem **sort_table;
+ int track_len;
+ int64 cutoff_freq;
+ int64 minfreq,
+ maxfreq;
+
+ /*
+ * We assume the standard stats code already took care of setting
+ * stats_valid, stanullfrac, stawidth, stadistinct. We'd have to
+ * re-compute those values if we wanted to not store the standard
+ * stats.
+ */
+
+ /*
+ * Construct an array of the interesting hashtable items, that is,
+ * those meeting the cutoff frequency (s - epsilon)*N. Also identify
+ * the minimum and maximum frequencies among these items.
+ *
+ * Since epsilon = s/10 and bucket_width = 1/epsilon, the cutoff
+ * frequency is 9*N / bucket_width.
+ */
+ cutoff_freq = 9 * element_no / bucket_width;
+
+ i = hash_get_num_entries(elements_tab); /* surely enough space */
+ sort_table = (TrackItem **) palloc(sizeof(TrackItem *) * i);
+
+ hash_seq_init(&scan_status, elements_tab);
+ track_len = 0;
+ minfreq = element_no;
+ maxfreq = 0;
+ while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL)
+ {
+ if (item->frequency > cutoff_freq)
+ {
+ sort_table[track_len++] = item;
+ minfreq = Min(minfreq, item->frequency);
+ maxfreq = Max(maxfreq, item->frequency);
+ }
+ }
+ Assert(track_len <= i);
+
+ /* emit some statistics for debug purposes */
+ elog(DEBUG3, "compute_array_stats: target # mces = %d, "
+ "bucket width = %d, "
+ "# elements = " INT64_FORMAT ", hashtable size = %d, "
+ "usable entries = %d",
+ num_mcelem, bucket_width, element_no, i, track_len);
+
+ /*
+ * If we obtained more elements than we really want, get rid of those
+ * with least frequencies. The easiest way is to qsort the array into
+ * descending frequency order and truncate the array.
+ */
+ if (num_mcelem < track_len)
+ {
+ qsort_interruptible(sort_table, track_len, sizeof(TrackItem *),
+ trackitem_compare_frequencies_desc, NULL);
+ /* reset minfreq to the smallest frequency we're keeping */
+ minfreq = sort_table[num_mcelem - 1]->frequency;
+ }
+ else
+ num_mcelem = track_len;
+
+ /* Generate MCELEM slot entry */
+ if (num_mcelem > 0)
+ {
+ MemoryContext old_context;
+ Datum *mcelem_values;
+ float4 *mcelem_freqs;
+
+ /*
+ * We want to store statistics sorted on the element value using
+ * the element type's default comparison function. This permits
+ * fast binary searches in selectivity estimation functions.
+ */
+ qsort_interruptible(sort_table, num_mcelem, sizeof(TrackItem *),
+ trackitem_compare_element, NULL);
+
+ /* Must copy the target values into anl_context */
+ old_context = MemoryContextSwitchTo(stats->anl_context);
+
+ /*
+ * We sorted statistics on the element value, but we want to be
+ * able to find the minimal and maximal frequencies without going
+ * through all the values. We also want the frequency of null
+ * elements. Store these three values at the end of mcelem_freqs.
+ */
+ mcelem_values = (Datum *) palloc(num_mcelem * sizeof(Datum));
+ mcelem_freqs = (float4 *) palloc((num_mcelem + 3) * sizeof(float4));
+
+ /*
+ * See comments above about use of nonnull_cnt as the divisor for
+ * the final frequency estimates.
+ */
+ for (i = 0; i < num_mcelem; i++)
+ {
+ TrackItem *item = sort_table[i];
+
+ mcelem_values[i] = datumCopy(item->key,
+ extra_data->typbyval,
+ extra_data->typlen);
+ mcelem_freqs[i] = (double) item->frequency /
+ (double) nonnull_cnt;
+ }
+ mcelem_freqs[i++] = (double) minfreq / (double) nonnull_cnt;
+ mcelem_freqs[i++] = (double) maxfreq / (double) nonnull_cnt;
+ mcelem_freqs[i++] = (double) null_elem_cnt / (double) nonnull_cnt;
+
+ MemoryContextSwitchTo(old_context);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_MCELEM;
+ stats->staop[slot_idx] = extra_data->eq_opr;
+ stats->stacoll[slot_idx] = extra_data->coll_id;
+ stats->stanumbers[slot_idx] = mcelem_freqs;
+ /* See above comment about extra stanumber entries */
+ stats->numnumbers[slot_idx] = num_mcelem + 3;
+ stats->stavalues[slot_idx] = mcelem_values;
+ stats->numvalues[slot_idx] = num_mcelem;
+ /* We are storing values of element type */
+ stats->statypid[slot_idx] = extra_data->type_id;
+ stats->statyplen[slot_idx] = extra_data->typlen;
+ stats->statypbyval[slot_idx] = extra_data->typbyval;
+ stats->statypalign[slot_idx] = extra_data->typalign;
+ slot_idx++;
+ }
+
+ /* Generate DECHIST slot entry */
+ count_items_count = hash_get_num_entries(count_tab);
+ if (count_items_count > 0)
+ {
+ int num_hist = stats->attr->attstattarget;
+ DECountItem **sorted_count_items;
+ int j;
+ int delta;
+ int64 frac;
+ float4 *hist;
+
+ /* num_hist must be at least 2 for the loop below to work */
+ num_hist = Max(num_hist, 2);
+
+ /*
+ * Create an array of DECountItem pointers, and sort them into
+ * increasing count order.
+ */
+ sorted_count_items = (DECountItem **)
+ palloc(sizeof(DECountItem *) * count_items_count);
+ hash_seq_init(&scan_status, count_tab);
+ j = 0;
+ while ((count_item = (DECountItem *) hash_seq_search(&scan_status)) != NULL)
+ {
+ sorted_count_items[j++] = count_item;
+ }
+ qsort_interruptible(sorted_count_items, count_items_count,
+ sizeof(DECountItem *),
+ countitem_compare_count, NULL);
+
+ /*
+ * Prepare to fill stanumbers with the histogram, followed by the
+ * average count. This array must be stored in anl_context.
+ */
+ hist = (float4 *)
+ MemoryContextAlloc(stats->anl_context,
+ sizeof(float4) * (num_hist + 1));
+ hist[num_hist] = (double) element_no / (double) nonnull_cnt;
+
+ /*----------
+ * Construct the histogram of distinct-element counts (DECs).
+ *
+ * The object of this loop is to copy the min and max DECs to
+ * hist[0] and hist[num_hist - 1], along with evenly-spaced DECs
+ * in between (where "evenly-spaced" is with reference to the
+ * whole input population of arrays). If we had a complete sorted
+ * array of DECs, one per analyzed row, the i'th hist value would
+ * come from DECs[i * (analyzed_rows - 1) / (num_hist - 1)]
+ * (compare the histogram-making loop in compute_scalar_stats()).
+ * But instead of that we have the sorted_count_items[] array,
+ * which holds unique DEC values with their frequencies (that is,
+ * a run-length-compressed version of the full array). So we
+ * control advancing through sorted_count_items[] with the
+ * variable "frac", which is defined as (x - y) * (num_hist - 1),
+ * where x is the index in the notional DECs array corresponding
+ * to the start of the next sorted_count_items[] element's run,
+ * and y is the index in DECs from which we should take the next
+ * histogram value. We have to advance whenever x <= y, that is
+ * frac <= 0. The x component is the sum of the frequencies seen
+ * so far (up through the current sorted_count_items[] element),
+ * and of course y * (num_hist - 1) = i * (analyzed_rows - 1),
+ * per the subscript calculation above. (The subscript calculation
+ * implies dropping any fractional part of y; in this formulation
+ * that's handled by not advancing until frac reaches 1.)
+ *
+ * Even though frac has a bounded range, it could overflow int32
+ * when working with very large statistics targets, so we do that
+ * math in int64.
+ *----------
+ */
+ delta = analyzed_rows - 1;
+ j = 0; /* current index in sorted_count_items */
+ /* Initialize frac for sorted_count_items[0]; y is initially 0 */
+ frac = (int64) sorted_count_items[0]->frequency * (num_hist - 1);
+ for (i = 0; i < num_hist; i++)
+ {
+ while (frac <= 0)
+ {
+ /* Advance, and update x component of frac */
+ j++;
+ frac += (int64) sorted_count_items[j]->frequency * (num_hist - 1);
+ }
+ hist[i] = sorted_count_items[j]->count;
+ frac -= delta; /* update y for upcoming i increment */
+ }
+ Assert(j == count_items_count - 1);
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_DECHIST;
+ stats->staop[slot_idx] = extra_data->eq_opr;
+ stats->stacoll[slot_idx] = extra_data->coll_id;
+ stats->stanumbers[slot_idx] = hist;
+ stats->numnumbers[slot_idx] = num_hist + 1;
+ slot_idx++;
+ }
+ }
+
+ /*
+ * We don't need to bother cleaning up any of our temporary palloc's. The
+ * hashtable should also go away, as it used a child memory context.
+ */
+}
+
+/*
+ * A function to prune the D structure from the Lossy Counting algorithm.
+ * Consult compute_tsvector_stats() for wider explanation.
+ */
+static void
+prune_element_hashtable(HTAB *elements_tab, int b_current)
+{
+ HASH_SEQ_STATUS scan_status;
+ TrackItem *item;
+
+ hash_seq_init(&scan_status, elements_tab);
+ while ((item = (TrackItem *) hash_seq_search(&scan_status)) != NULL)
+ {
+ if (item->frequency + item->delta <= b_current)
+ {
+ Datum value = item->key;
+
+ if (hash_search(elements_tab, (const void *) &item->key,
+ HASH_REMOVE, NULL) == NULL)
+ elog(ERROR, "hash table corrupted");
+ /* We should free memory if element is not passed by value */
+ if (!array_extra_data->typbyval)
+ pfree(DatumGetPointer(value));
+ }
+ }
+}
+
+/*
+ * Hash function for elements.
+ *
+ * We use the element type's default hash opclass, and the column collation
+ * if the type is collation-sensitive.
+ */
+static uint32
+element_hash(const void *key, Size keysize)
+{
+ Datum d = *((const Datum *) key);
+ Datum h;
+
+ h = FunctionCall1Coll(array_extra_data->hash,
+ array_extra_data->coll_id,
+ d);
+ return DatumGetUInt32(h);
+}
+
+/*
+ * Matching function for elements, to be used in hashtable lookups.
+ */
+static int
+element_match(const void *key1, const void *key2, Size keysize)
+{
+ /* The keysize parameter is superfluous here */
+ return element_compare(key1, key2);
+}
+
+/*
+ * Comparison function for elements.
+ *
+ * We use the element type's default btree opclass, and the column collation
+ * if the type is collation-sensitive.
+ *
+ * XXX consider using SortSupport infrastructure
+ */
+static int
+element_compare(const void *key1, const void *key2)
+{
+ Datum d1 = *((const Datum *) key1);
+ Datum d2 = *((const Datum *) key2);
+ Datum c;
+
+ c = FunctionCall2Coll(array_extra_data->cmp,
+ array_extra_data->coll_id,
+ d1, d2);
+ return DatumGetInt32(c);
+}
+
+/*
+ * Comparator for sorting TrackItems by frequencies (descending sort)
+ */
+static int
+trackitem_compare_frequencies_desc(const void *e1, const void *e2, void *arg)
+{
+ const TrackItem *const *t1 = (const TrackItem *const *) e1;
+ const TrackItem *const *t2 = (const TrackItem *const *) e2;
+
+ return (*t2)->frequency - (*t1)->frequency;
+}
+
+/*
+ * Comparator for sorting TrackItems by element values
+ */
+static int
+trackitem_compare_element(const void *e1, const void *e2, void *arg)
+{
+ const TrackItem *const *t1 = (const TrackItem *const *) e1;
+ const TrackItem *const *t2 = (const TrackItem *const *) e2;
+
+ return element_compare(&(*t1)->key, &(*t2)->key);
+}
+
+/*
+ * Comparator for sorting DECountItems by count
+ */
+static int
+countitem_compare_count(const void *e1, const void *e2, void *arg)
+{
+ const DECountItem *const *t1 = (const DECountItem *const *) e1;
+ const DECountItem *const *t2 = (const DECountItem *const *) e2;
+
+ if ((*t1)->count < (*t2)->count)
+ return -1;
+ else if ((*t1)->count == (*t2)->count)
+ return 0;
+ else
+ return 1;
+}
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
new file mode 100644
index 0000000..e75c774
--- /dev/null
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -0,0 +1,912 @@
+/*-------------------------------------------------------------------------
+ *
+ * array_userfuncs.c
+ * Misc user-visible array support functions
+ *
+ * Copyright (c) 2003-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/array_userfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+
+
+static Datum array_position_common(FunctionCallInfo fcinfo);
+
+
+/*
+ * fetch_array_arg_replace_nulls
+ *
+ * Fetch an array-valued argument in expanded form; if it's null, construct an
+ * empty array value of the proper data type. Also cache basic element type
+ * information in fn_extra.
+ *
+ * Caution: if the input is a read/write pointer, this returns the input
+ * argument; so callers must be sure that their changes are "safe", that is
+ * they cannot leave the array in a corrupt state.
+ *
+ * If we're being called as an aggregate function, make sure any newly-made
+ * expanded array is allocated in the aggregate state context, so as to save
+ * copying operations.
+ */
+static ExpandedArrayHeader *
+fetch_array_arg_replace_nulls(FunctionCallInfo fcinfo, int argno)
+{
+ ExpandedArrayHeader *eah;
+ Oid element_type;
+ ArrayMetaState *my_extra;
+ MemoryContext resultcxt;
+
+ /* If first time through, create datatype cache struct */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ my_extra = (ArrayMetaState *)
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra->element_type = InvalidOid;
+ fcinfo->flinfo->fn_extra = my_extra;
+ }
+
+ /* Figure out which context we want the result in */
+ if (!AggCheckCallContext(fcinfo, &resultcxt))
+ resultcxt = CurrentMemoryContext;
+
+ /* Now collect the array value */
+ if (!PG_ARGISNULL(argno))
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(resultcxt);
+
+ eah = PG_GETARG_EXPANDED_ARRAYX(argno, my_extra);
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ {
+ /* We have to look up the array type and element type */
+ Oid arr_typeid = get_fn_expr_argtype(fcinfo->flinfo, argno);
+
+ if (!OidIsValid(arr_typeid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+ element_type = get_element_type(arr_typeid);
+ if (!OidIsValid(element_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("input data type is not an array")));
+
+ eah = construct_empty_expanded_array(element_type,
+ resultcxt,
+ my_extra);
+ }
+
+ return eah;
+}
+
+/*-----------------------------------------------------------------------------
+ * array_append :
+ * push an element onto the end of a one-dimensional array
+ *----------------------------------------------------------------------------
+ */
+Datum
+array_append(PG_FUNCTION_ARGS)
+{
+ ExpandedArrayHeader *eah;
+ Datum newelem;
+ bool isNull;
+ Datum result;
+ int *dimv,
+ *lb;
+ int indx;
+ ArrayMetaState *my_extra;
+
+ eah = fetch_array_arg_replace_nulls(fcinfo, 0);
+ isNull = PG_ARGISNULL(1);
+ if (isNull)
+ newelem = (Datum) 0;
+ else
+ newelem = PG_GETARG_DATUM(1);
+
+ if (eah->ndims == 1)
+ {
+ /* append newelem */
+ lb = eah->lbound;
+ dimv = eah->dims;
+
+ /* index of added elem is at lb[0] + (dimv[0] - 1) + 1 */
+ if (pg_add_s32_overflow(lb[0], dimv[0], &indx))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ }
+ else if (eah->ndims == 0)
+ indx = 1;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("argument must be empty or one-dimensional array")));
+
+ /* Perform element insertion */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+
+ result = array_set_element(EOHPGetRWDatum(&eah->hdr),
+ 1, &indx, newelem, isNull,
+ -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*-----------------------------------------------------------------------------
+ * array_prepend :
+ * push an element onto the front of a one-dimensional array
+ *----------------------------------------------------------------------------
+ */
+Datum
+array_prepend(PG_FUNCTION_ARGS)
+{
+ ExpandedArrayHeader *eah;
+ Datum newelem;
+ bool isNull;
+ Datum result;
+ int *lb;
+ int indx;
+ int lb0;
+ ArrayMetaState *my_extra;
+
+ isNull = PG_ARGISNULL(0);
+ if (isNull)
+ newelem = (Datum) 0;
+ else
+ newelem = PG_GETARG_DATUM(0);
+ eah = fetch_array_arg_replace_nulls(fcinfo, 1);
+
+ if (eah->ndims == 1)
+ {
+ /* prepend newelem */
+ lb = eah->lbound;
+ lb0 = lb[0];
+
+ if (pg_sub_s32_overflow(lb0, 1, &indx))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ }
+ else if (eah->ndims == 0)
+ {
+ indx = 1;
+ lb0 = 1;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("argument must be empty or one-dimensional array")));
+
+ /* Perform element insertion */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+
+ result = array_set_element(EOHPGetRWDatum(&eah->hdr),
+ 1, &indx, newelem, isNull,
+ -1, my_extra->typlen, my_extra->typbyval, my_extra->typalign);
+
+ /* Readjust result's LB to match the input's, as expected for prepend */
+ Assert(result == EOHPGetRWDatum(&eah->hdr));
+ if (eah->ndims == 1)
+ {
+ /* This is ok whether we've deconstructed or not */
+ eah->lbound[0] = lb0;
+ }
+
+ PG_RETURN_DATUM(result);
+}
+
+/*-----------------------------------------------------------------------------
+ * array_cat :
+ * concatenate two nD arrays to form an nD array, or
+ * push an (n-1)D array onto the end of an nD array
+ *----------------------------------------------------------------------------
+ */
+Datum
+array_cat(PG_FUNCTION_ARGS)
+{
+ ArrayType *v1,
+ *v2;
+ ArrayType *result;
+ int *dims,
+ *lbs,
+ ndims,
+ nitems,
+ ndatabytes,
+ nbytes;
+ int *dims1,
+ *lbs1,
+ ndims1,
+ nitems1,
+ ndatabytes1;
+ int *dims2,
+ *lbs2,
+ ndims2,
+ nitems2,
+ ndatabytes2;
+ int i;
+ char *dat1,
+ *dat2;
+ bits8 *bitmap1,
+ *bitmap2;
+ Oid element_type;
+ Oid element_type1;
+ Oid element_type2;
+ int32 dataoffset;
+
+ /* Concatenating a null array is a no-op, just return the other input */
+ if (PG_ARGISNULL(0))
+ {
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ result = PG_GETARG_ARRAYTYPE_P(1);
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+ if (PG_ARGISNULL(1))
+ {
+ result = PG_GETARG_ARRAYTYPE_P(0);
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+
+ v1 = PG_GETARG_ARRAYTYPE_P(0);
+ v2 = PG_GETARG_ARRAYTYPE_P(1);
+
+ element_type1 = ARR_ELEMTYPE(v1);
+ element_type2 = ARR_ELEMTYPE(v2);
+
+ /* Check we have matching element types */
+ if (element_type1 != element_type2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot concatenate incompatible arrays"),
+ errdetail("Arrays with element types %s and %s are not "
+ "compatible for concatenation.",
+ format_type_be(element_type1),
+ format_type_be(element_type2))));
+
+ /* OK, use it */
+ element_type = element_type1;
+
+ /*----------
+ * We must have one of the following combinations of inputs:
+ * 1) one empty array, and one non-empty array
+ * 2) both arrays empty
+ * 3) two arrays with ndims1 == ndims2
+ * 4) ndims1 == ndims2 - 1
+ * 5) ndims1 == ndims2 + 1
+ *----------
+ */
+ ndims1 = ARR_NDIM(v1);
+ ndims2 = ARR_NDIM(v2);
+
+ /*
+ * short circuit - if one input array is empty, and the other is not, we
+ * return the non-empty one as the result
+ *
+ * if both are empty, return the first one
+ */
+ if (ndims1 == 0 && ndims2 > 0)
+ PG_RETURN_ARRAYTYPE_P(v2);
+
+ if (ndims2 == 0)
+ PG_RETURN_ARRAYTYPE_P(v1);
+
+ /* the rest fall under rule 3, 4, or 5 */
+ if (ndims1 != ndims2 &&
+ ndims1 != ndims2 - 1 &&
+ ndims1 != ndims2 + 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot concatenate incompatible arrays"),
+ errdetail("Arrays of %d and %d dimensions are not "
+ "compatible for concatenation.",
+ ndims1, ndims2)));
+
+ /* get argument array details */
+ lbs1 = ARR_LBOUND(v1);
+ lbs2 = ARR_LBOUND(v2);
+ dims1 = ARR_DIMS(v1);
+ dims2 = ARR_DIMS(v2);
+ dat1 = ARR_DATA_PTR(v1);
+ dat2 = ARR_DATA_PTR(v2);
+ bitmap1 = ARR_NULLBITMAP(v1);
+ bitmap2 = ARR_NULLBITMAP(v2);
+ nitems1 = ArrayGetNItems(ndims1, dims1);
+ nitems2 = ArrayGetNItems(ndims2, dims2);
+ ndatabytes1 = ARR_SIZE(v1) - ARR_DATA_OFFSET(v1);
+ ndatabytes2 = ARR_SIZE(v2) - ARR_DATA_OFFSET(v2);
+
+ if (ndims1 == ndims2)
+ {
+ /*
+ * resulting array is made up of the elements (possibly arrays
+ * themselves) of the input argument arrays
+ */
+ ndims = ndims1;
+ dims = (int *) palloc(ndims * sizeof(int));
+ lbs = (int *) palloc(ndims * sizeof(int));
+
+ dims[0] = dims1[0] + dims2[0];
+ lbs[0] = lbs1[0];
+
+ for (i = 1; i < ndims; i++)
+ {
+ if (dims1[i] != dims2[i] || lbs1[i] != lbs2[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot concatenate incompatible arrays"),
+ errdetail("Arrays with differing element dimensions are "
+ "not compatible for concatenation.")));
+
+ dims[i] = dims1[i];
+ lbs[i] = lbs1[i];
+ }
+ }
+ else if (ndims1 == ndims2 - 1)
+ {
+ /*
+ * resulting array has the second argument as the outer array, with
+ * the first argument inserted at the front of the outer dimension
+ */
+ ndims = ndims2;
+ dims = (int *) palloc(ndims * sizeof(int));
+ lbs = (int *) palloc(ndims * sizeof(int));
+ memcpy(dims, dims2, ndims * sizeof(int));
+ memcpy(lbs, lbs2, ndims * sizeof(int));
+
+ /* increment number of elements in outer array */
+ dims[0] += 1;
+
+ /* make sure the added element matches our existing elements */
+ for (i = 0; i < ndims1; i++)
+ {
+ if (dims1[i] != dims[i + 1] || lbs1[i] != lbs[i + 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot concatenate incompatible arrays"),
+ errdetail("Arrays with differing dimensions are not "
+ "compatible for concatenation.")));
+ }
+ }
+ else
+ {
+ /*
+ * (ndims1 == ndims2 + 1)
+ *
+ * resulting array has the first argument as the outer array, with the
+ * second argument appended to the end of the outer dimension
+ */
+ ndims = ndims1;
+ dims = (int *) palloc(ndims * sizeof(int));
+ lbs = (int *) palloc(ndims * sizeof(int));
+ memcpy(dims, dims1, ndims * sizeof(int));
+ memcpy(lbs, lbs1, ndims * sizeof(int));
+
+ /* increment number of elements in outer array */
+ dims[0] += 1;
+
+ /* make sure the added element matches our existing elements */
+ for (i = 0; i < ndims2; i++)
+ {
+ if (dims2[i] != dims[i + 1] || lbs2[i] != lbs[i + 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot concatenate incompatible arrays"),
+ errdetail("Arrays with differing dimensions are not "
+ "compatible for concatenation.")));
+ }
+ }
+
+ /* Do this mainly for overflow checking */
+ nitems = ArrayGetNItems(ndims, dims);
+ ArrayCheckBounds(ndims, dims, lbs);
+
+ /* build the result array */
+ ndatabytes = ndatabytes1 + ndatabytes2;
+ if (ARR_HASNULL(v1) || ARR_HASNULL(v2))
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nitems);
+ nbytes = ndatabytes + dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes = ndatabytes + ARR_OVERHEAD_NONULLS(ndims);
+ }
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = ndims;
+ result->dataoffset = dataoffset;
+ result->elemtype = element_type;
+ memcpy(ARR_DIMS(result), dims, ndims * sizeof(int));
+ memcpy(ARR_LBOUND(result), lbs, ndims * sizeof(int));
+ /* data area is arg1 then arg2 */
+ memcpy(ARR_DATA_PTR(result), dat1, ndatabytes1);
+ memcpy(ARR_DATA_PTR(result) + ndatabytes1, dat2, ndatabytes2);
+ /* handle the null bitmap if needed */
+ if (ARR_HASNULL(result))
+ {
+ array_bitmap_copy(ARR_NULLBITMAP(result), 0,
+ bitmap1, 0,
+ nitems1);
+ array_bitmap_copy(ARR_NULLBITMAP(result), nitems1,
+ bitmap2, 0,
+ nitems2);
+ }
+
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+
+/*
+ * ARRAY_AGG(anynonarray) aggregate function
+ */
+Datum
+array_agg_transfn(PG_FUNCTION_ARGS)
+{
+ Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ MemoryContext aggcontext;
+ ArrayBuildState *state;
+ Datum elem;
+
+ if (arg1_typeid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ /*
+ * Note: we do not need a run-time check about whether arg1_typeid is a
+ * valid array element type, because the parser would have verified that
+ * while resolving the input/result types of this polymorphic aggregate.
+ */
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "array_agg_transfn called in non-aggregate context");
+ }
+
+ if (PG_ARGISNULL(0))
+ state = initArrayResult(arg1_typeid, aggcontext, false);
+ else
+ state = (ArrayBuildState *) PG_GETARG_POINTER(0);
+
+ elem = PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1);
+
+ state = accumArrayResult(state,
+ elem,
+ PG_ARGISNULL(1),
+ arg1_typeid,
+ aggcontext);
+
+ /*
+ * The transition type for array_agg() is declared to be "internal", which
+ * is a pass-by-value type the same size as a pointer. So we can safely
+ * pass the ArrayBuildState pointer through nodeAgg.c's machinations.
+ */
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+array_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ Datum result;
+ ArrayBuildState *state;
+ int dims[1];
+ int lbs[1];
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0);
+
+ if (state == NULL)
+ PG_RETURN_NULL(); /* returns null iff no input values */
+
+ dims[0] = state->nelems;
+ lbs[0] = 1;
+
+ /*
+ * Make the result. We cannot release the ArrayBuildState because
+ * sometimes aggregate final functions are re-executed. Rather, it is
+ * nodeAgg.c's responsibility to reset the aggcontext when it's safe to do
+ * so.
+ */
+ result = makeMdArrayResult(state, 1, dims, lbs,
+ CurrentMemoryContext,
+ false);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * ARRAY_AGG(anyarray) aggregate function
+ */
+Datum
+array_agg_array_transfn(PG_FUNCTION_ARGS)
+{
+ Oid arg1_typeid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ MemoryContext aggcontext;
+ ArrayBuildStateArr *state;
+
+ if (arg1_typeid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ /*
+ * Note: we do not need a run-time check about whether arg1_typeid is a
+ * valid array type, because the parser would have verified that while
+ * resolving the input/result types of this polymorphic aggregate.
+ */
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "array_agg_array_transfn called in non-aggregate context");
+ }
+
+
+ if (PG_ARGISNULL(0))
+ state = initArrayResultArr(arg1_typeid, InvalidOid, aggcontext, false);
+ else
+ state = (ArrayBuildStateArr *) PG_GETARG_POINTER(0);
+
+ state = accumArrayResultArr(state,
+ PG_GETARG_DATUM(1),
+ PG_ARGISNULL(1),
+ arg1_typeid,
+ aggcontext);
+
+ /*
+ * The transition type for array_agg() is declared to be "internal", which
+ * is a pass-by-value type the same size as a pointer. So we can safely
+ * pass the ArrayBuildStateArr pointer through nodeAgg.c's machinations.
+ */
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+array_agg_array_finalfn(PG_FUNCTION_ARGS)
+{
+ Datum result;
+ ArrayBuildStateArr *state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (ArrayBuildStateArr *) PG_GETARG_POINTER(0);
+
+ if (state == NULL)
+ PG_RETURN_NULL(); /* returns null iff no input values */
+
+ /*
+ * Make the result. We cannot release the ArrayBuildStateArr because
+ * sometimes aggregate final functions are re-executed. Rather, it is
+ * nodeAgg.c's responsibility to reset the aggcontext when it's safe to do
+ * so.
+ */
+ result = makeArrayResultArr(state, CurrentMemoryContext, false);
+
+ PG_RETURN_DATUM(result);
+}
+
+/*-----------------------------------------------------------------------------
+ * array_position, array_position_start :
+ * return the offset of a value in an array.
+ *
+ * IS NOT DISTINCT FROM semantics are used for comparisons. Return NULL when
+ * the value is not found.
+ *-----------------------------------------------------------------------------
+ */
+Datum
+array_position(PG_FUNCTION_ARGS)
+{
+ return array_position_common(fcinfo);
+}
+
+Datum
+array_position_start(PG_FUNCTION_ARGS)
+{
+ return array_position_common(fcinfo);
+}
+
+/*
+ * array_position_common
+ * Common code for array_position and array_position_start
+ *
+ * These are separate wrappers for the sake of opr_sanity regression test.
+ * They are not strict so we have to test for null inputs explicitly.
+ */
+static Datum
+array_position_common(FunctionCallInfo fcinfo)
+{
+ ArrayType *array;
+ Oid collation = PG_GET_COLLATION();
+ Oid element_type;
+ Datum searched_element,
+ value;
+ bool isnull;
+ int position,
+ position_min;
+ bool found = false;
+ TypeCacheEntry *typentry;
+ ArrayMetaState *my_extra;
+ bool null_search;
+ ArrayIterator array_iterator;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ array = PG_GETARG_ARRAYTYPE_P(0);
+
+ /*
+ * We refuse to search for elements in multi-dimensional arrays, since we
+ * have no good way to report the element's location in the array.
+ */
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("searching for elements in multidimensional arrays is not supported")));
+
+ /* Searching in an empty array is well-defined, though: it always fails */
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_NULL();
+
+ if (PG_ARGISNULL(1))
+ {
+ /* fast return when the array doesn't have nulls */
+ if (!array_contains_nulls(array))
+ PG_RETURN_NULL();
+ searched_element = (Datum) 0;
+ null_search = true;
+ }
+ else
+ {
+ searched_element = PG_GETARG_DATUM(1);
+ null_search = false;
+ }
+
+ element_type = ARR_ELEMTYPE(array);
+ position = (ARR_LBOUND(array))[0] - 1;
+
+ /* figure out where to start */
+ if (PG_NARGS() == 3)
+ {
+ if (PG_ARGISNULL(2))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("initial position must not be null")));
+
+ position_min = PG_GETARG_INT32(2);
+ }
+ else
+ position_min = (ARR_LBOUND(array))[0];
+
+ /*
+ * We arrange to look up type info for array_create_iterator only once per
+ * series of calls, assuming the element type doesn't change underneath
+ * us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ get_typlenbyvalalign(element_type,
+ &my_extra->typlen,
+ &my_extra->typbyval,
+ &my_extra->typalign);
+
+ typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO);
+
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(element_type))));
+
+ my_extra->element_type = element_type;
+ fmgr_info_cxt(typentry->eq_opr_finfo.fn_oid, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ }
+
+ /* Examine each array element until we find a match. */
+ array_iterator = array_create_iterator(array, 0, my_extra);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ position++;
+
+ /* skip initial elements if caller requested so */
+ if (position < position_min)
+ continue;
+
+ /*
+ * Can't look at the array element's value if it's null; but if we
+ * search for null, we have a hit and are done.
+ */
+ if (isnull || null_search)
+ {
+ if (isnull && null_search)
+ {
+ found = true;
+ break;
+ }
+ else
+ continue;
+ }
+
+ /* not nulls, so run the operator */
+ if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation,
+ searched_element, value)))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ array_free_iterator(array_iterator);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+
+ if (!found)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(position);
+}
+
+/*-----------------------------------------------------------------------------
+ * array_positions :
+ * return an array of positions of a value in an array.
+ *
+ * IS NOT DISTINCT FROM semantics are used for comparisons. Returns NULL when
+ * the input array is NULL. When the value is not found in the array, returns
+ * an empty array.
+ *
+ * This is not strict so we have to test for null inputs explicitly.
+ *-----------------------------------------------------------------------------
+ */
+Datum
+array_positions(PG_FUNCTION_ARGS)
+{
+ ArrayType *array;
+ Oid collation = PG_GET_COLLATION();
+ Oid element_type;
+ Datum searched_element,
+ value;
+ bool isnull;
+ int position;
+ TypeCacheEntry *typentry;
+ ArrayMetaState *my_extra;
+ bool null_search;
+ ArrayIterator array_iterator;
+ ArrayBuildState *astate = NULL;
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ array = PG_GETARG_ARRAYTYPE_P(0);
+
+ /*
+ * We refuse to search for elements in multi-dimensional arrays, since we
+ * have no good way to report the element's location in the array.
+ */
+ if (ARR_NDIM(array) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("searching for elements in multidimensional arrays is not supported")));
+
+ astate = initArrayResult(INT4OID, CurrentMemoryContext, false);
+
+ /* Searching in an empty array is well-defined, though: it always fails */
+ if (ARR_NDIM(array) < 1)
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+
+ if (PG_ARGISNULL(1))
+ {
+ /* fast return when the array doesn't have nulls */
+ if (!array_contains_nulls(array))
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+ searched_element = (Datum) 0;
+ null_search = true;
+ }
+ else
+ {
+ searched_element = PG_GETARG_DATUM(1);
+ null_search = false;
+ }
+
+ element_type = ARR_ELEMTYPE(array);
+ position = (ARR_LBOUND(array))[0] - 1;
+
+ /*
+ * We arrange to look up type info for array_create_iterator only once per
+ * series of calls, assuming the element type doesn't change underneath
+ * us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ get_typlenbyvalalign(element_type,
+ &my_extra->typlen,
+ &my_extra->typbyval,
+ &my_extra->typalign);
+
+ typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO);
+
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(element_type))));
+
+ my_extra->element_type = element_type;
+ fmgr_info_cxt(typentry->eq_opr_finfo.fn_oid, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ }
+
+ /*
+ * Accumulate each array position iff the element matches the given
+ * element.
+ */
+ array_iterator = array_create_iterator(array, 0, my_extra);
+ while (array_iterate(array_iterator, &value, &isnull))
+ {
+ position += 1;
+
+ /*
+ * Can't look at the array element's value if it's null; but if we
+ * search for null, we have a hit.
+ */
+ if (isnull || null_search)
+ {
+ if (isnull && null_search)
+ astate =
+ accumArrayResult(astate, Int32GetDatum(position), false,
+ INT4OID, CurrentMemoryContext);
+
+ continue;
+ }
+
+ /* not nulls, so run the operator */
+ if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation,
+ searched_element, value)))
+ astate =
+ accumArrayResult(astate, Int32GetDatum(position), false,
+ INT4OID, CurrentMemoryContext);
+ }
+
+ array_free_iterator(array_iterator);
+
+ /* Avoid leaking memory when handed toasted input */
+ PG_FREE_IF_COPY(array, 0);
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
new file mode 100644
index 0000000..d887619
--- /dev/null
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -0,0 +1,6770 @@
+/*-------------------------------------------------------------------------
+ *
+ * arrayfuncs.c
+ * Support functions for arrays.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/arrayfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
+#include "port/pg_bitutils.h"
+#include "utils/array.h"
+#include "utils/arrayaccess.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/selfuncs.h"
+#include "utils/typcache.h"
+
+
+/*
+ * GUC parameter
+ */
+bool Array_nulls = true;
+
+/*
+ * Local definitions
+ */
+#define ASSGN "="
+
+#define AARR_FREE_IF_COPY(array,n) \
+ do { \
+ if (!VARATT_IS_EXPANDED_HEADER(array)) \
+ PG_FREE_IF_COPY(array, n); \
+ } while (0)
+
+typedef enum
+{
+ ARRAY_NO_LEVEL,
+ ARRAY_LEVEL_STARTED,
+ ARRAY_ELEM_STARTED,
+ ARRAY_ELEM_COMPLETED,
+ ARRAY_QUOTED_ELEM_STARTED,
+ ARRAY_QUOTED_ELEM_COMPLETED,
+ ARRAY_ELEM_DELIMITED,
+ ARRAY_LEVEL_COMPLETED,
+ ARRAY_LEVEL_DELIMITED
+} ArrayParseState;
+
+/* Working state for array_iterate() */
+typedef struct ArrayIteratorData
+{
+ /* basic info about the array, set up during array_create_iterator() */
+ ArrayType *arr; /* array we're iterating through */
+ bits8 *nullbitmap; /* its null bitmap, if any */
+ int nitems; /* total number of elements in array */
+ int16 typlen; /* element type's length */
+ bool typbyval; /* element type's byval property */
+ char typalign; /* element type's align property */
+
+ /* information about the requested slice size */
+ int slice_ndim; /* slice dimension, or 0 if not slicing */
+ int slice_len; /* number of elements per slice */
+ int *slice_dims; /* slice dims array */
+ int *slice_lbound; /* slice lbound array */
+ Datum *slice_values; /* workspace of length slice_len */
+ bool *slice_nulls; /* workspace of length slice_len */
+
+ /* current position information, updated on each iteration */
+ char *data_ptr; /* our current position in the array */
+ int current_item; /* the item # we're at in the array */
+} ArrayIteratorData;
+
+static bool array_isspace(char ch);
+static int ArrayCount(const char *str, int *dim, char typdelim);
+static void ReadArrayStr(char *arrayStr, const char *origStr,
+ int nitems, int ndim, int *dim,
+ FmgrInfo *inputproc, Oid typioparam, int32 typmod,
+ char typdelim,
+ int typlen, bool typbyval, char typalign,
+ Datum *values, bool *nulls,
+ bool *hasnulls, int32 *nbytes);
+static void ReadArrayBinary(StringInfo buf, int nitems,
+ FmgrInfo *receiveproc, Oid typioparam, int32 typmod,
+ int typlen, bool typbyval, char typalign,
+ Datum *values, bool *nulls,
+ bool *hasnulls, int32 *nbytes);
+static Datum array_get_element_expanded(Datum arraydatum,
+ int nSubscripts, int *indx,
+ int arraytyplen,
+ int elmlen, bool elmbyval, char elmalign,
+ bool *isNull);
+static Datum array_set_element_expanded(Datum arraydatum,
+ int nSubscripts, int *indx,
+ Datum dataValue, bool isNull,
+ int arraytyplen,
+ int elmlen, bool elmbyval, char elmalign);
+static bool array_get_isnull(const bits8 *nullbitmap, int offset);
+static void array_set_isnull(bits8 *nullbitmap, int offset, bool isNull);
+static Datum ArrayCast(char *value, bool byval, int len);
+static int ArrayCastAndSet(Datum src,
+ int typlen, bool typbyval, char typalign,
+ char *dest);
+static char *array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
+ int typlen, bool typbyval, char typalign);
+static int array_nelems_size(char *ptr, int offset, bits8 *nullbitmap,
+ int nitems, int typlen, bool typbyval, char typalign);
+static int array_copy(char *destptr, int nitems,
+ char *srcptr, int offset, bits8 *nullbitmap,
+ int typlen, bool typbyval, char typalign);
+static int array_slice_size(char *arraydataptr, bits8 *arraynullsptr,
+ int ndim, int *dim, int *lb,
+ int *st, int *endp,
+ int typlen, bool typbyval, char typalign);
+static void array_extract_slice(ArrayType *newarray,
+ int ndim, int *dim, int *lb,
+ char *arraydataptr, bits8 *arraynullsptr,
+ int *st, int *endp,
+ int typlen, bool typbyval, char typalign);
+static void array_insert_slice(ArrayType *destArray, ArrayType *origArray,
+ ArrayType *srcArray,
+ int ndim, int *dim, int *lb,
+ int *st, int *endp,
+ int typlen, bool typbyval, char typalign);
+static int array_cmp(FunctionCallInfo fcinfo);
+static ArrayType *create_array_envelope(int ndims, int *dimv, int *lbsv, int nbytes,
+ Oid elmtype, int dataoffset);
+static ArrayType *array_fill_internal(ArrayType *dims, ArrayType *lbs,
+ Datum value, bool isnull, Oid elmtype,
+ FunctionCallInfo fcinfo);
+static ArrayType *array_replace_internal(ArrayType *array,
+ Datum search, bool search_isnull,
+ Datum replace, bool replace_isnull,
+ bool remove, Oid collation,
+ FunctionCallInfo fcinfo);
+static int width_bucket_array_float8(Datum operand, ArrayType *thresholds);
+static int width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
+static int width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry);
+
+
+/*
+ * array_in :
+ * converts an array from the external format in "string" to
+ * its internal format.
+ *
+ * return value :
+ * the internal representation of the input array
+ */
+Datum
+array_in(PG_FUNCTION_ARGS)
+{
+ char *string = PG_GETARG_CSTRING(0); /* external form */
+ Oid element_type = PG_GETARG_OID(1); /* type of an array
+ * element */
+ int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */
+ int typlen;
+ bool typbyval;
+ char typalign;
+ char typdelim;
+ Oid typioparam;
+ char *string_save,
+ *p;
+ int i,
+ nitems;
+ Datum *dataPtr;
+ bool *nullsPtr;
+ bool hasnulls;
+ int32 nbytes;
+ int32 dataoffset;
+ ArrayType *retval;
+ int ndim,
+ dim[MAXDIM],
+ lBound[MAXDIM];
+ ArrayMetaState *my_extra;
+
+ /*
+ * We arrange to look up info about element type, including its input
+ * conversion proc, only once per series of calls, assuming the element
+ * type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ /*
+ * Get info about element type, including its input conversion proc
+ */
+ get_type_io_data(element_type, IOFunc_input,
+ &my_extra->typlen, &my_extra->typbyval,
+ &my_extra->typalign, &my_extra->typdelim,
+ &my_extra->typioparam, &my_extra->typiofunc);
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ my_extra->element_type = element_type;
+ }
+ typlen = my_extra->typlen;
+ typbyval = my_extra->typbyval;
+ typalign = my_extra->typalign;
+ typdelim = my_extra->typdelim;
+ typioparam = my_extra->typioparam;
+
+ /* Make a modifiable copy of the input */
+ string_save = pstrdup(string);
+
+ /*
+ * If the input string starts with dimension info, read and use that.
+ * Otherwise, we require the input to be in curly-brace style, and we
+ * prescan the input to determine dimensions.
+ *
+ * Dimension info takes the form of one or more [n] or [m:n] items. The
+ * outer loop iterates once per dimension item.
+ */
+ p = string_save;
+ ndim = 0;
+ for (;;)
+ {
+ char *q;
+ int ub;
+
+ /*
+ * Note: we currently allow whitespace between, but not within,
+ * dimension items.
+ */
+ while (array_isspace(*p))
+ p++;
+ if (*p != '[')
+ break; /* no more dimension items */
+ p++;
+ if (ndim >= MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndim + 1, MAXDIM)));
+
+ for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
+ /* skip */ ;
+ if (q == p) /* no digits? */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("\"[\" must introduce explicitly-specified array dimensions.")));
+
+ if (*q == ':')
+ {
+ /* [m:n] format */
+ *q = '\0';
+ lBound[ndim] = atoi(p);
+ p = q + 1;
+ for (q = p; isdigit((unsigned char) *q) || (*q == '-') || (*q == '+'); q++)
+ /* skip */ ;
+ if (q == p) /* no digits? */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Missing array dimension value.")));
+ }
+ else
+ {
+ /* [n] format */
+ lBound[ndim] = 1;
+ }
+ if (*q != ']')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Missing \"%s\" after array dimensions.",
+ "]")));
+
+ *q = '\0';
+ ub = atoi(p);
+ p = q + 1;
+ if (ub < lBound[ndim])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("upper bound cannot be less than lower bound")));
+
+ dim[ndim] = ub - lBound[ndim] + 1;
+ ndim++;
+ }
+
+ if (ndim == 0)
+ {
+ /* No array dimensions, so intuit dimensions from brace structure */
+ if (*p != '{')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Array value must start with \"{\" or dimension information.")));
+ ndim = ArrayCount(p, dim, typdelim);
+ for (i = 0; i < ndim; i++)
+ lBound[i] = 1;
+ }
+ else
+ {
+ int ndim_braces,
+ dim_braces[MAXDIM];
+
+ /* If array dimensions are given, expect '=' operator */
+ if (strncmp(p, ASSGN, strlen(ASSGN)) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Missing \"%s\" after array dimensions.",
+ ASSGN)));
+ p += strlen(ASSGN);
+ while (array_isspace(*p))
+ p++;
+
+ /*
+ * intuit dimensions from brace structure -- it better match what we
+ * were given
+ */
+ if (*p != '{')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Array contents must start with \"{\".")));
+ ndim_braces = ArrayCount(p, dim_braces, typdelim);
+ if (ndim_braces != ndim)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Specified array dimensions do not match array contents.")));
+ for (i = 0; i < ndim; ++i)
+ {
+ if (dim[i] != dim_braces[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", string),
+ errdetail("Specified array dimensions do not match array contents.")));
+ }
+ }
+
+#ifdef ARRAYDEBUG
+ printf("array_in- ndim %d (", ndim);
+ for (i = 0; i < ndim; i++)
+ {
+ printf(" %d", dim[i]);
+ };
+ printf(") for %s\n", string);
+#endif
+
+ /* This checks for overflow of the array dimensions */
+ nitems = ArrayGetNItems(ndim, dim);
+ ArrayCheckBounds(ndim, dim, lBound);
+
+ /* Empty array? */
+ if (nitems == 0)
+ PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));
+
+ dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
+ nullsPtr = (bool *) palloc(nitems * sizeof(bool));
+ ReadArrayStr(p, string,
+ nitems, ndim, dim,
+ &my_extra->proc, typioparam, typmod,
+ typdelim,
+ typlen, typbyval, typalign,
+ dataPtr, nullsPtr,
+ &hasnulls, &nbytes);
+ if (hasnulls)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes += ARR_OVERHEAD_NONULLS(ndim);
+ }
+ retval = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(retval, nbytes);
+ retval->ndim = ndim;
+ retval->dataoffset = dataoffset;
+
+ /*
+ * This comes from the array's pg_type.typelem (which points to the base
+ * data type's pg_type.oid) and stores system oids in user tables. This
+ * oid must be preserved by binary upgrades.
+ */
+ retval->elemtype = element_type;
+ memcpy(ARR_DIMS(retval), dim, ndim * sizeof(int));
+ memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));
+
+ CopyArrayEls(retval,
+ dataPtr, nullsPtr, nitems,
+ typlen, typbyval, typalign,
+ true);
+
+ pfree(dataPtr);
+ pfree(nullsPtr);
+ pfree(string_save);
+
+ PG_RETURN_ARRAYTYPE_P(retval);
+}
+
+/*
+ * array_isspace() --- a non-locale-dependent isspace()
+ *
+ * We used to use isspace() for parsing array values, but that has
+ * undesirable results: an array value might be silently interpreted
+ * differently depending on the locale setting. Now we just hard-wire
+ * the traditional ASCII definition of isspace().
+ */
+static bool
+array_isspace(char ch)
+{
+ if (ch == ' ' ||
+ ch == '\t' ||
+ ch == '\n' ||
+ ch == '\r' ||
+ ch == '\v' ||
+ ch == '\f')
+ return true;
+ return false;
+}
+
+/*
+ * ArrayCount
+ * Determines the dimensions for an array string.
+ *
+ * Returns number of dimensions as function result. The axis lengths are
+ * returned in dim[], which must be of size MAXDIM.
+ */
+static int
+ArrayCount(const char *str, int *dim, char typdelim)
+{
+ int nest_level = 0,
+ i;
+ int ndim = 1,
+ temp[MAXDIM],
+ nelems[MAXDIM],
+ nelems_last[MAXDIM];
+ bool in_quotes = false;
+ bool eoArray = false;
+ bool empty_array = true;
+ const char *ptr;
+ ArrayParseState parse_state = ARRAY_NO_LEVEL;
+
+ for (i = 0; i < MAXDIM; ++i)
+ {
+ temp[i] = dim[i] = nelems_last[i] = 0;
+ nelems[i] = 1;
+ }
+
+ ptr = str;
+ while (!eoArray)
+ {
+ bool itemdone = false;
+
+ while (!itemdone)
+ {
+ if (parse_state == ARRAY_ELEM_STARTED ||
+ parse_state == ARRAY_QUOTED_ELEM_STARTED)
+ empty_array = false;
+
+ switch (*ptr)
+ {
+ case '\0':
+ /* Signal a premature end of the string */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected end of input.")));
+ break;
+ case '\\':
+
+ /*
+ * An escape must be after a level start, after an element
+ * start, or after an element delimiter. In any case we
+ * now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected \"%c\" character.",
+ '\\')));
+ if (parse_state != ARRAY_QUOTED_ELEM_STARTED)
+ parse_state = ARRAY_ELEM_STARTED;
+ /* skip the escaped character */
+ if (*(ptr + 1))
+ ptr++;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected end of input.")));
+ break;
+ case '"':
+
+ /*
+ * A quote must be after a level start, after a quoted
+ * element start, or after an element delimiter. In any
+ * case we now must be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_QUOTED_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected array element.")));
+ in_quotes = !in_quotes;
+ if (in_quotes)
+ parse_state = ARRAY_QUOTED_ELEM_STARTED;
+ else
+ parse_state = ARRAY_QUOTED_ELEM_COMPLETED;
+ break;
+ case '{':
+ if (!in_quotes)
+ {
+ /*
+ * A left brace can occur if no nesting has occurred
+ * yet, after a level start, or after a level
+ * delimiter.
+ */
+ if (parse_state != ARRAY_NO_LEVEL &&
+ parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_LEVEL_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected \"%c\" character.",
+ '{')));
+ parse_state = ARRAY_LEVEL_STARTED;
+ if (nest_level >= MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ nest_level + 1, MAXDIM)));
+ temp[nest_level] = 0;
+ nest_level++;
+ if (ndim < nest_level)
+ ndim = nest_level;
+ }
+ break;
+ case '}':
+ if (!in_quotes)
+ {
+ /*
+ * A right brace can occur after an element start, an
+ * element completion, a quoted element completion, or
+ * a level completion.
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_COMPLETED &&
+ parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ parse_state != ARRAY_LEVEL_COMPLETED &&
+ !(nest_level == 1 && parse_state == ARRAY_LEVEL_STARTED))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected \"%c\" character.",
+ '}')));
+ parse_state = ARRAY_LEVEL_COMPLETED;
+ if (nest_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unmatched \"%c\" character.", '}')));
+ nest_level--;
+
+ if (nelems_last[nest_level] != 0 &&
+ nelems[nest_level] != nelems_last[nest_level])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Multidimensional arrays must have "
+ "sub-arrays with matching "
+ "dimensions.")));
+ nelems_last[nest_level] = nelems[nest_level];
+ nelems[nest_level] = 1;
+ if (nest_level == 0)
+ eoArray = itemdone = true;
+ else
+ {
+ /*
+ * We don't set itemdone here; see comments in
+ * ReadArrayStr
+ */
+ temp[nest_level - 1]++;
+ }
+ }
+ break;
+ default:
+ if (!in_quotes)
+ {
+ if (*ptr == typdelim)
+ {
+ /*
+ * Delimiters can occur after an element start, an
+ * element completion, a quoted element
+ * completion, or a level completion.
+ */
+ if (parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_COMPLETED &&
+ parse_state != ARRAY_QUOTED_ELEM_COMPLETED &&
+ parse_state != ARRAY_LEVEL_COMPLETED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected \"%c\" character.",
+ typdelim)));
+ if (parse_state == ARRAY_LEVEL_COMPLETED)
+ parse_state = ARRAY_LEVEL_DELIMITED;
+ else
+ parse_state = ARRAY_ELEM_DELIMITED;
+ itemdone = true;
+ nelems[nest_level - 1]++;
+ }
+ else if (!array_isspace(*ptr))
+ {
+ /*
+ * Other non-space characters must be after a
+ * level start, after an element start, or after
+ * an element delimiter. In any case we now must
+ * be past an element start.
+ */
+ if (parse_state != ARRAY_LEVEL_STARTED &&
+ parse_state != ARRAY_ELEM_STARTED &&
+ parse_state != ARRAY_ELEM_DELIMITED)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Unexpected array element.")));
+ parse_state = ARRAY_ELEM_STARTED;
+ }
+ }
+ break;
+ }
+ if (!itemdone)
+ ptr++;
+ }
+ temp[ndim - 1]++;
+ ptr++;
+ }
+
+ /* only whitespace is allowed after the closing brace */
+ while (*ptr)
+ {
+ if (!array_isspace(*ptr++))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"", str),
+ errdetail("Junk after closing right brace.")));
+ }
+
+ /* special case for an empty array */
+ if (empty_array)
+ return 0;
+
+ for (i = 0; i < ndim; ++i)
+ dim[i] = temp[i];
+
+ return ndim;
+}
+
+/*
+ * ReadArrayStr :
+ * parses the array string pointed to by "arrayStr" and converts the values
+ * to internal format. Unspecified elements are initialized to nulls.
+ * The array dimensions must already have been determined.
+ *
+ * Inputs:
+ * arrayStr: the string to parse.
+ * CAUTION: the contents of "arrayStr" will be modified!
+ * origStr: the unmodified input string, used only in error messages.
+ * nitems: total number of array elements, as already determined.
+ * ndim: number of array dimensions
+ * dim[]: array axis lengths
+ * inputproc: type-specific input procedure for element datatype.
+ * typioparam, typmod: auxiliary values to pass to inputproc.
+ * typdelim: the value delimiter (type-specific).
+ * typlen, typbyval, typalign: storage parameters of element datatype.
+ *
+ * Outputs:
+ * values[]: filled with converted data values.
+ * nulls[]: filled with is-null markers.
+ * *hasnulls: set true iff there are any null elements.
+ * *nbytes: set to total size of data area needed (including alignment
+ * padding but not including array header overhead).
+ *
+ * Note that values[] and nulls[] are allocated by the caller, and must have
+ * nitems elements.
+ */
+static void
+ReadArrayStr(char *arrayStr,
+ const char *origStr,
+ int nitems,
+ int ndim,
+ int *dim,
+ FmgrInfo *inputproc,
+ Oid typioparam,
+ int32 typmod,
+ char typdelim,
+ int typlen,
+ bool typbyval,
+ char typalign,
+ Datum *values,
+ bool *nulls,
+ bool *hasnulls,
+ int32 *nbytes)
+{
+ int i,
+ nest_level = 0;
+ char *srcptr;
+ bool in_quotes = false;
+ bool eoArray = false;
+ bool hasnull;
+ int32 totbytes;
+ int indx[MAXDIM],
+ prod[MAXDIM];
+
+ mda_get_prod(ndim, dim, prod);
+ MemSet(indx, 0, sizeof(indx));
+
+ /* Initialize is-null markers to true */
+ memset(nulls, true, nitems * sizeof(bool));
+
+ /*
+ * We have to remove " and \ characters to create a clean item value to
+ * pass to the datatype input routine. We overwrite each item value
+ * in-place within arrayStr to do this. srcptr is the current scan point,
+ * and dstptr is where we are copying to.
+ *
+ * We also want to suppress leading and trailing unquoted whitespace. We
+ * use the leadingspace flag to suppress leading space. Trailing space is
+ * tracked by using dstendptr to point to the last significant output
+ * character.
+ *
+ * The error checking in this routine is mostly pro-forma, since we expect
+ * that ArrayCount() already validated the string. So we don't bother
+ * with errdetail messages.
+ */
+ srcptr = arrayStr;
+ while (!eoArray)
+ {
+ bool itemdone = false;
+ bool leadingspace = true;
+ bool hasquoting = false;
+ char *itemstart;
+ char *dstptr;
+ char *dstendptr;
+
+ i = -1;
+ itemstart = dstptr = dstendptr = srcptr;
+
+ while (!itemdone)
+ {
+ switch (*srcptr)
+ {
+ case '\0':
+ /* Signal a premature end of the string */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"",
+ origStr)));
+ break;
+ case '\\':
+ /* Skip backslash, copy next character as-is. */
+ srcptr++;
+ if (*srcptr == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"",
+ origStr)));
+ *dstptr++ = *srcptr++;
+ /* Treat the escaped character as non-whitespace */
+ leadingspace = false;
+ dstendptr = dstptr;
+ hasquoting = true; /* can't be a NULL marker */
+ break;
+ case '"':
+ in_quotes = !in_quotes;
+ if (in_quotes)
+ leadingspace = false;
+ else
+ {
+ /*
+ * Advance dstendptr when we exit in_quotes; this
+ * saves having to do it in all the other in_quotes
+ * cases.
+ */
+ dstendptr = dstptr;
+ }
+ hasquoting = true; /* can't be a NULL marker */
+ srcptr++;
+ break;
+ case '{':
+ if (!in_quotes)
+ {
+ if (nest_level >= ndim)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"",
+ origStr)));
+ nest_level++;
+ indx[nest_level - 1] = 0;
+ srcptr++;
+ }
+ else
+ *dstptr++ = *srcptr++;
+ break;
+ case '}':
+ if (!in_quotes)
+ {
+ if (nest_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"",
+ origStr)));
+ if (i == -1)
+ i = ArrayGetOffset0(ndim, indx, prod);
+ indx[nest_level - 1] = 0;
+ nest_level--;
+ if (nest_level == 0)
+ eoArray = itemdone = true;
+ else
+ indx[nest_level - 1]++;
+ srcptr++;
+ }
+ else
+ *dstptr++ = *srcptr++;
+ break;
+ default:
+ if (in_quotes)
+ *dstptr++ = *srcptr++;
+ else if (*srcptr == typdelim)
+ {
+ if (i == -1)
+ i = ArrayGetOffset0(ndim, indx, prod);
+ itemdone = true;
+ indx[ndim - 1]++;
+ srcptr++;
+ }
+ else if (array_isspace(*srcptr))
+ {
+ /*
+ * If leading space, drop it immediately. Else, copy
+ * but don't advance dstendptr.
+ */
+ if (leadingspace)
+ srcptr++;
+ else
+ *dstptr++ = *srcptr++;
+ }
+ else
+ {
+ *dstptr++ = *srcptr++;
+ leadingspace = false;
+ dstendptr = dstptr;
+ }
+ break;
+ }
+ }
+
+ Assert(dstptr < srcptr);
+ *dstendptr = '\0';
+
+ if (i < 0 || i >= nitems)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed array literal: \"%s\"",
+ origStr)));
+
+ if (Array_nulls && !hasquoting &&
+ pg_strcasecmp(itemstart, "NULL") == 0)
+ {
+ /* it's a NULL item */
+ values[i] = InputFunctionCall(inputproc, NULL,
+ typioparam, typmod);
+ nulls[i] = true;
+ }
+ else
+ {
+ values[i] = InputFunctionCall(inputproc, itemstart,
+ typioparam, typmod);
+ nulls[i] = false;
+ }
+ }
+
+ /*
+ * Check for nulls, compute total data space needed
+ */
+ hasnull = false;
+ totbytes = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ if (nulls[i])
+ hasnull = true;
+ else
+ {
+ /* let's just make sure data is not toasted */
+ if (typlen == -1)
+ values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+ totbytes = att_addlength_datum(totbytes, typlen, values[i]);
+ totbytes = att_align_nominal(totbytes, typalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(totbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+ }
+ *hasnulls = hasnull;
+ *nbytes = totbytes;
+}
+
+
+/*
+ * Copy data into an array object from a temporary array of Datums.
+ *
+ * array: array object (with header fields already filled in)
+ * values: array of Datums to be copied
+ * nulls: array of is-null flags (can be NULL if no nulls)
+ * nitems: number of Datums to be copied
+ * typbyval, typlen, typalign: info about element datatype
+ * freedata: if true and element type is pass-by-ref, pfree data values
+ * referenced by Datums after copying them.
+ *
+ * If the input data is of varlena type, the caller must have ensured that
+ * the values are not toasted. (Doing it here doesn't work since the
+ * caller has already allocated space for the array...)
+ */
+void
+CopyArrayEls(ArrayType *array,
+ Datum *values,
+ bool *nulls,
+ int nitems,
+ int typlen,
+ bool typbyval,
+ char typalign,
+ bool freedata)
+{
+ char *p = ARR_DATA_PTR(array);
+ bits8 *bitmap = ARR_NULLBITMAP(array);
+ int bitval = 0;
+ int bitmask = 1;
+ int i;
+
+ if (typbyval)
+ freedata = false;
+
+ for (i = 0; i < nitems; i++)
+ {
+ if (nulls && nulls[i])
+ {
+ if (!bitmap) /* shouldn't happen */
+ elog(ERROR, "null array element where not supported");
+ /* bitmap bit stays 0 */
+ }
+ else
+ {
+ bitval |= bitmask;
+ p += ArrayCastAndSet(values[i], typlen, typbyval, typalign, p);
+ if (freedata)
+ pfree(DatumGetPointer(values[i]));
+ }
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ *bitmap++ = bitval;
+ bitval = 0;
+ bitmask = 1;
+ }
+ }
+ }
+
+ if (bitmap && bitmask != 1)
+ *bitmap = bitval;
+}
+
+/*
+ * array_out :
+ * takes the internal representation of an array and returns a string
+ * containing the array in its external format.
+ */
+Datum
+array_out(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ Oid element_type = AARR_ELEMTYPE(v);
+ int typlen;
+ bool typbyval;
+ char typalign;
+ char typdelim;
+ char *p,
+ *tmp,
+ *retval,
+ **values,
+ dims_str[(MAXDIM * 33) + 2];
+
+ /*
+ * 33 per dim since we assume 15 digits per number + ':' +'[]'
+ *
+ * +2 allows for assignment operator + trailing null
+ */
+ bool *needquotes,
+ needdims = false;
+ size_t overall_length;
+ int nitems,
+ i,
+ j,
+ k,
+ indx[MAXDIM];
+ int ndim,
+ *dims,
+ *lb;
+ array_iter iter;
+ ArrayMetaState *my_extra;
+
+ /*
+ * We arrange to look up info about element type, including its output
+ * conversion proc, only once per series of calls, assuming the element
+ * type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ /*
+ * Get info about element type, including its output conversion proc
+ */
+ get_type_io_data(element_type, IOFunc_output,
+ &my_extra->typlen, &my_extra->typbyval,
+ &my_extra->typalign, &my_extra->typdelim,
+ &my_extra->typioparam, &my_extra->typiofunc);
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ my_extra->element_type = element_type;
+ }
+ typlen = my_extra->typlen;
+ typbyval = my_extra->typbyval;
+ typalign = my_extra->typalign;
+ typdelim = my_extra->typdelim;
+
+ ndim = AARR_NDIM(v);
+ dims = AARR_DIMS(v);
+ lb = AARR_LBOUND(v);
+ nitems = ArrayGetNItems(ndim, dims);
+
+ if (nitems == 0)
+ {
+ retval = pstrdup("{}");
+ PG_RETURN_CSTRING(retval);
+ }
+
+ /*
+ * we will need to add explicit dimensions if any dimension has a lower
+ * bound other than one
+ */
+ for (i = 0; i < ndim; i++)
+ {
+ if (lb[i] != 1)
+ {
+ needdims = true;
+ break;
+ }
+ }
+
+ /*
+ * Convert all values to string form, count total space needed (including
+ * any overhead such as escaping backslashes), and detect whether each
+ * item needs double quotes.
+ */
+ values = (char **) palloc(nitems * sizeof(char *));
+ needquotes = (bool *) palloc(nitems * sizeof(bool));
+ overall_length = 0;
+
+ array_iter_setup(&iter, v);
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum itemvalue;
+ bool isnull;
+ bool needquote;
+
+ /* Get source element, checking for NULL */
+ itemvalue = array_iter_next(&iter, &isnull, i,
+ typlen, typbyval, typalign);
+
+ if (isnull)
+ {
+ values[i] = pstrdup("NULL");
+ overall_length += 4;
+ needquote = false;
+ }
+ else
+ {
+ values[i] = OutputFunctionCall(&my_extra->proc, itemvalue);
+
+ /* count data plus backslashes; detect chars needing quotes */
+ if (values[i][0] == '\0')
+ needquote = true; /* force quotes for empty string */
+ else if (pg_strcasecmp(values[i], "NULL") == 0)
+ needquote = true; /* force quotes for literal NULL */
+ else
+ needquote = false;
+
+ for (tmp = values[i]; *tmp != '\0'; tmp++)
+ {
+ char ch = *tmp;
+
+ overall_length += 1;
+ if (ch == '"' || ch == '\\')
+ {
+ needquote = true;
+ overall_length += 1;
+ }
+ else if (ch == '{' || ch == '}' || ch == typdelim ||
+ array_isspace(ch))
+ needquote = true;
+ }
+ }
+
+ needquotes[i] = needquote;
+
+ /* Count the pair of double quotes, if needed */
+ if (needquote)
+ overall_length += 2;
+ /* and the comma (or other typdelim delimiter) */
+ overall_length += 1;
+ }
+
+ /*
+ * The very last array element doesn't have a typdelim delimiter after it,
+ * but that's OK; that space is needed for the trailing '\0'.
+ *
+ * Now count total number of curly brace pairs in output string.
+ */
+ for (i = j = 0, k = 1; i < ndim; i++)
+ {
+ j += k, k *= dims[i];
+ }
+ overall_length += 2 * j;
+
+ /* Format explicit dimensions if required */
+ dims_str[0] = '\0';
+ if (needdims)
+ {
+ char *ptr = dims_str;
+
+ for (i = 0; i < ndim; i++)
+ {
+ sprintf(ptr, "[%d:%d]", lb[i], lb[i] + dims[i] - 1);
+ ptr += strlen(ptr);
+ }
+ *ptr++ = *ASSGN;
+ *ptr = '\0';
+ overall_length += ptr - dims_str;
+ }
+
+ /* Now construct the output string */
+ retval = (char *) palloc(overall_length);
+ p = retval;
+
+#define APPENDSTR(str) (strcpy(p, (str)), p += strlen(p))
+#define APPENDCHAR(ch) (*p++ = (ch), *p = '\0')
+
+ if (needdims)
+ APPENDSTR(dims_str);
+ APPENDCHAR('{');
+ for (i = 0; i < ndim; i++)
+ indx[i] = 0;
+ j = 0;
+ k = 0;
+ do
+ {
+ for (i = j; i < ndim - 1; i++)
+ APPENDCHAR('{');
+
+ if (needquotes[k])
+ {
+ APPENDCHAR('"');
+ for (tmp = values[k]; *tmp; tmp++)
+ {
+ char ch = *tmp;
+
+ if (ch == '"' || ch == '\\')
+ *p++ = '\\';
+ *p++ = ch;
+ }
+ *p = '\0';
+ APPENDCHAR('"');
+ }
+ else
+ APPENDSTR(values[k]);
+ pfree(values[k++]);
+
+ for (i = ndim - 1; i >= 0; i--)
+ {
+ if (++(indx[i]) < dims[i])
+ {
+ APPENDCHAR(typdelim);
+ break;
+ }
+ else
+ {
+ indx[i] = 0;
+ APPENDCHAR('}');
+ }
+ }
+ j = i;
+ } while (j != -1);
+
+#undef APPENDSTR
+#undef APPENDCHAR
+
+ /* Assert that we calculated the string length accurately */
+ Assert(overall_length == (p - retval + 1));
+
+ pfree(values);
+ pfree(needquotes);
+
+ PG_RETURN_CSTRING(retval);
+}
+
+/*
+ * array_recv :
+ * converts an array from the external binary format to
+ * its internal format.
+ *
+ * return value :
+ * the internal representation of the input array
+ */
+Datum
+array_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Oid spec_element_type = PG_GETARG_OID(1); /* type of an array
+ * element */
+ int32 typmod = PG_GETARG_INT32(2); /* typmod for array elements */
+ Oid element_type;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ Oid typioparam;
+ int i,
+ nitems;
+ Datum *dataPtr;
+ bool *nullsPtr;
+ bool hasnulls;
+ int32 nbytes;
+ int32 dataoffset;
+ ArrayType *retval;
+ int ndim,
+ flags,
+ dim[MAXDIM],
+ lBound[MAXDIM];
+ ArrayMetaState *my_extra;
+
+ /* Get the array header information */
+ ndim = pq_getmsgint(buf, 4);
+ if (ndim < 0) /* we do allow zero-dimension arrays */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid number of dimensions: %d", ndim)));
+ if (ndim > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndim, MAXDIM)));
+
+ flags = pq_getmsgint(buf, 4);
+ if (flags != 0 && flags != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid array flags")));
+
+ /* Check element type recorded in the data */
+ element_type = pq_getmsgint(buf, sizeof(Oid));
+
+ /*
+ * From a security standpoint, it doesn't matter whether the input's
+ * element type matches what we expect: the element type's receive
+ * function has to be robust enough to cope with invalid data. However,
+ * from a user-friendliness standpoint, it's nicer to complain about type
+ * mismatches than to throw "improper binary format" errors. But there's
+ * a problem: only built-in types have OIDs that are stable enough to
+ * believe that a mismatch is a real issue. So complain only if both OIDs
+ * are in the built-in range. Otherwise, carry on with the element type
+ * we "should" be getting.
+ */
+ if (element_type != spec_element_type)
+ {
+ if (element_type < FirstGenbkiObjectId &&
+ spec_element_type < FirstGenbkiObjectId)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("binary data has array element type %u (%s) instead of expected %u (%s)",
+ element_type,
+ format_type_extended(element_type, -1,
+ FORMAT_TYPE_ALLOW_INVALID),
+ spec_element_type,
+ format_type_extended(spec_element_type, -1,
+ FORMAT_TYPE_ALLOW_INVALID))));
+ element_type = spec_element_type;
+ }
+
+ for (i = 0; i < ndim; i++)
+ {
+ dim[i] = pq_getmsgint(buf, 4);
+ lBound[i] = pq_getmsgint(buf, 4);
+ }
+
+ /* This checks for overflow of array dimensions */
+ nitems = ArrayGetNItems(ndim, dim);
+ ArrayCheckBounds(ndim, dim, lBound);
+
+ /*
+ * We arrange to look up info about element type, including its receive
+ * conversion proc, only once per series of calls, assuming the element
+ * type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ /* Get info about element type, including its receive proc */
+ get_type_io_data(element_type, IOFunc_receive,
+ &my_extra->typlen, &my_extra->typbyval,
+ &my_extra->typalign, &my_extra->typdelim,
+ &my_extra->typioparam, &my_extra->typiofunc);
+ if (!OidIsValid(my_extra->typiofunc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary input function available for type %s",
+ format_type_be(element_type))));
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ my_extra->element_type = element_type;
+ }
+
+ if (nitems == 0)
+ {
+ /* Return empty array ... but not till we've validated element_type */
+ PG_RETURN_ARRAYTYPE_P(construct_empty_array(element_type));
+ }
+
+ typlen = my_extra->typlen;
+ typbyval = my_extra->typbyval;
+ typalign = my_extra->typalign;
+ typioparam = my_extra->typioparam;
+
+ dataPtr = (Datum *) palloc(nitems * sizeof(Datum));
+ nullsPtr = (bool *) palloc(nitems * sizeof(bool));
+ ReadArrayBinary(buf, nitems,
+ &my_extra->proc, typioparam, typmod,
+ typlen, typbyval, typalign,
+ dataPtr, nullsPtr,
+ &hasnulls, &nbytes);
+ if (hasnulls)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes += ARR_OVERHEAD_NONULLS(ndim);
+ }
+ retval = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(retval, nbytes);
+ retval->ndim = ndim;
+ retval->dataoffset = dataoffset;
+ retval->elemtype = element_type;
+ memcpy(ARR_DIMS(retval), dim, ndim * sizeof(int));
+ memcpy(ARR_LBOUND(retval), lBound, ndim * sizeof(int));
+
+ CopyArrayEls(retval,
+ dataPtr, nullsPtr, nitems,
+ typlen, typbyval, typalign,
+ true);
+
+ pfree(dataPtr);
+ pfree(nullsPtr);
+
+ PG_RETURN_ARRAYTYPE_P(retval);
+}
+
+/*
+ * ReadArrayBinary:
+ * collect the data elements of an array being read in binary style.
+ *
+ * Inputs:
+ * buf: the data buffer to read from.
+ * nitems: total number of array elements (already read).
+ * receiveproc: type-specific receive procedure for element datatype.
+ * typioparam, typmod: auxiliary values to pass to receiveproc.
+ * typlen, typbyval, typalign: storage parameters of element datatype.
+ *
+ * Outputs:
+ * values[]: filled with converted data values.
+ * nulls[]: filled with is-null markers.
+ * *hasnulls: set true iff there are any null elements.
+ * *nbytes: set to total size of data area needed (including alignment
+ * padding but not including array header overhead).
+ *
+ * Note that values[] and nulls[] are allocated by the caller, and must have
+ * nitems elements.
+ */
+static void
+ReadArrayBinary(StringInfo buf,
+ int nitems,
+ FmgrInfo *receiveproc,
+ Oid typioparam,
+ int32 typmod,
+ int typlen,
+ bool typbyval,
+ char typalign,
+ Datum *values,
+ bool *nulls,
+ bool *hasnulls,
+ int32 *nbytes)
+{
+ int i;
+ bool hasnull;
+ int32 totbytes;
+
+ for (i = 0; i < nitems; i++)
+ {
+ int itemlen;
+ StringInfoData elem_buf;
+ char csave;
+
+ /* Get and check the item length */
+ itemlen = pq_getmsgint(buf, 4);
+ if (itemlen < -1 || itemlen > (buf->len - buf->cursor))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("insufficient data left in message")));
+
+ if (itemlen == -1)
+ {
+ /* -1 length means NULL */
+ values[i] = ReceiveFunctionCall(receiveproc, NULL,
+ typioparam, typmod);
+ nulls[i] = true;
+ continue;
+ }
+
+ /*
+ * Rather than copying data around, we just set up a phony StringInfo
+ * pointing to the correct portion of the input buffer. We assume we
+ * can scribble on the input buffer so as to maintain the convention
+ * that StringInfos have a trailing null.
+ */
+ elem_buf.data = &buf->data[buf->cursor];
+ elem_buf.maxlen = itemlen + 1;
+ elem_buf.len = itemlen;
+ elem_buf.cursor = 0;
+
+ buf->cursor += itemlen;
+
+ csave = buf->data[buf->cursor];
+ buf->data[buf->cursor] = '\0';
+
+ /* Now call the element's receiveproc */
+ values[i] = ReceiveFunctionCall(receiveproc, &elem_buf,
+ typioparam, typmod);
+ nulls[i] = false;
+
+ /* Trouble if it didn't eat the whole buffer */
+ if (elem_buf.cursor != itemlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("improper binary format in array element %d",
+ i + 1)));
+
+ buf->data[buf->cursor] = csave;
+ }
+
+ /*
+ * Check for nulls, compute total data space needed
+ */
+ hasnull = false;
+ totbytes = 0;
+ for (i = 0; i < nitems; i++)
+ {
+ if (nulls[i])
+ hasnull = true;
+ else
+ {
+ /* let's just make sure data is not toasted */
+ if (typlen == -1)
+ values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+ totbytes = att_addlength_datum(totbytes, typlen, values[i]);
+ totbytes = att_align_nominal(totbytes, typalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(totbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+ }
+ *hasnulls = hasnull;
+ *nbytes = totbytes;
+}
+
+
+/*
+ * array_send :
+ * takes the internal representation of an array and returns a bytea
+ * containing the array in its external binary format.
+ */
+Datum
+array_send(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ Oid element_type = AARR_ELEMTYPE(v);
+ int typlen;
+ bool typbyval;
+ char typalign;
+ int nitems,
+ i;
+ int ndim,
+ *dim,
+ *lb;
+ StringInfoData buf;
+ array_iter iter;
+ ArrayMetaState *my_extra;
+
+ /*
+ * We arrange to look up info about element type, including its send
+ * conversion proc, only once per series of calls, assuming the element
+ * type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ /* Get info about element type, including its send proc */
+ get_type_io_data(element_type, IOFunc_send,
+ &my_extra->typlen, &my_extra->typbyval,
+ &my_extra->typalign, &my_extra->typdelim,
+ &my_extra->typioparam, &my_extra->typiofunc);
+ if (!OidIsValid(my_extra->typiofunc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary output function available for type %s",
+ format_type_be(element_type))));
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ my_extra->element_type = element_type;
+ }
+ typlen = my_extra->typlen;
+ typbyval = my_extra->typbyval;
+ typalign = my_extra->typalign;
+
+ ndim = AARR_NDIM(v);
+ dim = AARR_DIMS(v);
+ lb = AARR_LBOUND(v);
+ nitems = ArrayGetNItems(ndim, dim);
+
+ pq_begintypsend(&buf);
+
+ /* Send the array header information */
+ pq_sendint32(&buf, ndim);
+ pq_sendint32(&buf, AARR_HASNULL(v) ? 1 : 0);
+ pq_sendint32(&buf, element_type);
+ for (i = 0; i < ndim; i++)
+ {
+ pq_sendint32(&buf, dim[i]);
+ pq_sendint32(&buf, lb[i]);
+ }
+
+ /* Send the array elements using the element's own sendproc */
+ array_iter_setup(&iter, v);
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum itemvalue;
+ bool isnull;
+
+ /* Get source element, checking for NULL */
+ itemvalue = array_iter_next(&iter, &isnull, i,
+ typlen, typbyval, typalign);
+
+ if (isnull)
+ {
+ /* -1 length means a NULL */
+ pq_sendint32(&buf, -1);
+ }
+ else
+ {
+ bytea *outputbytes;
+
+ outputbytes = SendFunctionCall(&my_extra->proc, itemvalue);
+ pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ);
+ pq_sendbytes(&buf, VARDATA(outputbytes),
+ VARSIZE(outputbytes) - VARHDRSZ);
+ pfree(outputbytes);
+ }
+ }
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * array_ndims :
+ * returns the number of dimensions of the array pointed to by "v"
+ */
+Datum
+array_ndims(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(AARR_NDIM(v));
+}
+
+/*
+ * array_dims :
+ * returns the dimensions of the array pointed to by "v", as a "text"
+ */
+Datum
+array_dims(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ char *p;
+ int i;
+ int *dimv,
+ *lb;
+
+ /*
+ * 33 since we assume 15 digits per number + ':' +'[]'
+ *
+ * +1 for trailing null
+ */
+ char buf[MAXDIM * 33 + 1];
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ PG_RETURN_NULL();
+
+ dimv = AARR_DIMS(v);
+ lb = AARR_LBOUND(v);
+
+ p = buf;
+ for (i = 0; i < AARR_NDIM(v); i++)
+ {
+ sprintf(p, "[%d:%d]", lb[i], dimv[i] + lb[i] - 1);
+ p += strlen(p);
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(buf));
+}
+
+/*
+ * array_lower :
+ * returns the lower dimension, of the DIM requested, for
+ * the array pointed to by "v", as an int4
+ */
+Datum
+array_lower(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ int reqdim = PG_GETARG_INT32(1);
+ int *lb;
+ int result;
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ PG_RETURN_NULL();
+
+ /* Sanity check: was the requested dim valid */
+ if (reqdim <= 0 || reqdim > AARR_NDIM(v))
+ PG_RETURN_NULL();
+
+ lb = AARR_LBOUND(v);
+ result = lb[reqdim - 1];
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * array_upper :
+ * returns the upper dimension, of the DIM requested, for
+ * the array pointed to by "v", as an int4
+ */
+Datum
+array_upper(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ int reqdim = PG_GETARG_INT32(1);
+ int *dimv,
+ *lb;
+ int result;
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ PG_RETURN_NULL();
+
+ /* Sanity check: was the requested dim valid */
+ if (reqdim <= 0 || reqdim > AARR_NDIM(v))
+ PG_RETURN_NULL();
+
+ lb = AARR_LBOUND(v);
+ dimv = AARR_DIMS(v);
+
+ result = dimv[reqdim - 1] + lb[reqdim - 1] - 1;
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * array_length :
+ * returns the length, of the dimension requested, for
+ * the array pointed to by "v", as an int4
+ */
+Datum
+array_length(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ int reqdim = PG_GETARG_INT32(1);
+ int *dimv;
+ int result;
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ PG_RETURN_NULL();
+
+ /* Sanity check: was the requested dim valid */
+ if (reqdim <= 0 || reqdim > AARR_NDIM(v))
+ PG_RETURN_NULL();
+
+ dimv = AARR_DIMS(v);
+
+ result = dimv[reqdim - 1];
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * array_cardinality:
+ * returns the total number of elements in an array
+ */
+Datum
+array_cardinality(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+
+ PG_RETURN_INT32(ArrayGetNItems(AARR_NDIM(v), AARR_DIMS(v)));
+}
+
+
+/*
+ * array_get_element :
+ * This routine takes an array datum and a subscript array and returns
+ * the referenced item as a Datum. Note that for a pass-by-reference
+ * datatype, the returned Datum is a pointer into the array object.
+ *
+ * This handles both ordinary varlena arrays and fixed-length arrays.
+ *
+ * Inputs:
+ * arraydatum: the array object (mustn't be NULL)
+ * nSubscripts: number of subscripts supplied
+ * indx[]: the subscript values
+ * arraytyplen: pg_type.typlen for the array type
+ * elmlen: pg_type.typlen for the array's element type
+ * elmbyval: pg_type.typbyval for the array's element type
+ * elmalign: pg_type.typalign for the array's element type
+ *
+ * Outputs:
+ * The return value is the element Datum.
+ * *isNull is set to indicate whether the element is NULL.
+ */
+Datum
+array_get_element(Datum arraydatum,
+ int nSubscripts,
+ int *indx,
+ int arraytyplen,
+ int elmlen,
+ bool elmbyval,
+ char elmalign,
+ bool *isNull)
+{
+ int i,
+ ndim,
+ *dim,
+ *lb,
+ offset,
+ fixedDim[1],
+ fixedLb[1];
+ char *arraydataptr,
+ *retptr;
+ bits8 *arraynullsptr;
+
+ if (arraytyplen > 0)
+ {
+ /*
+ * fixed-length arrays -- these are assumed to be 1-d, 0-based
+ */
+ ndim = 1;
+ fixedDim[0] = arraytyplen / elmlen;
+ fixedLb[0] = 0;
+ dim = fixedDim;
+ lb = fixedLb;
+ arraydataptr = (char *) DatumGetPointer(arraydatum);
+ arraynullsptr = NULL;
+ }
+ else if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum)))
+ {
+ /* expanded array: let's do this in a separate function */
+ return array_get_element_expanded(arraydatum,
+ nSubscripts,
+ indx,
+ arraytyplen,
+ elmlen,
+ elmbyval,
+ elmalign,
+ isNull);
+ }
+ else
+ {
+ /* detoast array if necessary, producing normal varlena input */
+ ArrayType *array = DatumGetArrayTypeP(arraydatum);
+
+ ndim = ARR_NDIM(array);
+ dim = ARR_DIMS(array);
+ lb = ARR_LBOUND(array);
+ arraydataptr = ARR_DATA_PTR(array);
+ arraynullsptr = ARR_NULLBITMAP(array);
+ }
+
+ /*
+ * Return NULL for invalid subscript
+ */
+ if (ndim != nSubscripts || ndim <= 0 || ndim > MAXDIM)
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+ for (i = 0; i < ndim; i++)
+ {
+ if (indx[i] < lb[i] || indx[i] >= (dim[i] + lb[i]))
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+ }
+
+ /*
+ * Calculate the element number
+ */
+ offset = ArrayGetOffset(nSubscripts, dim, lb, indx);
+
+ /*
+ * Check for NULL array element
+ */
+ if (array_get_isnull(arraynullsptr, offset))
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+
+ /*
+ * OK, get the element
+ */
+ *isNull = false;
+ retptr = array_seek(arraydataptr, 0, arraynullsptr, offset,
+ elmlen, elmbyval, elmalign);
+ return ArrayCast(retptr, elmbyval, elmlen);
+}
+
+/*
+ * Implementation of array_get_element() for an expanded array
+ */
+static Datum
+array_get_element_expanded(Datum arraydatum,
+ int nSubscripts, int *indx,
+ int arraytyplen,
+ int elmlen, bool elmbyval, char elmalign,
+ bool *isNull)
+{
+ ExpandedArrayHeader *eah;
+ int i,
+ ndim,
+ *dim,
+ *lb,
+ offset;
+ Datum *dvalues;
+ bool *dnulls;
+
+ eah = (ExpandedArrayHeader *) DatumGetEOHP(arraydatum);
+ Assert(eah->ea_magic == EA_MAGIC);
+
+ /* sanity-check caller's info against object */
+ Assert(arraytyplen == -1);
+ Assert(elmlen == eah->typlen);
+ Assert(elmbyval == eah->typbyval);
+ Assert(elmalign == eah->typalign);
+
+ ndim = eah->ndims;
+ dim = eah->dims;
+ lb = eah->lbound;
+
+ /*
+ * Return NULL for invalid subscript
+ */
+ if (ndim != nSubscripts || ndim <= 0 || ndim > MAXDIM)
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+ for (i = 0; i < ndim; i++)
+ {
+ if (indx[i] < lb[i] || indx[i] >= (dim[i] + lb[i]))
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+ }
+
+ /*
+ * Calculate the element number
+ */
+ offset = ArrayGetOffset(nSubscripts, dim, lb, indx);
+
+ /*
+ * Deconstruct array if we didn't already. Note that we apply this even
+ * if the input is nominally read-only: it should be safe enough.
+ */
+ deconstruct_expanded_array(eah);
+
+ dvalues = eah->dvalues;
+ dnulls = eah->dnulls;
+
+ /*
+ * Check for NULL array element
+ */
+ if (dnulls && dnulls[offset])
+ {
+ *isNull = true;
+ return (Datum) 0;
+ }
+
+ /*
+ * OK, get the element. It's OK to return a pass-by-ref value as a
+ * pointer into the expanded array, for the same reason that regular
+ * array_get_element can return a pointer into flat arrays: the value is
+ * assumed not to change for as long as the Datum reference can exist.
+ */
+ *isNull = false;
+ return dvalues[offset];
+}
+
+/*
+ * array_get_slice :
+ * This routine takes an array and a range of indices (upperIndx and
+ * lowerIndx), creates a new array structure for the referred elements
+ * and returns a pointer to it.
+ *
+ * This handles both ordinary varlena arrays and fixed-length arrays.
+ *
+ * Inputs:
+ * arraydatum: the array object (mustn't be NULL)
+ * nSubscripts: number of subscripts supplied (must be same for upper/lower)
+ * upperIndx[]: the upper subscript values
+ * lowerIndx[]: the lower subscript values
+ * upperProvided[]: true for provided upper subscript values
+ * lowerProvided[]: true for provided lower subscript values
+ * arraytyplen: pg_type.typlen for the array type
+ * elmlen: pg_type.typlen for the array's element type
+ * elmbyval: pg_type.typbyval for the array's element type
+ * elmalign: pg_type.typalign for the array's element type
+ *
+ * Outputs:
+ * The return value is the new array Datum (it's never NULL)
+ *
+ * Omitted upper and lower subscript values are replaced by the corresponding
+ * array bound.
+ *
+ * NOTE: we assume it is OK to scribble on the provided subscript arrays
+ * lowerIndx[] and upperIndx[]; also, these arrays must be of size MAXDIM
+ * even when nSubscripts is less. These are generally just temporaries.
+ */
+Datum
+array_get_slice(Datum arraydatum,
+ int nSubscripts,
+ int *upperIndx,
+ int *lowerIndx,
+ bool *upperProvided,
+ bool *lowerProvided,
+ int arraytyplen,
+ int elmlen,
+ bool elmbyval,
+ char elmalign)
+{
+ ArrayType *array;
+ ArrayType *newarray;
+ int i,
+ ndim,
+ *dim,
+ *lb,
+ *newlb;
+ int fixedDim[1],
+ fixedLb[1];
+ Oid elemtype;
+ char *arraydataptr;
+ bits8 *arraynullsptr;
+ int32 dataoffset;
+ int bytes,
+ span[MAXDIM];
+
+ if (arraytyplen > 0)
+ {
+ /*
+ * fixed-length arrays -- currently, cannot slice these because parser
+ * labels output as being of the fixed-length array type! Code below
+ * shows how we could support it if the parser were changed to label
+ * output as a suitable varlena array type.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("slices of fixed-length arrays not implemented")));
+
+ /*
+ * fixed-length arrays -- these are assumed to be 1-d, 0-based
+ *
+ * XXX where would we get the correct ELEMTYPE from?
+ */
+ ndim = 1;
+ fixedDim[0] = arraytyplen / elmlen;
+ fixedLb[0] = 0;
+ dim = fixedDim;
+ lb = fixedLb;
+ elemtype = InvalidOid; /* XXX */
+ arraydataptr = (char *) DatumGetPointer(arraydatum);
+ arraynullsptr = NULL;
+ }
+ else
+ {
+ /* detoast input array if necessary */
+ array = DatumGetArrayTypeP(arraydatum);
+
+ ndim = ARR_NDIM(array);
+ dim = ARR_DIMS(array);
+ lb = ARR_LBOUND(array);
+ elemtype = ARR_ELEMTYPE(array);
+ arraydataptr = ARR_DATA_PTR(array);
+ arraynullsptr = ARR_NULLBITMAP(array);
+ }
+
+ /*
+ * Check provided subscripts. A slice exceeding the current array limits
+ * is silently truncated to the array limits. If we end up with an empty
+ * slice, return an empty array.
+ */
+ if (ndim < nSubscripts || ndim <= 0 || ndim > MAXDIM)
+ return PointerGetDatum(construct_empty_array(elemtype));
+
+ for (i = 0; i < nSubscripts; i++)
+ {
+ if (!lowerProvided[i] || lowerIndx[i] < lb[i])
+ lowerIndx[i] = lb[i];
+ if (!upperProvided[i] || upperIndx[i] >= (dim[i] + lb[i]))
+ upperIndx[i] = dim[i] + lb[i] - 1;
+ if (lowerIndx[i] > upperIndx[i])
+ return PointerGetDatum(construct_empty_array(elemtype));
+ }
+ /* fill any missing subscript positions with full array range */
+ for (; i < ndim; i++)
+ {
+ lowerIndx[i] = lb[i];
+ upperIndx[i] = dim[i] + lb[i] - 1;
+ if (lowerIndx[i] > upperIndx[i])
+ return PointerGetDatum(construct_empty_array(elemtype));
+ }
+
+ mda_get_range(ndim, span, lowerIndx, upperIndx);
+
+ bytes = array_slice_size(arraydataptr, arraynullsptr,
+ ndim, dim, lb,
+ lowerIndx, upperIndx,
+ elmlen, elmbyval, elmalign);
+
+ /*
+ * Currently, we put a null bitmap in the result if the source has one;
+ * could be smarter ...
+ */
+ if (arraynullsptr)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, ArrayGetNItems(ndim, span));
+ bytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ bytes += ARR_OVERHEAD_NONULLS(ndim);
+ }
+
+ newarray = (ArrayType *) palloc0(bytes);
+ SET_VARSIZE(newarray, bytes);
+ newarray->ndim = ndim;
+ newarray->dataoffset = dataoffset;
+ newarray->elemtype = elemtype;
+ memcpy(ARR_DIMS(newarray), span, ndim * sizeof(int));
+
+ /*
+ * Lower bounds of the new array are set to 1. Formerly (before 7.3) we
+ * copied the given lowerIndx values ... but that seems confusing.
+ */
+ newlb = ARR_LBOUND(newarray);
+ for (i = 0; i < ndim; i++)
+ newlb[i] = 1;
+
+ array_extract_slice(newarray,
+ ndim, dim, lb,
+ arraydataptr, arraynullsptr,
+ lowerIndx, upperIndx,
+ elmlen, elmbyval, elmalign);
+
+ return PointerGetDatum(newarray);
+}
+
+/*
+ * array_set_element :
+ * This routine sets the value of one array element (specified by
+ * a subscript array) to a new value specified by "dataValue".
+ *
+ * This handles both ordinary varlena arrays and fixed-length arrays.
+ *
+ * Inputs:
+ * arraydatum: the initial array object (mustn't be NULL)
+ * nSubscripts: number of subscripts supplied
+ * indx[]: the subscript values
+ * dataValue: the datum to be inserted at the given position
+ * isNull: whether dataValue is NULL
+ * arraytyplen: pg_type.typlen for the array type
+ * elmlen: pg_type.typlen for the array's element type
+ * elmbyval: pg_type.typbyval for the array's element type
+ * elmalign: pg_type.typalign for the array's element type
+ *
+ * Result:
+ * A new array is returned, just like the old except for the one
+ * modified entry. The original array object is not changed,
+ * unless what is passed is a read-write reference to an expanded
+ * array object; in that case the expanded array is updated in-place.
+ *
+ * For one-dimensional arrays only, we allow the array to be extended
+ * by assigning to a position outside the existing subscript range; any
+ * positions between the existing elements and the new one are set to NULLs.
+ * (XXX TODO: allow a corresponding behavior for multidimensional arrays)
+ *
+ * NOTE: For assignments, we throw an error for invalid subscripts etc,
+ * rather than returning a NULL as the fetch operations do.
+ */
+Datum
+array_set_element(Datum arraydatum,
+ int nSubscripts,
+ int *indx,
+ Datum dataValue,
+ bool isNull,
+ int arraytyplen,
+ int elmlen,
+ bool elmbyval,
+ char elmalign)
+{
+ ArrayType *array;
+ ArrayType *newarray;
+ int i,
+ ndim,
+ dim[MAXDIM],
+ lb[MAXDIM],
+ offset;
+ char *elt_ptr;
+ bool newhasnulls;
+ bits8 *oldnullbitmap;
+ int oldnitems,
+ newnitems,
+ olddatasize,
+ newsize,
+ olditemlen,
+ newitemlen,
+ overheadlen,
+ oldoverheadlen,
+ addedbefore,
+ addedafter,
+ lenbefore,
+ lenafter;
+
+ if (arraytyplen > 0)
+ {
+ /*
+ * fixed-length arrays -- these are assumed to be 1-d, 0-based. We
+ * cannot extend them, either.
+ */
+ char *resultarray;
+
+ if (nSubscripts != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (indx[0] < 0 || indx[0] >= arraytyplen / elmlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array subscript out of range")));
+
+ if (isNull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("cannot assign null value to an element of a fixed-length array")));
+
+ resultarray = (char *) palloc(arraytyplen);
+ memcpy(resultarray, DatumGetPointer(arraydatum), arraytyplen);
+ elt_ptr = (char *) resultarray + indx[0] * elmlen;
+ ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign, elt_ptr);
+ return PointerGetDatum(resultarray);
+ }
+
+ if (nSubscripts <= 0 || nSubscripts > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ /* make sure item to be inserted is not toasted */
+ if (elmlen == -1 && !isNull)
+ dataValue = PointerGetDatum(PG_DETOAST_DATUM(dataValue));
+
+ if (VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(arraydatum)))
+ {
+ /* expanded array: let's do this in a separate function */
+ return array_set_element_expanded(arraydatum,
+ nSubscripts,
+ indx,
+ dataValue,
+ isNull,
+ arraytyplen,
+ elmlen,
+ elmbyval,
+ elmalign);
+ }
+
+ /* detoast input array if necessary */
+ array = DatumGetArrayTypeP(arraydatum);
+
+ ndim = ARR_NDIM(array);
+
+ /*
+ * if number of dims is zero, i.e. an empty array, create an array with
+ * nSubscripts dimensions, and set the lower bounds to the supplied
+ * subscripts
+ */
+ if (ndim == 0)
+ {
+ Oid elmtype = ARR_ELEMTYPE(array);
+
+ for (i = 0; i < nSubscripts; i++)
+ {
+ dim[i] = 1;
+ lb[i] = indx[i];
+ }
+
+ return PointerGetDatum(construct_md_array(&dataValue, &isNull,
+ nSubscripts, dim, lb,
+ elmtype,
+ elmlen, elmbyval, elmalign));
+ }
+
+ if (ndim != nSubscripts)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ /* copy dim/lb since we may modify them */
+ memcpy(dim, ARR_DIMS(array), ndim * sizeof(int));
+ memcpy(lb, ARR_LBOUND(array), ndim * sizeof(int));
+
+ newhasnulls = (ARR_HASNULL(array) || isNull);
+ addedbefore = addedafter = 0;
+
+ /*
+ * Check subscripts. We assume the existing subscripts passed
+ * ArrayCheckBounds, so that dim[i] + lb[i] can be computed without
+ * overflow. But we must beware of other overflows in our calculations of
+ * new dim[] values.
+ */
+ if (ndim == 1)
+ {
+ if (indx[0] < lb[0])
+ {
+ /* addedbefore = lb[0] - indx[0]; */
+ /* dim[0] += addedbefore; */
+ if (pg_sub_s32_overflow(lb[0], indx[0], &addedbefore) ||
+ pg_add_s32_overflow(dim[0], addedbefore, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ lb[0] = indx[0];
+ if (addedbefore > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ if (indx[0] >= (dim[0] + lb[0]))
+ {
+ /* addedafter = indx[0] - (dim[0] + lb[0]) + 1; */
+ /* dim[0] += addedafter; */
+ if (pg_sub_s32_overflow(indx[0], dim[0] + lb[0], &addedafter) ||
+ pg_add_s32_overflow(addedafter, 1, &addedafter) ||
+ pg_add_s32_overflow(dim[0], addedafter, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ if (addedafter > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ }
+ else
+ {
+ /*
+ * XXX currently we do not support extending multi-dimensional arrays
+ * during assignment
+ */
+ for (i = 0; i < ndim; i++)
+ {
+ if (indx[i] < lb[i] ||
+ indx[i] >= (dim[i] + lb[i]))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array subscript out of range")));
+ }
+ }
+
+ /* This checks for overflow of the array dimensions */
+ newnitems = ArrayGetNItems(ndim, dim);
+ ArrayCheckBounds(ndim, dim, lb);
+
+ /*
+ * Compute sizes of items and areas to copy
+ */
+ if (newhasnulls)
+ overheadlen = ARR_OVERHEAD_WITHNULLS(ndim, newnitems);
+ else
+ overheadlen = ARR_OVERHEAD_NONULLS(ndim);
+ oldnitems = ArrayGetNItems(ndim, ARR_DIMS(array));
+ oldnullbitmap = ARR_NULLBITMAP(array);
+ oldoverheadlen = ARR_DATA_OFFSET(array);
+ olddatasize = ARR_SIZE(array) - oldoverheadlen;
+ if (addedbefore)
+ {
+ offset = 0;
+ lenbefore = 0;
+ olditemlen = 0;
+ lenafter = olddatasize;
+ }
+ else if (addedafter)
+ {
+ offset = oldnitems;
+ lenbefore = olddatasize;
+ olditemlen = 0;
+ lenafter = 0;
+ }
+ else
+ {
+ offset = ArrayGetOffset(nSubscripts, dim, lb, indx);
+ elt_ptr = array_seek(ARR_DATA_PTR(array), 0, oldnullbitmap, offset,
+ elmlen, elmbyval, elmalign);
+ lenbefore = (int) (elt_ptr - ARR_DATA_PTR(array));
+ if (array_get_isnull(oldnullbitmap, offset))
+ olditemlen = 0;
+ else
+ {
+ olditemlen = att_addlength_pointer(0, elmlen, elt_ptr);
+ olditemlen = att_align_nominal(olditemlen, elmalign);
+ }
+ lenafter = (int) (olddatasize - lenbefore - olditemlen);
+ }
+
+ if (isNull)
+ newitemlen = 0;
+ else
+ {
+ newitemlen = att_addlength_datum(0, elmlen, dataValue);
+ newitemlen = att_align_nominal(newitemlen, elmalign);
+ }
+
+ newsize = overheadlen + lenbefore + newitemlen + lenafter;
+
+ /*
+ * OK, create the new array and fill in header/dimensions
+ */
+ newarray = (ArrayType *) palloc0(newsize);
+ SET_VARSIZE(newarray, newsize);
+ newarray->ndim = ndim;
+ newarray->dataoffset = newhasnulls ? overheadlen : 0;
+ newarray->elemtype = ARR_ELEMTYPE(array);
+ memcpy(ARR_DIMS(newarray), dim, ndim * sizeof(int));
+ memcpy(ARR_LBOUND(newarray), lb, ndim * sizeof(int));
+
+ /*
+ * Fill in data
+ */
+ memcpy((char *) newarray + overheadlen,
+ (char *) array + oldoverheadlen,
+ lenbefore);
+ if (!isNull)
+ ArrayCastAndSet(dataValue, elmlen, elmbyval, elmalign,
+ (char *) newarray + overheadlen + lenbefore);
+ memcpy((char *) newarray + overheadlen + lenbefore + newitemlen,
+ (char *) array + oldoverheadlen + lenbefore + olditemlen,
+ lenafter);
+
+ /*
+ * Fill in nulls bitmap if needed
+ *
+ * Note: it's possible we just replaced the last NULL with a non-NULL, and
+ * could get rid of the bitmap. Seems not worth testing for though.
+ */
+ if (newhasnulls)
+ {
+ bits8 *newnullbitmap = ARR_NULLBITMAP(newarray);
+
+ /* palloc0 above already marked any inserted positions as nulls */
+ /* Fix the inserted value */
+ if (addedafter)
+ array_set_isnull(newnullbitmap, newnitems - 1, isNull);
+ else
+ array_set_isnull(newnullbitmap, offset, isNull);
+ /* Fix the copied range(s) */
+ if (addedbefore)
+ array_bitmap_copy(newnullbitmap, addedbefore,
+ oldnullbitmap, 0,
+ oldnitems);
+ else
+ {
+ array_bitmap_copy(newnullbitmap, 0,
+ oldnullbitmap, 0,
+ offset);
+ if (addedafter == 0)
+ array_bitmap_copy(newnullbitmap, offset + 1,
+ oldnullbitmap, offset + 1,
+ oldnitems - offset - 1);
+ }
+ }
+
+ return PointerGetDatum(newarray);
+}
+
+/*
+ * Implementation of array_set_element() for an expanded array
+ *
+ * Note: as with any operation on a read/write expanded object, we must
+ * take pains not to leave the object in a corrupt state if we fail partway
+ * through.
+ */
+static Datum
+array_set_element_expanded(Datum arraydatum,
+ int nSubscripts, int *indx,
+ Datum dataValue, bool isNull,
+ int arraytyplen,
+ int elmlen, bool elmbyval, char elmalign)
+{
+ ExpandedArrayHeader *eah;
+ Datum *dvalues;
+ bool *dnulls;
+ int i,
+ ndim,
+ dim[MAXDIM],
+ lb[MAXDIM],
+ offset;
+ bool dimschanged,
+ newhasnulls;
+ int addedbefore,
+ addedafter;
+ char *oldValue;
+
+ /* Convert to R/W object if not so already */
+ eah = DatumGetExpandedArray(arraydatum);
+
+ /* Sanity-check caller's info against object; we don't use it otherwise */
+ Assert(arraytyplen == -1);
+ Assert(elmlen == eah->typlen);
+ Assert(elmbyval == eah->typbyval);
+ Assert(elmalign == eah->typalign);
+
+ /*
+ * Copy dimension info into local storage. This allows us to modify the
+ * dimensions if needed, while not messing up the expanded value if we
+ * fail partway through.
+ */
+ ndim = eah->ndims;
+ Assert(ndim >= 0 && ndim <= MAXDIM);
+ memcpy(dim, eah->dims, ndim * sizeof(int));
+ memcpy(lb, eah->lbound, ndim * sizeof(int));
+ dimschanged = false;
+
+ /*
+ * if number of dims is zero, i.e. an empty array, create an array with
+ * nSubscripts dimensions, and set the lower bounds to the supplied
+ * subscripts.
+ */
+ if (ndim == 0)
+ {
+ /*
+ * Allocate adequate space for new dimension info. This is harmless
+ * if we fail later.
+ */
+ Assert(nSubscripts > 0 && nSubscripts <= MAXDIM);
+ eah->dims = (int *) MemoryContextAllocZero(eah->hdr.eoh_context,
+ nSubscripts * sizeof(int));
+ eah->lbound = (int *) MemoryContextAllocZero(eah->hdr.eoh_context,
+ nSubscripts * sizeof(int));
+
+ /* Update local copies of dimension info */
+ ndim = nSubscripts;
+ for (i = 0; i < nSubscripts; i++)
+ {
+ dim[i] = 0;
+ lb[i] = indx[i];
+ }
+ dimschanged = true;
+ }
+ else if (ndim != nSubscripts)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ /*
+ * Deconstruct array if we didn't already. (Someday maybe add a special
+ * case path for fixed-length, no-nulls cases, where we can overwrite an
+ * element in place without ever deconstructing. But today is not that
+ * day.)
+ */
+ deconstruct_expanded_array(eah);
+
+ /*
+ * Copy new element into array's context, if needed (we assume it's
+ * already detoasted, so no junk should be created). Doing this before
+ * we've made any significant changes ensures that our behavior is sane
+ * even when the source is a reference to some element of this same array.
+ * If we fail further down, this memory is leaked, but that's reasonably
+ * harmless.
+ */
+ if (!eah->typbyval && !isNull)
+ {
+ MemoryContext oldcxt = MemoryContextSwitchTo(eah->hdr.eoh_context);
+
+ dataValue = datumCopy(dataValue, false, eah->typlen);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ dvalues = eah->dvalues;
+ dnulls = eah->dnulls;
+
+ newhasnulls = ((dnulls != NULL) || isNull);
+ addedbefore = addedafter = 0;
+
+ /*
+ * Check subscripts (this logic must match array_set_element). We assume
+ * the existing subscripts passed ArrayCheckBounds, so that dim[i] + lb[i]
+ * can be computed without overflow. But we must beware of other
+ * overflows in our calculations of new dim[] values.
+ */
+ if (ndim == 1)
+ {
+ if (indx[0] < lb[0])
+ {
+ /* addedbefore = lb[0] - indx[0]; */
+ /* dim[0] += addedbefore; */
+ if (pg_sub_s32_overflow(lb[0], indx[0], &addedbefore) ||
+ pg_add_s32_overflow(dim[0], addedbefore, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ lb[0] = indx[0];
+ dimschanged = true;
+ if (addedbefore > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ if (indx[0] >= (dim[0] + lb[0]))
+ {
+ /* addedafter = indx[0] - (dim[0] + lb[0]) + 1; */
+ /* dim[0] += addedafter; */
+ if (pg_sub_s32_overflow(indx[0], dim[0] + lb[0], &addedafter) ||
+ pg_add_s32_overflow(addedafter, 1, &addedafter) ||
+ pg_add_s32_overflow(dim[0], addedafter, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ dimschanged = true;
+ if (addedafter > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ }
+ else
+ {
+ /*
+ * XXX currently we do not support extending multi-dimensional arrays
+ * during assignment
+ */
+ for (i = 0; i < ndim; i++)
+ {
+ if (indx[i] < lb[i] ||
+ indx[i] >= (dim[i] + lb[i]))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array subscript out of range")));
+ }
+ }
+
+ /* Check for overflow of the array dimensions */
+ if (dimschanged)
+ {
+ (void) ArrayGetNItems(ndim, dim);
+ ArrayCheckBounds(ndim, dim, lb);
+ }
+
+ /* Now we can calculate linear offset of target item in array */
+ offset = ArrayGetOffset(nSubscripts, dim, lb, indx);
+
+ /* Physically enlarge existing dvalues/dnulls arrays if needed */
+ if (dim[0] > eah->dvalueslen)
+ {
+ /* We want some extra space if we're enlarging */
+ int newlen = dim[0] + dim[0] / 8;
+
+ newlen = Max(newlen, dim[0]); /* integer overflow guard */
+ eah->dvalues = dvalues = (Datum *)
+ repalloc(dvalues, newlen * sizeof(Datum));
+ if (dnulls)
+ eah->dnulls = dnulls = (bool *)
+ repalloc(dnulls, newlen * sizeof(bool));
+ eah->dvalueslen = newlen;
+ }
+
+ /*
+ * If we need a nulls bitmap and don't already have one, create it, being
+ * sure to mark all existing entries as not null.
+ */
+ if (newhasnulls && dnulls == NULL)
+ eah->dnulls = dnulls = (bool *)
+ MemoryContextAllocZero(eah->hdr.eoh_context,
+ eah->dvalueslen * sizeof(bool));
+
+ /*
+ * We now have all the needed space allocated, so we're ready to make
+ * irreversible changes. Be very wary of allowing failure below here.
+ */
+
+ /* Flattened value will no longer represent array accurately */
+ eah->fvalue = NULL;
+ /* And we don't know the flattened size either */
+ eah->flat_size = 0;
+
+ /* Update dimensionality info if needed */
+ if (dimschanged)
+ {
+ eah->ndims = ndim;
+ memcpy(eah->dims, dim, ndim * sizeof(int));
+ memcpy(eah->lbound, lb, ndim * sizeof(int));
+ }
+
+ /* Reposition items if needed, and fill addedbefore items with nulls */
+ if (addedbefore > 0)
+ {
+ memmove(dvalues + addedbefore, dvalues, eah->nelems * sizeof(Datum));
+ for (i = 0; i < addedbefore; i++)
+ dvalues[i] = (Datum) 0;
+ if (dnulls)
+ {
+ memmove(dnulls + addedbefore, dnulls, eah->nelems * sizeof(bool));
+ for (i = 0; i < addedbefore; i++)
+ dnulls[i] = true;
+ }
+ eah->nelems += addedbefore;
+ }
+
+ /* fill addedafter items with nulls */
+ if (addedafter > 0)
+ {
+ for (i = 0; i < addedafter; i++)
+ dvalues[eah->nelems + i] = (Datum) 0;
+ if (dnulls)
+ {
+ for (i = 0; i < addedafter; i++)
+ dnulls[eah->nelems + i] = true;
+ }
+ eah->nelems += addedafter;
+ }
+
+ /* Grab old element value for pfree'ing, if needed. */
+ if (!eah->typbyval && (dnulls == NULL || !dnulls[offset]))
+ oldValue = (char *) DatumGetPointer(dvalues[offset]);
+ else
+ oldValue = NULL;
+
+ /* And finally we can insert the new element. */
+ dvalues[offset] = dataValue;
+ if (dnulls)
+ dnulls[offset] = isNull;
+
+ /*
+ * Free old element if needed; this keeps repeated element replacements
+ * from bloating the array's storage. If the pfree somehow fails, it
+ * won't corrupt the array.
+ */
+ if (oldValue)
+ {
+ /* Don't try to pfree a part of the original flat array */
+ if (oldValue < eah->fstartptr || oldValue >= eah->fendptr)
+ pfree(oldValue);
+ }
+
+ /* Done, return standard TOAST pointer for object */
+ return EOHPGetRWDatum(&eah->hdr);
+}
+
+/*
+ * array_set_slice :
+ * This routine sets the value of a range of array locations (specified
+ * by upper and lower subscript values) to new values passed as
+ * another array.
+ *
+ * This handles both ordinary varlena arrays and fixed-length arrays.
+ *
+ * Inputs:
+ * arraydatum: the initial array object (mustn't be NULL)
+ * nSubscripts: number of subscripts supplied (must be same for upper/lower)
+ * upperIndx[]: the upper subscript values
+ * lowerIndx[]: the lower subscript values
+ * upperProvided[]: true for provided upper subscript values
+ * lowerProvided[]: true for provided lower subscript values
+ * srcArrayDatum: the source for the inserted values
+ * isNull: indicates whether srcArrayDatum is NULL
+ * arraytyplen: pg_type.typlen for the array type
+ * elmlen: pg_type.typlen for the array's element type
+ * elmbyval: pg_type.typbyval for the array's element type
+ * elmalign: pg_type.typalign for the array's element type
+ *
+ * Result:
+ * A new array is returned, just like the old except for the
+ * modified range. The original array object is not changed.
+ *
+ * Omitted upper and lower subscript values are replaced by the corresponding
+ * array bound.
+ *
+ * For one-dimensional arrays only, we allow the array to be extended
+ * by assigning to positions outside the existing subscript range; any
+ * positions between the existing elements and the new ones are set to NULLs.
+ * (XXX TODO: allow a corresponding behavior for multidimensional arrays)
+ *
+ * NOTE: we assume it is OK to scribble on the provided index arrays
+ * lowerIndx[] and upperIndx[]; also, these arrays must be of size MAXDIM
+ * even when nSubscripts is less. These are generally just temporaries.
+ *
+ * NOTE: For assignments, we throw an error for silly subscripts etc,
+ * rather than returning a NULL or empty array as the fetch operations do.
+ */
+Datum
+array_set_slice(Datum arraydatum,
+ int nSubscripts,
+ int *upperIndx,
+ int *lowerIndx,
+ bool *upperProvided,
+ bool *lowerProvided,
+ Datum srcArrayDatum,
+ bool isNull,
+ int arraytyplen,
+ int elmlen,
+ bool elmbyval,
+ char elmalign)
+{
+ ArrayType *array;
+ ArrayType *srcArray;
+ ArrayType *newarray;
+ int i,
+ ndim,
+ dim[MAXDIM],
+ lb[MAXDIM],
+ span[MAXDIM];
+ bool newhasnulls;
+ int nitems,
+ nsrcitems,
+ olddatasize,
+ newsize,
+ olditemsize,
+ newitemsize,
+ overheadlen,
+ oldoverheadlen,
+ addedbefore,
+ addedafter,
+ lenbefore,
+ lenafter,
+ itemsbefore,
+ itemsafter,
+ nolditems;
+
+ /* Currently, assignment from a NULL source array is a no-op */
+ if (isNull)
+ return arraydatum;
+
+ if (arraytyplen > 0)
+ {
+ /*
+ * fixed-length arrays -- not got round to doing this...
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("updates on slices of fixed-length arrays not implemented")));
+ }
+
+ /* detoast arrays if necessary */
+ array = DatumGetArrayTypeP(arraydatum);
+ srcArray = DatumGetArrayTypeP(srcArrayDatum);
+
+ /* note: we assume srcArray contains no toasted elements */
+
+ ndim = ARR_NDIM(array);
+
+ /*
+ * if number of dims is zero, i.e. an empty array, create an array with
+ * nSubscripts dimensions, and set the upper and lower bounds to the
+ * supplied subscripts
+ */
+ if (ndim == 0)
+ {
+ Datum *dvalues;
+ bool *dnulls;
+ int nelems;
+ Oid elmtype = ARR_ELEMTYPE(array);
+
+ deconstruct_array(srcArray, elmtype, elmlen, elmbyval, elmalign,
+ &dvalues, &dnulls, &nelems);
+
+ for (i = 0; i < nSubscripts; i++)
+ {
+ if (!upperProvided[i] || !lowerProvided[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array slice subscript must provide both boundaries"),
+ errdetail("When assigning to a slice of an empty array value,"
+ " slice boundaries must be fully specified.")));
+
+ dim[i] = 1 + upperIndx[i] - lowerIndx[i];
+ lb[i] = lowerIndx[i];
+ }
+
+ /* complain if too few source items; we ignore extras, however */
+ if (nelems < ArrayGetNItems(nSubscripts, dim))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("source array too small")));
+
+ return PointerGetDatum(construct_md_array(dvalues, dnulls, nSubscripts,
+ dim, lb, elmtype,
+ elmlen, elmbyval, elmalign));
+ }
+
+ if (ndim < nSubscripts || ndim <= 0 || ndim > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ /* copy dim/lb since we may modify them */
+ memcpy(dim, ARR_DIMS(array), ndim * sizeof(int));
+ memcpy(lb, ARR_LBOUND(array), ndim * sizeof(int));
+
+ newhasnulls = (ARR_HASNULL(array) || ARR_HASNULL(srcArray));
+ addedbefore = addedafter = 0;
+
+ /*
+ * Check subscripts. We assume the existing subscripts passed
+ * ArrayCheckBounds, so that dim[i] + lb[i] can be computed without
+ * overflow. But we must beware of other overflows in our calculations of
+ * new dim[] values.
+ */
+ if (ndim == 1)
+ {
+ Assert(nSubscripts == 1);
+ if (!lowerProvided[0])
+ lowerIndx[0] = lb[0];
+ if (!upperProvided[0])
+ upperIndx[0] = dim[0] + lb[0] - 1;
+ if (lowerIndx[0] > upperIndx[0])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("upper bound cannot be less than lower bound")));
+ if (lowerIndx[0] < lb[0])
+ {
+ /* addedbefore = lb[0] - lowerIndx[0]; */
+ /* dim[0] += addedbefore; */
+ if (pg_sub_s32_overflow(lb[0], lowerIndx[0], &addedbefore) ||
+ pg_add_s32_overflow(dim[0], addedbefore, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ lb[0] = lowerIndx[0];
+ if (addedbefore > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ if (upperIndx[0] >= (dim[0] + lb[0]))
+ {
+ /* addedafter = upperIndx[0] - (dim[0] + lb[0]) + 1; */
+ /* dim[0] += addedafter; */
+ if (pg_sub_s32_overflow(upperIndx[0], dim[0] + lb[0], &addedafter) ||
+ pg_add_s32_overflow(addedafter, 1, &addedafter) ||
+ pg_add_s32_overflow(dim[0], addedafter, &dim[0]))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ if (addedafter > 1)
+ newhasnulls = true; /* will insert nulls */
+ }
+ }
+ else
+ {
+ /*
+ * XXX currently we do not support extending multi-dimensional arrays
+ * during assignment
+ */
+ for (i = 0; i < nSubscripts; i++)
+ {
+ if (!lowerProvided[i])
+ lowerIndx[i] = lb[i];
+ if (!upperProvided[i])
+ upperIndx[i] = dim[i] + lb[i] - 1;
+ if (lowerIndx[i] > upperIndx[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("upper bound cannot be less than lower bound")));
+ if (lowerIndx[i] < lb[i] ||
+ upperIndx[i] >= (dim[i] + lb[i]))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array subscript out of range")));
+ }
+ /* fill any missing subscript positions with full array range */
+ for (; i < ndim; i++)
+ {
+ lowerIndx[i] = lb[i];
+ upperIndx[i] = dim[i] + lb[i] - 1;
+ if (lowerIndx[i] > upperIndx[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("upper bound cannot be less than lower bound")));
+ }
+ }
+
+ /* Do this mainly to check for overflow */
+ nitems = ArrayGetNItems(ndim, dim);
+ ArrayCheckBounds(ndim, dim, lb);
+
+ /*
+ * Make sure source array has enough entries. Note we ignore the shape of
+ * the source array and just read entries serially.
+ */
+ mda_get_range(ndim, span, lowerIndx, upperIndx);
+ nsrcitems = ArrayGetNItems(ndim, span);
+ if (nsrcitems > ArrayGetNItems(ARR_NDIM(srcArray), ARR_DIMS(srcArray)))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("source array too small")));
+
+ /*
+ * Compute space occupied by new entries, space occupied by replaced
+ * entries, and required space for new array.
+ */
+ if (newhasnulls)
+ overheadlen = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
+ else
+ overheadlen = ARR_OVERHEAD_NONULLS(ndim);
+ newitemsize = array_nelems_size(ARR_DATA_PTR(srcArray), 0,
+ ARR_NULLBITMAP(srcArray), nsrcitems,
+ elmlen, elmbyval, elmalign);
+ oldoverheadlen = ARR_DATA_OFFSET(array);
+ olddatasize = ARR_SIZE(array) - oldoverheadlen;
+ if (ndim > 1)
+ {
+ /*
+ * here we do not need to cope with extension of the array; it would
+ * be a lot more complicated if we had to do so...
+ */
+ olditemsize = array_slice_size(ARR_DATA_PTR(array),
+ ARR_NULLBITMAP(array),
+ ndim, dim, lb,
+ lowerIndx, upperIndx,
+ elmlen, elmbyval, elmalign);
+ lenbefore = lenafter = 0; /* keep compiler quiet */
+ itemsbefore = itemsafter = nolditems = 0;
+ }
+ else
+ {
+ /*
+ * here we must allow for possibility of slice larger than orig array
+ * and/or not adjacent to orig array subscripts
+ */
+ int oldlb = ARR_LBOUND(array)[0];
+ int oldub = oldlb + ARR_DIMS(array)[0] - 1;
+ int slicelb = Max(oldlb, lowerIndx[0]);
+ int sliceub = Min(oldub, upperIndx[0]);
+ char *oldarraydata = ARR_DATA_PTR(array);
+ bits8 *oldarraybitmap = ARR_NULLBITMAP(array);
+
+ /* count/size of old array entries that will go before the slice */
+ itemsbefore = Min(slicelb, oldub + 1) - oldlb;
+ lenbefore = array_nelems_size(oldarraydata, 0, oldarraybitmap,
+ itemsbefore,
+ elmlen, elmbyval, elmalign);
+ /* count/size of old array entries that will be replaced by slice */
+ if (slicelb > sliceub)
+ {
+ nolditems = 0;
+ olditemsize = 0;
+ }
+ else
+ {
+ nolditems = sliceub - slicelb + 1;
+ olditemsize = array_nelems_size(oldarraydata + lenbefore,
+ itemsbefore, oldarraybitmap,
+ nolditems,
+ elmlen, elmbyval, elmalign);
+ }
+ /* count/size of old array entries that will go after the slice */
+ itemsafter = oldub + 1 - Max(sliceub + 1, oldlb);
+ lenafter = olddatasize - lenbefore - olditemsize;
+ }
+
+ newsize = overheadlen + olddatasize - olditemsize + newitemsize;
+
+ newarray = (ArrayType *) palloc0(newsize);
+ SET_VARSIZE(newarray, newsize);
+ newarray->ndim = ndim;
+ newarray->dataoffset = newhasnulls ? overheadlen : 0;
+ newarray->elemtype = ARR_ELEMTYPE(array);
+ memcpy(ARR_DIMS(newarray), dim, ndim * sizeof(int));
+ memcpy(ARR_LBOUND(newarray), lb, ndim * sizeof(int));
+
+ if (ndim > 1)
+ {
+ /*
+ * here we do not need to cope with extension of the array; it would
+ * be a lot more complicated if we had to do so...
+ */
+ array_insert_slice(newarray, array, srcArray,
+ ndim, dim, lb,
+ lowerIndx, upperIndx,
+ elmlen, elmbyval, elmalign);
+ }
+ else
+ {
+ /* fill in data */
+ memcpy((char *) newarray + overheadlen,
+ (char *) array + oldoverheadlen,
+ lenbefore);
+ memcpy((char *) newarray + overheadlen + lenbefore,
+ ARR_DATA_PTR(srcArray),
+ newitemsize);
+ memcpy((char *) newarray + overheadlen + lenbefore + newitemsize,
+ (char *) array + oldoverheadlen + lenbefore + olditemsize,
+ lenafter);
+ /* fill in nulls bitmap if needed */
+ if (newhasnulls)
+ {
+ bits8 *newnullbitmap = ARR_NULLBITMAP(newarray);
+ bits8 *oldnullbitmap = ARR_NULLBITMAP(array);
+
+ /* palloc0 above already marked any inserted positions as nulls */
+ array_bitmap_copy(newnullbitmap, addedbefore,
+ oldnullbitmap, 0,
+ itemsbefore);
+ array_bitmap_copy(newnullbitmap, lowerIndx[0] - lb[0],
+ ARR_NULLBITMAP(srcArray), 0,
+ nsrcitems);
+ array_bitmap_copy(newnullbitmap, addedbefore + itemsbefore + nolditems,
+ oldnullbitmap, itemsbefore + nolditems,
+ itemsafter);
+ }
+ }
+
+ return PointerGetDatum(newarray);
+}
+
+/*
+ * array_ref : backwards compatibility wrapper for array_get_element
+ *
+ * This only works for detoasted/flattened varlena arrays, since the array
+ * argument is declared as "ArrayType *". However there's enough code like
+ * that to justify preserving this API.
+ */
+Datum
+array_ref(ArrayType *array, int nSubscripts, int *indx,
+ int arraytyplen, int elmlen, bool elmbyval, char elmalign,
+ bool *isNull)
+{
+ return array_get_element(PointerGetDatum(array), nSubscripts, indx,
+ arraytyplen, elmlen, elmbyval, elmalign,
+ isNull);
+}
+
+/*
+ * array_set : backwards compatibility wrapper for array_set_element
+ *
+ * This only works for detoasted/flattened varlena arrays, since the array
+ * argument and result are declared as "ArrayType *". However there's enough
+ * code like that to justify preserving this API.
+ */
+ArrayType *
+array_set(ArrayType *array, int nSubscripts, int *indx,
+ Datum dataValue, bool isNull,
+ int arraytyplen, int elmlen, bool elmbyval, char elmalign)
+{
+ return DatumGetArrayTypeP(array_set_element(PointerGetDatum(array),
+ nSubscripts, indx,
+ dataValue, isNull,
+ arraytyplen,
+ elmlen, elmbyval, elmalign));
+}
+
+/*
+ * array_map()
+ *
+ * Map an array through an arbitrary expression. Return a new array with
+ * the same dimensions and each source element transformed by the given,
+ * already-compiled expression. Each source element is placed in the
+ * innermost_caseval/innermost_casenull fields of the ExprState.
+ *
+ * Parameters are:
+ * * arrayd: Datum representing array argument.
+ * * exprstate: ExprState representing the per-element transformation.
+ * * econtext: context for expression evaluation.
+ * * retType: OID of element type of output array. This must be the same as,
+ * or binary-compatible with, the result type of the expression. It might
+ * be different from the input array's element type.
+ * * amstate: workspace for array_map. Must be zeroed by caller before
+ * first call, and not touched after that.
+ *
+ * It is legitimate to pass a freshly-zeroed ArrayMapState on each call,
+ * but better performance can be had if the state can be preserved across
+ * a series of calls.
+ *
+ * NB: caller must assure that input array is not NULL. NULL elements in
+ * the array are OK however.
+ * NB: caller should be running in econtext's per-tuple memory context.
+ */
+Datum
+array_map(Datum arrayd,
+ ExprState *exprstate, ExprContext *econtext,
+ Oid retType, ArrayMapState *amstate)
+{
+ AnyArrayType *v = DatumGetAnyArrayP(arrayd);
+ ArrayType *result;
+ Datum *values;
+ bool *nulls;
+ int *dim;
+ int ndim;
+ int nitems;
+ int i;
+ int32 nbytes = 0;
+ int32 dataoffset;
+ bool hasnulls;
+ Oid inpType;
+ int inp_typlen;
+ bool inp_typbyval;
+ char inp_typalign;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ array_iter iter;
+ ArrayMetaState *inp_extra;
+ ArrayMetaState *ret_extra;
+ Datum *transform_source = exprstate->innermost_caseval;
+ bool *transform_source_isnull = exprstate->innermost_casenull;
+
+ inpType = AARR_ELEMTYPE(v);
+ ndim = AARR_NDIM(v);
+ dim = AARR_DIMS(v);
+ nitems = ArrayGetNItems(ndim, dim);
+
+ /* Check for empty array */
+ if (nitems <= 0)
+ {
+ /* Return empty array */
+ return PointerGetDatum(construct_empty_array(retType));
+ }
+
+ /*
+ * We arrange to look up info about input and return element types only
+ * once per series of calls, assuming the element type doesn't change
+ * underneath us.
+ */
+ inp_extra = &amstate->inp_extra;
+ ret_extra = &amstate->ret_extra;
+
+ if (inp_extra->element_type != inpType)
+ {
+ get_typlenbyvalalign(inpType,
+ &inp_extra->typlen,
+ &inp_extra->typbyval,
+ &inp_extra->typalign);
+ inp_extra->element_type = inpType;
+ }
+ inp_typlen = inp_extra->typlen;
+ inp_typbyval = inp_extra->typbyval;
+ inp_typalign = inp_extra->typalign;
+
+ if (ret_extra->element_type != retType)
+ {
+ get_typlenbyvalalign(retType,
+ &ret_extra->typlen,
+ &ret_extra->typbyval,
+ &ret_extra->typalign);
+ ret_extra->element_type = retType;
+ }
+ typlen = ret_extra->typlen;
+ typbyval = ret_extra->typbyval;
+ typalign = ret_extra->typalign;
+
+ /* Allocate temporary arrays for new values */
+ values = (Datum *) palloc(nitems * sizeof(Datum));
+ nulls = (bool *) palloc(nitems * sizeof(bool));
+
+ /* Loop over source data */
+ array_iter_setup(&iter, v);
+ hasnulls = false;
+
+ for (i = 0; i < nitems; i++)
+ {
+ /* Get source element, checking for NULL */
+ *transform_source =
+ array_iter_next(&iter, transform_source_isnull, i,
+ inp_typlen, inp_typbyval, inp_typalign);
+
+ /* Apply the given expression to source element */
+ values[i] = ExecEvalExpr(exprstate, econtext, &nulls[i]);
+
+ if (nulls[i])
+ hasnulls = true;
+ else
+ {
+ /* Ensure data is not toasted */
+ if (typlen == -1)
+ values[i] = PointerGetDatum(PG_DETOAST_DATUM(values[i]));
+ /* Update total result size */
+ nbytes = att_addlength_datum(nbytes, typlen, values[i]);
+ nbytes = att_align_nominal(nbytes, typalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+ }
+
+ /* Allocate and fill the result array */
+ if (hasnulls)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nitems);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes += ARR_OVERHEAD_NONULLS(ndim);
+ }
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = ndim;
+ result->dataoffset = dataoffset;
+ result->elemtype = retType;
+ memcpy(ARR_DIMS(result), AARR_DIMS(v), ndim * sizeof(int));
+ memcpy(ARR_LBOUND(result), AARR_LBOUND(v), ndim * sizeof(int));
+
+ CopyArrayEls(result,
+ values, nulls, nitems,
+ typlen, typbyval, typalign,
+ false);
+
+ /*
+ * Note: do not risk trying to pfree the results of the called expression
+ */
+ pfree(values);
+ pfree(nulls);
+
+ return PointerGetDatum(result);
+}
+
+/*
+ * construct_array --- simple method for constructing an array object
+ *
+ * elems: array of Datum items to become the array contents
+ * (NULL element values are not supported).
+ * nelems: number of items
+ * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items
+ *
+ * A palloc'd 1-D array object is constructed and returned. Note that
+ * elem values will be copied into the object even if pass-by-ref type.
+ * Also note the result will be 0-D not 1-D if nelems = 0.
+ *
+ * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
+ * from the system catalogs, given the elmtype. However, the caller is
+ * in a better position to cache this info across multiple uses, or even
+ * to hard-wire values if the element type is hard-wired.
+ */
+ArrayType *
+construct_array(Datum *elems, int nelems,
+ Oid elmtype,
+ int elmlen, bool elmbyval, char elmalign)
+{
+ int dims[1];
+ int lbs[1];
+
+ dims[0] = nelems;
+ lbs[0] = 1;
+
+ return construct_md_array(elems, NULL, 1, dims, lbs,
+ elmtype, elmlen, elmbyval, elmalign);
+}
+
+/*
+ * construct_md_array --- simple method for constructing an array object
+ * with arbitrary dimensions and possible NULLs
+ *
+ * elems: array of Datum items to become the array contents
+ * nulls: array of is-null flags (can be NULL if no nulls)
+ * ndims: number of dimensions
+ * dims: integer array with size of each dimension
+ * lbs: integer array with lower bound of each dimension
+ * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items
+ *
+ * A palloc'd ndims-D array object is constructed and returned. Note that
+ * elem values will be copied into the object even if pass-by-ref type.
+ * Also note the result will be 0-D not ndims-D if any dims[i] = 0.
+ *
+ * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
+ * from the system catalogs, given the elmtype. However, the caller is
+ * in a better position to cache this info across multiple uses, or even
+ * to hard-wire values if the element type is hard-wired.
+ */
+ArrayType *
+construct_md_array(Datum *elems,
+ bool *nulls,
+ int ndims,
+ int *dims,
+ int *lbs,
+ Oid elmtype, int elmlen, bool elmbyval, char elmalign)
+{
+ ArrayType *result;
+ bool hasnulls;
+ int32 nbytes;
+ int32 dataoffset;
+ int i;
+ int nelems;
+
+ if (ndims < 0) /* we do allow zero-dimension arrays */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid number of dimensions: %d", ndims)));
+ if (ndims > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndims, MAXDIM)));
+
+ /* This checks for overflow of the array dimensions */
+ nelems = ArrayGetNItems(ndims, dims);
+ ArrayCheckBounds(ndims, dims, lbs);
+
+ /* if ndims <= 0 or any dims[i] == 0, return empty array */
+ if (nelems <= 0)
+ return construct_empty_array(elmtype);
+
+ /* compute required space */
+ nbytes = 0;
+ hasnulls = false;
+ for (i = 0; i < nelems; i++)
+ {
+ if (nulls && nulls[i])
+ {
+ hasnulls = true;
+ continue;
+ }
+ /* make sure data is not toasted */
+ if (elmlen == -1)
+ elems[i] = PointerGetDatum(PG_DETOAST_DATUM(elems[i]));
+ nbytes = att_addlength_datum(nbytes, elmlen, elems[i]);
+ nbytes = att_align_nominal(nbytes, elmalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+
+ /* Allocate and initialize result array */
+ if (hasnulls)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nelems);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes += ARR_OVERHEAD_NONULLS(ndims);
+ }
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = ndims;
+ result->dataoffset = dataoffset;
+ result->elemtype = elmtype;
+ memcpy(ARR_DIMS(result), dims, ndims * sizeof(int));
+ memcpy(ARR_LBOUND(result), lbs, ndims * sizeof(int));
+
+ CopyArrayEls(result,
+ elems, nulls, nelems,
+ elmlen, elmbyval, elmalign,
+ false);
+
+ return result;
+}
+
+/*
+ * construct_empty_array --- make a zero-dimensional array of given type
+ */
+ArrayType *
+construct_empty_array(Oid elmtype)
+{
+ ArrayType *result;
+
+ result = (ArrayType *) palloc0(sizeof(ArrayType));
+ SET_VARSIZE(result, sizeof(ArrayType));
+ result->ndim = 0;
+ result->dataoffset = 0;
+ result->elemtype = elmtype;
+ return result;
+}
+
+/*
+ * construct_empty_expanded_array: make an empty expanded array
+ * given only type information. (metacache can be NULL if not needed.)
+ */
+ExpandedArrayHeader *
+construct_empty_expanded_array(Oid element_type,
+ MemoryContext parentcontext,
+ ArrayMetaState *metacache)
+{
+ ArrayType *array = construct_empty_array(element_type);
+ Datum d;
+
+ d = expand_array(PointerGetDatum(array), parentcontext, metacache);
+ pfree(array);
+ return (ExpandedArrayHeader *) DatumGetEOHP(d);
+}
+
+/*
+ * deconstruct_array --- simple method for extracting data from an array
+ *
+ * array: array object to examine (must not be NULL)
+ * elmtype, elmlen, elmbyval, elmalign: info for the datatype of the items
+ * elemsp: return value, set to point to palloc'd array of Datum values
+ * nullsp: return value, set to point to palloc'd array of isnull markers
+ * nelemsp: return value, set to number of extracted values
+ *
+ * The caller may pass nullsp == NULL if it does not support NULLs in the
+ * array. Note that this produces a very uninformative error message,
+ * so do it only in cases where a NULL is really not expected.
+ *
+ * If array elements are pass-by-ref data type, the returned Datums will
+ * be pointers into the array object.
+ *
+ * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info
+ * from the system catalogs, given the elmtype. However, in most current
+ * uses the type is hard-wired into the caller and so we can save a lookup
+ * cycle by hard-wiring the type info as well.
+ */
+void
+deconstruct_array(ArrayType *array,
+ Oid elmtype,
+ int elmlen, bool elmbyval, char elmalign,
+ Datum **elemsp, bool **nullsp, int *nelemsp)
+{
+ Datum *elems;
+ bool *nulls;
+ int nelems;
+ char *p;
+ bits8 *bitmap;
+ int bitmask;
+ int i;
+
+ Assert(ARR_ELEMTYPE(array) == elmtype);
+
+ nelems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+ *elemsp = elems = (Datum *) palloc(nelems * sizeof(Datum));
+ if (nullsp)
+ *nullsp = nulls = (bool *) palloc0(nelems * sizeof(bool));
+ else
+ nulls = NULL;
+ *nelemsp = nelems;
+
+ p = ARR_DATA_PTR(array);
+ bitmap = ARR_NULLBITMAP(array);
+ bitmask = 1;
+
+ for (i = 0; i < nelems; i++)
+ {
+ /* Get source element, checking for NULL */
+ if (bitmap && (*bitmap & bitmask) == 0)
+ {
+ elems[i] = (Datum) 0;
+ if (nulls)
+ nulls[i] = true;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null array element not allowed in this context")));
+ }
+ else
+ {
+ elems[i] = fetch_att(p, elmbyval, elmlen);
+ p = att_addlength_pointer(p, elmlen, p);
+ p = (char *) att_align_nominal(p, elmalign);
+ }
+
+ /* advance bitmap pointer if any */
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+}
+
+/*
+ * array_contains_nulls --- detect whether an array has any null elements
+ *
+ * This gives an accurate answer, whereas testing ARR_HASNULL only tells
+ * if the array *might* contain a null.
+ */
+bool
+array_contains_nulls(ArrayType *array)
+{
+ int nelems;
+ bits8 *bitmap;
+ int bitmask;
+
+ /* Easy answer if there's no null bitmap */
+ if (!ARR_HASNULL(array))
+ return false;
+
+ nelems = ArrayGetNItems(ARR_NDIM(array), ARR_DIMS(array));
+
+ bitmap = ARR_NULLBITMAP(array);
+
+ /* check whole bytes of the bitmap byte-at-a-time */
+ while (nelems >= 8)
+ {
+ if (*bitmap != 0xFF)
+ return true;
+ bitmap++;
+ nelems -= 8;
+ }
+
+ /* check last partial byte */
+ bitmask = 1;
+ while (nelems > 0)
+ {
+ if ((*bitmap & bitmask) == 0)
+ return true;
+ bitmask <<= 1;
+ nelems--;
+ }
+
+ return false;
+}
+
+
+/*
+ * array_eq :
+ * compares two arrays for equality
+ * result :
+ * returns true if the arrays are equal, false otherwise.
+ *
+ * Note: we do not use array_cmp here, since equality may be meaningful in
+ * datatypes that don't have a total ordering (and hence no btree support).
+ */
+Datum
+array_eq(PG_FUNCTION_ARGS)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0);
+ AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1);
+ Oid collation = PG_GET_COLLATION();
+ int ndims1 = AARR_NDIM(array1);
+ int ndims2 = AARR_NDIM(array2);
+ int *dims1 = AARR_DIMS(array1);
+ int *dims2 = AARR_DIMS(array2);
+ int *lbs1 = AARR_LBOUND(array1);
+ int *lbs2 = AARR_LBOUND(array2);
+ Oid element_type = AARR_ELEMTYPE(array1);
+ bool result = true;
+ int nitems;
+ TypeCacheEntry *typentry;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ array_iter it1;
+ array_iter it2;
+ int i;
+
+ if (element_type != AARR_ELEMTYPE(array2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare arrays of different element types")));
+
+ /* fast path if the arrays do not have the same dimensionality */
+ if (ndims1 != ndims2 ||
+ memcmp(dims1, dims2, ndims1 * sizeof(int)) != 0 ||
+ memcmp(lbs1, lbs2, ndims1 * sizeof(int)) != 0)
+ result = false;
+ else
+ {
+ /*
+ * We arrange to look up the equality function only once per series of
+ * calls, assuming the element type doesn't change underneath us. The
+ * typcache is used so that we have no memory leakage when being used
+ * as an index support function.
+ */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_EQ_OPR_FINFO);
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ /*
+ * apply the operator to each pair of array elements.
+ */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Loop over source data */
+ nitems = ArrayGetNItems(ndims1, dims1);
+ array_iter_setup(&it1, array1);
+ array_iter_setup(&it2, array2);
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum elt1;
+ Datum elt2;
+ bool isnull1;
+ bool isnull2;
+ bool oprresult;
+
+ /* Get elements, checking for NULL */
+ elt1 = array_iter_next(&it1, &isnull1, i,
+ typlen, typbyval, typalign);
+ elt2 = array_iter_next(&it2, &isnull2, i,
+ typlen, typbyval, typalign);
+
+ /*
+ * We consider two NULLs equal; NULL and not-NULL are unequal.
+ */
+ if (isnull1 && isnull2)
+ continue;
+ if (isnull1 || isnull2)
+ {
+ result = false;
+ break;
+ }
+
+ /*
+ * Apply the operator to the element pair; treat NULL as false
+ */
+ locfcinfo->args[0].value = elt1;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = elt2;
+ locfcinfo->args[1].isnull = false;
+ locfcinfo->isnull = false;
+ oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo));
+ if (locfcinfo->isnull || !oprresult)
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array1, 0);
+ AARR_FREE_IF_COPY(array2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+
+/*-----------------------------------------------------------------------------
+ * array-array bool operators:
+ * Given two arrays, iterate comparison operators
+ * over the array. Uses logic similar to text comparison
+ * functions, except element-by-element instead of
+ * character-by-character.
+ *----------------------------------------------------------------------------
+ */
+
+Datum
+array_ne(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(!DatumGetBool(array_eq(fcinfo)));
+}
+
+Datum
+array_lt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(array_cmp(fcinfo) < 0);
+}
+
+Datum
+array_gt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(array_cmp(fcinfo) > 0);
+}
+
+Datum
+array_le(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(array_cmp(fcinfo) <= 0);
+}
+
+Datum
+array_ge(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(array_cmp(fcinfo) >= 0);
+}
+
+Datum
+btarraycmp(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(array_cmp(fcinfo));
+}
+
+/*
+ * array_cmp()
+ * Internal comparison function for arrays.
+ *
+ * Returns -1, 0 or 1
+ */
+static int
+array_cmp(FunctionCallInfo fcinfo)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0);
+ AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1);
+ Oid collation = PG_GET_COLLATION();
+ int ndims1 = AARR_NDIM(array1);
+ int ndims2 = AARR_NDIM(array2);
+ int *dims1 = AARR_DIMS(array1);
+ int *dims2 = AARR_DIMS(array2);
+ int nitems1 = ArrayGetNItems(ndims1, dims1);
+ int nitems2 = ArrayGetNItems(ndims2, dims2);
+ Oid element_type = AARR_ELEMTYPE(array1);
+ int result = 0;
+ TypeCacheEntry *typentry;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ int min_nitems;
+ array_iter it1;
+ array_iter it2;
+ int i;
+
+ if (element_type != AARR_ELEMTYPE(array2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare arrays of different element types")));
+
+ /*
+ * We arrange to look up the comparison function only once per series of
+ * calls, assuming the element type doesn't change underneath us. The
+ * typcache is used so that we have no memory leakage when being used as
+ * an index support function.
+ */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ /*
+ * apply the operator to each pair of array elements.
+ */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Loop over source data */
+ min_nitems = Min(nitems1, nitems2);
+ array_iter_setup(&it1, array1);
+ array_iter_setup(&it2, array2);
+
+ for (i = 0; i < min_nitems; i++)
+ {
+ Datum elt1;
+ Datum elt2;
+ bool isnull1;
+ bool isnull2;
+ int32 cmpresult;
+
+ /* Get elements, checking for NULL */
+ elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign);
+ elt2 = array_iter_next(&it2, &isnull2, i, typlen, typbyval, typalign);
+
+ /*
+ * We consider two NULLs equal; NULL > not-NULL.
+ */
+ if (isnull1 && isnull2)
+ continue;
+ if (isnull1)
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ if (isnull2)
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+
+ /* Compare the pair of elements */
+ locfcinfo->args[0].value = elt1;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = elt2;
+ locfcinfo->args[1].isnull = false;
+ cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect comparison support functions to return null */
+ Assert(!locfcinfo->isnull);
+
+ if (cmpresult == 0)
+ continue; /* equal */
+
+ if (cmpresult < 0)
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+ else
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ }
+
+ /*
+ * If arrays contain same data (up to end of shorter one), apply
+ * additional rules to sort by dimensionality. The relative significance
+ * of the different bits of information is historical; mainly we just care
+ * that we don't say "equal" for arrays of different dimensionality.
+ */
+ if (result == 0)
+ {
+ if (nitems1 != nitems2)
+ result = (nitems1 < nitems2) ? -1 : 1;
+ else if (ndims1 != ndims2)
+ result = (ndims1 < ndims2) ? -1 : 1;
+ else
+ {
+ for (i = 0; i < ndims1; i++)
+ {
+ if (dims1[i] != dims2[i])
+ {
+ result = (dims1[i] < dims2[i]) ? -1 : 1;
+ break;
+ }
+ }
+ if (result == 0)
+ {
+ int *lbound1 = AARR_LBOUND(array1);
+ int *lbound2 = AARR_LBOUND(array2);
+
+ for (i = 0; i < ndims1; i++)
+ {
+ if (lbound1[i] != lbound2[i])
+ {
+ result = (lbound1[i] < lbound2[i]) ? -1 : 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array1, 0);
+ AARR_FREE_IF_COPY(array2, 1);
+
+ return result;
+}
+
+
+/*-----------------------------------------------------------------------------
+ * array hashing
+ * Hash the elements and combine the results.
+ *----------------------------------------------------------------------------
+ */
+
+Datum
+hash_array(PG_FUNCTION_ARGS)
+{
+ LOCAL_FCINFO(locfcinfo, 1);
+ AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0);
+ int ndims = AARR_NDIM(array);
+ int *dims = AARR_DIMS(array);
+ Oid element_type = AARR_ELEMTYPE(array);
+ uint32 result = 1;
+ int nitems;
+ TypeCacheEntry *typentry;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ int i;
+ array_iter iter;
+
+ /*
+ * We arrange to look up the hash function only once per series of calls,
+ * assuming the element type doesn't change underneath us. The typcache
+ * is used so that we have no memory leakage when being used as an index
+ * support function.
+ */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_HASH_PROC_FINFO);
+ if (!OidIsValid(typentry->hash_proc_finfo.fn_oid) && element_type != RECORDOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(element_type))));
+
+ /*
+ * The type cache doesn't believe that record is hashable (see
+ * cache_record_field_properties()), but since we're here, we're
+ * committed to hashing, so we can assume it does. Worst case, if any
+ * components of the record don't support hashing, we will fail at
+ * execution.
+ */
+ if (element_type == RECORDOID)
+ {
+ MemoryContext oldcontext;
+ TypeCacheEntry *record_typentry;
+
+ oldcontext = MemoryContextSwitchTo(fcinfo->flinfo->fn_mcxt);
+
+ /*
+ * Make fake type cache entry structure. Note that we can't just
+ * modify typentry, since that points directly into the type
+ * cache.
+ */
+ record_typentry = palloc0(sizeof(*record_typentry));
+ record_typentry->type_id = element_type;
+
+ /* fill in what we need below */
+ record_typentry->typlen = typentry->typlen;
+ record_typentry->typbyval = typentry->typbyval;
+ record_typentry->typalign = typentry->typalign;
+ fmgr_info(F_HASH_RECORD, &record_typentry->hash_proc_finfo);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ typentry = record_typentry;
+ }
+
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ /*
+ * apply the hash function to each array element.
+ */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
+ PG_GET_COLLATION(), NULL, NULL);
+
+ /* Loop over source data */
+ nitems = ArrayGetNItems(ndims, dims);
+ array_iter_setup(&iter, array);
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum elt;
+ bool isnull;
+ uint32 elthash;
+
+ /* Get element, checking for NULL */
+ elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
+
+ if (isnull)
+ {
+ /* Treat nulls as having hashvalue 0 */
+ elthash = 0;
+ }
+ else
+ {
+ /* Apply the hash function */
+ locfcinfo->args[0].value = elt;
+ locfcinfo->args[0].isnull = false;
+ elthash = DatumGetUInt32(FunctionCallInvoke(locfcinfo));
+ /* We don't expect hash functions to return null */
+ Assert(!locfcinfo->isnull);
+ }
+
+ /*
+ * Combine hash values of successive elements by multiplying the
+ * current value by 31 and adding on the new element's hash value.
+ *
+ * The result is a sum in which each element's hash value is
+ * multiplied by a different power of 31. This is modulo 2^32
+ * arithmetic, and the powers of 31 modulo 2^32 form a cyclic group of
+ * order 2^27. So for arrays of up to 2^27 elements, each element's
+ * hash value is multiplied by a different (odd) number, resulting in
+ * a good mixing of all the elements' hash values.
+ */
+ result = (result << 5) - result + elthash;
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array, 0);
+
+ PG_RETURN_UINT32(result);
+}
+
+/*
+ * Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
+ * Otherwise, similar to hash_array.
+ */
+Datum
+hash_array_extended(PG_FUNCTION_ARGS)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ AnyArrayType *array = PG_GETARG_ANY_ARRAY_P(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ int ndims = AARR_NDIM(array);
+ int *dims = AARR_DIMS(array);
+ Oid element_type = AARR_ELEMTYPE(array);
+ uint64 result = 1;
+ int nitems;
+ TypeCacheEntry *typentry;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ int i;
+ array_iter iter;
+
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+ if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an extended hash function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2,
+ PG_GET_COLLATION(), NULL, NULL);
+
+ /* Loop over source data */
+ nitems = ArrayGetNItems(ndims, dims);
+ array_iter_setup(&iter, array);
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum elt;
+ bool isnull;
+ uint64 elthash;
+
+ /* Get element, checking for NULL */
+ elt = array_iter_next(&iter, &isnull, i, typlen, typbyval, typalign);
+
+ if (isnull)
+ {
+ elthash = 0;
+ }
+ else
+ {
+ /* Apply the hash function */
+ locfcinfo->args[0].value = elt;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = Int64GetDatum(seed);
+ locfcinfo->args[1].isnull = false;
+ elthash = DatumGetUInt64(FunctionCallInvoke(locfcinfo));
+ /* We don't expect hash functions to return null */
+ Assert(!locfcinfo->isnull);
+ }
+
+ result = (result << 5) - result + elthash;
+ }
+
+ AARR_FREE_IF_COPY(array, 0);
+
+ PG_RETURN_UINT64(result);
+}
+
+
+/*-----------------------------------------------------------------------------
+ * array overlap/containment comparisons
+ * These use the same methods of comparing array elements as array_eq.
+ * We consider only the elements of the arrays, ignoring dimensionality.
+ *----------------------------------------------------------------------------
+ */
+
+/*
+ * array_contain_compare :
+ * compares two arrays for overlap/containment
+ *
+ * When matchall is true, return true if all members of array1 are in array2.
+ * When matchall is false, return true if any members of array1 are in array2.
+ */
+static bool
+array_contain_compare(AnyArrayType *array1, AnyArrayType *array2, Oid collation,
+ bool matchall, void **fn_extra)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ bool result = matchall;
+ Oid element_type = AARR_ELEMTYPE(array1);
+ TypeCacheEntry *typentry;
+ int nelems1;
+ Datum *values2;
+ bool *nulls2;
+ int nelems2;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ int i;
+ int j;
+ array_iter it1;
+
+ if (element_type != AARR_ELEMTYPE(array2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare arrays of different element types")));
+
+ /*
+ * We arrange to look up the equality function only once per series of
+ * calls, assuming the element type doesn't change underneath us. The
+ * typcache is used so that we have no memory leakage when being used as
+ * an index support function.
+ */
+ typentry = (TypeCacheEntry *) *fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_EQ_OPR_FINFO);
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(element_type))));
+ *fn_extra = (void *) typentry;
+ }
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ /*
+ * Since we probably will need to scan array2 multiple times, it's
+ * worthwhile to use deconstruct_array on it. We scan array1 the hard way
+ * however, since we very likely won't need to look at all of it.
+ */
+ if (VARATT_IS_EXPANDED_HEADER(array2))
+ {
+ /* This should be safe even if input is read-only */
+ deconstruct_expanded_array(&(array2->xpn));
+ values2 = array2->xpn.dvalues;
+ nulls2 = array2->xpn.dnulls;
+ nelems2 = array2->xpn.nelems;
+ }
+ else
+ deconstruct_array((ArrayType *) array2,
+ element_type, typlen, typbyval, typalign,
+ &values2, &nulls2, &nelems2);
+
+ /*
+ * Apply the comparison operator to each pair of array elements.
+ */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Loop over source data */
+ nelems1 = ArrayGetNItems(AARR_NDIM(array1), AARR_DIMS(array1));
+ array_iter_setup(&it1, array1);
+
+ for (i = 0; i < nelems1; i++)
+ {
+ Datum elt1;
+ bool isnull1;
+
+ /* Get element, checking for NULL */
+ elt1 = array_iter_next(&it1, &isnull1, i, typlen, typbyval, typalign);
+
+ /*
+ * We assume that the comparison operator is strict, so a NULL can't
+ * match anything. XXX this diverges from the "NULL=NULL" behavior of
+ * array_eq, should we act like that?
+ */
+ if (isnull1)
+ {
+ if (matchall)
+ {
+ result = false;
+ break;
+ }
+ continue;
+ }
+
+ for (j = 0; j < nelems2; j++)
+ {
+ Datum elt2 = values2[j];
+ bool isnull2 = nulls2 ? nulls2[j] : false;
+ bool oprresult;
+
+ if (isnull2)
+ continue; /* can't match */
+
+ /*
+ * Apply the operator to the element pair; treat NULL as false
+ */
+ locfcinfo->args[0].value = elt1;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = elt2;
+ locfcinfo->args[1].isnull = false;
+ locfcinfo->isnull = false;
+ oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo));
+ if (!locfcinfo->isnull && oprresult)
+ break;
+ }
+
+ if (j < nelems2)
+ {
+ /* found a match for elt1 */
+ if (!matchall)
+ {
+ result = true;
+ break;
+ }
+ }
+ else
+ {
+ /* no match for elt1 */
+ if (matchall)
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+Datum
+arrayoverlap(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0);
+ AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1);
+ Oid collation = PG_GET_COLLATION();
+ bool result;
+
+ result = array_contain_compare(array1, array2, collation, false,
+ &fcinfo->flinfo->fn_extra);
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array1, 0);
+ AARR_FREE_IF_COPY(array2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+arraycontains(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0);
+ AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1);
+ Oid collation = PG_GET_COLLATION();
+ bool result;
+
+ result = array_contain_compare(array2, array1, collation, true,
+ &fcinfo->flinfo->fn_extra);
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array1, 0);
+ AARR_FREE_IF_COPY(array2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+arraycontained(PG_FUNCTION_ARGS)
+{
+ AnyArrayType *array1 = PG_GETARG_ANY_ARRAY_P(0);
+ AnyArrayType *array2 = PG_GETARG_ANY_ARRAY_P(1);
+ Oid collation = PG_GET_COLLATION();
+ bool result;
+
+ result = array_contain_compare(array1, array2, collation, true,
+ &fcinfo->flinfo->fn_extra);
+
+ /* Avoid leaking memory when handed toasted input. */
+ AARR_FREE_IF_COPY(array1, 0);
+ AARR_FREE_IF_COPY(array2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+
+/*-----------------------------------------------------------------------------
+ * Array iteration functions
+ * These functions are used to iterate efficiently through arrays
+ *-----------------------------------------------------------------------------
+ */
+
+/*
+ * array_create_iterator --- set up to iterate through an array
+ *
+ * If slice_ndim is zero, we will iterate element-by-element; the returned
+ * datums are of the array's element type.
+ *
+ * If slice_ndim is 1..ARR_NDIM(arr), we will iterate by slices: the
+ * returned datums are of the same array type as 'arr', but of size
+ * equal to the rightmost N dimensions of 'arr'.
+ *
+ * The passed-in array must remain valid for the lifetime of the iterator.
+ */
+ArrayIterator
+array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate)
+{
+ ArrayIterator iterator = palloc0(sizeof(ArrayIteratorData));
+
+ /*
+ * Sanity-check inputs --- caller should have got this right already
+ */
+ Assert(PointerIsValid(arr));
+ if (slice_ndim < 0 || slice_ndim > ARR_NDIM(arr))
+ elog(ERROR, "invalid arguments to array_create_iterator");
+
+ /*
+ * Remember basic info about the array and its element type
+ */
+ iterator->arr = arr;
+ iterator->nullbitmap = ARR_NULLBITMAP(arr);
+ iterator->nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr));
+
+ if (mstate != NULL)
+ {
+ Assert(mstate->element_type == ARR_ELEMTYPE(arr));
+
+ iterator->typlen = mstate->typlen;
+ iterator->typbyval = mstate->typbyval;
+ iterator->typalign = mstate->typalign;
+ }
+ else
+ get_typlenbyvalalign(ARR_ELEMTYPE(arr),
+ &iterator->typlen,
+ &iterator->typbyval,
+ &iterator->typalign);
+
+ /*
+ * Remember the slicing parameters.
+ */
+ iterator->slice_ndim = slice_ndim;
+
+ if (slice_ndim > 0)
+ {
+ /*
+ * Get pointers into the array's dims and lbound arrays to represent
+ * the dims/lbound arrays of a slice. These are the same as the
+ * rightmost N dimensions of the array.
+ */
+ iterator->slice_dims = ARR_DIMS(arr) + ARR_NDIM(arr) - slice_ndim;
+ iterator->slice_lbound = ARR_LBOUND(arr) + ARR_NDIM(arr) - slice_ndim;
+
+ /*
+ * Compute number of elements in a slice.
+ */
+ iterator->slice_len = ArrayGetNItems(slice_ndim,
+ iterator->slice_dims);
+
+ /*
+ * Create workspace for building sub-arrays.
+ */
+ iterator->slice_values = (Datum *)
+ palloc(iterator->slice_len * sizeof(Datum));
+ iterator->slice_nulls = (bool *)
+ palloc(iterator->slice_len * sizeof(bool));
+ }
+
+ /*
+ * Initialize our data pointer and linear element number. These will
+ * advance through the array during array_iterate().
+ */
+ iterator->data_ptr = ARR_DATA_PTR(arr);
+ iterator->current_item = 0;
+
+ return iterator;
+}
+
+/*
+ * Iterate through the array referenced by 'iterator'.
+ *
+ * As long as there is another element (or slice), return it into
+ * *value / *isnull, and return true. Return false when no more data.
+ */
+bool
+array_iterate(ArrayIterator iterator, Datum *value, bool *isnull)
+{
+ /* Done if we have reached the end of the array */
+ if (iterator->current_item >= iterator->nitems)
+ return false;
+
+ if (iterator->slice_ndim == 0)
+ {
+ /*
+ * Scalar case: return one element.
+ */
+ if (array_get_isnull(iterator->nullbitmap, iterator->current_item++))
+ {
+ *isnull = true;
+ *value = (Datum) 0;
+ }
+ else
+ {
+ /* non-NULL, so fetch the individual Datum to return */
+ char *p = iterator->data_ptr;
+
+ *isnull = false;
+ *value = fetch_att(p, iterator->typbyval, iterator->typlen);
+
+ /* Move our data pointer forward to the next element */
+ p = att_addlength_pointer(p, iterator->typlen, p);
+ p = (char *) att_align_nominal(p, iterator->typalign);
+ iterator->data_ptr = p;
+ }
+ }
+ else
+ {
+ /*
+ * Slice case: build and return an array of the requested size.
+ */
+ ArrayType *result;
+ Datum *values = iterator->slice_values;
+ bool *nulls = iterator->slice_nulls;
+ char *p = iterator->data_ptr;
+ int i;
+
+ for (i = 0; i < iterator->slice_len; i++)
+ {
+ if (array_get_isnull(iterator->nullbitmap,
+ iterator->current_item++))
+ {
+ nulls[i] = true;
+ values[i] = (Datum) 0;
+ }
+ else
+ {
+ nulls[i] = false;
+ values[i] = fetch_att(p, iterator->typbyval, iterator->typlen);
+
+ /* Move our data pointer forward to the next element */
+ p = att_addlength_pointer(p, iterator->typlen, p);
+ p = (char *) att_align_nominal(p, iterator->typalign);
+ }
+ }
+
+ iterator->data_ptr = p;
+
+ result = construct_md_array(values,
+ nulls,
+ iterator->slice_ndim,
+ iterator->slice_dims,
+ iterator->slice_lbound,
+ ARR_ELEMTYPE(iterator->arr),
+ iterator->typlen,
+ iterator->typbyval,
+ iterator->typalign);
+
+ *isnull = false;
+ *value = PointerGetDatum(result);
+ }
+
+ return true;
+}
+
+/*
+ * Release an ArrayIterator data structure
+ */
+void
+array_free_iterator(ArrayIterator iterator)
+{
+ if (iterator->slice_ndim > 0)
+ {
+ pfree(iterator->slice_values);
+ pfree(iterator->slice_nulls);
+ }
+ pfree(iterator);
+}
+
+
+/***************************************************************************/
+/******************| Support Routines |*****************/
+/***************************************************************************/
+
+/*
+ * Check whether a specific array element is NULL
+ *
+ * nullbitmap: pointer to array's null bitmap (NULL if none)
+ * offset: 0-based linear element number of array element
+ */
+static bool
+array_get_isnull(const bits8 *nullbitmap, int offset)
+{
+ if (nullbitmap == NULL)
+ return false; /* assume not null */
+ if (nullbitmap[offset / 8] & (1 << (offset % 8)))
+ return false; /* not null */
+ return true;
+}
+
+/*
+ * Set a specific array element's null-bitmap entry
+ *
+ * nullbitmap: pointer to array's null bitmap (mustn't be NULL)
+ * offset: 0-based linear element number of array element
+ * isNull: null status to set
+ */
+static void
+array_set_isnull(bits8 *nullbitmap, int offset, bool isNull)
+{
+ int bitmask;
+
+ nullbitmap += offset / 8;
+ bitmask = 1 << (offset % 8);
+ if (isNull)
+ *nullbitmap &= ~bitmask;
+ else
+ *nullbitmap |= bitmask;
+}
+
+/*
+ * Fetch array element at pointer, converted correctly to a Datum
+ *
+ * Caller must have handled case of NULL element
+ */
+static Datum
+ArrayCast(char *value, bool byval, int len)
+{
+ return fetch_att(value, byval, len);
+}
+
+/*
+ * Copy datum to *dest and return total space used (including align padding)
+ *
+ * Caller must have handled case of NULL element
+ */
+static int
+ArrayCastAndSet(Datum src,
+ int typlen,
+ bool typbyval,
+ char typalign,
+ char *dest)
+{
+ int inc;
+
+ if (typlen > 0)
+ {
+ if (typbyval)
+ store_att_byval(dest, src, typlen);
+ else
+ memmove(dest, DatumGetPointer(src), typlen);
+ inc = att_align_nominal(typlen, typalign);
+ }
+ else
+ {
+ Assert(!typbyval);
+ inc = att_addlength_datum(0, typlen, src);
+ memmove(dest, DatumGetPointer(src), inc);
+ inc = att_align_nominal(inc, typalign);
+ }
+
+ return inc;
+}
+
+/*
+ * Advance ptr over nitems array elements
+ *
+ * ptr: starting location in array
+ * offset: 0-based linear element number of first element (the one at *ptr)
+ * nullbitmap: start of array's null bitmap, or NULL if none
+ * nitems: number of array elements to advance over (>= 0)
+ * typlen, typbyval, typalign: storage parameters of array element datatype
+ *
+ * It is caller's responsibility to ensure that nitems is within range
+ */
+static char *
+array_seek(char *ptr, int offset, bits8 *nullbitmap, int nitems,
+ int typlen, bool typbyval, char typalign)
+{
+ int bitmask;
+ int i;
+
+ /* easy if fixed-size elements and no NULLs */
+ if (typlen > 0 && !nullbitmap)
+ return ptr + nitems * ((Size) att_align_nominal(typlen, typalign));
+
+ /* seems worth having separate loops for NULL and no-NULLs cases */
+ if (nullbitmap)
+ {
+ nullbitmap += offset / 8;
+ bitmask = 1 << (offset % 8);
+
+ for (i = 0; i < nitems; i++)
+ {
+ if (*nullbitmap & bitmask)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ nullbitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+ else
+ {
+ for (i = 0; i < nitems; i++)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+ }
+ return ptr;
+}
+
+/*
+ * Compute total size of the nitems array elements starting at *ptr
+ *
+ * Parameters same as for array_seek
+ */
+static int
+array_nelems_size(char *ptr, int offset, bits8 *nullbitmap, int nitems,
+ int typlen, bool typbyval, char typalign)
+{
+ return array_seek(ptr, offset, nullbitmap, nitems,
+ typlen, typbyval, typalign) - ptr;
+}
+
+/*
+ * Copy nitems array elements from srcptr to destptr
+ *
+ * destptr: starting destination location (must be enough room!)
+ * nitems: number of array elements to copy (>= 0)
+ * srcptr: starting location in source array
+ * offset: 0-based linear element number of first element (the one at *srcptr)
+ * nullbitmap: start of source array's null bitmap, or NULL if none
+ * typlen, typbyval, typalign: storage parameters of array element datatype
+ *
+ * Returns number of bytes copied
+ *
+ * NB: this does not take care of setting up the destination's null bitmap!
+ */
+static int
+array_copy(char *destptr, int nitems,
+ char *srcptr, int offset, bits8 *nullbitmap,
+ int typlen, bool typbyval, char typalign)
+{
+ int numbytes;
+
+ numbytes = array_nelems_size(srcptr, offset, nullbitmap, nitems,
+ typlen, typbyval, typalign);
+ memcpy(destptr, srcptr, numbytes);
+ return numbytes;
+}
+
+/*
+ * Copy nitems null-bitmap bits from source to destination
+ *
+ * destbitmap: start of destination array's null bitmap (mustn't be NULL)
+ * destoffset: 0-based linear element number of first dest element
+ * srcbitmap: start of source array's null bitmap, or NULL if none
+ * srcoffset: 0-based linear element number of first source element
+ * nitems: number of bits to copy (>= 0)
+ *
+ * If srcbitmap is NULL then we assume the source is all-non-NULL and
+ * fill 1's into the destination bitmap. Note that only the specified
+ * bits in the destination map are changed, not any before or after.
+ *
+ * Note: this could certainly be optimized using standard bitblt methods.
+ * However, it's not clear that the typical Postgres array has enough elements
+ * to make it worth worrying too much. For the moment, KISS.
+ */
+void
+array_bitmap_copy(bits8 *destbitmap, int destoffset,
+ const bits8 *srcbitmap, int srcoffset,
+ int nitems)
+{
+ int destbitmask,
+ destbitval,
+ srcbitmask,
+ srcbitval;
+
+ Assert(destbitmap);
+ if (nitems <= 0)
+ return; /* don't risk fetch off end of memory */
+ destbitmap += destoffset / 8;
+ destbitmask = 1 << (destoffset % 8);
+ destbitval = *destbitmap;
+ if (srcbitmap)
+ {
+ srcbitmap += srcoffset / 8;
+ srcbitmask = 1 << (srcoffset % 8);
+ srcbitval = *srcbitmap;
+ while (nitems-- > 0)
+ {
+ if (srcbitval & srcbitmask)
+ destbitval |= destbitmask;
+ else
+ destbitval &= ~destbitmask;
+ destbitmask <<= 1;
+ if (destbitmask == 0x100)
+ {
+ *destbitmap++ = destbitval;
+ destbitmask = 1;
+ if (nitems > 0)
+ destbitval = *destbitmap;
+ }
+ srcbitmask <<= 1;
+ if (srcbitmask == 0x100)
+ {
+ srcbitmap++;
+ srcbitmask = 1;
+ if (nitems > 0)
+ srcbitval = *srcbitmap;
+ }
+ }
+ if (destbitmask != 1)
+ *destbitmap = destbitval;
+ }
+ else
+ {
+ while (nitems-- > 0)
+ {
+ destbitval |= destbitmask;
+ destbitmask <<= 1;
+ if (destbitmask == 0x100)
+ {
+ *destbitmap++ = destbitval;
+ destbitmask = 1;
+ if (nitems > 0)
+ destbitval = *destbitmap;
+ }
+ }
+ if (destbitmask != 1)
+ *destbitmap = destbitval;
+ }
+}
+
+/*
+ * Compute space needed for a slice of an array
+ *
+ * We assume the caller has verified that the slice coordinates are valid.
+ */
+static int
+array_slice_size(char *arraydataptr, bits8 *arraynullsptr,
+ int ndim, int *dim, int *lb,
+ int *st, int *endp,
+ int typlen, bool typbyval, char typalign)
+{
+ int src_offset,
+ span[MAXDIM],
+ prod[MAXDIM],
+ dist[MAXDIM],
+ indx[MAXDIM];
+ char *ptr;
+ int i,
+ j,
+ inc;
+ int count = 0;
+
+ mda_get_range(ndim, span, st, endp);
+
+ /* Pretty easy for fixed element length without nulls ... */
+ if (typlen > 0 && !arraynullsptr)
+ return ArrayGetNItems(ndim, span) * att_align_nominal(typlen, typalign);
+
+ /* Else gotta do it the hard way */
+ src_offset = ArrayGetOffset(ndim, dim, lb, st);
+ ptr = array_seek(arraydataptr, 0, arraynullsptr, src_offset,
+ typlen, typbyval, typalign);
+ mda_get_prod(ndim, dim, prod);
+ mda_get_offset_values(ndim, dist, prod, span);
+ for (i = 0; i < ndim; i++)
+ indx[i] = 0;
+ j = ndim - 1;
+ do
+ {
+ if (dist[j])
+ {
+ ptr = array_seek(ptr, src_offset, arraynullsptr, dist[j],
+ typlen, typbyval, typalign);
+ src_offset += dist[j];
+ }
+ if (!array_get_isnull(arraynullsptr, src_offset))
+ {
+ inc = att_addlength_pointer(0, typlen, ptr);
+ inc = att_align_nominal(inc, typalign);
+ ptr += inc;
+ count += inc;
+ }
+ src_offset++;
+ } while ((j = mda_next_tuple(ndim, indx, span)) != -1);
+ return count;
+}
+
+/*
+ * Extract a slice of an array into consecutive elements in the destination
+ * array.
+ *
+ * We assume the caller has verified that the slice coordinates are valid,
+ * allocated enough storage for the result, and initialized the header
+ * of the new array.
+ */
+static void
+array_extract_slice(ArrayType *newarray,
+ int ndim,
+ int *dim,
+ int *lb,
+ char *arraydataptr,
+ bits8 *arraynullsptr,
+ int *st,
+ int *endp,
+ int typlen,
+ bool typbyval,
+ char typalign)
+{
+ char *destdataptr = ARR_DATA_PTR(newarray);
+ bits8 *destnullsptr = ARR_NULLBITMAP(newarray);
+ char *srcdataptr;
+ int src_offset,
+ dest_offset,
+ prod[MAXDIM],
+ span[MAXDIM],
+ dist[MAXDIM],
+ indx[MAXDIM];
+ int i,
+ j,
+ inc;
+
+ src_offset = ArrayGetOffset(ndim, dim, lb, st);
+ srcdataptr = array_seek(arraydataptr, 0, arraynullsptr, src_offset,
+ typlen, typbyval, typalign);
+ mda_get_prod(ndim, dim, prod);
+ mda_get_range(ndim, span, st, endp);
+ mda_get_offset_values(ndim, dist, prod, span);
+ for (i = 0; i < ndim; i++)
+ indx[i] = 0;
+ dest_offset = 0;
+ j = ndim - 1;
+ do
+ {
+ if (dist[j])
+ {
+ /* skip unwanted elements */
+ srcdataptr = array_seek(srcdataptr, src_offset, arraynullsptr,
+ dist[j],
+ typlen, typbyval, typalign);
+ src_offset += dist[j];
+ }
+ inc = array_copy(destdataptr, 1,
+ srcdataptr, src_offset, arraynullsptr,
+ typlen, typbyval, typalign);
+ if (destnullsptr)
+ array_bitmap_copy(destnullsptr, dest_offset,
+ arraynullsptr, src_offset,
+ 1);
+ destdataptr += inc;
+ srcdataptr += inc;
+ src_offset++;
+ dest_offset++;
+ } while ((j = mda_next_tuple(ndim, indx, span)) != -1);
+}
+
+/*
+ * Insert a slice into an array.
+ *
+ * ndim/dim[]/lb[] are dimensions of the original array. A new array with
+ * those same dimensions is to be constructed. destArray must already
+ * have been allocated and its header initialized.
+ *
+ * st[]/endp[] identify the slice to be replaced. Elements within the slice
+ * volume are taken from consecutive elements of the srcArray; elements
+ * outside it are copied from origArray.
+ *
+ * We assume the caller has verified that the slice coordinates are valid.
+ */
+static void
+array_insert_slice(ArrayType *destArray,
+ ArrayType *origArray,
+ ArrayType *srcArray,
+ int ndim,
+ int *dim,
+ int *lb,
+ int *st,
+ int *endp,
+ int typlen,
+ bool typbyval,
+ char typalign)
+{
+ char *destPtr = ARR_DATA_PTR(destArray);
+ char *origPtr = ARR_DATA_PTR(origArray);
+ char *srcPtr = ARR_DATA_PTR(srcArray);
+ bits8 *destBitmap = ARR_NULLBITMAP(destArray);
+ bits8 *origBitmap = ARR_NULLBITMAP(origArray);
+ bits8 *srcBitmap = ARR_NULLBITMAP(srcArray);
+ int orignitems = ArrayGetNItems(ARR_NDIM(origArray),
+ ARR_DIMS(origArray));
+ int dest_offset,
+ orig_offset,
+ src_offset,
+ prod[MAXDIM],
+ span[MAXDIM],
+ dist[MAXDIM],
+ indx[MAXDIM];
+ int i,
+ j,
+ inc;
+
+ dest_offset = ArrayGetOffset(ndim, dim, lb, st);
+ /* copy items before the slice start */
+ inc = array_copy(destPtr, dest_offset,
+ origPtr, 0, origBitmap,
+ typlen, typbyval, typalign);
+ destPtr += inc;
+ origPtr += inc;
+ if (destBitmap)
+ array_bitmap_copy(destBitmap, 0, origBitmap, 0, dest_offset);
+ orig_offset = dest_offset;
+ mda_get_prod(ndim, dim, prod);
+ mda_get_range(ndim, span, st, endp);
+ mda_get_offset_values(ndim, dist, prod, span);
+ for (i = 0; i < ndim; i++)
+ indx[i] = 0;
+ src_offset = 0;
+ j = ndim - 1;
+ do
+ {
+ /* Copy/advance over elements between here and next part of slice */
+ if (dist[j])
+ {
+ inc = array_copy(destPtr, dist[j],
+ origPtr, orig_offset, origBitmap,
+ typlen, typbyval, typalign);
+ destPtr += inc;
+ origPtr += inc;
+ if (destBitmap)
+ array_bitmap_copy(destBitmap, dest_offset,
+ origBitmap, orig_offset,
+ dist[j]);
+ dest_offset += dist[j];
+ orig_offset += dist[j];
+ }
+ /* Copy new element at this slice position */
+ inc = array_copy(destPtr, 1,
+ srcPtr, src_offset, srcBitmap,
+ typlen, typbyval, typalign);
+ if (destBitmap)
+ array_bitmap_copy(destBitmap, dest_offset,
+ srcBitmap, src_offset,
+ 1);
+ destPtr += inc;
+ srcPtr += inc;
+ dest_offset++;
+ src_offset++;
+ /* Advance over old element at this slice position */
+ origPtr = array_seek(origPtr, orig_offset, origBitmap, 1,
+ typlen, typbyval, typalign);
+ orig_offset++;
+ } while ((j = mda_next_tuple(ndim, indx, span)) != -1);
+
+ /* don't miss any data at the end */
+ array_copy(destPtr, orignitems - orig_offset,
+ origPtr, orig_offset, origBitmap,
+ typlen, typbyval, typalign);
+ if (destBitmap)
+ array_bitmap_copy(destBitmap, dest_offset,
+ origBitmap, orig_offset,
+ orignitems - orig_offset);
+}
+
+/*
+ * initArrayResult - initialize an empty ArrayBuildState
+ *
+ * element_type is the array element type (must be a valid array element type)
+ * rcontext is where to keep working state
+ * subcontext is a flag determining whether to use a separate memory context
+ *
+ * Note: there are two common schemes for using accumArrayResult().
+ * In the older scheme, you start with a NULL ArrayBuildState pointer, and
+ * call accumArrayResult once per element. In this scheme you end up with
+ * a NULL pointer if there were no elements, which you need to special-case.
+ * In the newer scheme, call initArrayResult and then call accumArrayResult
+ * once per element. In this scheme you always end with a non-NULL pointer
+ * that you can pass to makeArrayResult; you get an empty array if there
+ * were no elements. This is preferred if an empty array is what you want.
+ *
+ * It's possible to choose whether to create a separate memory context for the
+ * array build state, or whether to allocate it directly within rcontext.
+ *
+ * When there are many concurrent small states (e.g. array_agg() using hash
+ * aggregation of many small groups), using a separate memory context for each
+ * one may result in severe memory bloat. In such cases, use the same memory
+ * context to initialize all such array build states, and pass
+ * subcontext=false.
+ *
+ * In cases when the array build states have different lifetimes, using a
+ * single memory context is impractical. Instead, pass subcontext=true so that
+ * the array build states can be freed individually.
+ */
+ArrayBuildState *
+initArrayResult(Oid element_type, MemoryContext rcontext, bool subcontext)
+{
+ ArrayBuildState *astate;
+ MemoryContext arr_context = rcontext;
+
+ /* Make a temporary context to hold all the junk */
+ if (subcontext)
+ arr_context = AllocSetContextCreate(rcontext,
+ "accumArrayResult",
+ ALLOCSET_DEFAULT_SIZES);
+
+ astate = (ArrayBuildState *)
+ MemoryContextAlloc(arr_context, sizeof(ArrayBuildState));
+ astate->mcontext = arr_context;
+ astate->private_cxt = subcontext;
+ astate->alen = (subcontext ? 64 : 8); /* arbitrary starting array size */
+ astate->dvalues = (Datum *)
+ MemoryContextAlloc(arr_context, astate->alen * sizeof(Datum));
+ astate->dnulls = (bool *)
+ MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
+ astate->nelems = 0;
+ astate->element_type = element_type;
+ get_typlenbyvalalign(element_type,
+ &astate->typlen,
+ &astate->typbyval,
+ &astate->typalign);
+
+ return astate;
+}
+
+/*
+ * accumArrayResult - accumulate one (more) Datum for an array result
+ *
+ * astate is working state (can be NULL on first call)
+ * dvalue/disnull represent the new Datum to append to the array
+ * element_type is the Datum's type (must be a valid array element type)
+ * rcontext is where to keep working state
+ */
+ArrayBuildState *
+accumArrayResult(ArrayBuildState *astate,
+ Datum dvalue, bool disnull,
+ Oid element_type,
+ MemoryContext rcontext)
+{
+ MemoryContext oldcontext;
+
+ if (astate == NULL)
+ {
+ /* First time through --- initialize */
+ astate = initArrayResult(element_type, rcontext, true);
+ }
+ else
+ {
+ Assert(astate->element_type == element_type);
+ }
+
+ oldcontext = MemoryContextSwitchTo(astate->mcontext);
+
+ /* enlarge dvalues[]/dnulls[] if needed */
+ if (astate->nelems >= astate->alen)
+ {
+ astate->alen *= 2;
+ astate->dvalues = (Datum *)
+ repalloc(astate->dvalues, astate->alen * sizeof(Datum));
+ astate->dnulls = (bool *)
+ repalloc(astate->dnulls, astate->alen * sizeof(bool));
+ }
+
+ /*
+ * Ensure pass-by-ref stuff is copied into mcontext; and detoast it too if
+ * it's varlena. (You might think that detoasting is not needed here
+ * because construct_md_array can detoast the array elements later.
+ * However, we must not let construct_md_array modify the ArrayBuildState
+ * because that would mean array_agg_finalfn damages its input, which is
+ * verboten. Also, this way frequently saves one copying step.)
+ */
+ if (!disnull && !astate->typbyval)
+ {
+ if (astate->typlen == -1)
+ dvalue = PointerGetDatum(PG_DETOAST_DATUM_COPY(dvalue));
+ else
+ dvalue = datumCopy(dvalue, astate->typbyval, astate->typlen);
+ }
+
+ astate->dvalues[astate->nelems] = dvalue;
+ astate->dnulls[astate->nelems] = disnull;
+ astate->nelems++;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return astate;
+}
+
+/*
+ * makeArrayResult - produce 1-D final result of accumArrayResult
+ *
+ * Note: only releases astate if it was initialized within a separate memory
+ * context (i.e. using subcontext=true when calling initArrayResult).
+ *
+ * astate is working state (must not be NULL)
+ * rcontext is where to construct result
+ */
+Datum
+makeArrayResult(ArrayBuildState *astate,
+ MemoryContext rcontext)
+{
+ int ndims;
+ int dims[1];
+ int lbs[1];
+
+ /* If no elements were presented, we want to create an empty array */
+ ndims = (astate->nelems > 0) ? 1 : 0;
+ dims[0] = astate->nelems;
+ lbs[0] = 1;
+
+ return makeMdArrayResult(astate, ndims, dims, lbs, rcontext,
+ astate->private_cxt);
+}
+
+/*
+ * makeMdArrayResult - produce multi-D final result of accumArrayResult
+ *
+ * beware: no check that specified dimensions match the number of values
+ * accumulated.
+ *
+ * Note: if the astate was not initialized within a separate memory context
+ * (that is, initArrayResult was called with subcontext=false), then using
+ * release=true is illegal. Instead, release astate along with the rest of its
+ * context when appropriate.
+ *
+ * astate is working state (must not be NULL)
+ * rcontext is where to construct result
+ * release is true if okay to release working state
+ */
+Datum
+makeMdArrayResult(ArrayBuildState *astate,
+ int ndims,
+ int *dims,
+ int *lbs,
+ MemoryContext rcontext,
+ bool release)
+{
+ ArrayType *result;
+ MemoryContext oldcontext;
+
+ /* Build the final array result in rcontext */
+ oldcontext = MemoryContextSwitchTo(rcontext);
+
+ result = construct_md_array(astate->dvalues,
+ astate->dnulls,
+ ndims,
+ dims,
+ lbs,
+ astate->element_type,
+ astate->typlen,
+ astate->typbyval,
+ astate->typalign);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Clean up all the junk */
+ if (release)
+ {
+ Assert(astate->private_cxt);
+ MemoryContextDelete(astate->mcontext);
+ }
+
+ return PointerGetDatum(result);
+}
+
+/*
+ * The following three functions provide essentially the same API as
+ * initArrayResult/accumArrayResult/makeArrayResult, but instead of accepting
+ * inputs that are array elements, they accept inputs that are arrays and
+ * produce an output array having N+1 dimensions. The inputs must all have
+ * identical dimensionality as well as element type.
+ */
+
+/*
+ * initArrayResultArr - initialize an empty ArrayBuildStateArr
+ *
+ * array_type is the array type (must be a valid varlena array type)
+ * element_type is the type of the array's elements (lookup if InvalidOid)
+ * rcontext is where to keep working state
+ * subcontext is a flag determining whether to use a separate memory context
+ */
+ArrayBuildStateArr *
+initArrayResultArr(Oid array_type, Oid element_type, MemoryContext rcontext,
+ bool subcontext)
+{
+ ArrayBuildStateArr *astate;
+ MemoryContext arr_context = rcontext; /* by default use the parent ctx */
+
+ /* Lookup element type, unless element_type already provided */
+ if (!OidIsValid(element_type))
+ {
+ element_type = get_element_type(array_type);
+
+ if (!OidIsValid(element_type))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("data type %s is not an array type",
+ format_type_be(array_type))));
+ }
+
+ /* Make a temporary context to hold all the junk */
+ if (subcontext)
+ arr_context = AllocSetContextCreate(rcontext,
+ "accumArrayResultArr",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Note we initialize all fields to zero */
+ astate = (ArrayBuildStateArr *)
+ MemoryContextAllocZero(arr_context, sizeof(ArrayBuildStateArr));
+ astate->mcontext = arr_context;
+ astate->private_cxt = subcontext;
+
+ /* Save relevant datatype information */
+ astate->array_type = array_type;
+ astate->element_type = element_type;
+
+ return astate;
+}
+
+/*
+ * accumArrayResultArr - accumulate one (more) sub-array for an array result
+ *
+ * astate is working state (can be NULL on first call)
+ * dvalue/disnull represent the new sub-array to append to the array
+ * array_type is the array type (must be a valid varlena array type)
+ * rcontext is where to keep working state
+ */
+ArrayBuildStateArr *
+accumArrayResultArr(ArrayBuildStateArr *astate,
+ Datum dvalue, bool disnull,
+ Oid array_type,
+ MemoryContext rcontext)
+{
+ ArrayType *arg;
+ MemoryContext oldcontext;
+ int *dims,
+ *lbs,
+ ndims,
+ nitems,
+ ndatabytes;
+ char *data;
+ int i;
+
+ /*
+ * We disallow accumulating null subarrays. Another plausible definition
+ * is to ignore them, but callers that want that can just skip calling
+ * this function.
+ */
+ if (disnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("cannot accumulate null arrays")));
+
+ /* Detoast input array in caller's context */
+ arg = DatumGetArrayTypeP(dvalue);
+
+ if (astate == NULL)
+ astate = initArrayResultArr(array_type, InvalidOid, rcontext, true);
+ else
+ Assert(astate->array_type == array_type);
+
+ oldcontext = MemoryContextSwitchTo(astate->mcontext);
+
+ /* Collect this input's dimensions */
+ ndims = ARR_NDIM(arg);
+ dims = ARR_DIMS(arg);
+ lbs = ARR_LBOUND(arg);
+ data = ARR_DATA_PTR(arg);
+ nitems = ArrayGetNItems(ndims, dims);
+ ndatabytes = ARR_SIZE(arg) - ARR_DATA_OFFSET(arg);
+
+ if (astate->ndims == 0)
+ {
+ /* First input; check/save the dimensionality info */
+
+ /* Should we allow empty inputs and just produce an empty output? */
+ if (ndims == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot accumulate empty arrays")));
+ if (ndims + 1 > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndims + 1, MAXDIM)));
+
+ /*
+ * The output array will have n+1 dimensions, with the ones after the
+ * first matching the input's dimensions.
+ */
+ astate->ndims = ndims + 1;
+ astate->dims[0] = 0;
+ memcpy(&astate->dims[1], dims, ndims * sizeof(int));
+ astate->lbs[0] = 1;
+ memcpy(&astate->lbs[1], lbs, ndims * sizeof(int));
+
+ /* Allocate at least enough data space for this item */
+ astate->abytes = pg_nextpower2_32(Max(1024, ndatabytes + 1));
+ astate->data = (char *) palloc(astate->abytes);
+ }
+ else
+ {
+ /* Second or later input: must match first input's dimensionality */
+ if (astate->ndims != ndims + 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot accumulate arrays of different dimensionality")));
+ for (i = 0; i < ndims; i++)
+ {
+ if (astate->dims[i + 1] != dims[i] || astate->lbs[i + 1] != lbs[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("cannot accumulate arrays of different dimensionality")));
+ }
+
+ /* Enlarge data space if needed */
+ if (astate->nbytes + ndatabytes > astate->abytes)
+ {
+ astate->abytes = Max(astate->abytes * 2,
+ astate->nbytes + ndatabytes);
+ astate->data = (char *) repalloc(astate->data, astate->abytes);
+ }
+ }
+
+ /*
+ * Copy the data portion of the sub-array. Note we assume that the
+ * advertised data length of the sub-array is properly aligned. We do not
+ * have to worry about detoasting elements since whatever's in the
+ * sub-array should be OK already.
+ */
+ memcpy(astate->data + astate->nbytes, data, ndatabytes);
+ astate->nbytes += ndatabytes;
+
+ /* Deal with null bitmap if needed */
+ if (astate->nullbitmap || ARR_HASNULL(arg))
+ {
+ int newnitems = astate->nitems + nitems;
+
+ if (astate->nullbitmap == NULL)
+ {
+ /*
+ * First input with nulls; we must retrospectively handle any
+ * previous inputs by marking all their items non-null.
+ */
+ astate->aitems = pg_nextpower2_32(Max(256, newnitems + 1));
+ astate->nullbitmap = (bits8 *) palloc((astate->aitems + 7) / 8);
+ array_bitmap_copy(astate->nullbitmap, 0,
+ NULL, 0,
+ astate->nitems);
+ }
+ else if (newnitems > astate->aitems)
+ {
+ astate->aitems = Max(astate->aitems * 2, newnitems);
+ astate->nullbitmap = (bits8 *)
+ repalloc(astate->nullbitmap, (astate->aitems + 7) / 8);
+ }
+ array_bitmap_copy(astate->nullbitmap, astate->nitems,
+ ARR_NULLBITMAP(arg), 0,
+ nitems);
+ }
+
+ astate->nitems += nitems;
+ astate->dims[0] += 1;
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Release detoasted copy if any */
+ if ((Pointer) arg != DatumGetPointer(dvalue))
+ pfree(arg);
+
+ return astate;
+}
+
+/*
+ * makeArrayResultArr - produce N+1-D final result of accumArrayResultArr
+ *
+ * astate is working state (must not be NULL)
+ * rcontext is where to construct result
+ * release is true if okay to release working state
+ */
+Datum
+makeArrayResultArr(ArrayBuildStateArr *astate,
+ MemoryContext rcontext,
+ bool release)
+{
+ ArrayType *result;
+ MemoryContext oldcontext;
+
+ /* Build the final array result in rcontext */
+ oldcontext = MemoryContextSwitchTo(rcontext);
+
+ if (astate->ndims == 0)
+ {
+ /* No inputs, return empty array */
+ result = construct_empty_array(astate->element_type);
+ }
+ else
+ {
+ int dataoffset,
+ nbytes;
+
+ /* Check for overflow of the array dimensions */
+ (void) ArrayGetNItems(astate->ndims, astate->dims);
+ ArrayCheckBounds(astate->ndims, astate->dims, astate->lbs);
+
+ /* Compute required space */
+ nbytes = astate->nbytes;
+ if (astate->nullbitmap != NULL)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(astate->ndims, astate->nitems);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0;
+ nbytes += ARR_OVERHEAD_NONULLS(astate->ndims);
+ }
+
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = astate->ndims;
+ result->dataoffset = dataoffset;
+ result->elemtype = astate->element_type;
+
+ memcpy(ARR_DIMS(result), astate->dims, astate->ndims * sizeof(int));
+ memcpy(ARR_LBOUND(result), astate->lbs, astate->ndims * sizeof(int));
+ memcpy(ARR_DATA_PTR(result), astate->data, astate->nbytes);
+
+ if (astate->nullbitmap != NULL)
+ array_bitmap_copy(ARR_NULLBITMAP(result), 0,
+ astate->nullbitmap, 0,
+ astate->nitems);
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ /* Clean up all the junk */
+ if (release)
+ {
+ Assert(astate->private_cxt);
+ MemoryContextDelete(astate->mcontext);
+ }
+
+ return PointerGetDatum(result);
+}
+
+/*
+ * The following three functions provide essentially the same API as
+ * initArrayResult/accumArrayResult/makeArrayResult, but can accept either
+ * scalar or array inputs, invoking the appropriate set of functions above.
+ */
+
+/*
+ * initArrayResultAny - initialize an empty ArrayBuildStateAny
+ *
+ * input_type is the input datatype (either element or array type)
+ * rcontext is where to keep working state
+ * subcontext is a flag determining whether to use a separate memory context
+ */
+ArrayBuildStateAny *
+initArrayResultAny(Oid input_type, MemoryContext rcontext, bool subcontext)
+{
+ ArrayBuildStateAny *astate;
+ Oid element_type = get_element_type(input_type);
+
+ if (OidIsValid(element_type))
+ {
+ /* Array case */
+ ArrayBuildStateArr *arraystate;
+
+ arraystate = initArrayResultArr(input_type, InvalidOid, rcontext, subcontext);
+ astate = (ArrayBuildStateAny *)
+ MemoryContextAlloc(arraystate->mcontext,
+ sizeof(ArrayBuildStateAny));
+ astate->scalarstate = NULL;
+ astate->arraystate = arraystate;
+ }
+ else
+ {
+ /* Scalar case */
+ ArrayBuildState *scalarstate;
+
+ /* Let's just check that we have a type that can be put into arrays */
+ Assert(OidIsValid(get_array_type(input_type)));
+
+ scalarstate = initArrayResult(input_type, rcontext, subcontext);
+ astate = (ArrayBuildStateAny *)
+ MemoryContextAlloc(scalarstate->mcontext,
+ sizeof(ArrayBuildStateAny));
+ astate->scalarstate = scalarstate;
+ astate->arraystate = NULL;
+ }
+
+ return astate;
+}
+
+/*
+ * accumArrayResultAny - accumulate one (more) input for an array result
+ *
+ * astate is working state (can be NULL on first call)
+ * dvalue/disnull represent the new input to append to the array
+ * input_type is the input datatype (either element or array type)
+ * rcontext is where to keep working state
+ */
+ArrayBuildStateAny *
+accumArrayResultAny(ArrayBuildStateAny *astate,
+ Datum dvalue, bool disnull,
+ Oid input_type,
+ MemoryContext rcontext)
+{
+ if (astate == NULL)
+ astate = initArrayResultAny(input_type, rcontext, true);
+
+ if (astate->scalarstate)
+ (void) accumArrayResult(astate->scalarstate,
+ dvalue, disnull,
+ input_type, rcontext);
+ else
+ (void) accumArrayResultArr(astate->arraystate,
+ dvalue, disnull,
+ input_type, rcontext);
+
+ return astate;
+}
+
+/*
+ * makeArrayResultAny - produce final result of accumArrayResultAny
+ *
+ * astate is working state (must not be NULL)
+ * rcontext is where to construct result
+ * release is true if okay to release working state
+ */
+Datum
+makeArrayResultAny(ArrayBuildStateAny *astate,
+ MemoryContext rcontext, bool release)
+{
+ Datum result;
+
+ if (astate->scalarstate)
+ {
+ /* Must use makeMdArrayResult to support "release" parameter */
+ int ndims;
+ int dims[1];
+ int lbs[1];
+
+ /* If no elements were presented, we want to create an empty array */
+ ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
+ dims[0] = astate->scalarstate->nelems;
+ lbs[0] = 1;
+
+ result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
+ rcontext, release);
+ }
+ else
+ {
+ result = makeArrayResultArr(astate->arraystate,
+ rcontext, release);
+ }
+ return result;
+}
+
+
+Datum
+array_larger(PG_FUNCTION_ARGS)
+{
+ if (array_cmp(fcinfo) > 0)
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
+ else
+ PG_RETURN_DATUM(PG_GETARG_DATUM(1));
+}
+
+Datum
+array_smaller(PG_FUNCTION_ARGS)
+{
+ if (array_cmp(fcinfo) < 0)
+ PG_RETURN_DATUM(PG_GETARG_DATUM(0));
+ else
+ PG_RETURN_DATUM(PG_GETARG_DATUM(1));
+}
+
+
+typedef struct generate_subscripts_fctx
+{
+ int32 lower;
+ int32 upper;
+ bool reverse;
+} generate_subscripts_fctx;
+
+/*
+ * generate_subscripts(array anyarray, dim int [, reverse bool])
+ * Returns all subscripts of the array for any dimension
+ */
+Datum
+generate_subscripts(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ MemoryContext oldcontext;
+ generate_subscripts_fctx *fctx;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ AnyArrayType *v = PG_GETARG_ANY_ARRAY_P(0);
+ int reqdim = PG_GETARG_INT32(1);
+ int *lb,
+ *dimv;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /* Sanity check: does it look like an array at all? */
+ if (AARR_NDIM(v) <= 0 || AARR_NDIM(v) > MAXDIM)
+ SRF_RETURN_DONE(funcctx);
+
+ /* Sanity check: was the requested dim valid */
+ if (reqdim <= 0 || reqdim > AARR_NDIM(v))
+ SRF_RETURN_DONE(funcctx);
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ fctx = (generate_subscripts_fctx *) palloc(sizeof(generate_subscripts_fctx));
+
+ lb = AARR_LBOUND(v);
+ dimv = AARR_DIMS(v);
+
+ fctx->lower = lb[reqdim - 1];
+ fctx->upper = dimv[reqdim - 1] + lb[reqdim - 1] - 1;
+ fctx->reverse = (PG_NARGS() < 3) ? false : PG_GETARG_BOOL(2);
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ fctx = funcctx->user_fctx;
+
+ if (fctx->lower <= fctx->upper)
+ {
+ if (!fctx->reverse)
+ SRF_RETURN_NEXT(funcctx, Int32GetDatum(fctx->lower++));
+ else
+ SRF_RETURN_NEXT(funcctx, Int32GetDatum(fctx->upper--));
+ }
+ else
+ /* done when there are no more elements left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * generate_subscripts_nodir
+ * Implements the 2-argument version of generate_subscripts
+ */
+Datum
+generate_subscripts_nodir(PG_FUNCTION_ARGS)
+{
+ /* just call the other one -- it can handle both cases */
+ return generate_subscripts(fcinfo);
+}
+
+/*
+ * array_fill_with_lower_bounds
+ * Create and fill array with defined lower bounds.
+ */
+Datum
+array_fill_with_lower_bounds(PG_FUNCTION_ARGS)
+{
+ ArrayType *dims;
+ ArrayType *lbs;
+ ArrayType *result;
+ Oid elmtype;
+ Datum value;
+ bool isnull;
+
+ if (PG_ARGISNULL(1) || PG_ARGISNULL(2))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension array or low bound array cannot be null")));
+
+ dims = PG_GETARG_ARRAYTYPE_P(1);
+ lbs = PG_GETARG_ARRAYTYPE_P(2);
+
+ if (!PG_ARGISNULL(0))
+ {
+ value = PG_GETARG_DATUM(0);
+ isnull = false;
+ }
+ else
+ {
+ value = 0;
+ isnull = true;
+ }
+
+ elmtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (!OidIsValid(elmtype))
+ elog(ERROR, "could not determine data type of input");
+
+ result = array_fill_internal(dims, lbs, value, isnull, elmtype, fcinfo);
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+/*
+ * array_fill
+ * Create and fill array with default lower bounds.
+ */
+Datum
+array_fill(PG_FUNCTION_ARGS)
+{
+ ArrayType *dims;
+ ArrayType *result;
+ Oid elmtype;
+ Datum value;
+ bool isnull;
+
+ if (PG_ARGISNULL(1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension array or low bound array cannot be null")));
+
+ dims = PG_GETARG_ARRAYTYPE_P(1);
+
+ if (!PG_ARGISNULL(0))
+ {
+ value = PG_GETARG_DATUM(0);
+ isnull = false;
+ }
+ else
+ {
+ value = 0;
+ isnull = true;
+ }
+
+ elmtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (!OidIsValid(elmtype))
+ elog(ERROR, "could not determine data type of input");
+
+ result = array_fill_internal(dims, NULL, value, isnull, elmtype, fcinfo);
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+static ArrayType *
+create_array_envelope(int ndims, int *dimv, int *lbsv, int nbytes,
+ Oid elmtype, int dataoffset)
+{
+ ArrayType *result;
+
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = ndims;
+ result->dataoffset = dataoffset;
+ result->elemtype = elmtype;
+ memcpy(ARR_DIMS(result), dimv, ndims * sizeof(int));
+ memcpy(ARR_LBOUND(result), lbsv, ndims * sizeof(int));
+
+ return result;
+}
+
+static ArrayType *
+array_fill_internal(ArrayType *dims, ArrayType *lbs,
+ Datum value, bool isnull, Oid elmtype,
+ FunctionCallInfo fcinfo)
+{
+ ArrayType *result;
+ int *dimv;
+ int *lbsv;
+ int ndims;
+ int nitems;
+ int deflbs[MAXDIM];
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ ArrayMetaState *my_extra;
+
+ /*
+ * Params checks
+ */
+ if (ARR_NDIM(dims) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Dimension array must be one dimensional.")));
+
+ if (array_contains_nulls(dims))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension values cannot be null")));
+
+ dimv = (int *) ARR_DATA_PTR(dims);
+ ndims = (ARR_NDIM(dims) > 0) ? ARR_DIMS(dims)[0] : 0;
+
+ if (ndims < 0) /* we do allow zero-dimension arrays */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid number of dimensions: %d", ndims)));
+ if (ndims > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ ndims, MAXDIM)));
+
+ if (lbs != NULL)
+ {
+ if (ARR_NDIM(lbs) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Dimension array must be one dimensional.")));
+
+ if (array_contains_nulls(lbs))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("dimension values cannot be null")));
+
+ if (ndims != ((ARR_NDIM(lbs) > 0) ? ARR_DIMS(lbs)[0] : 0))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts"),
+ errdetail("Low bound array has different size than dimensions array.")));
+
+ lbsv = (int *) ARR_DATA_PTR(lbs);
+ }
+ else
+ {
+ int i;
+
+ for (i = 0; i < MAXDIM; i++)
+ deflbs[i] = 1;
+
+ lbsv = deflbs;
+ }
+
+ /* This checks for overflow of the array dimensions */
+ nitems = ArrayGetNItems(ndims, dimv);
+ ArrayCheckBounds(ndims, dimv, lbsv);
+
+ /* fast track for empty array */
+ if (nitems <= 0)
+ return construct_empty_array(elmtype);
+
+ /*
+ * We arrange to look up info about element type only once per series of
+ * calls, assuming the element type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = InvalidOid;
+ }
+
+ if (my_extra->element_type != elmtype)
+ {
+ /* Get info about element type */
+ get_typlenbyvalalign(elmtype,
+ &my_extra->typlen,
+ &my_extra->typbyval,
+ &my_extra->typalign);
+ my_extra->element_type = elmtype;
+ }
+
+ elmlen = my_extra->typlen;
+ elmbyval = my_extra->typbyval;
+ elmalign = my_extra->typalign;
+
+ /* compute required space */
+ if (!isnull)
+ {
+ int i;
+ char *p;
+ int nbytes;
+ int totbytes;
+
+ /* make sure data is not toasted */
+ if (elmlen == -1)
+ value = PointerGetDatum(PG_DETOAST_DATUM(value));
+
+ nbytes = att_addlength_datum(0, elmlen, value);
+ nbytes = att_align_nominal(nbytes, elmalign);
+ Assert(nbytes > 0);
+
+ totbytes = nbytes * nitems;
+
+ /* check for overflow of multiplication or total request */
+ if (totbytes / nbytes != nitems ||
+ !AllocSizeIsValid(totbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+
+ /*
+ * This addition can't overflow, but it might cause us to go past
+ * MaxAllocSize. We leave it to palloc to complain in that case.
+ */
+ totbytes += ARR_OVERHEAD_NONULLS(ndims);
+
+ result = create_array_envelope(ndims, dimv, lbsv, totbytes,
+ elmtype, 0);
+
+ p = ARR_DATA_PTR(result);
+ for (i = 0; i < nitems; i++)
+ p += ArrayCastAndSet(value, elmlen, elmbyval, elmalign, p);
+ }
+ else
+ {
+ int nbytes;
+ int dataoffset;
+
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndims, nitems);
+ nbytes = dataoffset;
+
+ result = create_array_envelope(ndims, dimv, lbsv, nbytes,
+ elmtype, dataoffset);
+
+ /* create_array_envelope already zeroed the bitmap, so we're done */
+ }
+
+ return result;
+}
+
+
+/*
+ * UNNEST
+ */
+Datum
+array_unnest(PG_FUNCTION_ARGS)
+{
+ typedef struct
+ {
+ array_iter iter;
+ int nextelem;
+ int numelems;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ } array_unnest_fctx;
+
+ FuncCallContext *funcctx;
+ array_unnest_fctx *fctx;
+ MemoryContext oldcontext;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ AnyArrayType *arr;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /*
+ * Get the array value and detoast if needed. We can't do this
+ * earlier because if we have to detoast, we want the detoasted copy
+ * to be in multi_call_memory_ctx, so it will go away when we're done
+ * and not before. (If no detoast happens, we assume the originally
+ * passed array will stick around till then.)
+ */
+ arr = PG_GETARG_ANY_ARRAY_P(0);
+
+ /* allocate memory for user context */
+ fctx = (array_unnest_fctx *) palloc(sizeof(array_unnest_fctx));
+
+ /* initialize state */
+ array_iter_setup(&fctx->iter, arr);
+ fctx->nextelem = 0;
+ fctx->numelems = ArrayGetNItems(AARR_NDIM(arr), AARR_DIMS(arr));
+
+ if (VARATT_IS_EXPANDED_HEADER(arr))
+ {
+ /* we can just grab the type data from expanded array */
+ fctx->elmlen = arr->xpn.typlen;
+ fctx->elmbyval = arr->xpn.typbyval;
+ fctx->elmalign = arr->xpn.typalign;
+ }
+ else
+ get_typlenbyvalalign(AARR_ELEMTYPE(arr),
+ &fctx->elmlen,
+ &fctx->elmbyval,
+ &fctx->elmalign);
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ fctx = funcctx->user_fctx;
+
+ if (fctx->nextelem < fctx->numelems)
+ {
+ int offset = fctx->nextelem++;
+ Datum elem;
+
+ elem = array_iter_next(&fctx->iter, &fcinfo->isnull, offset,
+ fctx->elmlen, fctx->elmbyval, fctx->elmalign);
+
+ SRF_RETURN_NEXT(funcctx, elem);
+ }
+ else
+ {
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+/*
+ * Planner support function for array_unnest(anyarray)
+ */
+Datum
+array_unnest_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+
+ req->rows = estimate_array_length(arg1);
+ ret = (Node *) req;
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+
+/*
+ * array_replace/array_remove support
+ *
+ * Find all array entries matching (not distinct from) search/search_isnull,
+ * and delete them if remove is true, else replace them with
+ * replace/replace_isnull. Comparisons are done using the specified
+ * collation. fcinfo is passed only for caching purposes.
+ */
+static ArrayType *
+array_replace_internal(ArrayType *array,
+ Datum search, bool search_isnull,
+ Datum replace, bool replace_isnull,
+ bool remove, Oid collation,
+ FunctionCallInfo fcinfo)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ ArrayType *result;
+ Oid element_type;
+ Datum *values;
+ bool *nulls;
+ int *dim;
+ int ndim;
+ int nitems,
+ nresult;
+ int i;
+ int32 nbytes = 0;
+ int32 dataoffset;
+ bool hasnulls;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ char *arraydataptr;
+ bits8 *bitmap;
+ int bitmask;
+ bool changed = false;
+ TypeCacheEntry *typentry;
+
+ element_type = ARR_ELEMTYPE(array);
+ ndim = ARR_NDIM(array);
+ dim = ARR_DIMS(array);
+ nitems = ArrayGetNItems(ndim, dim);
+
+ /* Return input array unmodified if it is empty */
+ if (nitems <= 0)
+ return array;
+
+ /*
+ * We can't remove elements from multi-dimensional arrays, since the
+ * result might not be rectangular.
+ */
+ if (remove && ndim > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("removing elements from multidimensional arrays is not supported")));
+
+ /*
+ * We arrange to look up the equality function only once per series of
+ * calls, assuming the element type doesn't change underneath us.
+ */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_EQ_OPR_FINFO);
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+ typlen = typentry->typlen;
+ typbyval = typentry->typbyval;
+ typalign = typentry->typalign;
+
+ /*
+ * Detoast values if they are toasted. The replacement value must be
+ * detoasted for insertion into the result array, while detoasting the
+ * search value only once saves cycles.
+ */
+ if (typlen == -1)
+ {
+ if (!search_isnull)
+ search = PointerGetDatum(PG_DETOAST_DATUM(search));
+ if (!replace_isnull)
+ replace = PointerGetDatum(PG_DETOAST_DATUM(replace));
+ }
+
+ /* Prepare to apply the comparison operator */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Allocate temporary arrays for new values */
+ values = (Datum *) palloc(nitems * sizeof(Datum));
+ nulls = (bool *) palloc(nitems * sizeof(bool));
+
+ /* Loop over source data */
+ arraydataptr = ARR_DATA_PTR(array);
+ bitmap = ARR_NULLBITMAP(array);
+ bitmask = 1;
+ hasnulls = false;
+ nresult = 0;
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum elt;
+ bool isNull;
+ bool oprresult;
+ bool skip = false;
+
+ /* Get source element, checking for NULL */
+ if (bitmap && (*bitmap & bitmask) == 0)
+ {
+ isNull = true;
+ /* If searching for NULL, we have a match */
+ if (search_isnull)
+ {
+ if (remove)
+ {
+ skip = true;
+ changed = true;
+ }
+ else if (!replace_isnull)
+ {
+ values[nresult] = replace;
+ isNull = false;
+ changed = true;
+ }
+ }
+ }
+ else
+ {
+ isNull = false;
+ elt = fetch_att(arraydataptr, typbyval, typlen);
+ arraydataptr = att_addlength_datum(arraydataptr, typlen, elt);
+ arraydataptr = (char *) att_align_nominal(arraydataptr, typalign);
+
+ if (search_isnull)
+ {
+ /* no match possible, keep element */
+ values[nresult] = elt;
+ }
+ else
+ {
+ /*
+ * Apply the operator to the element pair; treat NULL as false
+ */
+ locfcinfo->args[0].value = elt;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = search;
+ locfcinfo->args[1].isnull = false;
+ locfcinfo->isnull = false;
+ oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo));
+ if (locfcinfo->isnull || !oprresult)
+ {
+ /* no match, keep element */
+ values[nresult] = elt;
+ }
+ else
+ {
+ /* match, so replace or delete */
+ changed = true;
+ if (remove)
+ skip = true;
+ else
+ {
+ values[nresult] = replace;
+ isNull = replace_isnull;
+ }
+ }
+ }
+ }
+
+ if (!skip)
+ {
+ nulls[nresult] = isNull;
+ if (isNull)
+ hasnulls = true;
+ else
+ {
+ /* Update total result size */
+ nbytes = att_addlength_datum(nbytes, typlen, values[nresult]);
+ nbytes = att_align_nominal(nbytes, typalign);
+ /* check for overflow of total request */
+ if (!AllocSizeIsValid(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxAllocSize)));
+ }
+ nresult++;
+ }
+
+ /* advance bitmap pointer if any */
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+
+ /*
+ * If not changed just return the original array
+ */
+ if (!changed)
+ {
+ pfree(values);
+ pfree(nulls);
+ return array;
+ }
+
+ /* If all elements were removed return an empty array */
+ if (nresult == 0)
+ {
+ pfree(values);
+ pfree(nulls);
+ return construct_empty_array(element_type);
+ }
+
+ /* Allocate and initialize the result array */
+ if (hasnulls)
+ {
+ dataoffset = ARR_OVERHEAD_WITHNULLS(ndim, nresult);
+ nbytes += dataoffset;
+ }
+ else
+ {
+ dataoffset = 0; /* marker for no null bitmap */
+ nbytes += ARR_OVERHEAD_NONULLS(ndim);
+ }
+ result = (ArrayType *) palloc0(nbytes);
+ SET_VARSIZE(result, nbytes);
+ result->ndim = ndim;
+ result->dataoffset = dataoffset;
+ result->elemtype = element_type;
+ memcpy(ARR_DIMS(result), ARR_DIMS(array), ndim * sizeof(int));
+ memcpy(ARR_LBOUND(result), ARR_LBOUND(array), ndim * sizeof(int));
+
+ if (remove)
+ {
+ /* Adjust the result length */
+ ARR_DIMS(result)[0] = nresult;
+ }
+
+ /* Insert data into result array */
+ CopyArrayEls(result,
+ values, nulls, nresult,
+ typlen, typbyval, typalign,
+ false);
+
+ pfree(values);
+ pfree(nulls);
+
+ return result;
+}
+
+/*
+ * Remove any occurrences of an element from an array
+ *
+ * If used on a multi-dimensional array this will raise an error.
+ */
+Datum
+array_remove(PG_FUNCTION_ARGS)
+{
+ ArrayType *array;
+ Datum search = PG_GETARG_DATUM(1);
+ bool search_isnull = PG_ARGISNULL(1);
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ array = PG_GETARG_ARRAYTYPE_P(0);
+
+ array = array_replace_internal(array,
+ search, search_isnull,
+ (Datum) 0, true,
+ true, PG_GET_COLLATION(),
+ fcinfo);
+ PG_RETURN_ARRAYTYPE_P(array);
+}
+
+/*
+ * Replace any occurrences of an element in an array
+ */
+Datum
+array_replace(PG_FUNCTION_ARGS)
+{
+ ArrayType *array;
+ Datum search = PG_GETARG_DATUM(1);
+ bool search_isnull = PG_ARGISNULL(1);
+ Datum replace = PG_GETARG_DATUM(2);
+ bool replace_isnull = PG_ARGISNULL(2);
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ array = PG_GETARG_ARRAYTYPE_P(0);
+
+ array = array_replace_internal(array,
+ search, search_isnull,
+ replace, replace_isnull,
+ false, PG_GET_COLLATION(),
+ fcinfo);
+ PG_RETURN_ARRAYTYPE_P(array);
+}
+
+/*
+ * Implements width_bucket(anyelement, anyarray).
+ *
+ * 'thresholds' is an array containing lower bound values for each bucket;
+ * these must be sorted from smallest to largest, or bogus results will be
+ * produced. If N thresholds are supplied, the output is from 0 to N:
+ * 0 is for inputs < first threshold, N is for inputs >= last threshold.
+ */
+Datum
+width_bucket_array(PG_FUNCTION_ARGS)
+{
+ Datum operand = PG_GETARG_DATUM(0);
+ ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1);
+ Oid collation = PG_GET_COLLATION();
+ Oid element_type = ARR_ELEMTYPE(thresholds);
+ int result;
+
+ /* Check input */
+ if (ARR_NDIM(thresholds) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("thresholds must be one-dimensional array")));
+
+ if (array_contains_nulls(thresholds))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("thresholds array must not contain NULLs")));
+
+ /* We have a dedicated implementation for float8 data */
+ if (element_type == FLOAT8OID)
+ result = width_bucket_array_float8(operand, thresholds);
+ else
+ {
+ TypeCacheEntry *typentry;
+
+ /* Cache information about the input type */
+ typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (typentry == NULL ||
+ typentry->type_id != element_type)
+ {
+ typentry = lookup_type_cache(element_type,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(element_type))));
+ fcinfo->flinfo->fn_extra = (void *) typentry;
+ }
+
+ /*
+ * We have separate implementation paths for fixed- and variable-width
+ * types, since indexing the array is a lot cheaper in the first case.
+ */
+ if (typentry->typlen > 0)
+ result = width_bucket_array_fixed(operand, thresholds,
+ collation, typentry);
+ else
+ result = width_bucket_array_variable(operand, thresholds,
+ collation, typentry);
+ }
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(thresholds, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * width_bucket_array for float8 data.
+ */
+static int
+width_bucket_array_float8(Datum operand, ArrayType *thresholds)
+{
+ float8 op = DatumGetFloat8(operand);
+ float8 *thresholds_data;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (float8 *) ARR_DATA_PTR(thresholds);
+
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+
+ /*
+ * If the probe value is a NaN, it's greater than or equal to all possible
+ * threshold values (including other NaNs), so we need not search. Note
+ * that this would give the same result as searching even if the array
+ * contains multiple NaNs (as long as they're correctly sorted), since the
+ * loop logic will find the rightmost of multiple equal threshold values.
+ */
+ if (isnan(op))
+ return right;
+
+ /* Find the bucket */
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+
+ if (isnan(thresholds_data[mid]) || op < thresholds_data[mid])
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic fixed-width data types.
+ */
+static int
+width_bucket_array_fixed(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ int left;
+ int right;
+
+ /*
+ * Since we know the array contains no NULLs, we can just index it
+ * directly.
+ */
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int32 cmpresult;
+
+ ptr = thresholds_data + mid * typlen;
+
+ locfcinfo->args[0].value = operand;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = fetch_att(ptr, typbyval, typlen);
+ locfcinfo->args[1].isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect comparison support functions to return null */
+ Assert(!locfcinfo->isnull);
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ left = mid + 1;
+ }
+
+ return left;
+}
+
+/*
+ * width_bucket_array for generic variable-width data types.
+ */
+static int
+width_bucket_array_variable(Datum operand,
+ ArrayType *thresholds,
+ Oid collation,
+ TypeCacheEntry *typentry)
+{
+ LOCAL_FCINFO(locfcinfo, 2);
+ char *thresholds_data;
+ int typlen = typentry->typlen;
+ bool typbyval = typentry->typbyval;
+ char typalign = typentry->typalign;
+ int left;
+ int right;
+
+ thresholds_data = (char *) ARR_DATA_PTR(thresholds);
+
+ InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+
+ /* Find the bucket */
+ left = 0;
+ right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds));
+ while (left < right)
+ {
+ int mid = (left + right) / 2;
+ char *ptr;
+ int i;
+ int32 cmpresult;
+
+ /* Locate mid'th array element by advancing from left element */
+ ptr = thresholds_data;
+ for (i = left; i < mid; i++)
+ {
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ }
+
+ locfcinfo->args[0].value = operand;
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = fetch_att(ptr, typbyval, typlen);
+ locfcinfo->args[1].isnull = false;
+
+ cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect comparison support functions to return null */
+ Assert(!locfcinfo->isnull);
+
+ if (cmpresult < 0)
+ right = mid;
+ else
+ {
+ left = mid + 1;
+
+ /*
+ * Move the thresholds pointer to match new "left" index, so we
+ * don't have to seek over those elements again. This trick
+ * ensures we do only O(N) array indexing work, not O(N^2).
+ */
+ ptr = att_addlength_pointer(ptr, typlen, ptr);
+ thresholds_data = (char *) att_align_nominal(ptr, typalign);
+ }
+ }
+
+ return left;
+}
+
+/*
+ * Trim the last N elements from an array by building an appropriate slice.
+ * Only the first dimension is trimmed.
+ */
+Datum
+trim_array(PG_FUNCTION_ARGS)
+{
+ ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
+ int n = PG_GETARG_INT32(1);
+ int array_length = (ARR_NDIM(v) > 0) ? ARR_DIMS(v)[0] : 0;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int lower[MAXDIM];
+ int upper[MAXDIM];
+ bool lowerProvided[MAXDIM];
+ bool upperProvided[MAXDIM];
+ Datum result;
+
+ /* Per spec, throw an error if out of bounds */
+ if (n < 0 || n > array_length)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
+ errmsg("number of elements to trim must be between 0 and %d",
+ array_length)));
+
+ /* Set all the bounds as unprovided except the first upper bound */
+ memset(lowerProvided, false, sizeof(lowerProvided));
+ memset(upperProvided, false, sizeof(upperProvided));
+ if (ARR_NDIM(v) > 0)
+ {
+ upper[0] = ARR_LBOUND(v)[0] + array_length - n - 1;
+ upperProvided[0] = true;
+ }
+
+ /* Fetch the needed information about the element type */
+ get_typlenbyvalalign(ARR_ELEMTYPE(v), &elmlen, &elmbyval, &elmalign);
+
+ /* Get the slice */
+ result = array_get_slice(PointerGetDatum(v), 1,
+ upper, lower, upperProvided, lowerProvided,
+ -1, elmlen, elmbyval, elmalign);
+
+ PG_RETURN_DATUM(result);
+}
diff --git a/src/backend/utils/adt/arraysubs.c b/src/backend/utils/adt/arraysubs.c
new file mode 100644
index 0000000..11db1af
--- /dev/null
+++ b/src/backend/utils/adt/arraysubs.c
@@ -0,0 +1,577 @@
+/*-------------------------------------------------------------------------
+ *
+ * arraysubs.c
+ * Subscripting support functions for arrays.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/arraysubs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "executor/execExpr.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/subscripting.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_expr.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+
+/* SubscriptingRefState.workspace for array subscripting execution */
+typedef struct ArraySubWorkspace
+{
+ /* Values determined during expression compilation */
+ Oid refelemtype; /* OID of the array element type */
+ int16 refattrlength; /* typlen of array type */
+ int16 refelemlength; /* typlen of the array element type */
+ bool refelembyval; /* is the element type pass-by-value? */
+ char refelemalign; /* typalign of the element type */
+
+ /*
+ * Subscript values converted to integers. Note that these arrays must be
+ * of length MAXDIM even when dealing with fewer subscripts, because
+ * array_get/set_slice may scribble on the extra entries.
+ */
+ int upperindex[MAXDIM];
+ int lowerindex[MAXDIM];
+} ArraySubWorkspace;
+
+
+/*
+ * Finish parse analysis of a SubscriptingRef expression for an array.
+ *
+ * Transform the subscript expressions, coerce them to integers,
+ * and determine the result type of the SubscriptingRef node.
+ */
+static void
+array_subscript_transform(SubscriptingRef *sbsref,
+ List *indirection,
+ ParseState *pstate,
+ bool isSlice,
+ bool isAssignment)
+{
+ List *upperIndexpr = NIL;
+ List *lowerIndexpr = NIL;
+ ListCell *idx;
+
+ /*
+ * Transform the subscript expressions, and separate upper and lower
+ * bounds into two lists.
+ *
+ * If we have a container slice expression, we convert any non-slice
+ * indirection items to slices by treating the single subscript as the
+ * upper bound and supplying an assumed lower bound of 1.
+ */
+ foreach(idx, indirection)
+ {
+ A_Indices *ai = lfirst_node(A_Indices, idx);
+ Node *subexpr;
+
+ if (isSlice)
+ {
+ if (ai->lidx)
+ {
+ subexpr = transformExpr(pstate, ai->lidx, pstate->p_expr_kind);
+ /* If it's not int4 already, try to coerce */
+ subexpr = coerce_to_target_type(pstate,
+ subexpr, exprType(subexpr),
+ INT4OID, -1,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (subexpr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("array subscript must have type integer"),
+ parser_errposition(pstate, exprLocation(ai->lidx))));
+ }
+ else if (!ai->is_slice)
+ {
+ /* Make a constant 1 */
+ subexpr = (Node *) makeConst(INT4OID,
+ -1,
+ InvalidOid,
+ sizeof(int32),
+ Int32GetDatum(1),
+ false,
+ true); /* pass by value */
+ }
+ else
+ {
+ /* Slice with omitted lower bound, put NULL into the list */
+ subexpr = NULL;
+ }
+ lowerIndexpr = lappend(lowerIndexpr, subexpr);
+ }
+ else
+ Assert(ai->lidx == NULL && !ai->is_slice);
+
+ if (ai->uidx)
+ {
+ subexpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
+ /* If it's not int4 already, try to coerce */
+ subexpr = coerce_to_target_type(pstate,
+ subexpr, exprType(subexpr),
+ INT4OID, -1,
+ COERCION_ASSIGNMENT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (subexpr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("array subscript must have type integer"),
+ parser_errposition(pstate, exprLocation(ai->uidx))));
+ }
+ else
+ {
+ /* Slice with omitted upper bound, put NULL into the list */
+ Assert(isSlice && ai->is_slice);
+ subexpr = NULL;
+ }
+ upperIndexpr = lappend(upperIndexpr, subexpr);
+ }
+
+ /* ... and store the transformed lists into the SubscriptRef node */
+ sbsref->refupperindexpr = upperIndexpr;
+ sbsref->reflowerindexpr = lowerIndexpr;
+
+ /* Verify subscript list lengths are within implementation limit */
+ if (list_length(upperIndexpr) > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ list_length(upperIndexpr), MAXDIM)));
+ /* We need not check lowerIndexpr separately */
+
+ /*
+ * Determine the result type of the subscripting operation. It's the same
+ * as the array type if we're slicing, else it's the element type. In
+ * either case, the typmod is the same as the array's, so we need not
+ * change reftypmod.
+ */
+ if (isSlice)
+ sbsref->refrestype = sbsref->refcontainertype;
+ else
+ sbsref->refrestype = sbsref->refelemtype;
+}
+
+/*
+ * During execution, process the subscripts in a SubscriptingRef expression.
+ *
+ * The subscript expressions are already evaluated in Datum form in the
+ * SubscriptingRefState's arrays. Check and convert them as necessary.
+ *
+ * If any subscript is NULL, we throw error in assignment cases, or in fetch
+ * cases set result to NULL and return false (instructing caller to skip the
+ * rest of the SubscriptingRef sequence).
+ *
+ * We convert all the subscripts to plain integers and save them in the
+ * sbsrefstate->workspace arrays.
+ */
+static bool
+array_subscript_check_subscripts(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+
+ /* Process upper subscripts */
+ for (int i = 0; i < sbsrefstate->numupper; i++)
+ {
+ if (sbsrefstate->upperprovided[i])
+ {
+ /* If any index expr yields NULL, result is NULL or error */
+ if (sbsrefstate->upperindexnull[i])
+ {
+ if (sbsrefstate->isassignment)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array subscript in assignment must not be null")));
+ *op->resnull = true;
+ return false;
+ }
+ workspace->upperindex[i] = DatumGetInt32(sbsrefstate->upperindex[i]);
+ }
+ }
+
+ /* Likewise for lower subscripts */
+ for (int i = 0; i < sbsrefstate->numlower; i++)
+ {
+ if (sbsrefstate->lowerprovided[i])
+ {
+ /* If any index expr yields NULL, result is NULL or error */
+ if (sbsrefstate->lowerindexnull[i])
+ {
+ if (sbsrefstate->isassignment)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array subscript in assignment must not be null")));
+ *op->resnull = true;
+ return false;
+ }
+ workspace->lowerindex[i] = DatumGetInt32(sbsrefstate->lowerindex[i]);
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Evaluate SubscriptingRef fetch for an array element.
+ *
+ * Source container is in step's result variable (it's known not NULL, since
+ * we set fetch_strict to true), and indexes have already been evaluated into
+ * workspace array.
+ */
+static void
+array_subscript_fetch(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+
+ /* Should not get here if source array (or any subscript) is null */
+ Assert(!(*op->resnull));
+
+ *op->resvalue = array_get_element(*op->resvalue,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign,
+ op->resnull);
+}
+
+/*
+ * Evaluate SubscriptingRef fetch for an array slice.
+ *
+ * Source container is in step's result variable (it's known not NULL, since
+ * we set fetch_strict to true), and indexes have already been evaluated into
+ * workspace array.
+ */
+static void
+array_subscript_fetch_slice(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+
+ /* Should not get here if source array (or any subscript) is null */
+ Assert(!(*op->resnull));
+
+ *op->resvalue = array_get_slice(*op->resvalue,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ workspace->lowerindex,
+ sbsrefstate->upperprovided,
+ sbsrefstate->lowerprovided,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign);
+ /* The slice is never NULL, so no need to change *op->resnull */
+}
+
+/*
+ * Evaluate SubscriptingRef assignment for an array element assignment.
+ *
+ * Input container (possibly null) is in result area, replacement value is in
+ * SubscriptingRefState's replacevalue/replacenull.
+ */
+static void
+array_subscript_assign(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+ Datum arraySource = *op->resvalue;
+
+ /*
+ * For an assignment to a fixed-length array type, both the original array
+ * and the value to be assigned into it must be non-NULL, else we punt and
+ * return the original array.
+ */
+ if (workspace->refattrlength > 0)
+ {
+ if (*op->resnull || sbsrefstate->replacenull)
+ return;
+ }
+
+ /*
+ * For assignment to varlena arrays, we handle a NULL original array by
+ * substituting an empty (zero-dimensional) array; insertion of the new
+ * element will result in a singleton array value. It does not matter
+ * whether the new element is NULL.
+ */
+ if (*op->resnull)
+ {
+ arraySource = PointerGetDatum(construct_empty_array(workspace->refelemtype));
+ *op->resnull = false;
+ }
+
+ *op->resvalue = array_set_element(arraySource,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ sbsrefstate->replacevalue,
+ sbsrefstate->replacenull,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign);
+ /* The result is never NULL, so no need to change *op->resnull */
+}
+
+/*
+ * Evaluate SubscriptingRef assignment for an array slice assignment.
+ *
+ * Input container (possibly null) is in result area, replacement value is in
+ * SubscriptingRefState's replacevalue/replacenull.
+ */
+static void
+array_subscript_assign_slice(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+ Datum arraySource = *op->resvalue;
+
+ /*
+ * For an assignment to a fixed-length array type, both the original array
+ * and the value to be assigned into it must be non-NULL, else we punt and
+ * return the original array.
+ */
+ if (workspace->refattrlength > 0)
+ {
+ if (*op->resnull || sbsrefstate->replacenull)
+ return;
+ }
+
+ /*
+ * For assignment to varlena arrays, we handle a NULL original array by
+ * substituting an empty (zero-dimensional) array; insertion of the new
+ * element will result in a singleton array value. It does not matter
+ * whether the new element is NULL.
+ */
+ if (*op->resnull)
+ {
+ arraySource = PointerGetDatum(construct_empty_array(workspace->refelemtype));
+ *op->resnull = false;
+ }
+
+ *op->resvalue = array_set_slice(arraySource,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ workspace->lowerindex,
+ sbsrefstate->upperprovided,
+ sbsrefstate->lowerprovided,
+ sbsrefstate->replacevalue,
+ sbsrefstate->replacenull,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign);
+ /* The result is never NULL, so no need to change *op->resnull */
+}
+
+/*
+ * Compute old array element value for a SubscriptingRef assignment
+ * expression. Will only be called if the new-value subexpression
+ * contains SubscriptingRef or FieldStore. This is the same as the
+ * regular fetch case, except that we have to handle a null array,
+ * and the value should be stored into the SubscriptingRefState's
+ * prevvalue/prevnull fields.
+ */
+static void
+array_subscript_fetch_old(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+
+ if (*op->resnull)
+ {
+ /* whole array is null, so any element is too */
+ sbsrefstate->prevvalue = (Datum) 0;
+ sbsrefstate->prevnull = true;
+ }
+ else
+ sbsrefstate->prevvalue = array_get_element(*op->resvalue,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign,
+ &sbsrefstate->prevnull);
+}
+
+/*
+ * Compute old array slice value for a SubscriptingRef assignment
+ * expression. Will only be called if the new-value subexpression
+ * contains SubscriptingRef or FieldStore. This is the same as the
+ * regular fetch case, except that we have to handle a null array,
+ * and the value should be stored into the SubscriptingRefState's
+ * prevvalue/prevnull fields.
+ *
+ * Note: this is presently dead code, because the new value for a
+ * slice would have to be an array, so it couldn't directly contain a
+ * FieldStore; nor could it contain a SubscriptingRef assignment, since
+ * we consider adjacent subscripts to index one multidimensional array
+ * not nested array types. Future generalizations might make this
+ * reachable, however.
+ */
+static void
+array_subscript_fetch_old_slice(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ ArraySubWorkspace *workspace = (ArraySubWorkspace *) sbsrefstate->workspace;
+
+ if (*op->resnull)
+ {
+ /* whole array is null, so any slice is too */
+ sbsrefstate->prevvalue = (Datum) 0;
+ sbsrefstate->prevnull = true;
+ }
+ else
+ {
+ sbsrefstate->prevvalue = array_get_slice(*op->resvalue,
+ sbsrefstate->numupper,
+ workspace->upperindex,
+ workspace->lowerindex,
+ sbsrefstate->upperprovided,
+ sbsrefstate->lowerprovided,
+ workspace->refattrlength,
+ workspace->refelemlength,
+ workspace->refelembyval,
+ workspace->refelemalign);
+ /* slices of non-null arrays are never null */
+ sbsrefstate->prevnull = false;
+ }
+}
+
+/*
+ * Set up execution state for an array subscript operation.
+ */
+static void
+array_exec_setup(const SubscriptingRef *sbsref,
+ SubscriptingRefState *sbsrefstate,
+ SubscriptExecSteps *methods)
+{
+ bool is_slice = (sbsrefstate->numlower != 0);
+ ArraySubWorkspace *workspace;
+
+ /*
+ * Enforce the implementation limit on number of array subscripts. This
+ * check isn't entirely redundant with checking at parse time; conceivably
+ * the expression was stored by a backend with a different MAXDIM value.
+ */
+ if (sbsrefstate->numupper > MAXDIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of array dimensions (%d) exceeds the maximum allowed (%d)",
+ sbsrefstate->numupper, MAXDIM)));
+
+ /* Should be impossible if parser is sane, but check anyway: */
+ if (sbsrefstate->numlower != 0 &&
+ sbsrefstate->numupper != sbsrefstate->numlower)
+ elog(ERROR, "upper and lower index lists are not same length");
+
+ /*
+ * Allocate type-specific workspace.
+ */
+ workspace = (ArraySubWorkspace *) palloc(sizeof(ArraySubWorkspace));
+ sbsrefstate->workspace = workspace;
+
+ /*
+ * Collect datatype details we'll need at execution.
+ */
+ workspace->refelemtype = sbsref->refelemtype;
+ workspace->refattrlength = get_typlen(sbsref->refcontainertype);
+ get_typlenbyvalalign(sbsref->refelemtype,
+ &workspace->refelemlength,
+ &workspace->refelembyval,
+ &workspace->refelemalign);
+
+ /*
+ * Pass back pointers to appropriate step execution functions.
+ */
+ methods->sbs_check_subscripts = array_subscript_check_subscripts;
+ if (is_slice)
+ {
+ methods->sbs_fetch = array_subscript_fetch_slice;
+ methods->sbs_assign = array_subscript_assign_slice;
+ methods->sbs_fetch_old = array_subscript_fetch_old_slice;
+ }
+ else
+ {
+ methods->sbs_fetch = array_subscript_fetch;
+ methods->sbs_assign = array_subscript_assign;
+ methods->sbs_fetch_old = array_subscript_fetch_old;
+ }
+}
+
+/*
+ * array_subscript_handler
+ * Subscripting handler for standard varlena arrays.
+ *
+ * This should be used only for "true" array types, which have array headers
+ * as understood by the varlena array routines, and are referenced by the
+ * element type's pg_type.typarray field.
+ */
+Datum
+array_subscript_handler(PG_FUNCTION_ARGS)
+{
+ static const SubscriptRoutines sbsroutines = {
+ .transform = array_subscript_transform,
+ .exec_setup = array_exec_setup,
+ .fetch_strict = true, /* fetch returns NULL for NULL inputs */
+ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */
+ .store_leakproof = false /* ... but assignment throws error */
+ };
+
+ PG_RETURN_POINTER(&sbsroutines);
+}
+
+/*
+ * raw_array_subscript_handler
+ * Subscripting handler for "raw" arrays.
+ *
+ * A "raw" array just contains N independent instances of the element type.
+ * Currently we require both the element type and the array type to be fixed
+ * length, but it wouldn't be too hard to relax that for the array type.
+ *
+ * As of now, all the support code is shared with standard varlena arrays.
+ * We may split those into separate code paths, but probably that would yield
+ * only marginal speedups. The main point of having a separate handler is
+ * so that pg_type.typsubscript clearly indicates the type's semantics.
+ */
+Datum
+raw_array_subscript_handler(PG_FUNCTION_ARGS)
+{
+ static const SubscriptRoutines sbsroutines = {
+ .transform = array_subscript_transform,
+ .exec_setup = array_exec_setup,
+ .fetch_strict = true, /* fetch returns NULL for NULL inputs */
+ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */
+ .store_leakproof = false /* ... but assignment throws error */
+ };
+
+ PG_RETURN_POINTER(&sbsroutines);
+}
diff --git a/src/backend/utils/adt/arrayutils.c b/src/backend/utils/adt/arrayutils.c
new file mode 100644
index 0000000..fcdfde9
--- /dev/null
+++ b/src/backend/utils/adt/arrayutils.c
@@ -0,0 +1,259 @@
+/*-------------------------------------------------------------------------
+ *
+ * arrayutils.c
+ * This file contains some support routines required for array functions.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/arrayutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+
+
+/*
+ * Convert subscript list into linear element number (from 0)
+ *
+ * We assume caller has already range-checked the dimensions and subscripts,
+ * so no overflow is possible.
+ */
+int
+ArrayGetOffset(int n, const int *dim, const int *lb, const int *indx)
+{
+ int i,
+ scale = 1,
+ offset = 0;
+
+ for (i = n - 1; i >= 0; i--)
+ {
+ offset += (indx[i] - lb[i]) * scale;
+ scale *= dim[i];
+ }
+ return offset;
+}
+
+/*
+ * Same, but subscripts are assumed 0-based, and use a scale array
+ * instead of raw dimension data (see mda_get_prod to create scale array)
+ */
+int
+ArrayGetOffset0(int n, const int *tup, const int *scale)
+{
+ int i,
+ lin = 0;
+
+ for (i = 0; i < n; i++)
+ lin += tup[i] * scale[i];
+ return lin;
+}
+
+/*
+ * Convert array dimensions into number of elements
+ *
+ * This must do overflow checking, since it is used to validate that a user
+ * dimensionality request doesn't overflow what we can handle.
+ *
+ * The multiplication overflow check only works on machines that have int64
+ * arithmetic, but that is nearly all platforms these days, and doing check
+ * divides for those that don't seems way too expensive.
+ */
+int
+ArrayGetNItems(int ndim, const int *dims)
+{
+ int32 ret;
+ int i;
+
+ if (ndim <= 0)
+ return 0;
+ ret = 1;
+ for (i = 0; i < ndim; i++)
+ {
+ int64 prod;
+
+ /* A negative dimension implies that UB-LB overflowed ... */
+ if (dims[i] < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+
+ prod = (int64) ret * (int64) dims[i];
+
+ ret = (int32) prod;
+ if ((int64) ret != prod)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ }
+ Assert(ret >= 0);
+ if ((Size) ret > MaxArraySize)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array size exceeds the maximum allowed (%d)",
+ (int) MaxArraySize)));
+ return (int) ret;
+}
+
+/*
+ * Verify sanity of proposed lower-bound values for an array
+ *
+ * The lower-bound values must not be so large as to cause overflow when
+ * calculating subscripts, e.g. lower bound 2147483640 with length 10
+ * must be disallowed. We actually insist that dims[i] + lb[i] be
+ * computable without overflow, meaning that an array with last subscript
+ * equal to INT_MAX will be disallowed.
+ *
+ * It is assumed that the caller already called ArrayGetNItems, so that
+ * overflowed (negative) dims[] values have been eliminated.
+ */
+void
+ArrayCheckBounds(int ndim, const int *dims, const int *lb)
+{
+ int i;
+
+ for (i = 0; i < ndim; i++)
+ {
+ /* PG_USED_FOR_ASSERTS_ONLY prevents variable-isn't-read warnings */
+ int32 sum PG_USED_FOR_ASSERTS_ONLY;
+
+ if (pg_add_s32_overflow(dims[i], lb[i], &sum))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("array lower bound is too large: %d",
+ lb[i])));
+ }
+}
+
+/*
+ * Compute ranges (sub-array dimensions) for an array slice
+ *
+ * We assume caller has validated slice endpoints, so overflow is impossible
+ */
+void
+mda_get_range(int n, int *span, const int *st, const int *endp)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ span[i] = endp[i] - st[i] + 1;
+}
+
+/*
+ * Compute products of array dimensions, ie, scale factors for subscripts
+ *
+ * We assume caller has validated dimensions, so overflow is impossible
+ */
+void
+mda_get_prod(int n, const int *range, int *prod)
+{
+ int i;
+
+ prod[n - 1] = 1;
+ for (i = n - 2; i >= 0; i--)
+ prod[i] = prod[i + 1] * range[i + 1];
+}
+
+/*
+ * From products of whole-array dimensions and spans of a sub-array,
+ * compute offset distances needed to step through subarray within array
+ *
+ * We assume caller has validated dimensions, so overflow is impossible
+ */
+void
+mda_get_offset_values(int n, int *dist, const int *prod, const int *span)
+{
+ int i,
+ j;
+
+ dist[n - 1] = 0;
+ for (j = n - 2; j >= 0; j--)
+ {
+ dist[j] = prod[j] - 1;
+ for (i = j + 1; i < n; i++)
+ dist[j] -= (span[i] - 1) * prod[i];
+ }
+}
+
+/*
+ * Generates the tuple that is lexicographically one greater than the current
+ * n-tuple in "curr", with the restriction that the i-th element of "curr" is
+ * less than the i-th element of "span".
+ *
+ * Returns -1 if no next tuple exists, else the subscript position (0..n-1)
+ * corresponding to the dimension to advance along.
+ *
+ * We assume caller has validated dimensions, so overflow is impossible
+ */
+int
+mda_next_tuple(int n, int *curr, const int *span)
+{
+ int i;
+
+ if (n <= 0)
+ return -1;
+
+ curr[n - 1] = (curr[n - 1] + 1) % span[n - 1];
+ for (i = n - 1; i && curr[i] == 0; i--)
+ curr[i - 1] = (curr[i - 1] + 1) % span[i - 1];
+
+ if (i)
+ return i;
+ if (curr[0])
+ return 0;
+
+ return -1;
+}
+
+/*
+ * ArrayGetIntegerTypmods: verify that argument is a 1-D cstring array,
+ * and get the contents converted to integers. Returns a palloc'd array
+ * and places the length at *n.
+ */
+int32 *
+ArrayGetIntegerTypmods(ArrayType *arr, int *n)
+{
+ int32 *result;
+ Datum *elem_values;
+ int i;
+
+ if (ARR_ELEMTYPE(arr) != CSTRINGOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_ELEMENT_ERROR),
+ errmsg("typmod array must be type cstring[]")));
+
+ if (ARR_NDIM(arr) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("typmod array must be one-dimensional")));
+
+ if (array_contains_nulls(arr))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("typmod array must not contain nulls")));
+
+ /* hardwired knowledge about cstring's representation details here */
+ deconstruct_array(arr, CSTRINGOID,
+ -2, false, TYPALIGN_CHAR,
+ &elem_values, NULL, n);
+
+ result = (int32 *) palloc(*n * sizeof(int32));
+
+ for (i = 0; i < *n; i++)
+ result[i] = pg_strtoint32(DatumGetCString(elem_values[i]));
+
+ pfree(elem_values);
+
+ return result;
+}
diff --git a/src/backend/utils/adt/ascii.c b/src/backend/utils/adt/ascii.c
new file mode 100644
index 0000000..54c36a7
--- /dev/null
+++ b/src/backend/utils/adt/ascii.c
@@ -0,0 +1,198 @@
+/*-----------------------------------------------------------------------
+ * ascii.c
+ * The PostgreSQL routine for string to ascii conversion.
+ *
+ * Portions Copyright (c) 1999-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/ascii.c
+ *
+ *-----------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "mb/pg_wchar.h"
+#include "utils/ascii.h"
+#include "utils/builtins.h"
+
+static void pg_to_ascii(unsigned char *src, unsigned char *src_end,
+ unsigned char *dest, int enc);
+static text *encode_to_ascii(text *data, int enc);
+
+
+/* ----------
+ * to_ascii
+ * ----------
+ */
+static void
+pg_to_ascii(unsigned char *src, unsigned char *src_end, unsigned char *dest, int enc)
+{
+ unsigned char *x;
+ const unsigned char *ascii;
+ int range;
+
+ /*
+ * relevant start for an encoding
+ */
+#define RANGE_128 128
+#define RANGE_160 160
+
+ if (enc == PG_LATIN1)
+ {
+ /*
+ * ISO-8859-1 <range: 160 -- 255>
+ */
+ ascii = (const unsigned char *) " cL Y \"Ca -R 'u ., ?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
+ range = RANGE_160;
+ }
+ else if (enc == PG_LATIN2)
+ {
+ /*
+ * ISO-8859-2 <range: 160 -- 255>
+ */
+ ascii = (const unsigned char *) " A L LS \"SSTZ-ZZ a,l'ls ,sstz\"zzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt.";
+ range = RANGE_160;
+ }
+ else if (enc == PG_LATIN9)
+ {
+ /*
+ * ISO-8859-15 <range: 160 -- 255>
+ */
+ ascii = (const unsigned char *) " cL YS sCa -R Zu .z EeY?AAAAAAACEEEEIIII NOOOOOxOUUUUYTBaaaaaaaceeeeiiii nooooo/ouuuuyty";
+ range = RANGE_160;
+ }
+ else if (enc == PG_WIN1250)
+ {
+ /*
+ * Window CP1250 <range: 128 -- 255>
+ */
+ ascii = (const unsigned char *) " ' \" %S<STZZ `'\"\".-- s>stzz L A \"CS -RZ ,l'u .,as L\"lzRAAAALCCCEEEEIIDDNNOOOOxRUUUUYTBraaaalccceeeeiiddnnoooo/ruuuuyt ";
+ range = RANGE_128;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("encoding conversion from %s to ASCII not supported",
+ pg_encoding_to_char(enc))));
+ return; /* keep compiler quiet */
+ }
+
+ /*
+ * Encode
+ */
+ for (x = src; x < src_end; x++)
+ {
+ if (*x < 128)
+ *dest++ = *x;
+ else if (*x < range)
+ *dest++ = ' '; /* bogus 128 to 'range' */
+ else
+ *dest++ = ascii[*x - range];
+ }
+}
+
+/* ----------
+ * encode text
+ *
+ * The text datum is overwritten in-place, therefore this coding method
+ * cannot support conversions that change the string length!
+ * ----------
+ */
+static text *
+encode_to_ascii(text *data, int enc)
+{
+ pg_to_ascii((unsigned char *) VARDATA(data), /* src */
+ (unsigned char *) (data) + VARSIZE(data), /* src end */
+ (unsigned char *) VARDATA(data), /* dest */
+ enc); /* encoding */
+
+ return data;
+}
+
+/* ----------
+ * convert to ASCII - enc is set as 'name' arg.
+ * ----------
+ */
+Datum
+to_ascii_encname(PG_FUNCTION_ARGS)
+{
+ text *data = PG_GETARG_TEXT_P_COPY(0);
+ char *encname = NameStr(*PG_GETARG_NAME(1));
+ int enc = pg_char_to_encoding(encname);
+
+ if (enc < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("%s is not a valid encoding name", encname)));
+
+ PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
+}
+
+/* ----------
+ * convert to ASCII - enc is set as int4
+ * ----------
+ */
+Datum
+to_ascii_enc(PG_FUNCTION_ARGS)
+{
+ text *data = PG_GETARG_TEXT_P_COPY(0);
+ int enc = PG_GETARG_INT32(1);
+
+ if (!PG_VALID_ENCODING(enc))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("%d is not a valid encoding code", enc)));
+
+ PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
+}
+
+/* ----------
+ * convert to ASCII - current enc is DatabaseEncoding
+ * ----------
+ */
+Datum
+to_ascii_default(PG_FUNCTION_ARGS)
+{
+ text *data = PG_GETARG_TEXT_P_COPY(0);
+ int enc = GetDatabaseEncoding();
+
+ PG_RETURN_TEXT_P(encode_to_ascii(data, enc));
+}
+
+/* ----------
+ * Copy a string in an arbitrary backend-safe encoding, converting it to a
+ * valid ASCII string by replacing non-ASCII bytes with '?'. Otherwise the
+ * behavior is identical to strlcpy(), except that we don't bother with a
+ * return value.
+ *
+ * This must not trigger ereport(ERROR), as it is called in postmaster.
+ * ----------
+ */
+void
+ascii_safe_strlcpy(char *dest, const char *src, size_t destsiz)
+{
+ if (destsiz == 0) /* corner case: no room for trailing nul */
+ return;
+
+ while (--destsiz > 0)
+ {
+ /* use unsigned char here to avoid compiler warning */
+ unsigned char ch = *src++;
+
+ if (ch == '\0')
+ break;
+ /* Keep printable ASCII characters */
+ if (32 <= ch && ch <= 127)
+ *dest = ch;
+ /* White-space is also OK */
+ else if (ch == '\n' || ch == '\r' || ch == '\t')
+ *dest = ch;
+ /* Everything else is replaced with '?' */
+ else
+ *dest = '?';
+ dest++;
+ }
+
+ *dest = '\0';
+}
diff --git a/src/backend/utils/adt/bool.c b/src/backend/utils/adt/bool.c
new file mode 100644
index 0000000..cd73352
--- /dev/null
+++ b/src/backend/utils/adt/bool.c
@@ -0,0 +1,404 @@
+/*-------------------------------------------------------------------------
+ *
+ * bool.c
+ * Functions for the built-in type "bool".
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/bool.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+
+/*
+ * Try to interpret value as boolean value. Valid values are: true,
+ * false, yes, no, on, off, 1, 0; as well as unique prefixes thereof.
+ * If the string parses okay, return true, else false.
+ * If okay and result is not NULL, return the value in *result.
+ */
+bool
+parse_bool(const char *value, bool *result)
+{
+ return parse_bool_with_len(value, strlen(value), result);
+}
+
+bool
+parse_bool_with_len(const char *value, size_t len, bool *result)
+{
+ switch (*value)
+ {
+ case 't':
+ case 'T':
+ if (pg_strncasecmp(value, "true", len) == 0)
+ {
+ if (result)
+ *result = true;
+ return true;
+ }
+ break;
+ case 'f':
+ case 'F':
+ if (pg_strncasecmp(value, "false", len) == 0)
+ {
+ if (result)
+ *result = false;
+ return true;
+ }
+ break;
+ case 'y':
+ case 'Y':
+ if (pg_strncasecmp(value, "yes", len) == 0)
+ {
+ if (result)
+ *result = true;
+ return true;
+ }
+ break;
+ case 'n':
+ case 'N':
+ if (pg_strncasecmp(value, "no", len) == 0)
+ {
+ if (result)
+ *result = false;
+ return true;
+ }
+ break;
+ case 'o':
+ case 'O':
+ /* 'o' is not unique enough */
+ if (pg_strncasecmp(value, "on", (len > 2 ? len : 2)) == 0)
+ {
+ if (result)
+ *result = true;
+ return true;
+ }
+ else if (pg_strncasecmp(value, "off", (len > 2 ? len : 2)) == 0)
+ {
+ if (result)
+ *result = false;
+ return true;
+ }
+ break;
+ case '1':
+ if (len == 1)
+ {
+ if (result)
+ *result = true;
+ return true;
+ }
+ break;
+ case '0':
+ if (len == 1)
+ {
+ if (result)
+ *result = false;
+ return true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (result)
+ *result = false; /* suppress compiler warning */
+ return false;
+}
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+/*
+ * boolin - converts "t" or "f" to 1 or 0
+ *
+ * Check explicitly for "true/false" and TRUE/FALSE, 1/0, YES/NO, ON/OFF.
+ * Reject other values.
+ *
+ * In the switch statement, check the most-used possibilities first.
+ */
+Datum
+boolin(PG_FUNCTION_ARGS)
+{
+ const char *in_str = PG_GETARG_CSTRING(0);
+ const char *str;
+ size_t len;
+ bool result;
+
+ /*
+ * Skip leading and trailing whitespace
+ */
+ str = in_str;
+ while (isspace((unsigned char) *str))
+ str++;
+
+ len = strlen(str);
+ while (len > 0 && isspace((unsigned char) str[len - 1]))
+ len--;
+
+ if (parse_bool_with_len(str, len, &result))
+ PG_RETURN_BOOL(result);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "boolean", in_str)));
+
+ /* not reached */
+ PG_RETURN_BOOL(false);
+}
+
+/*
+ * boolout - converts 1 or 0 to "t" or "f"
+ */
+Datum
+boolout(PG_FUNCTION_ARGS)
+{
+ bool b = PG_GETARG_BOOL(0);
+ char *result = (char *) palloc(2);
+
+ result[0] = (b) ? 't' : 'f';
+ result[1] = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * boolrecv - converts external binary format to bool
+ *
+ * The external representation is one byte. Any nonzero value is taken
+ * as "true".
+ */
+Datum
+boolrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ int ext;
+
+ ext = pq_getmsgbyte(buf);
+ PG_RETURN_BOOL(ext != 0);
+}
+
+/*
+ * boolsend - converts bool to binary format
+ */
+Datum
+boolsend(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, arg1 ? 1 : 0);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * booltext - cast function for bool => text
+ *
+ * We need this because it's different from the behavior of boolout();
+ * this function follows the SQL-spec result (except for producing lower case)
+ */
+Datum
+booltext(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ const char *str;
+
+ if (arg1)
+ str = "true";
+ else
+ str = "false";
+
+ PG_RETURN_TEXT_P(cstring_to_text(str));
+}
+
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+Datum
+booleq(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+boolne(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+boollt(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+boolgt(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+boolle(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+boolge(PG_FUNCTION_ARGS)
+{
+ bool arg1 = PG_GETARG_BOOL(0);
+ bool arg2 = PG_GETARG_BOOL(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+/*
+ * boolean-and and boolean-or aggregates.
+ */
+
+/*
+ * Function for standard EVERY aggregate conforming to SQL 2003.
+ * The aggregate is also named bool_and for consistency.
+ *
+ * Note: this is only used in plain aggregate mode, not moving-aggregate mode.
+ */
+Datum
+booland_statefunc(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(PG_GETARG_BOOL(0) && PG_GETARG_BOOL(1));
+}
+
+/*
+ * Function for standard ANY/SOME aggregate conforming to SQL 2003.
+ * The aggregate is named bool_or, because ANY/SOME have parsing conflicts.
+ *
+ * Note: this is only used in plain aggregate mode, not moving-aggregate mode.
+ */
+Datum
+boolor_statefunc(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(PG_GETARG_BOOL(0) || PG_GETARG_BOOL(1));
+}
+
+typedef struct BoolAggState
+{
+ int64 aggcount; /* number of non-null values aggregated */
+ int64 aggtrue; /* number of values aggregated that are true */
+} BoolAggState;
+
+static BoolAggState *
+makeBoolAggState(FunctionCallInfo fcinfo)
+{
+ BoolAggState *state;
+ MemoryContext agg_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state = (BoolAggState *) MemoryContextAlloc(agg_context,
+ sizeof(BoolAggState));
+ state->aggcount = 0;
+ state->aggtrue = 0;
+
+ return state;
+}
+
+Datum
+bool_accum(PG_FUNCTION_ARGS)
+{
+ BoolAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on first call */
+ if (state == NULL)
+ state = makeBoolAggState(fcinfo);
+
+ if (!PG_ARGISNULL(1))
+ {
+ state->aggcount++;
+ if (PG_GETARG_BOOL(1))
+ state->aggtrue++;
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+bool_accum_inv(PG_FUNCTION_ARGS)
+{
+ BoolAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0);
+
+ /* bool_accum should have created the state data */
+ if (state == NULL)
+ elog(ERROR, "bool_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ state->aggcount--;
+ if (PG_GETARG_BOOL(1))
+ state->aggtrue--;
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+bool_alltrue(PG_FUNCTION_ARGS)
+{
+ BoolAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0);
+
+ /* if there were no non-null values, return NULL */
+ if (state == NULL || state->aggcount == 0)
+ PG_RETURN_NULL();
+
+ /* true if all non-null values are true */
+ PG_RETURN_BOOL(state->aggtrue == state->aggcount);
+}
+
+Datum
+bool_anytrue(PG_FUNCTION_ARGS)
+{
+ BoolAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (BoolAggState *) PG_GETARG_POINTER(0);
+
+ /* if there were no non-null values, return NULL */
+ if (state == NULL || state->aggcount == 0)
+ PG_RETURN_NULL();
+
+ /* true if any non-null value is true */
+ PG_RETURN_BOOL(state->aggtrue > 0);
+}
diff --git a/src/backend/utils/adt/cash.c b/src/backend/utils/adt/cash.c
new file mode 100644
index 0000000..f7e78fa
--- /dev/null
+++ b/src/backend/utils/adt/cash.c
@@ -0,0 +1,1175 @@
+/*
+ * cash.c
+ * Written by D'Arcy J.M. Cain
+ * darcy@druid.net
+ * http://www.druid.net/darcy/
+ *
+ * Functions to allow input and output of money normally but store
+ * and handle it as 64 bit ints
+ *
+ * A slightly modified version of this file and a discussion of the
+ * workings can be found in the book "Software Solutions in C" by
+ * Dale Schumacher, Academic Press, ISBN: 0-12-632360-7 except that
+ * this version handles 64 bit numbers and so can hold values up to
+ * $92,233,720,368,547,758.07.
+ *
+ * src/backend/utils/adt/cash.c
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+#include <ctype.h>
+#include <math.h>
+
+#include "common/int.h"
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+#include "utils/cash.h"
+#include "utils/numeric.h"
+#include "utils/pg_locale.h"
+
+
+/*************************************************************************
+ * Private routines
+ ************************************************************************/
+
+static const char *
+num_word(Cash value)
+{
+ static char buf[128];
+ static const char *const small[] = {
+ "zero", "one", "two", "three", "four", "five", "six", "seven",
+ "eight", "nine", "ten", "eleven", "twelve", "thirteen", "fourteen",
+ "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty",
+ "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"
+ };
+ const char *const *big = small + 18;
+ int tu = value % 100;
+
+ /* deal with the simple cases first */
+ if (value <= 20)
+ return small[value];
+
+ /* is it an even multiple of 100? */
+ if (!tu)
+ {
+ sprintf(buf, "%s hundred", small[value / 100]);
+ return buf;
+ }
+
+ /* more than 99? */
+ if (value > 99)
+ {
+ /* is it an even multiple of 10 other than 10? */
+ if (value % 10 == 0 && tu > 10)
+ sprintf(buf, "%s hundred %s",
+ small[value / 100], big[tu / 10]);
+ else if (tu < 20)
+ sprintf(buf, "%s hundred and %s",
+ small[value / 100], small[tu]);
+ else
+ sprintf(buf, "%s hundred %s %s",
+ small[value / 100], big[tu / 10], small[tu % 10]);
+ }
+ else
+ {
+ /* is it an even multiple of 10 other than 10? */
+ if (value % 10 == 0 && tu > 10)
+ sprintf(buf, "%s", big[tu / 10]);
+ else if (tu < 20)
+ sprintf(buf, "%s", small[tu]);
+ else
+ sprintf(buf, "%s %s", big[tu / 10], small[tu % 10]);
+ }
+
+ return buf;
+} /* num_word() */
+
+/* cash_in()
+ * Convert a string to a cash data type.
+ * Format is [$]###[,]###[.##]
+ * Examples: 123.45 $123.45 $123,456.78
+ *
+ */
+Datum
+cash_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ Cash result;
+ Cash value = 0;
+ Cash dec = 0;
+ Cash sgn = 1;
+ bool seen_dot = false;
+ const char *s = str;
+ int fpoint;
+ char dsymbol;
+ const char *ssymbol,
+ *psymbol,
+ *nsymbol,
+ *csymbol;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /*
+ * frac_digits will be CHAR_MAX in some locales, notably C. However, just
+ * testing for == CHAR_MAX is risky, because of compilers like gcc that
+ * "helpfully" let you alter the platform-standard definition of whether
+ * char is signed or not. If we are so unfortunate as to get compiled
+ * with a nonstandard -fsigned-char or -funsigned-char switch, then our
+ * idea of CHAR_MAX will not agree with libc's. The safest course is not
+ * to test for CHAR_MAX at all, but to impose a range check for plausible
+ * frac_digits values.
+ */
+ fpoint = lconvert->frac_digits;
+ if (fpoint < 0 || fpoint > 10)
+ fpoint = 2; /* best guess in this case, I think */
+
+ /* we restrict dsymbol to be a single byte, but not the other symbols */
+ if (*lconvert->mon_decimal_point != '\0' &&
+ lconvert->mon_decimal_point[1] == '\0')
+ dsymbol = *lconvert->mon_decimal_point;
+ else
+ dsymbol = '.';
+ if (*lconvert->mon_thousands_sep != '\0')
+ ssymbol = lconvert->mon_thousands_sep;
+ else /* ssymbol should not equal dsymbol */
+ ssymbol = (dsymbol != ',') ? "," : ".";
+ csymbol = (*lconvert->currency_symbol != '\0') ? lconvert->currency_symbol : "$";
+ psymbol = (*lconvert->positive_sign != '\0') ? lconvert->positive_sign : "+";
+ nsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
+
+#ifdef CASHDEBUG
+ printf("cashin- precision '%d'; decimal '%c'; thousands '%s'; currency '%s'; positive '%s'; negative '%s'\n",
+ fpoint, dsymbol, ssymbol, csymbol, psymbol, nsymbol);
+#endif
+
+ /* we need to add all sorts of checking here. For now just */
+ /* strip all leading whitespace and any leading currency symbol */
+ while (isspace((unsigned char) *s))
+ s++;
+ if (strncmp(s, csymbol, strlen(csymbol)) == 0)
+ s += strlen(csymbol);
+ while (isspace((unsigned char) *s))
+ s++;
+
+#ifdef CASHDEBUG
+ printf("cashin- string is '%s'\n", s);
+#endif
+
+ /* a leading minus or paren signifies a negative number */
+ /* again, better heuristics needed */
+ /* XXX - doesn't properly check for balanced parens - djmc */
+ if (strncmp(s, nsymbol, strlen(nsymbol)) == 0)
+ {
+ sgn = -1;
+ s += strlen(nsymbol);
+ }
+ else if (*s == '(')
+ {
+ sgn = -1;
+ s++;
+ }
+ else if (strncmp(s, psymbol, strlen(psymbol)) == 0)
+ s += strlen(psymbol);
+
+#ifdef CASHDEBUG
+ printf("cashin- string is '%s'\n", s);
+#endif
+
+ /* allow whitespace and currency symbol after the sign, too */
+ while (isspace((unsigned char) *s))
+ s++;
+ if (strncmp(s, csymbol, strlen(csymbol)) == 0)
+ s += strlen(csymbol);
+ while (isspace((unsigned char) *s))
+ s++;
+
+#ifdef CASHDEBUG
+ printf("cashin- string is '%s'\n", s);
+#endif
+
+ /*
+ * We accumulate the absolute amount in "value" and then apply the sign at
+ * the end. (The sign can appear before or after the digits, so it would
+ * be more complicated to do otherwise.) Because of the larger range of
+ * negative signed integers, we build "value" in the negative and then
+ * flip the sign at the end, catching most-negative-number overflow if
+ * necessary.
+ */
+
+ for (; *s; s++)
+ {
+ /*
+ * We look for digits as long as we have found less than the required
+ * number of decimal places.
+ */
+ if (isdigit((unsigned char) *s) && (!seen_dot || dec < fpoint))
+ {
+ int8 digit = *s - '0';
+
+ if (pg_mul_s64_overflow(value, 10, &value) ||
+ pg_sub_s64_overflow(value, digit, &value))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ str, "money")));
+
+ if (seen_dot)
+ dec++;
+ }
+ /* decimal point? then start counting fractions... */
+ else if (*s == dsymbol && !seen_dot)
+ {
+ seen_dot = true;
+ }
+ /* ignore if "thousands" separator, else we're done */
+ else if (strncmp(s, ssymbol, strlen(ssymbol)) == 0)
+ s += strlen(ssymbol) - 1;
+ else
+ break;
+ }
+
+ /* round off if there's another digit */
+ if (isdigit((unsigned char) *s) && *s >= '5')
+ {
+ /* remember we build the value in the negative */
+ if (pg_sub_s64_overflow(value, 1, &value))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ str, "money")));
+ }
+
+ /* adjust for less than required decimal places */
+ for (; dec < fpoint; dec++)
+ {
+ if (pg_mul_s64_overflow(value, 10, &value))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ str, "money")));
+ }
+
+ /*
+ * should only be trailing digits followed by whitespace, right paren,
+ * trailing sign, and/or trailing currency symbol
+ */
+ while (isdigit((unsigned char) *s))
+ s++;
+
+ while (*s)
+ {
+ if (isspace((unsigned char) *s) || *s == ')')
+ s++;
+ else if (strncmp(s, nsymbol, strlen(nsymbol)) == 0)
+ {
+ sgn = -1;
+ s += strlen(nsymbol);
+ }
+ else if (strncmp(s, psymbol, strlen(psymbol)) == 0)
+ s += strlen(psymbol);
+ else if (strncmp(s, csymbol, strlen(csymbol)) == 0)
+ s += strlen(csymbol);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "money", str)));
+ }
+
+ /*
+ * If the value is supposed to be positive, flip the sign, but check for
+ * the most negative number.
+ */
+ if (sgn > 0)
+ {
+ if (value == PG_INT64_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ str, "money")));
+ result = -value;
+ }
+ else
+ result = value;
+
+#ifdef CASHDEBUG
+ printf("cashin- result is " INT64_FORMAT "\n", result);
+#endif
+
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_out()
+ * Function to convert cash to a dollars and cents representation, using
+ * the lc_monetary locale's formatting.
+ */
+Datum
+cash_out(PG_FUNCTION_ARGS)
+{
+ Cash value = PG_GETARG_CASH(0);
+ char *result;
+ char buf[128];
+ char *bufptr;
+ int digit_pos;
+ int points,
+ mon_group;
+ char dsymbol;
+ const char *ssymbol,
+ *csymbol,
+ *signsymbol;
+ char sign_posn,
+ cs_precedes,
+ sep_by_space;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /* see comments about frac_digits in cash_in() */
+ points = lconvert->frac_digits;
+ if (points < 0 || points > 10)
+ points = 2; /* best guess in this case, I think */
+
+ /*
+ * As with frac_digits, must apply a range check to mon_grouping to avoid
+ * being fooled by variant CHAR_MAX values.
+ */
+ mon_group = *lconvert->mon_grouping;
+ if (mon_group <= 0 || mon_group > 6)
+ mon_group = 3;
+
+ /* we restrict dsymbol to be a single byte, but not the other symbols */
+ if (*lconvert->mon_decimal_point != '\0' &&
+ lconvert->mon_decimal_point[1] == '\0')
+ dsymbol = *lconvert->mon_decimal_point;
+ else
+ dsymbol = '.';
+ if (*lconvert->mon_thousands_sep != '\0')
+ ssymbol = lconvert->mon_thousands_sep;
+ else /* ssymbol should not equal dsymbol */
+ ssymbol = (dsymbol != ',') ? "," : ".";
+ csymbol = (*lconvert->currency_symbol != '\0') ? lconvert->currency_symbol : "$";
+
+ if (value < 0)
+ {
+ /* make the amount positive for digit-reconstruction loop */
+ value = -value;
+ /* set up formatting data */
+ signsymbol = (*lconvert->negative_sign != '\0') ? lconvert->negative_sign : "-";
+ sign_posn = lconvert->n_sign_posn;
+ cs_precedes = lconvert->n_cs_precedes;
+ sep_by_space = lconvert->n_sep_by_space;
+ }
+ else
+ {
+ signsymbol = lconvert->positive_sign;
+ sign_posn = lconvert->p_sign_posn;
+ cs_precedes = lconvert->p_cs_precedes;
+ sep_by_space = lconvert->p_sep_by_space;
+ }
+
+ /* we build the digits+decimal-point+sep string right-to-left in buf[] */
+ bufptr = buf + sizeof(buf) - 1;
+ *bufptr = '\0';
+
+ /*
+ * Generate digits till there are no non-zero digits left and we emitted
+ * at least one to the left of the decimal point. digit_pos is the
+ * current digit position, with zero as the digit just left of the decimal
+ * point, increasing to the right.
+ */
+ digit_pos = points;
+ do
+ {
+ if (points && digit_pos == 0)
+ {
+ /* insert decimal point, but not if value cannot be fractional */
+ *(--bufptr) = dsymbol;
+ }
+ else if (digit_pos < 0 && (digit_pos % mon_group) == 0)
+ {
+ /* insert thousands sep, but only to left of radix point */
+ bufptr -= strlen(ssymbol);
+ memcpy(bufptr, ssymbol, strlen(ssymbol));
+ }
+
+ *(--bufptr) = ((uint64) value % 10) + '0';
+ value = ((uint64) value) / 10;
+ digit_pos--;
+ } while (value || digit_pos >= 0);
+
+ /*----------
+ * Now, attach currency symbol and sign symbol in the correct order.
+ *
+ * The POSIX spec defines these values controlling this code:
+ *
+ * p/n_sign_posn:
+ * 0 Parentheses enclose the quantity and the currency_symbol.
+ * 1 The sign string precedes the quantity and the currency_symbol.
+ * 2 The sign string succeeds the quantity and the currency_symbol.
+ * 3 The sign string precedes the currency_symbol.
+ * 4 The sign string succeeds the currency_symbol.
+ *
+ * p/n_cs_precedes: 0 means currency symbol after value, else before it.
+ *
+ * p/n_sep_by_space:
+ * 0 No <space> separates the currency symbol and value.
+ * 1 If the currency symbol and sign string are adjacent, a <space>
+ * separates them from the value; otherwise, a <space> separates
+ * the currency symbol from the value.
+ * 2 If the currency symbol and sign string are adjacent, a <space>
+ * separates them; otherwise, a <space> separates the sign string
+ * from the value.
+ *----------
+ */
+ switch (sign_posn)
+ {
+ case 0:
+ if (cs_precedes)
+ result = psprintf("(%s%s%s)",
+ csymbol,
+ (sep_by_space == 1) ? " " : "",
+ bufptr);
+ else
+ result = psprintf("(%s%s%s)",
+ bufptr,
+ (sep_by_space == 1) ? " " : "",
+ csymbol);
+ break;
+ case 1:
+ default:
+ if (cs_precedes)
+ result = psprintf("%s%s%s%s%s",
+ signsymbol,
+ (sep_by_space == 2) ? " " : "",
+ csymbol,
+ (sep_by_space == 1) ? " " : "",
+ bufptr);
+ else
+ result = psprintf("%s%s%s%s%s",
+ signsymbol,
+ (sep_by_space == 2) ? " " : "",
+ bufptr,
+ (sep_by_space == 1) ? " " : "",
+ csymbol);
+ break;
+ case 2:
+ if (cs_precedes)
+ result = psprintf("%s%s%s%s%s",
+ csymbol,
+ (sep_by_space == 1) ? " " : "",
+ bufptr,
+ (sep_by_space == 2) ? " " : "",
+ signsymbol);
+ else
+ result = psprintf("%s%s%s%s%s",
+ bufptr,
+ (sep_by_space == 1) ? " " : "",
+ csymbol,
+ (sep_by_space == 2) ? " " : "",
+ signsymbol);
+ break;
+ case 3:
+ if (cs_precedes)
+ result = psprintf("%s%s%s%s%s",
+ signsymbol,
+ (sep_by_space == 2) ? " " : "",
+ csymbol,
+ (sep_by_space == 1) ? " " : "",
+ bufptr);
+ else
+ result = psprintf("%s%s%s%s%s",
+ bufptr,
+ (sep_by_space == 1) ? " " : "",
+ signsymbol,
+ (sep_by_space == 2) ? " " : "",
+ csymbol);
+ break;
+ case 4:
+ if (cs_precedes)
+ result = psprintf("%s%s%s%s%s",
+ csymbol,
+ (sep_by_space == 2) ? " " : "",
+ signsymbol,
+ (sep_by_space == 1) ? " " : "",
+ bufptr);
+ else
+ result = psprintf("%s%s%s%s%s",
+ bufptr,
+ (sep_by_space == 1) ? " " : "",
+ csymbol,
+ (sep_by_space == 2) ? " " : "",
+ signsymbol);
+ break;
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * cash_recv - converts external binary format to cash
+ */
+Datum
+cash_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_CASH((Cash) pq_getmsgint64(buf));
+}
+
+/*
+ * cash_send - converts cash to binary format
+ */
+Datum
+cash_send(PG_FUNCTION_ARGS)
+{
+ Cash arg1 = PG_GETARG_CASH(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Comparison functions
+ */
+
+Datum
+cash_eq(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 == c2);
+}
+
+Datum
+cash_ne(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 != c2);
+}
+
+Datum
+cash_lt(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 < c2);
+}
+
+Datum
+cash_le(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 <= c2);
+}
+
+Datum
+cash_gt(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 > c2);
+}
+
+Datum
+cash_ge(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ PG_RETURN_BOOL(c1 >= c2);
+}
+
+Datum
+cash_cmp(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+
+ if (c1 > c2)
+ PG_RETURN_INT32(1);
+ else if (c1 == c2)
+ PG_RETURN_INT32(0);
+ else
+ PG_RETURN_INT32(-1);
+}
+
+
+/* cash_pl()
+ * Add two cash values.
+ */
+Datum
+cash_pl(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = c1 + c2;
+
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_mi()
+ * Subtract two cash values.
+ */
+Datum
+cash_mi(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = c1 - c2;
+
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_div_cash()
+ * Divide cash by cash, returning float8.
+ */
+Datum
+cash_div_cash(PG_FUNCTION_ARGS)
+{
+ Cash dividend = PG_GETARG_CASH(0);
+ Cash divisor = PG_GETARG_CASH(1);
+ float8 quotient;
+
+ if (divisor == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ quotient = (float8) dividend / (float8) divisor;
+ PG_RETURN_FLOAT8(quotient);
+}
+
+
+/* cash_mul_flt8()
+ * Multiply cash by float8.
+ */
+Datum
+cash_mul_flt8(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ float8 f = PG_GETARG_FLOAT8(1);
+ Cash result;
+
+ result = rint(c * f);
+ PG_RETURN_CASH(result);
+}
+
+
+/* flt8_mul_cash()
+ * Multiply float8 by cash.
+ */
+Datum
+flt8_mul_cash(PG_FUNCTION_ARGS)
+{
+ float8 f = PG_GETARG_FLOAT8(0);
+ Cash c = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = rint(f * c);
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_div_flt8()
+ * Divide cash by float8.
+ */
+Datum
+cash_div_flt8(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ float8 f = PG_GETARG_FLOAT8(1);
+ Cash result;
+
+ if (f == 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result = rint(c / f);
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_mul_flt4()
+ * Multiply cash by float4.
+ */
+Datum
+cash_mul_flt4(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ float4 f = PG_GETARG_FLOAT4(1);
+ Cash result;
+
+ result = rint(c * (float8) f);
+ PG_RETURN_CASH(result);
+}
+
+
+/* flt4_mul_cash()
+ * Multiply float4 by cash.
+ */
+Datum
+flt4_mul_cash(PG_FUNCTION_ARGS)
+{
+ float4 f = PG_GETARG_FLOAT4(0);
+ Cash c = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = rint((float8) f * c);
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_div_flt4()
+ * Divide cash by float4.
+ *
+ */
+Datum
+cash_div_flt4(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ float4 f = PG_GETARG_FLOAT4(1);
+ Cash result;
+
+ if (f == 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result = rint(c / (float8) f);
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_mul_int8()
+ * Multiply cash by int8.
+ */
+Datum
+cash_mul_int8(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int64 i = PG_GETARG_INT64(1);
+ Cash result;
+
+ result = c * i;
+ PG_RETURN_CASH(result);
+}
+
+
+/* int8_mul_cash()
+ * Multiply int8 by cash.
+ */
+Datum
+int8_mul_cash(PG_FUNCTION_ARGS)
+{
+ int64 i = PG_GETARG_INT64(0);
+ Cash c = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = i * c;
+ PG_RETURN_CASH(result);
+}
+
+/* cash_div_int8()
+ * Divide cash by 8-byte integer.
+ */
+Datum
+cash_div_int8(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int64 i = PG_GETARG_INT64(1);
+ Cash result;
+
+ if (i == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result = c / i;
+
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_mul_int4()
+ * Multiply cash by int4.
+ */
+Datum
+cash_mul_int4(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int32 i = PG_GETARG_INT32(1);
+ Cash result;
+
+ result = c * i;
+ PG_RETURN_CASH(result);
+}
+
+
+/* int4_mul_cash()
+ * Multiply int4 by cash.
+ */
+Datum
+int4_mul_cash(PG_FUNCTION_ARGS)
+{
+ int32 i = PG_GETARG_INT32(0);
+ Cash c = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = i * c;
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_div_int4()
+ * Divide cash by 4-byte integer.
+ *
+ */
+Datum
+cash_div_int4(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int32 i = PG_GETARG_INT32(1);
+ Cash result;
+
+ if (i == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result = c / i;
+
+ PG_RETURN_CASH(result);
+}
+
+
+/* cash_mul_int2()
+ * Multiply cash by int2.
+ */
+Datum
+cash_mul_int2(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int16 s = PG_GETARG_INT16(1);
+ Cash result;
+
+ result = c * s;
+ PG_RETURN_CASH(result);
+}
+
+/* int2_mul_cash()
+ * Multiply int2 by cash.
+ */
+Datum
+int2_mul_cash(PG_FUNCTION_ARGS)
+{
+ int16 s = PG_GETARG_INT16(0);
+ Cash c = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = s * c;
+ PG_RETURN_CASH(result);
+}
+
+/* cash_div_int2()
+ * Divide cash by int2.
+ *
+ */
+Datum
+cash_div_int2(PG_FUNCTION_ARGS)
+{
+ Cash c = PG_GETARG_CASH(0);
+ int16 s = PG_GETARG_INT16(1);
+ Cash result;
+
+ if (s == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result = c / s;
+ PG_RETURN_CASH(result);
+}
+
+/* cashlarger()
+ * Return larger of two cash values.
+ */
+Datum
+cashlarger(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = (c1 > c2) ? c1 : c2;
+
+ PG_RETURN_CASH(result);
+}
+
+/* cashsmaller()
+ * Return smaller of two cash values.
+ */
+Datum
+cashsmaller(PG_FUNCTION_ARGS)
+{
+ Cash c1 = PG_GETARG_CASH(0);
+ Cash c2 = PG_GETARG_CASH(1);
+ Cash result;
+
+ result = (c1 < c2) ? c1 : c2;
+
+ PG_RETURN_CASH(result);
+}
+
+/* cash_words()
+ * This converts an int4 as well but to a representation using words
+ * Obviously way North American centric - sorry
+ */
+Datum
+cash_words(PG_FUNCTION_ARGS)
+{
+ Cash value = PG_GETARG_CASH(0);
+ uint64 val;
+ char buf[256];
+ char *p = buf;
+ Cash m0;
+ Cash m1;
+ Cash m2;
+ Cash m3;
+ Cash m4;
+ Cash m5;
+ Cash m6;
+
+ /* work with positive numbers */
+ if (value < 0)
+ {
+ value = -value;
+ strcpy(buf, "minus ");
+ p += 6;
+ }
+ else
+ buf[0] = '\0';
+
+ /* Now treat as unsigned, to avoid trouble at INT_MIN */
+ val = (uint64) value;
+
+ m0 = val % INT64CONST(100); /* cents */
+ m1 = (val / INT64CONST(100)) % 1000; /* hundreds */
+ m2 = (val / INT64CONST(100000)) % 1000; /* thousands */
+ m3 = (val / INT64CONST(100000000)) % 1000; /* millions */
+ m4 = (val / INT64CONST(100000000000)) % 1000; /* billions */
+ m5 = (val / INT64CONST(100000000000000)) % 1000; /* trillions */
+ m6 = (val / INT64CONST(100000000000000000)) % 1000; /* quadrillions */
+
+ if (m6)
+ {
+ strcat(buf, num_word(m6));
+ strcat(buf, " quadrillion ");
+ }
+
+ if (m5)
+ {
+ strcat(buf, num_word(m5));
+ strcat(buf, " trillion ");
+ }
+
+ if (m4)
+ {
+ strcat(buf, num_word(m4));
+ strcat(buf, " billion ");
+ }
+
+ if (m3)
+ {
+ strcat(buf, num_word(m3));
+ strcat(buf, " million ");
+ }
+
+ if (m2)
+ {
+ strcat(buf, num_word(m2));
+ strcat(buf, " thousand ");
+ }
+
+ if (m1)
+ strcat(buf, num_word(m1));
+
+ if (!*p)
+ strcat(buf, "zero");
+
+ strcat(buf, (val / 100) == 1 ? " dollar and " : " dollars and ");
+ strcat(buf, num_word(m0));
+ strcat(buf, m0 == 1 ? " cent" : " cents");
+
+ /* capitalize output */
+ buf[0] = pg_toupper((unsigned char) buf[0]);
+
+ /* return as text datum */
+ PG_RETURN_TEXT_P(cstring_to_text(buf));
+}
+
+
+/* cash_numeric()
+ * Convert cash to numeric.
+ */
+Datum
+cash_numeric(PG_FUNCTION_ARGS)
+{
+ Cash money = PG_GETARG_CASH(0);
+ Datum result;
+ int fpoint;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /* see comments about frac_digits in cash_in() */
+ fpoint = lconvert->frac_digits;
+ if (fpoint < 0 || fpoint > 10)
+ fpoint = 2;
+
+ /* convert the integral money value to numeric */
+ result = NumericGetDatum(int64_to_numeric(money));
+
+ /* scale appropriately, if needed */
+ if (fpoint > 0)
+ {
+ int64 scale;
+ int i;
+ Datum numeric_scale;
+ Datum quotient;
+
+ /* compute required scale factor */
+ scale = 1;
+ for (i = 0; i < fpoint; i++)
+ scale *= 10;
+ numeric_scale = NumericGetDatum(int64_to_numeric(scale));
+
+ /*
+ * Given integral inputs approaching INT64_MAX, select_div_scale()
+ * might choose a result scale of zero, causing loss of fractional
+ * digits in the quotient. We can ensure an exact result by setting
+ * the dscale of either input to be at least as large as the desired
+ * result scale. numeric_round() will do that for us.
+ */
+ numeric_scale = DirectFunctionCall2(numeric_round,
+ numeric_scale,
+ Int32GetDatum(fpoint));
+
+ /* Now we can safely divide ... */
+ quotient = DirectFunctionCall2(numeric_div, result, numeric_scale);
+
+ /* ... and forcibly round to exactly the intended number of digits */
+ result = DirectFunctionCall2(numeric_round,
+ quotient,
+ Int32GetDatum(fpoint));
+ }
+
+ PG_RETURN_DATUM(result);
+}
+
+/* numeric_cash()
+ * Convert numeric to cash.
+ */
+Datum
+numeric_cash(PG_FUNCTION_ARGS)
+{
+ Datum amount = PG_GETARG_DATUM(0);
+ Cash result;
+ int fpoint;
+ int64 scale;
+ int i;
+ Datum numeric_scale;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /* see comments about frac_digits in cash_in() */
+ fpoint = lconvert->frac_digits;
+ if (fpoint < 0 || fpoint > 10)
+ fpoint = 2;
+
+ /* compute required scale factor */
+ scale = 1;
+ for (i = 0; i < fpoint; i++)
+ scale *= 10;
+
+ /* multiply the input amount by scale factor */
+ numeric_scale = NumericGetDatum(int64_to_numeric(scale));
+ amount = DirectFunctionCall2(numeric_mul, amount, numeric_scale);
+
+ /* note that numeric_int8 will round to nearest integer for us */
+ result = DatumGetInt64(DirectFunctionCall1(numeric_int8, amount));
+
+ PG_RETURN_CASH(result);
+}
+
+/* int4_cash()
+ * Convert int4 (int) to cash
+ */
+Datum
+int4_cash(PG_FUNCTION_ARGS)
+{
+ int32 amount = PG_GETARG_INT32(0);
+ Cash result;
+ int fpoint;
+ int64 scale;
+ int i;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /* see comments about frac_digits in cash_in() */
+ fpoint = lconvert->frac_digits;
+ if (fpoint < 0 || fpoint > 10)
+ fpoint = 2;
+
+ /* compute required scale factor */
+ scale = 1;
+ for (i = 0; i < fpoint; i++)
+ scale *= 10;
+
+ /* compute amount * scale, checking for overflow */
+ result = DatumGetInt64(DirectFunctionCall2(int8mul, Int64GetDatum(amount),
+ Int64GetDatum(scale)));
+
+ PG_RETURN_CASH(result);
+}
+
+/* int8_cash()
+ * Convert int8 (bigint) to cash
+ */
+Datum
+int8_cash(PG_FUNCTION_ARGS)
+{
+ int64 amount = PG_GETARG_INT64(0);
+ Cash result;
+ int fpoint;
+ int64 scale;
+ int i;
+ struct lconv *lconvert = PGLC_localeconv();
+
+ /* see comments about frac_digits in cash_in() */
+ fpoint = lconvert->frac_digits;
+ if (fpoint < 0 || fpoint > 10)
+ fpoint = 2;
+
+ /* compute required scale factor */
+ scale = 1;
+ for (i = 0; i < fpoint; i++)
+ scale *= 10;
+
+ /* compute amount * scale, checking for overflow */
+ result = DatumGetInt64(DirectFunctionCall2(int8mul, Int64GetDatum(amount),
+ Int64GetDatum(scale)));
+
+ PG_RETURN_CASH(result);
+}
diff --git a/src/backend/utils/adt/char.c b/src/backend/utils/adt/char.c
new file mode 100644
index 0000000..e50293b
--- /dev/null
+++ b/src/backend/utils/adt/char.c
@@ -0,0 +1,253 @@
+/*-------------------------------------------------------------------------
+ *
+ * char.c
+ * Functions for the built-in type "char" (not to be confused with
+ * bpchar, which is the SQL CHAR(n) type).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/char.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+
+#define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
+#define TOOCTAL(c) ((c) + '0')
+#define FROMOCTAL(c) ((unsigned char) (c) - '0')
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+/*
+ * charin - converts "x" to 'x'
+ *
+ * This accepts the formats charout produces. If we have multibyte input
+ * that is not in the form '\ooo', then we take its first byte as the value
+ * and silently discard the rest; this is a backwards-compatibility provision.
+ */
+Datum
+charin(PG_FUNCTION_ARGS)
+{
+ char *ch = PG_GETARG_CSTRING(0);
+
+ if (strlen(ch) == 4 && ch[0] == '\\' &&
+ ISOCTAL(ch[1]) && ISOCTAL(ch[2]) && ISOCTAL(ch[3]))
+ PG_RETURN_CHAR((FROMOCTAL(ch[1]) << 6) +
+ (FROMOCTAL(ch[2]) << 3) +
+ FROMOCTAL(ch[3]));
+ /* This will do the right thing for a zero-length input string */
+ PG_RETURN_CHAR(ch[0]);
+}
+
+/*
+ * charout - converts 'x' to "x"
+ *
+ * The possible output formats are:
+ * 1. 0x00 is represented as an empty string.
+ * 2. 0x01..0x7F are represented as a single ASCII byte.
+ * 3. 0x80..0xFF are represented as \ooo (backslash and 3 octal digits).
+ * Case 3 is meant to match the traditional "escape" format of bytea.
+ */
+Datum
+charout(PG_FUNCTION_ARGS)
+{
+ char ch = PG_GETARG_CHAR(0);
+ char *result = (char *) palloc(5);
+
+ if (IS_HIGHBIT_SET(ch))
+ {
+ result[0] = '\\';
+ result[1] = TOOCTAL(((unsigned char) ch) >> 6);
+ result[2] = TOOCTAL((((unsigned char) ch) >> 3) & 07);
+ result[3] = TOOCTAL(((unsigned char) ch) & 07);
+ result[4] = '\0';
+ }
+ else
+ {
+ /* This produces acceptable results for 0x00 as well */
+ result[0] = ch;
+ result[1] = '\0';
+ }
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * charrecv - converts external binary format to char
+ *
+ * The external representation is one byte, with no character set
+ * conversion. This is somewhat dubious, perhaps, but in many
+ * cases people use char for a 1-byte binary type.
+ */
+Datum
+charrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_CHAR(pq_getmsgbyte(buf));
+}
+
+/*
+ * charsend - converts char to binary format
+ */
+Datum
+charsend(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+/*
+ * NOTE: comparisons are done as though char is unsigned (uint8).
+ * Conversions to and from integer are done as though char is signed (int8).
+ *
+ * You wanted consistency?
+ */
+
+Datum
+chareq(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+charne(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+charlt(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL((uint8) arg1 < (uint8) arg2);
+}
+
+Datum
+charle(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL((uint8) arg1 <= (uint8) arg2);
+}
+
+Datum
+chargt(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL((uint8) arg1 > (uint8) arg2);
+}
+
+Datum
+charge(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ char arg2 = PG_GETARG_CHAR(1);
+
+ PG_RETURN_BOOL((uint8) arg1 >= (uint8) arg2);
+}
+
+
+Datum
+chartoi4(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+
+ PG_RETURN_INT32((int32) ((int8) arg1));
+}
+
+Datum
+i4tochar(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+
+ if (arg1 < SCHAR_MIN || arg1 > SCHAR_MAX)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"char\" out of range")));
+
+ PG_RETURN_CHAR((int8) arg1);
+}
+
+
+Datum
+text_char(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ char *ch = VARDATA_ANY(arg1);
+ char result;
+
+ /*
+ * Conversion rules are the same as in charin(), but here we need to
+ * handle the empty-string case honestly.
+ */
+ if (VARSIZE_ANY_EXHDR(arg1) == 4 && ch[0] == '\\' &&
+ ISOCTAL(ch[1]) && ISOCTAL(ch[2]) && ISOCTAL(ch[3]))
+ result = (FROMOCTAL(ch[1]) << 6) +
+ (FROMOCTAL(ch[2]) << 3) +
+ FROMOCTAL(ch[3]);
+ else if (VARSIZE_ANY_EXHDR(arg1) > 0)
+ result = ch[0];
+ else
+ result = '\0';
+
+ PG_RETURN_CHAR(result);
+}
+
+Datum
+char_text(PG_FUNCTION_ARGS)
+{
+ char arg1 = PG_GETARG_CHAR(0);
+ text *result = palloc(VARHDRSZ + 4);
+
+ /*
+ * Conversion rules are the same as in charout(), but here we need to be
+ * honest about converting 0x00 to an empty string.
+ */
+ if (IS_HIGHBIT_SET(arg1))
+ {
+ SET_VARSIZE(result, VARHDRSZ + 4);
+ (VARDATA(result))[0] = '\\';
+ (VARDATA(result))[1] = TOOCTAL(((unsigned char) arg1) >> 6);
+ (VARDATA(result))[2] = TOOCTAL((((unsigned char) arg1) >> 3) & 07);
+ (VARDATA(result))[3] = TOOCTAL(((unsigned char) arg1) & 07);
+ }
+ else if (arg1 != '\0')
+ {
+ SET_VARSIZE(result, VARHDRSZ + 1);
+ *(VARDATA(result)) = arg1;
+ }
+ else
+ SET_VARSIZE(result, VARHDRSZ);
+
+ PG_RETURN_TEXT_P(result);
+}
diff --git a/src/backend/utils/adt/cryptohashfuncs.c b/src/backend/utils/adt/cryptohashfuncs.c
new file mode 100644
index 0000000..03d84ea
--- /dev/null
+++ b/src/backend/utils/adt/cryptohashfuncs.c
@@ -0,0 +1,168 @@
+/*-------------------------------------------------------------------------
+ *
+ * cryptohashfuncs.c
+ * Cryptographic hash functions
+ *
+ * Portions Copyright (c) 2018-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/cryptohashfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/cryptohash.h"
+#include "common/md5.h"
+#include "common/sha2.h"
+#include "utils/builtins.h"
+
+
+/*
+ * MD5
+ */
+
+/* MD5 produces a 16 byte (128 bit) hash; double it for hex */
+#define MD5_HASH_LEN 32
+
+/*
+ * Create an MD5 hash of a text value and return it as hex string.
+ */
+Datum
+md5_text(PG_FUNCTION_ARGS)
+{
+ text *in_text = PG_GETARG_TEXT_PP(0);
+ size_t len;
+ char hexsum[MD5_HASH_LEN + 1];
+ const char *errstr = NULL;
+
+ /* Calculate the length of the buffer using varlena metadata */
+ len = VARSIZE_ANY_EXHDR(in_text);
+
+ /* get the hash result */
+ if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum, &errstr) == false)
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not compute %s hash: %s", "MD5",
+ errstr)));
+
+ /* convert to text and return it */
+ PG_RETURN_TEXT_P(cstring_to_text(hexsum));
+}
+
+/*
+ * Create an MD5 hash of a bytea value and return it as a hex string.
+ */
+Datum
+md5_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *in = PG_GETARG_BYTEA_PP(0);
+ size_t len;
+ char hexsum[MD5_HASH_LEN + 1];
+ const char *errstr = NULL;
+
+ len = VARSIZE_ANY_EXHDR(in);
+ if (pg_md5_hash(VARDATA_ANY(in), len, hexsum, &errstr) == false)
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not compute %s hash: %s", "MD5",
+ errstr)));
+
+ PG_RETURN_TEXT_P(cstring_to_text(hexsum));
+}
+
+/*
+ * Internal routine to compute a cryptohash with the given bytea input.
+ */
+static inline bytea *
+cryptohash_internal(pg_cryptohash_type type, bytea *input)
+{
+ const uint8 *data;
+ const char *typestr = NULL;
+ int digest_len = 0;
+ size_t len;
+ pg_cryptohash_ctx *ctx;
+ bytea *result;
+
+ switch (type)
+ {
+ case PG_SHA224:
+ typestr = "SHA224";
+ digest_len = PG_SHA224_DIGEST_LENGTH;
+ break;
+ case PG_SHA256:
+ typestr = "SHA256";
+ digest_len = PG_SHA256_DIGEST_LENGTH;
+ break;
+ case PG_SHA384:
+ typestr = "SHA384";
+ digest_len = PG_SHA384_DIGEST_LENGTH;
+ break;
+ case PG_SHA512:
+ typestr = "SHA512";
+ digest_len = PG_SHA512_DIGEST_LENGTH;
+ break;
+ case PG_MD5:
+ case PG_SHA1:
+ elog(ERROR, "unsupported cryptohash type %d", type);
+ break;
+ }
+
+ result = palloc0(digest_len + VARHDRSZ);
+ len = VARSIZE_ANY_EXHDR(input);
+ data = (unsigned char *) VARDATA_ANY(input);
+
+ ctx = pg_cryptohash_create(type);
+ if (pg_cryptohash_init(ctx) < 0)
+ elog(ERROR, "could not initialize %s context: %s", typestr,
+ pg_cryptohash_error(ctx));
+ if (pg_cryptohash_update(ctx, data, len) < 0)
+ elog(ERROR, "could not update %s context: %s", typestr,
+ pg_cryptohash_error(ctx));
+ if (pg_cryptohash_final(ctx, (unsigned char *) VARDATA(result),
+ digest_len) < 0)
+ elog(ERROR, "could not finalize %s context: %s", typestr,
+ pg_cryptohash_error(ctx));
+ pg_cryptohash_free(ctx);
+
+ SET_VARSIZE(result, digest_len + VARHDRSZ);
+
+ return result;
+}
+
+/*
+ * SHA-2 variants
+ */
+
+Datum
+sha224_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *result = cryptohash_internal(PG_SHA224, PG_GETARG_BYTEA_PP(0));
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+sha256_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *result = cryptohash_internal(PG_SHA256, PG_GETARG_BYTEA_PP(0));
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+sha384_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *result = cryptohash_internal(PG_SHA384, PG_GETARG_BYTEA_PP(0));
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+Datum
+sha512_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *result = cryptohash_internal(PG_SHA512, PG_GETARG_BYTEA_PP(0));
+
+ PG_RETURN_BYTEA_P(result);
+}
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c
new file mode 100644
index 0000000..44b75fe
--- /dev/null
+++ b/src/backend/utils/adt/date.c
@@ -0,0 +1,3135 @@
+/*-------------------------------------------------------------------------
+ *
+ * date.c
+ * implements DATE and TIME data types specified in SQL standard
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994-5, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/date.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <float.h>
+#include <math.h>
+#include <time.h>
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "nodes/supportnodes.h"
+#include "parser/scansup.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/numeric.h"
+#include "utils/sortsupport.h"
+
+/*
+ * gcc's -ffast-math switch breaks routines that expect exact results from
+ * expressions like timeval / SECS_PER_HOUR, where timeval is double.
+ */
+#ifdef __FAST_MATH__
+#error -ffast-math is known to break this code
+#endif
+
+
+/* common code for timetypmodin and timetztypmodin */
+static int32
+anytime_typmodin(bool istz, ArrayType *ta)
+{
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * we're not too tense about good error message here because grammar
+ * shouldn't allow wrong number of modifiers for TIME
+ */
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+
+ return anytime_typmod_check(istz, tl[0]);
+}
+
+/* exported so parse_expr.c can use it */
+int32
+anytime_typmod_check(bool istz, int32 typmod)
+{
+ if (typmod < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("TIME(%d)%s precision must not be negative",
+ typmod, (istz ? " WITH TIME ZONE" : ""))));
+ if (typmod > MAX_TIME_PRECISION)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("TIME(%d)%s precision reduced to maximum allowed, %d",
+ typmod, (istz ? " WITH TIME ZONE" : ""),
+ MAX_TIME_PRECISION)));
+ typmod = MAX_TIME_PRECISION;
+ }
+
+ return typmod;
+}
+
+/* common code for timetypmodout and timetztypmodout */
+static char *
+anytime_typmodout(bool istz, int32 typmod)
+{
+ const char *tz = istz ? " with time zone" : " without time zone";
+
+ if (typmod >= 0)
+ return psprintf("(%d)%s", (int) typmod, tz);
+ else
+ return psprintf("%s", tz);
+}
+
+
+/*****************************************************************************
+ * Date ADT
+ *****************************************************************************/
+
+
+/* date_in()
+ * Given date text string, convert to internal date format.
+ */
+Datum
+date_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ DateADT date;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tzp;
+ int dtype;
+ int nf;
+ int dterr;
+ char *field[MAXDATEFIELDS];
+ int ftype[MAXDATEFIELDS];
+ char workbuf[MAXDATELEN + 1];
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
+ field, ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeDateTime(field, ftype, nf, &dtype, tm, &fsec, &tzp);
+ if (dterr != 0)
+ DateTimeParseError(dterr, str, "date");
+
+ switch (dtype)
+ {
+ case DTK_DATE:
+ break;
+
+ case DTK_EPOCH:
+ GetEpochTime(tm);
+ break;
+
+ case DTK_LATE:
+ DATE_NOEND(date);
+ PG_RETURN_DATEADT(date);
+
+ case DTK_EARLY:
+ DATE_NOBEGIN(date);
+ PG_RETURN_DATEADT(date);
+
+ default:
+ DateTimeParseError(DTERR_BAD_FORMAT, str, "date");
+ break;
+ }
+
+ /* Prevent overflow in Julian-day routines */
+ if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"", str)));
+
+ date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
+
+ /* Now check for just-out-of-range dates */
+ if (!IS_VALID_DATE(date))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"", str)));
+
+ PG_RETURN_DATEADT(date);
+}
+
+/* date_out()
+ * Given internal format date, convert to text string.
+ */
+Datum
+date_out(PG_FUNCTION_ARGS)
+{
+ DateADT date = PG_GETARG_DATEADT(0);
+ char *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ char buf[MAXDATELEN + 1];
+
+ if (DATE_NOT_FINITE(date))
+ EncodeSpecialDate(date, buf);
+ else
+ {
+ j2date(date + POSTGRES_EPOCH_JDATE,
+ &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday));
+ EncodeDateOnly(tm, DateStyle, buf);
+ }
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * date_recv - converts external binary format to date
+ */
+Datum
+date_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ DateADT result;
+
+ result = (DateADT) pq_getmsgint(buf, sizeof(DateADT));
+
+ /* Limit to the same range that date_in() accepts. */
+ if (DATE_NOT_FINITE(result))
+ /* ok */ ;
+ else if (!IS_VALID_DATE(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range")));
+
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * date_send - converts date to binary format
+ */
+Datum
+date_send(PG_FUNCTION_ARGS)
+{
+ DateADT date = PG_GETARG_DATEADT(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, date);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * make_date - date constructor
+ */
+Datum
+make_date(PG_FUNCTION_ARGS)
+{
+ struct pg_tm tm;
+ DateADT date;
+ int dterr;
+ bool bc = false;
+
+ tm.tm_year = PG_GETARG_INT32(0);
+ tm.tm_mon = PG_GETARG_INT32(1);
+ tm.tm_mday = PG_GETARG_INT32(2);
+
+ /* Handle negative years as BC */
+ if (tm.tm_year < 0)
+ {
+ bc = true;
+ tm.tm_year = -tm.tm_year;
+ }
+
+ dterr = ValidateDate(DTK_DATE_M, false, false, bc, &tm);
+
+ if (dterr != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("date field value out of range: %d-%02d-%02d",
+ tm.tm_year, tm.tm_mon, tm.tm_mday)));
+
+ /* Prevent overflow in Julian-day routines */
+ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: %d-%02d-%02d",
+ tm.tm_year, tm.tm_mon, tm.tm_mday)));
+
+ date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
+
+ /* Now check for just-out-of-range dates */
+ if (!IS_VALID_DATE(date))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: %d-%02d-%02d",
+ tm.tm_year, tm.tm_mon, tm.tm_mday)));
+
+ PG_RETURN_DATEADT(date);
+}
+
+/*
+ * Convert reserved date values to string.
+ */
+void
+EncodeSpecialDate(DateADT dt, char *str)
+{
+ if (DATE_IS_NOBEGIN(dt))
+ strcpy(str, EARLY);
+ else if (DATE_IS_NOEND(dt))
+ strcpy(str, LATE);
+ else /* shouldn't happen */
+ elog(ERROR, "invalid argument for EncodeSpecialDate");
+}
+
+
+/*
+ * GetSQLCurrentDate -- implements CURRENT_DATE
+ */
+DateADT
+GetSQLCurrentDate(void)
+{
+ struct pg_tm tm;
+
+ static int cache_year = 0;
+ static int cache_mon = 0;
+ static int cache_mday = 0;
+ static DateADT cache_date;
+
+ GetCurrentDateTime(&tm);
+
+ /*
+ * date2j involves several integer divisions; moreover, unless our session
+ * lives across local midnight, we don't really have to do it more than
+ * once. So it seems worth having a separate cache here.
+ */
+ if (tm.tm_year != cache_year ||
+ tm.tm_mon != cache_mon ||
+ tm.tm_mday != cache_mday)
+ {
+ cache_date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
+ cache_year = tm.tm_year;
+ cache_mon = tm.tm_mon;
+ cache_mday = tm.tm_mday;
+ }
+
+ return cache_date;
+}
+
+/*
+ * GetSQLCurrentTime -- implements CURRENT_TIME, CURRENT_TIME(n)
+ */
+TimeTzADT *
+GetSQLCurrentTime(int32 typmod)
+{
+ TimeTzADT *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ GetCurrentTimeUsec(tm, &fsec, &tz);
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+ tm2timetz(tm, fsec, tz, result);
+ AdjustTimeForTypmod(&(result->time), typmod);
+ return result;
+}
+
+/*
+ * GetSQLLocalTime -- implements LOCALTIME, LOCALTIME(n)
+ */
+TimeADT
+GetSQLLocalTime(int32 typmod)
+{
+ TimeADT result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ GetCurrentTimeUsec(tm, &fsec, &tz);
+
+ tm2time(tm, fsec, &result);
+ AdjustTimeForTypmod(&result, typmod);
+ return result;
+}
+
+
+/*
+ * Comparison functions for dates
+ */
+
+Datum
+date_eq(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 == dateVal2);
+}
+
+Datum
+date_ne(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 != dateVal2);
+}
+
+Datum
+date_lt(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 < dateVal2);
+}
+
+Datum
+date_le(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 <= dateVal2);
+}
+
+Datum
+date_gt(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 > dateVal2);
+}
+
+Datum
+date_ge(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(dateVal1 >= dateVal2);
+}
+
+Datum
+date_cmp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ if (dateVal1 < dateVal2)
+ PG_RETURN_INT32(-1);
+ else if (dateVal1 > dateVal2)
+ PG_RETURN_INT32(1);
+ PG_RETURN_INT32(0);
+}
+
+Datum
+date_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = ssup_datum_int32_cmp;
+ PG_RETURN_VOID();
+}
+
+Datum
+date_finite(PG_FUNCTION_ARGS)
+{
+ DateADT date = PG_GETARG_DATEADT(0);
+
+ PG_RETURN_BOOL(!DATE_NOT_FINITE(date));
+}
+
+Datum
+date_larger(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_DATEADT((dateVal1 > dateVal2) ? dateVal1 : dateVal2);
+}
+
+Datum
+date_smaller(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_DATEADT((dateVal1 < dateVal2) ? dateVal1 : dateVal2);
+}
+
+/* Compute difference between two dates in days.
+ */
+Datum
+date_mi(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal1 = PG_GETARG_DATEADT(0);
+ DateADT dateVal2 = PG_GETARG_DATEADT(1);
+
+ if (DATE_NOT_FINITE(dateVal1) || DATE_NOT_FINITE(dateVal2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite dates")));
+
+ PG_RETURN_INT32((int32) (dateVal1 - dateVal2));
+}
+
+/* Add a number of days to a date, giving a new date.
+ * Must handle both positive and negative numbers of days.
+ */
+Datum
+date_pli(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ int32 days = PG_GETARG_INT32(1);
+ DateADT result;
+
+ if (DATE_NOT_FINITE(dateVal))
+ PG_RETURN_DATEADT(dateVal); /* can't change infinity */
+
+ result = dateVal + days;
+
+ /* Check for integer overflow and out-of-allowed-range */
+ if ((days >= 0 ? (result < dateVal) : (result > dateVal)) ||
+ !IS_VALID_DATE(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range")));
+
+ PG_RETURN_DATEADT(result);
+}
+
+/* Subtract a number of days from a date, giving a new date.
+ */
+Datum
+date_mii(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ int32 days = PG_GETARG_INT32(1);
+ DateADT result;
+
+ if (DATE_NOT_FINITE(dateVal))
+ PG_RETURN_DATEADT(dateVal); /* can't change infinity */
+
+ result = dateVal - days;
+
+ /* Check for integer overflow and out-of-allowed-range */
+ if ((days >= 0 ? (result > dateVal) : (result < dateVal)) ||
+ !IS_VALID_DATE(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range")));
+
+ PG_RETURN_DATEADT(result);
+}
+
+
+/*
+ * Promote date to timestamp.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the date is finite but out of the valid range for timestamp, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamp infinity.
+ *
+ * Note: *overflow = -1 is actually not possible currently, since both
+ * datatypes have the same lower bound, Julian day zero.
+ */
+Timestamp
+date2timestamp_opt_overflow(DateADT dateVal, int *overflow)
+{
+ Timestamp result;
+
+ if (overflow)
+ *overflow = 0;
+
+ if (DATE_IS_NOBEGIN(dateVal))
+ TIMESTAMP_NOBEGIN(result);
+ else if (DATE_IS_NOEND(dateVal))
+ TIMESTAMP_NOEND(result);
+ else
+ {
+ /*
+ * Since dates have the same minimum values as timestamps, only upper
+ * boundary need be checked for overflow.
+ */
+ if (dateVal >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE))
+ {
+ if (overflow)
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ return result;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range for timestamp")));
+ }
+ }
+
+ /* date is days since 2000, timestamp is microseconds since same... */
+ result = dateVal * USECS_PER_DAY;
+ }
+
+ return result;
+}
+
+/*
+ * Promote date to timestamp, throwing error for overflow.
+ */
+static TimestampTz
+date2timestamp(DateADT dateVal)
+{
+ return date2timestamp_opt_overflow(dateVal, NULL);
+}
+
+/*
+ * Promote date to timestamp with time zone.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the date is finite but out of the valid range for timestamptz, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamptz infinity.
+ */
+TimestampTz
+date2timestamptz_opt_overflow(DateADT dateVal, int *overflow)
+{
+ TimestampTz result;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+
+ if (overflow)
+ *overflow = 0;
+
+ if (DATE_IS_NOBEGIN(dateVal))
+ TIMESTAMP_NOBEGIN(result);
+ else if (DATE_IS_NOEND(dateVal))
+ TIMESTAMP_NOEND(result);
+ else
+ {
+ /*
+ * Since dates have the same minimum values as timestamps, only upper
+ * boundary need be checked for overflow.
+ */
+ if (dateVal >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE))
+ {
+ if (overflow)
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ return result;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range for timestamp")));
+ }
+ }
+
+ j2date(dateVal + POSTGRES_EPOCH_JDATE,
+ &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday));
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ tz = DetermineTimeZoneOffset(tm, session_timezone);
+
+ result = dateVal * USECS_PER_DAY + tz * USECS_PER_SEC;
+
+ /*
+ * Since it is possible to go beyond allowed timestamptz range because
+ * of time zone, check for allowed timestamp range after adding tz.
+ */
+ if (!IS_VALID_TIMESTAMP(result))
+ {
+ if (overflow)
+ {
+ if (result < MIN_TIMESTAMP)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range for timestamp")));
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Promote date to timestamptz, throwing error for overflow.
+ */
+static TimestampTz
+date2timestamptz(DateADT dateVal)
+{
+ return date2timestamptz_opt_overflow(dateVal, NULL);
+}
+
+/*
+ * date2timestamp_no_overflow
+ *
+ * This is chartered to produce a double value that is numerically
+ * equivalent to the corresponding Timestamp value, if the date is in the
+ * valid range of Timestamps, but in any case not throw an overflow error.
+ * We can do this since the numerical range of double is greater than
+ * that of non-erroneous timestamps. The results are currently only
+ * used for statistical estimation purposes.
+ */
+double
+date2timestamp_no_overflow(DateADT dateVal)
+{
+ double result;
+
+ if (DATE_IS_NOBEGIN(dateVal))
+ result = -DBL_MAX;
+ else if (DATE_IS_NOEND(dateVal))
+ result = DBL_MAX;
+ else
+ {
+ /* date is days since 2000, timestamp is microseconds since same... */
+ result = dateVal * (double) USECS_PER_DAY;
+ }
+
+ return result;
+}
+
+
+/*
+ * Crosstype comparison functions for dates
+ */
+
+int32
+date_cmp_timestamp_internal(DateADT dateVal, Timestamp dt2)
+{
+ Timestamp dt1;
+ int overflow;
+
+ dt1 = date2timestamp_opt_overflow(dateVal, &overflow);
+ if (overflow > 0)
+ {
+ /* dt1 is larger than any finite timestamp, but less than infinity */
+ return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1;
+ }
+ Assert(overflow == 0); /* -1 case cannot occur */
+
+ return timestamp_cmp_internal(dt1, dt2);
+}
+
+Datum
+date_eq_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) == 0);
+}
+
+Datum
+date_ne_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) != 0);
+}
+
+Datum
+date_lt_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) < 0);
+}
+
+Datum
+date_gt_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) > 0);
+}
+
+Datum
+date_le_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) <= 0);
+}
+
+Datum
+date_ge_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt2) >= 0);
+}
+
+Datum
+date_cmp_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_INT32(date_cmp_timestamp_internal(dateVal, dt2));
+}
+
+int32
+date_cmp_timestamptz_internal(DateADT dateVal, TimestampTz dt2)
+{
+ TimestampTz dt1;
+ int overflow;
+
+ dt1 = date2timestamptz_opt_overflow(dateVal, &overflow);
+ if (overflow > 0)
+ {
+ /* dt1 is larger than any finite timestamp, but less than infinity */
+ return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1;
+ }
+ if (overflow < 0)
+ {
+ /* dt1 is less than any finite timestamp, but more than -infinity */
+ return TIMESTAMP_IS_NOBEGIN(dt2) ? +1 : -1;
+ }
+
+ return timestamptz_cmp_internal(dt1, dt2);
+}
+
+Datum
+date_eq_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) == 0);
+}
+
+Datum
+date_ne_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) != 0);
+}
+
+Datum
+date_lt_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) < 0);
+}
+
+Datum
+date_gt_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) > 0);
+}
+
+Datum
+date_le_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) <= 0);
+}
+
+Datum
+date_ge_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt2) >= 0);
+}
+
+Datum
+date_cmp_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_INT32(date_cmp_timestamptz_internal(dateVal, dt2));
+}
+
+Datum
+timestamp_eq_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) == 0);
+}
+
+Datum
+timestamp_ne_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) != 0);
+}
+
+Datum
+timestamp_lt_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) > 0);
+}
+
+Datum
+timestamp_gt_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) < 0);
+}
+
+Datum
+timestamp_le_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) >= 0);
+}
+
+Datum
+timestamp_ge_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamp_internal(dateVal, dt1) <= 0);
+}
+
+Datum
+timestamp_cmp_date(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_INT32(-date_cmp_timestamp_internal(dateVal, dt1));
+}
+
+Datum
+timestamptz_eq_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) == 0);
+}
+
+Datum
+timestamptz_ne_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) != 0);
+}
+
+Datum
+timestamptz_lt_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) > 0);
+}
+
+Datum
+timestamptz_gt_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) < 0);
+}
+
+Datum
+timestamptz_le_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) >= 0);
+}
+
+Datum
+timestamptz_ge_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_BOOL(date_cmp_timestamptz_internal(dateVal, dt1) <= 0);
+}
+
+Datum
+timestamptz_cmp_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ DateADT dateVal = PG_GETARG_DATEADT(1);
+
+ PG_RETURN_INT32(-date_cmp_timestamptz_internal(dateVal, dt1));
+}
+
+/*
+ * in_range support function for date.
+ *
+ * We implement this by promoting the dates to timestamp (without time zone)
+ * and then using the timestamp-and-interval in_range function.
+ */
+Datum
+in_range_date_interval(PG_FUNCTION_ARGS)
+{
+ DateADT val = PG_GETARG_DATEADT(0);
+ DateADT base = PG_GETARG_DATEADT(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ Timestamp valStamp;
+ Timestamp baseStamp;
+
+ /* XXX we could support out-of-range cases here, perhaps */
+ valStamp = date2timestamp(val);
+ baseStamp = date2timestamp(base);
+
+ return DirectFunctionCall5(in_range_timestamp_interval,
+ TimestampGetDatum(valStamp),
+ TimestampGetDatum(baseStamp),
+ IntervalPGetDatum(offset),
+ BoolGetDatum(sub),
+ BoolGetDatum(less));
+}
+
+
+/* extract_date()
+ * Extract specified field from date type.
+ */
+Datum
+extract_date(PG_FUNCTION_ARGS)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ DateADT date = PG_GETARG_DATEADT(1);
+ int64 intresult;
+ int type,
+ val;
+ char *lowunits;
+ int year,
+ mon,
+ mday;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (DATE_NOT_FINITE(date) && (type == UNITS || type == RESERV))
+ {
+ switch (val)
+ {
+ /* Oscillating units */
+ case DTK_DAY:
+ case DTK_MONTH:
+ case DTK_QUARTER:
+ case DTK_WEEK:
+ case DTK_DOW:
+ case DTK_ISODOW:
+ case DTK_DOY:
+ PG_RETURN_NULL();
+ break;
+
+ /* Monotonically-increasing units */
+ case DTK_YEAR:
+ case DTK_DECADE:
+ case DTK_CENTURY:
+ case DTK_MILLENNIUM:
+ case DTK_JULIAN:
+ case DTK_ISOYEAR:
+ case DTK_EPOCH:
+ if (DATE_IS_NOBEGIN(date))
+ PG_RETURN_NUMERIC(DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum("-Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1))));
+ else
+ PG_RETURN_NUMERIC(DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum("Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1))));
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(DATEOID))));
+ }
+ }
+ else if (type == UNITS)
+ {
+ j2date(date + POSTGRES_EPOCH_JDATE, &year, &mon, &mday);
+
+ switch (val)
+ {
+ case DTK_DAY:
+ intresult = mday;
+ break;
+
+ case DTK_MONTH:
+ intresult = mon;
+ break;
+
+ case DTK_QUARTER:
+ intresult = (mon - 1) / 3 + 1;
+ break;
+
+ case DTK_WEEK:
+ intresult = date2isoweek(year, mon, mday);
+ break;
+
+ case DTK_YEAR:
+ if (year > 0)
+ intresult = year;
+ else
+ /* there is no year 0, just 1 BC and 1 AD */
+ intresult = year - 1;
+ break;
+
+ case DTK_DECADE:
+ /* see comments in timestamp_part */
+ if (year >= 0)
+ intresult = year / 10;
+ else
+ intresult = -((8 - (year - 1)) / 10);
+ break;
+
+ case DTK_CENTURY:
+ /* see comments in timestamp_part */
+ if (year > 0)
+ intresult = (year + 99) / 100;
+ else
+ intresult = -((99 - (year - 1)) / 100);
+ break;
+
+ case DTK_MILLENNIUM:
+ /* see comments in timestamp_part */
+ if (year > 0)
+ intresult = (year + 999) / 1000;
+ else
+ intresult = -((999 - (year - 1)) / 1000);
+ break;
+
+ case DTK_JULIAN:
+ intresult = date + POSTGRES_EPOCH_JDATE;
+ break;
+
+ case DTK_ISOYEAR:
+ intresult = date2isoyear(year, mon, mday);
+ /* Adjust BC years */
+ if (intresult <= 0)
+ intresult -= 1;
+ break;
+
+ case DTK_DOW:
+ case DTK_ISODOW:
+ intresult = j2day(date + POSTGRES_EPOCH_JDATE);
+ if (val == DTK_ISODOW && intresult == 0)
+ intresult = 7;
+ break;
+
+ case DTK_DOY:
+ intresult = date2j(year, mon, mday) - date2j(year, 1, 1) + 1;
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(DATEOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV)
+ {
+ switch (val)
+ {
+ case DTK_EPOCH:
+ intresult = ((int64) date + POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY;
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(DATEOID))));
+ intresult = 0;
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(DATEOID))));
+ intresult = 0;
+ }
+
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+}
+
+
+/* Add an interval to a date, giving a new date.
+ * Must handle both positive and negative intervals.
+ *
+ * We implement this by promoting the date to timestamp (without time zone)
+ * and then using the timestamp plus interval function.
+ */
+Datum
+date_pl_interval(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ Timestamp dateStamp;
+
+ dateStamp = date2timestamp(dateVal);
+
+ return DirectFunctionCall2(timestamp_pl_interval,
+ TimestampGetDatum(dateStamp),
+ PointerGetDatum(span));
+}
+
+/* Subtract an interval from a date, giving a new date.
+ * Must handle both positive and negative intervals.
+ *
+ * We implement this by promoting the date to timestamp (without time zone)
+ * and then using the timestamp minus interval function.
+ */
+Datum
+date_mi_interval(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ Timestamp dateStamp;
+
+ dateStamp = date2timestamp(dateVal);
+
+ return DirectFunctionCall2(timestamp_mi_interval,
+ TimestampGetDatum(dateStamp),
+ PointerGetDatum(span));
+}
+
+/* date_timestamp()
+ * Convert date to timestamp data type.
+ */
+Datum
+date_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ Timestamp result;
+
+ result = date2timestamp(dateVal);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamp_date()
+ * Convert timestamp to date data type.
+ */
+Datum
+timestamp_date(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ DateADT result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ if (TIMESTAMP_IS_NOBEGIN(timestamp))
+ DATE_NOBEGIN(result);
+ else if (TIMESTAMP_IS_NOEND(timestamp))
+ DATE_NOEND(result);
+ else
+ {
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
+ }
+
+ PG_RETURN_DATEADT(result);
+}
+
+
+/* date_timestamptz()
+ * Convert date to timestamp with time zone data type.
+ */
+Datum
+date_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT dateVal = PG_GETARG_DATEADT(0);
+ TimestampTz result;
+
+ result = date2timestamptz(dateVal);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+
+/* timestamptz_date()
+ * Convert timestamp with time zone to date data type.
+ */
+Datum
+timestamptz_date(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
+ DateADT result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ if (TIMESTAMP_IS_NOBEGIN(timestamp))
+ DATE_NOBEGIN(result);
+ else if (TIMESTAMP_IS_NOEND(timestamp))
+ DATE_NOEND(result);
+ else
+ {
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
+ }
+
+ PG_RETURN_DATEADT(result);
+}
+
+
+/*****************************************************************************
+ * Time ADT
+ *****************************************************************************/
+
+Datum
+time_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimeADT result;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ int nf;
+ int dterr;
+ char workbuf[MAXDATELEN + 1];
+ char *field[MAXDATEFIELDS];
+ int dtype;
+ int ftype[MAXDATEFIELDS];
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
+ field, ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeTimeOnly(field, ftype, nf, &dtype, tm, &fsec, &tz);
+ if (dterr != 0)
+ DateTimeParseError(dterr, str, "time");
+
+ tm2time(tm, fsec, &result);
+ AdjustTimeForTypmod(&result, typmod);
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* tm2time()
+ * Convert a tm structure to a time data type.
+ */
+int
+tm2time(struct pg_tm *tm, fsec_t fsec, TimeADT *result)
+{
+ *result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec)
+ * USECS_PER_SEC) + fsec;
+ return 0;
+}
+
+/* time_overflows()
+ * Check to see if a broken-down time-of-day is out of range.
+ */
+bool
+time_overflows(int hour, int min, int sec, fsec_t fsec)
+{
+ /* Range-check the fields individually. */
+ if (hour < 0 || hour > HOURS_PER_DAY ||
+ min < 0 || min >= MINS_PER_HOUR ||
+ sec < 0 || sec > SECS_PER_MINUTE ||
+ fsec < 0 || fsec > USECS_PER_SEC)
+ return true;
+
+ /*
+ * Because we allow, eg, hour = 24 or sec = 60, we must check separately
+ * that the total time value doesn't exceed 24:00:00.
+ */
+ if ((((((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE)
+ + sec) * USECS_PER_SEC) + fsec) > USECS_PER_DAY)
+ return true;
+
+ return false;
+}
+
+/* float_time_overflows()
+ * Same, when we have seconds + fractional seconds as one "double" value.
+ */
+bool
+float_time_overflows(int hour, int min, double sec)
+{
+ /* Range-check the fields individually. */
+ if (hour < 0 || hour > HOURS_PER_DAY ||
+ min < 0 || min >= MINS_PER_HOUR)
+ return true;
+
+ /*
+ * "sec", being double, requires extra care. Cope with NaN, and round off
+ * before applying the range check to avoid unexpected errors due to
+ * imprecise input. (We assume rint() behaves sanely with infinities.)
+ */
+ if (isnan(sec))
+ return true;
+ sec = rint(sec * USECS_PER_SEC);
+ if (sec < 0 || sec > SECS_PER_MINUTE * USECS_PER_SEC)
+ return true;
+
+ /*
+ * Because we allow, eg, hour = 24 or sec = 60, we must check separately
+ * that the total time value doesn't exceed 24:00:00. This must match the
+ * way that callers will convert the fields to a time.
+ */
+ if (((((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE)
+ * USECS_PER_SEC) + (int64) sec) > USECS_PER_DAY)
+ return true;
+
+ return false;
+}
+
+
+/* time2tm()
+ * Convert time data type to POSIX time structure.
+ *
+ * Note that only the hour/min/sec/fractional-sec fields are filled in.
+ */
+int
+time2tm(TimeADT time, struct pg_tm *tm, fsec_t *fsec)
+{
+ tm->tm_hour = time / USECS_PER_HOUR;
+ time -= tm->tm_hour * USECS_PER_HOUR;
+ tm->tm_min = time / USECS_PER_MINUTE;
+ time -= tm->tm_min * USECS_PER_MINUTE;
+ tm->tm_sec = time / USECS_PER_SEC;
+ time -= tm->tm_sec * USECS_PER_SEC;
+ *fsec = time;
+ return 0;
+}
+
+Datum
+time_out(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ char *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ char buf[MAXDATELEN + 1];
+
+ time2tm(time, tm, &fsec);
+ EncodeTimeOnly(tm, fsec, false, 0, DateStyle, buf);
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * time_recv - converts external binary format to time
+ */
+Datum
+time_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimeADT result;
+
+ result = pq_getmsgint64(buf);
+
+ if (result < INT64CONST(0) || result > USECS_PER_DAY)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
+ AdjustTimeForTypmod(&result, typmod);
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/*
+ * time_send - converts time to binary format
+ */
+Datum
+time_send(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, time);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+timetypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anytime_typmodin(false, ta));
+}
+
+Datum
+timetypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anytime_typmodout(false, typmod));
+}
+
+/*
+ * make_time - time constructor
+ */
+Datum
+make_time(PG_FUNCTION_ARGS)
+{
+ int tm_hour = PG_GETARG_INT32(0);
+ int tm_min = PG_GETARG_INT32(1);
+ double sec = PG_GETARG_FLOAT8(2);
+ TimeADT time;
+
+ /* Check for time overflow */
+ if (float_time_overflows(tm_hour, tm_min, sec))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("time field value out of range: %d:%02d:%02g",
+ tm_hour, tm_min, sec)));
+
+ /* This should match tm2time */
+ time = (((tm_hour * MINS_PER_HOUR + tm_min) * SECS_PER_MINUTE)
+ * USECS_PER_SEC) + (int64) rint(sec * USECS_PER_SEC);
+
+ PG_RETURN_TIMEADT(time);
+}
+
+
+/* time_support()
+ *
+ * Planner support function for the time_scale() and timetz_scale()
+ * length coercion functions (we need not distinguish them here).
+ */
+Datum
+time_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+
+ ret = TemporalSimplify(MAX_TIME_PRECISION, (Node *) req->fcall);
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/* time_scale()
+ * Adjust time type for specified scale factor.
+ * Used by PostgreSQL type system to stuff columns.
+ */
+Datum
+time_scale(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ TimeADT result;
+
+ result = time;
+ AdjustTimeForTypmod(&result, typmod);
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* AdjustTimeForTypmod()
+ * Force the precision of the time value to a specified value.
+ * Uses *exactly* the same code as in AdjustTimestampForTypmod()
+ * but we make a separate copy because those types do not
+ * have a fundamental tie together but rather a coincidence of
+ * implementation. - thomas
+ */
+void
+AdjustTimeForTypmod(TimeADT *time, int32 typmod)
+{
+ static const int64 TimeScales[MAX_TIME_PRECISION + 1] = {
+ INT64CONST(1000000),
+ INT64CONST(100000),
+ INT64CONST(10000),
+ INT64CONST(1000),
+ INT64CONST(100),
+ INT64CONST(10),
+ INT64CONST(1)
+ };
+
+ static const int64 TimeOffsets[MAX_TIME_PRECISION + 1] = {
+ INT64CONST(500000),
+ INT64CONST(50000),
+ INT64CONST(5000),
+ INT64CONST(500),
+ INT64CONST(50),
+ INT64CONST(5),
+ INT64CONST(0)
+ };
+
+ if (typmod >= 0 && typmod <= MAX_TIME_PRECISION)
+ {
+ if (*time >= INT64CONST(0))
+ *time = ((*time + TimeOffsets[typmod]) / TimeScales[typmod]) *
+ TimeScales[typmod];
+ else
+ *time = -((((-*time) + TimeOffsets[typmod]) / TimeScales[typmod]) *
+ TimeScales[typmod]);
+ }
+}
+
+
+Datum
+time_eq(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 == time2);
+}
+
+Datum
+time_ne(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 != time2);
+}
+
+Datum
+time_lt(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 < time2);
+}
+
+Datum
+time_le(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 <= time2);
+}
+
+Datum
+time_gt(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 > time2);
+}
+
+Datum
+time_ge(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_BOOL(time1 >= time2);
+}
+
+Datum
+time_cmp(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ if (time1 < time2)
+ PG_RETURN_INT32(-1);
+ if (time1 > time2)
+ PG_RETURN_INT32(1);
+ PG_RETURN_INT32(0);
+}
+
+Datum
+time_hash(PG_FUNCTION_ARGS)
+{
+ return hashint8(fcinfo);
+}
+
+Datum
+time_hash_extended(PG_FUNCTION_ARGS)
+{
+ return hashint8extended(fcinfo);
+}
+
+Datum
+time_larger(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_TIMEADT((time1 > time2) ? time1 : time2);
+}
+
+Datum
+time_smaller(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+
+ PG_RETURN_TIMEADT((time1 < time2) ? time1 : time2);
+}
+
+/* overlaps_time() --- implements the SQL OVERLAPS operator.
+ *
+ * Algorithm is per SQL spec. This is much harder than you'd think
+ * because the spec requires us to deliver a non-null answer in some cases
+ * where some of the inputs are null.
+ */
+Datum
+overlaps_time(PG_FUNCTION_ARGS)
+{
+ /*
+ * The arguments are TimeADT, but we leave them as generic Datums to avoid
+ * dereferencing nulls (TimeADT is pass-by-reference!)
+ */
+ Datum ts1 = PG_GETARG_DATUM(0);
+ Datum te1 = PG_GETARG_DATUM(1);
+ Datum ts2 = PG_GETARG_DATUM(2);
+ Datum te2 = PG_GETARG_DATUM(3);
+ bool ts1IsNull = PG_ARGISNULL(0);
+ bool te1IsNull = PG_ARGISNULL(1);
+ bool ts2IsNull = PG_ARGISNULL(2);
+ bool te2IsNull = PG_ARGISNULL(3);
+
+#define TIMEADT_GT(t1,t2) \
+ (DatumGetTimeADT(t1) > DatumGetTimeADT(t2))
+#define TIMEADT_LT(t1,t2) \
+ (DatumGetTimeADT(t1) < DatumGetTimeADT(t2))
+
+ /*
+ * If both endpoints of interval 1 are null, the result is null (unknown).
+ * If just one endpoint is null, take ts1 as the non-null one. Otherwise,
+ * take ts1 as the lesser endpoint.
+ */
+ if (ts1IsNull)
+ {
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts1 = te1;
+ te1IsNull = true;
+ }
+ else if (!te1IsNull)
+ {
+ if (TIMEADT_GT(ts1, te1))
+ {
+ Datum tt = ts1;
+
+ ts1 = te1;
+ te1 = tt;
+ }
+ }
+
+ /* Likewise for interval 2. */
+ if (ts2IsNull)
+ {
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts2 = te2;
+ te2IsNull = true;
+ }
+ else if (!te2IsNull)
+ {
+ if (TIMEADT_GT(ts2, te2))
+ {
+ Datum tt = ts2;
+
+ ts2 = te2;
+ te2 = tt;
+ }
+ }
+
+ /*
+ * At this point neither ts1 nor ts2 is null, so we can consider three
+ * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
+ */
+ if (TIMEADT_GT(ts1, ts2))
+ {
+ /*
+ * This case is ts1 < te2 OR te1 < te2, which may look redundant but
+ * in the presence of nulls it's not quite completely so.
+ */
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ if (TIMEADT_LT(ts1, te2))
+ PG_RETURN_BOOL(true);
+ if (te1IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te1 is not null then we had ts1 <= te1 above, and we just found
+ * ts1 >= te2, hence te1 >= te2.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else if (TIMEADT_LT(ts1, ts2))
+ {
+ /* This case is ts2 < te1 OR te2 < te1 */
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ if (TIMEADT_LT(ts2, te1))
+ PG_RETURN_BOOL(true);
+ if (te2IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te2 is not null then we had ts2 <= te2 above, and we just found
+ * ts2 >= te1, hence te2 >= te1.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else
+ {
+ /*
+ * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
+ * rather silly way of saying "true if both are nonnull, else null".
+ */
+ if (te1IsNull || te2IsNull)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(true);
+ }
+
+#undef TIMEADT_GT
+#undef TIMEADT_LT
+}
+
+/* timestamp_time()
+ * Convert timestamp to time data type.
+ */
+Datum
+timestamp_time(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ TimeADT result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_NULL();
+
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /*
+ * Could also do this with time = (timestamp / USECS_PER_DAY *
+ * USECS_PER_DAY) - timestamp;
+ */
+ result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) *
+ USECS_PER_SEC) + fsec;
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* timestamptz_time()
+ * Convert timestamptz to time data type.
+ */
+Datum
+timestamptz_time(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
+ TimeADT result;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ fsec_t fsec;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_NULL();
+
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /*
+ * Could also do this with time = (timestamp / USECS_PER_DAY *
+ * USECS_PER_DAY) - timestamp;
+ */
+ result = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) *
+ USECS_PER_SEC) + fsec;
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* datetime_timestamp()
+ * Convert date and time to timestamp data type.
+ */
+Datum
+datetime_timestamp(PG_FUNCTION_ARGS)
+{
+ DateADT date = PG_GETARG_DATEADT(0);
+ TimeADT time = PG_GETARG_TIMEADT(1);
+ Timestamp result;
+
+ result = date2timestamp(date);
+ if (!TIMESTAMP_NOT_FINITE(result))
+ {
+ result += time;
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* time_interval()
+ * Convert time to interval data type.
+ */
+Datum
+time_interval(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result->time = time;
+ result->day = 0;
+ result->month = 0;
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* interval_time()
+ * Convert interval to time data type.
+ *
+ * This is defined as producing the fractional-day portion of the interval.
+ * Therefore, we can just ignore the months field. It is not real clear
+ * what to do with negative intervals, but we choose to subtract the floor,
+ * so that, say, '-2 hours' becomes '22:00:00'.
+ */
+Datum
+interval_time(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ TimeADT result;
+ int64 days;
+
+ result = span->time;
+ if (result >= USECS_PER_DAY)
+ {
+ days = result / USECS_PER_DAY;
+ result -= days * USECS_PER_DAY;
+ }
+ else if (result < 0)
+ {
+ days = (-result + USECS_PER_DAY - 1) / USECS_PER_DAY;
+ result += days * USECS_PER_DAY;
+ }
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* time_mi_time()
+ * Subtract two times to produce an interval.
+ */
+Datum
+time_mi_time(PG_FUNCTION_ARGS)
+{
+ TimeADT time1 = PG_GETARG_TIMEADT(0);
+ TimeADT time2 = PG_GETARG_TIMEADT(1);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result->month = 0;
+ result->day = 0;
+ result->time = time1 - time2;
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* time_pl_interval()
+ * Add interval to time.
+ */
+Datum
+time_pl_interval(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ TimeADT result;
+
+ result = time + span->time;
+ result -= result / USECS_PER_DAY * USECS_PER_DAY;
+ if (result < INT64CONST(0))
+ result += USECS_PER_DAY;
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/* time_mi_interval()
+ * Subtract interval from time.
+ */
+Datum
+time_mi_interval(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ TimeADT result;
+
+ result = time - span->time;
+ result -= result / USECS_PER_DAY * USECS_PER_DAY;
+ if (result < INT64CONST(0))
+ result += USECS_PER_DAY;
+
+ PG_RETURN_TIMEADT(result);
+}
+
+/*
+ * in_range support function for time.
+ */
+Datum
+in_range_time_interval(PG_FUNCTION_ARGS)
+{
+ TimeADT val = PG_GETARG_TIMEADT(0);
+ TimeADT base = PG_GETARG_TIMEADT(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ TimeADT sum;
+
+ /*
+ * Like time_pl_interval/time_mi_interval, we disregard the month and day
+ * fields of the offset. So our test for negative should too.
+ */
+ if (offset->time < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /*
+ * We can't use time_pl_interval/time_mi_interval here, because their
+ * wraparound behavior would give wrong (or at least undesirable) answers.
+ * Fortunately the equivalent non-wrapping behavior is trivial, especially
+ * since we don't worry about integer overflow.
+ */
+ if (sub)
+ sum = base - offset->time;
+ else
+ sum = base + offset->time;
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+
+/* time_part() and extract_time()
+ * Extract specified field from time type.
+ */
+static Datum
+time_part_common(PG_FUNCTION_ARGS, bool retnumeric)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ TimeADT time = PG_GETARG_TIMEADT(1);
+ int64 intresult;
+ int type,
+ val;
+ char *lowunits;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ time2tm(time, tm, &fsec);
+
+ switch (val)
+ {
+ case DTK_MICROSEC:
+ intresult = tm->tm_sec * INT64CONST(1000000) + fsec;
+ break;
+
+ case DTK_MILLISEC:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec * 1000 + fsec / 1000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0);
+ break;
+
+ case DTK_SECOND:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec + fsec / 1'000'000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0);
+ break;
+
+ case DTK_MINUTE:
+ intresult = tm->tm_min;
+ break;
+
+ case DTK_HOUR:
+ intresult = tm->tm_hour;
+ break;
+
+ case DTK_TZ:
+ case DTK_TZ_MINUTE:
+ case DTK_TZ_HOUR:
+ case DTK_DAY:
+ case DTK_MONTH:
+ case DTK_QUARTER:
+ case DTK_YEAR:
+ case DTK_DECADE:
+ case DTK_CENTURY:
+ case DTK_MILLENNIUM:
+ case DTK_ISOYEAR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMEOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV && val == DTK_EPOCH)
+ {
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(time, 6));
+ else
+ PG_RETURN_FLOAT8(time / 1000000.0);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMEOID))));
+ intresult = 0;
+ }
+
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+ else
+ PG_RETURN_FLOAT8(intresult);
+}
+
+Datum
+time_part(PG_FUNCTION_ARGS)
+{
+ return time_part_common(fcinfo, false);
+}
+
+Datum
+extract_time(PG_FUNCTION_ARGS)
+{
+ return time_part_common(fcinfo, true);
+}
+
+
+/*****************************************************************************
+ * Time With Time Zone ADT
+ *****************************************************************************/
+
+/* tm2timetz()
+ * Convert a tm structure to a time data type.
+ */
+int
+tm2timetz(struct pg_tm *tm, fsec_t fsec, int tz, TimeTzADT *result)
+{
+ result->time = ((((tm->tm_hour * MINS_PER_HOUR + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) *
+ USECS_PER_SEC) + fsec;
+ result->zone = tz;
+
+ return 0;
+}
+
+Datum
+timetz_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimeTzADT *result;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ int nf;
+ int dterr;
+ char workbuf[MAXDATELEN + 1];
+ char *field[MAXDATEFIELDS];
+ int dtype;
+ int ftype[MAXDATEFIELDS];
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
+ field, ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeTimeOnly(field, ftype, nf, &dtype, tm, &fsec, &tz);
+ if (dterr != 0)
+ DateTimeParseError(dterr, str, "time with time zone");
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+ tm2timetz(tm, fsec, tz, result);
+ AdjustTimeForTypmod(&(result->time), typmod);
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+Datum
+timetz_out(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(0);
+ char *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+ char buf[MAXDATELEN + 1];
+
+ timetz2tm(time, tm, &fsec, &tz);
+ EncodeTimeOnly(tm, fsec, true, tz, DateStyle, buf);
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * timetz_recv - converts external binary format to timetz
+ */
+Datum
+timetz_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimeTzADT *result;
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = pq_getmsgint64(buf);
+
+ if (result->time < INT64CONST(0) || result->time > USECS_PER_DAY)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range")));
+
+ result->zone = pq_getmsgint(buf, sizeof(result->zone));
+
+ /* Check for sane GMT displacement; see notes in datatype/timestamp.h */
+ if (result->zone <= -TZDISP_LIMIT || result->zone >= TZDISP_LIMIT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE),
+ errmsg("time zone displacement out of range")));
+
+ AdjustTimeForTypmod(&(result->time), typmod);
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+/*
+ * timetz_send - converts timetz to binary format
+ */
+Datum
+timetz_send(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, time->time);
+ pq_sendint32(&buf, time->zone);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+timetztypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anytime_typmodin(true, ta));
+}
+
+Datum
+timetztypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anytime_typmodout(true, typmod));
+}
+
+
+/* timetz2tm()
+ * Convert TIME WITH TIME ZONE data type to POSIX time structure.
+ */
+int
+timetz2tm(TimeTzADT *time, struct pg_tm *tm, fsec_t *fsec, int *tzp)
+{
+ TimeOffset trem = time->time;
+
+ tm->tm_hour = trem / USECS_PER_HOUR;
+ trem -= tm->tm_hour * USECS_PER_HOUR;
+ tm->tm_min = trem / USECS_PER_MINUTE;
+ trem -= tm->tm_min * USECS_PER_MINUTE;
+ tm->tm_sec = trem / USECS_PER_SEC;
+ *fsec = trem - tm->tm_sec * USECS_PER_SEC;
+
+ if (tzp != NULL)
+ *tzp = time->zone;
+
+ return 0;
+}
+
+/* timetz_scale()
+ * Adjust time type for specified scale factor.
+ * Used by PostgreSQL type system to stuff columns.
+ */
+Datum
+timetz_scale(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ TimeTzADT *result;
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = time->time;
+ result->zone = time->zone;
+
+ AdjustTimeForTypmod(&(result->time), typmod);
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+
+static int
+timetz_cmp_internal(TimeTzADT *time1, TimeTzADT *time2)
+{
+ TimeOffset t1,
+ t2;
+
+ /* Primary sort is by true (GMT-equivalent) time */
+ t1 = time1->time + (time1->zone * USECS_PER_SEC);
+ t2 = time2->time + (time2->zone * USECS_PER_SEC);
+
+ if (t1 > t2)
+ return 1;
+ if (t1 < t2)
+ return -1;
+
+ /*
+ * If same GMT time, sort by timezone; we only want to say that two
+ * timetz's are equal if both the time and zone parts are equal.
+ */
+ if (time1->zone > time2->zone)
+ return 1;
+ if (time1->zone < time2->zone)
+ return -1;
+
+ return 0;
+}
+
+Datum
+timetz_eq(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) == 0);
+}
+
+Datum
+timetz_ne(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) != 0);
+}
+
+Datum
+timetz_lt(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) < 0);
+}
+
+Datum
+timetz_le(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) <= 0);
+}
+
+Datum
+timetz_gt(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) > 0);
+}
+
+Datum
+timetz_ge(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_BOOL(timetz_cmp_internal(time1, time2) >= 0);
+}
+
+Datum
+timetz_cmp(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+
+ PG_RETURN_INT32(timetz_cmp_internal(time1, time2));
+}
+
+Datum
+timetz_hash(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *key = PG_GETARG_TIMETZADT_P(0);
+ uint32 thash;
+
+ /*
+ * To avoid any problems with padding bytes in the struct, we figure the
+ * field hashes separately and XOR them.
+ */
+ thash = DatumGetUInt32(DirectFunctionCall1(hashint8,
+ Int64GetDatumFast(key->time)));
+ thash ^= DatumGetUInt32(hash_uint32(key->zone));
+ PG_RETURN_UINT32(thash);
+}
+
+Datum
+timetz_hash_extended(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *key = PG_GETARG_TIMETZADT_P(0);
+ Datum seed = PG_GETARG_DATUM(1);
+ uint64 thash;
+
+ /* Same approach as timetz_hash */
+ thash = DatumGetUInt64(DirectFunctionCall2(hashint8extended,
+ Int64GetDatumFast(key->time),
+ seed));
+ thash ^= DatumGetUInt64(hash_uint32_extended(key->zone,
+ DatumGetInt64(seed)));
+ PG_RETURN_UINT64(thash);
+}
+
+Datum
+timetz_larger(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+ TimeTzADT *result;
+
+ if (timetz_cmp_internal(time1, time2) > 0)
+ result = time1;
+ else
+ result = time2;
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+Datum
+timetz_smaller(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time1 = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *time2 = PG_GETARG_TIMETZADT_P(1);
+ TimeTzADT *result;
+
+ if (timetz_cmp_internal(time1, time2) < 0)
+ result = time1;
+ else
+ result = time2;
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+/* timetz_pl_interval()
+ * Add interval to timetz.
+ */
+Datum
+timetz_pl_interval(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ TimeTzADT *result;
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = time->time + span->time;
+ result->time -= result->time / USECS_PER_DAY * USECS_PER_DAY;
+ if (result->time < INT64CONST(0))
+ result->time += USECS_PER_DAY;
+
+ result->zone = time->zone;
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+/* timetz_mi_interval()
+ * Subtract interval from timetz.
+ */
+Datum
+timetz_mi_interval(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ TimeTzADT *result;
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = time->time - span->time;
+ result->time -= result->time / USECS_PER_DAY * USECS_PER_DAY;
+ if (result->time < INT64CONST(0))
+ result->time += USECS_PER_DAY;
+
+ result->zone = time->zone;
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+/*
+ * in_range support function for timetz.
+ */
+Datum
+in_range_timetz_interval(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *val = PG_GETARG_TIMETZADT_P(0);
+ TimeTzADT *base = PG_GETARG_TIMETZADT_P(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ TimeTzADT sum;
+
+ /*
+ * Like timetz_pl_interval/timetz_mi_interval, we disregard the month and
+ * day fields of the offset. So our test for negative should too.
+ */
+ if (offset->time < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /*
+ * We can't use timetz_pl_interval/timetz_mi_interval here, because their
+ * wraparound behavior would give wrong (or at least undesirable) answers.
+ * Fortunately the equivalent non-wrapping behavior is trivial, especially
+ * since we don't worry about integer overflow.
+ */
+ if (sub)
+ sum.time = base->time - offset->time;
+ else
+ sum.time = base->time + offset->time;
+ sum.zone = base->zone;
+
+ if (less)
+ PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) <= 0);
+ else
+ PG_RETURN_BOOL(timetz_cmp_internal(val, &sum) >= 0);
+}
+
+/* overlaps_timetz() --- implements the SQL OVERLAPS operator.
+ *
+ * Algorithm is per SQL spec. This is much harder than you'd think
+ * because the spec requires us to deliver a non-null answer in some cases
+ * where some of the inputs are null.
+ */
+Datum
+overlaps_timetz(PG_FUNCTION_ARGS)
+{
+ /*
+ * The arguments are TimeTzADT *, but we leave them as generic Datums for
+ * convenience of notation --- and to avoid dereferencing nulls.
+ */
+ Datum ts1 = PG_GETARG_DATUM(0);
+ Datum te1 = PG_GETARG_DATUM(1);
+ Datum ts2 = PG_GETARG_DATUM(2);
+ Datum te2 = PG_GETARG_DATUM(3);
+ bool ts1IsNull = PG_ARGISNULL(0);
+ bool te1IsNull = PG_ARGISNULL(1);
+ bool ts2IsNull = PG_ARGISNULL(2);
+ bool te2IsNull = PG_ARGISNULL(3);
+
+#define TIMETZ_GT(t1,t2) \
+ DatumGetBool(DirectFunctionCall2(timetz_gt,t1,t2))
+#define TIMETZ_LT(t1,t2) \
+ DatumGetBool(DirectFunctionCall2(timetz_lt,t1,t2))
+
+ /*
+ * If both endpoints of interval 1 are null, the result is null (unknown).
+ * If just one endpoint is null, take ts1 as the non-null one. Otherwise,
+ * take ts1 as the lesser endpoint.
+ */
+ if (ts1IsNull)
+ {
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts1 = te1;
+ te1IsNull = true;
+ }
+ else if (!te1IsNull)
+ {
+ if (TIMETZ_GT(ts1, te1))
+ {
+ Datum tt = ts1;
+
+ ts1 = te1;
+ te1 = tt;
+ }
+ }
+
+ /* Likewise for interval 2. */
+ if (ts2IsNull)
+ {
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts2 = te2;
+ te2IsNull = true;
+ }
+ else if (!te2IsNull)
+ {
+ if (TIMETZ_GT(ts2, te2))
+ {
+ Datum tt = ts2;
+
+ ts2 = te2;
+ te2 = tt;
+ }
+ }
+
+ /*
+ * At this point neither ts1 nor ts2 is null, so we can consider three
+ * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
+ */
+ if (TIMETZ_GT(ts1, ts2))
+ {
+ /*
+ * This case is ts1 < te2 OR te1 < te2, which may look redundant but
+ * in the presence of nulls it's not quite completely so.
+ */
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ if (TIMETZ_LT(ts1, te2))
+ PG_RETURN_BOOL(true);
+ if (te1IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te1 is not null then we had ts1 <= te1 above, and we just found
+ * ts1 >= te2, hence te1 >= te2.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else if (TIMETZ_LT(ts1, ts2))
+ {
+ /* This case is ts2 < te1 OR te2 < te1 */
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ if (TIMETZ_LT(ts2, te1))
+ PG_RETURN_BOOL(true);
+ if (te2IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te2 is not null then we had ts2 <= te2 above, and we just found
+ * ts2 >= te1, hence te2 >= te1.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else
+ {
+ /*
+ * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
+ * rather silly way of saying "true if both are nonnull, else null".
+ */
+ if (te1IsNull || te2IsNull)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(true);
+ }
+
+#undef TIMETZ_GT
+#undef TIMETZ_LT
+}
+
+
+Datum
+timetz_time(PG_FUNCTION_ARGS)
+{
+ TimeTzADT *timetz = PG_GETARG_TIMETZADT_P(0);
+ TimeADT result;
+
+ /* swallow the time zone and just return the time */
+ result = timetz->time;
+
+ PG_RETURN_TIMEADT(result);
+}
+
+
+Datum
+time_timetz(PG_FUNCTION_ARGS)
+{
+ TimeADT time = PG_GETARG_TIMEADT(0);
+ TimeTzADT *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ GetCurrentDateTime(tm);
+ time2tm(time, tm, &fsec);
+ tz = DetermineTimeZoneOffset(tm, session_timezone);
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = time;
+ result->zone = tz;
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+
+/* timestamptz_timetz()
+ * Convert timestamp to timetz data type.
+ */
+Datum
+timestamptz_timetz(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMP(0);
+ TimeTzADT *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ fsec_t fsec;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_NULL();
+
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ tm2timetz(tm, fsec, tz, result);
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+
+/* datetimetz_timestamptz()
+ * Convert date and timetz to timestamp with time zone data type.
+ * Timestamp is stored in GMT, so add the time zone
+ * stored with the timetz to the result.
+ * - thomas 2000-03-10
+ */
+Datum
+datetimetz_timestamptz(PG_FUNCTION_ARGS)
+{
+ DateADT date = PG_GETARG_DATEADT(0);
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(1);
+ TimestampTz result;
+
+ if (DATE_IS_NOBEGIN(date))
+ TIMESTAMP_NOBEGIN(result);
+ else if (DATE_IS_NOEND(date))
+ TIMESTAMP_NOEND(result);
+ else
+ {
+ /*
+ * Date's range is wider than timestamp's, so check for boundaries.
+ * Since dates have the same minimum values as timestamps, only upper
+ * boundary need be checked for overflow.
+ */
+ if (date >= (TIMESTAMP_END_JULIAN - POSTGRES_EPOCH_JDATE))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range for timestamp")));
+ result = date * USECS_PER_DAY + time->time + time->zone * USECS_PER_SEC;
+
+ /*
+ * Since it is possible to go beyond allowed timestamptz range because
+ * of time zone, check for allowed timestamp range after adding tz.
+ */
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range for timestamp")));
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+
+/* timetz_part() and extract_timetz()
+ * Extract specified field from time type.
+ */
+static Datum
+timetz_part_common(PG_FUNCTION_ARGS, bool retnumeric)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(1);
+ int64 intresult;
+ int type,
+ val;
+ char *lowunits;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ int tz;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ timetz2tm(time, tm, &fsec, &tz);
+
+ switch (val)
+ {
+ case DTK_TZ:
+ intresult = -tz;
+ break;
+
+ case DTK_TZ_MINUTE:
+ intresult = (-tz / SECS_PER_MINUTE) % MINS_PER_HOUR;
+ break;
+
+ case DTK_TZ_HOUR:
+ intresult = -tz / SECS_PER_HOUR;
+ break;
+
+ case DTK_MICROSEC:
+ intresult = tm->tm_sec * INT64CONST(1000000) + fsec;
+ break;
+
+ case DTK_MILLISEC:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec * 1000 + fsec / 1000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0);
+ break;
+
+ case DTK_SECOND:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec + fsec / 1'000'000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0);
+ break;
+
+ case DTK_MINUTE:
+ intresult = tm->tm_min;
+ break;
+
+ case DTK_HOUR:
+ intresult = tm->tm_hour;
+ break;
+
+ case DTK_DAY:
+ case DTK_MONTH:
+ case DTK_QUARTER:
+ case DTK_YEAR:
+ case DTK_DECADE:
+ case DTK_CENTURY:
+ case DTK_MILLENNIUM:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMETZOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV && val == DTK_EPOCH)
+ {
+ if (retnumeric)
+ /*---
+ * time->time / 1'000'000 + time->zone
+ * = (time->time + time->zone * 1'000'000) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(time->time + time->zone * INT64CONST(1000000), 6));
+ else
+ PG_RETURN_FLOAT8(time->time / 1000000.0 + time->zone);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMETZOID))));
+ intresult = 0;
+ }
+
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+ else
+ PG_RETURN_FLOAT8(intresult);
+}
+
+
+Datum
+timetz_part(PG_FUNCTION_ARGS)
+{
+ return timetz_part_common(fcinfo, false);
+}
+
+Datum
+extract_timetz(PG_FUNCTION_ARGS)
+{
+ return timetz_part_common(fcinfo, true);
+}
+
+/* timetz_zone()
+ * Encode time with time zone type with specified time zone.
+ * Applies DST rules as of the transaction start time.
+ */
+Datum
+timetz_zone(PG_FUNCTION_ARGS)
+{
+ text *zone = PG_GETARG_TEXT_PP(0);
+ TimeTzADT *t = PG_GETARG_TIMETZADT_P(1);
+ TimeTzADT *result;
+ int tz;
+ char tzname[TZ_STRLEN_MAX + 1];
+ char *lowzone;
+ int type,
+ val;
+ pg_tz *tzp;
+
+ /*
+ * Look up the requested timezone. First we look in the timezone
+ * abbreviation table (to handle cases like "EST"), and if that fails, we
+ * look in the timezone database (to handle cases like
+ * "America/New_York"). (This matches the order in which timestamp input
+ * checks the cases; it's important because the timezone database unwisely
+ * uses a few zone names that are identical to offset abbreviations.)
+ */
+ text_to_cstring_buffer(zone, tzname, sizeof(tzname));
+
+ /* DecodeTimezoneAbbrev requires lowercase input */
+ lowzone = downcase_truncate_identifier(tzname,
+ strlen(tzname),
+ false);
+
+ type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp);
+
+ if (type == TZ || type == DTZ)
+ {
+ /* fixed-offset abbreviation */
+ tz = -val;
+ }
+ else if (type == DYNTZ)
+ {
+ /* dynamic-offset abbreviation, resolve using transaction start time */
+ TimestampTz now = GetCurrentTransactionStartTimestamp();
+ int isdst;
+
+ tz = DetermineTimeZoneAbbrevOffsetTS(now, tzname, tzp, &isdst);
+ }
+ else
+ {
+ /* try it as a full zone name */
+ tzp = pg_tzset(tzname);
+ if (tzp)
+ {
+ /* Get the offset-from-GMT that is valid now for the zone */
+ TimestampTz now = GetCurrentTransactionStartTimestamp();
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ if (timestamp2tm(now, &tz, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+ tz = 0; /* keep compiler quiet */
+ }
+ }
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = t->time + (t->zone - tz) * USECS_PER_SEC;
+ /* C99 modulo has the wrong sign convention for negative input */
+ while (result->time < INT64CONST(0))
+ result->time += USECS_PER_DAY;
+ if (result->time >= USECS_PER_DAY)
+ result->time %= USECS_PER_DAY;
+
+ result->zone = tz;
+
+ PG_RETURN_TIMETZADT_P(result);
+}
+
+/* timetz_izone()
+ * Encode time with time zone type with specified time interval as time zone.
+ */
+Datum
+timetz_izone(PG_FUNCTION_ARGS)
+{
+ Interval *zone = PG_GETARG_INTERVAL_P(0);
+ TimeTzADT *time = PG_GETARG_TIMETZADT_P(1);
+ TimeTzADT *result;
+ int tz;
+
+ if (zone->month != 0 || zone->day != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("interval time zone \"%s\" must not include months or days",
+ DatumGetCString(DirectFunctionCall1(interval_out,
+ PointerGetDatum(zone))))));
+
+ tz = -(zone->time / USECS_PER_SEC);
+
+ result = (TimeTzADT *) palloc(sizeof(TimeTzADT));
+
+ result->time = time->time + (time->zone - tz) * USECS_PER_SEC;
+ /* C99 modulo has the wrong sign convention for negative input */
+ while (result->time < INT64CONST(0))
+ result->time += USECS_PER_DAY;
+ if (result->time >= USECS_PER_DAY)
+ result->time %= USECS_PER_DAY;
+
+ result->zone = tz;
+
+ PG_RETURN_TIMETZADT_P(result);
+}
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
new file mode 100644
index 0000000..8891133
--- /dev/null
+++ b/src/backend/utils/adt/datetime.c
@@ -0,0 +1,5102 @@
+/*-------------------------------------------------------------------------
+ *
+ * datetime.c
+ * Support functions for date/time types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/datetime.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "common/string.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/memutils.h"
+#include "utils/tzparser.h"
+
+static int DecodeNumber(int flen, char *field, bool haveTextMonth,
+ int fmask, int *tmask,
+ struct pg_tm *tm, fsec_t *fsec, bool *is2digits);
+static int DecodeNumberField(int len, char *str,
+ int fmask, int *tmask,
+ struct pg_tm *tm, fsec_t *fsec, bool *is2digits);
+static int DecodeTimeCommon(char *str, int fmask, int range,
+ int *tmask, struct pg_itm *itm);
+static int DecodeTime(char *str, int fmask, int range,
+ int *tmask, struct pg_tm *tm, fsec_t *fsec);
+static int DecodeTimeForInterval(char *str, int fmask, int range,
+ int *tmask, struct pg_itm_in *itm_in);
+static const datetkn *datebsearch(const char *key, const datetkn *base, int nel);
+static int DecodeDate(char *str, int fmask, int *tmask, bool *is2digits,
+ struct pg_tm *tm);
+static char *AppendSeconds(char *cp, int sec, fsec_t fsec,
+ int precision, bool fillzeros);
+static bool int64_multiply_add(int64 val, int64 multiplier, int64 *sum);
+static bool AdjustFractMicroseconds(double frac, int64 scale,
+ struct pg_itm_in *itm_in);
+static bool AdjustFractDays(double frac, int scale,
+ struct pg_itm_in *itm_in);
+static bool AdjustFractYears(double frac, int scale,
+ struct pg_itm_in *itm_in);
+static bool AdjustMicroseconds(int64 val, double fval, int64 scale,
+ struct pg_itm_in *itm_in);
+static bool AdjustDays(int64 val, int scale,
+ struct pg_itm_in *itm_in);
+static bool AdjustMonths(int64 val, struct pg_itm_in *itm_in);
+static bool AdjustYears(int64 val, int scale,
+ struct pg_itm_in *itm_in);
+static int DetermineTimeZoneOffsetInternal(struct pg_tm *tm, pg_tz *tzp,
+ pg_time_t *tp);
+static bool DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t,
+ const char *abbr, pg_tz *tzp,
+ int *offset, int *isdst);
+static pg_tz *FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp);
+
+
+const int day_tab[2][13] =
+{
+ {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0},
+ {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 0}
+};
+
+const char *const months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
+"Jul", "Aug", "Sep", "Oct", "Nov", "Dec", NULL};
+
+const char *const days[] = {"Sunday", "Monday", "Tuesday", "Wednesday",
+"Thursday", "Friday", "Saturday", NULL};
+
+
+/*****************************************************************************
+ * PRIVATE ROUTINES *
+ *****************************************************************************/
+
+/*
+ * datetktbl holds date/time keywords.
+ *
+ * Note that this table must be strictly alphabetically ordered to allow an
+ * O(ln(N)) search algorithm to be used.
+ *
+ * The token field must be NUL-terminated; we truncate entries to TOKMAXLEN
+ * characters to fit.
+ *
+ * The static table contains no TZ, DTZ, or DYNTZ entries; rather those
+ * are loaded from configuration files and stored in zoneabbrevtbl, whose
+ * abbrevs[] field has the same format as the static datetktbl.
+ */
+static const datetkn datetktbl[] = {
+ /* token, type, value */
+ {EARLY, RESERV, DTK_EARLY}, /* "-infinity" reserved for "early time" */
+ {DA_D, ADBC, AD}, /* "ad" for years > 0 */
+ {"allballs", RESERV, DTK_ZULU}, /* 00:00:00 */
+ {"am", AMPM, AM},
+ {"apr", MONTH, 4},
+ {"april", MONTH, 4},
+ {"at", IGNORE_DTF, 0}, /* "at" (throwaway) */
+ {"aug", MONTH, 8},
+ {"august", MONTH, 8},
+ {DB_C, ADBC, BC}, /* "bc" for years <= 0 */
+ {"d", UNITS, DTK_DAY}, /* "day of month" for ISO input */
+ {"dec", MONTH, 12},
+ {"december", MONTH, 12},
+ {"dow", UNITS, DTK_DOW}, /* day of week */
+ {"doy", UNITS, DTK_DOY}, /* day of year */
+ {"dst", DTZMOD, SECS_PER_HOUR},
+ {EPOCH, RESERV, DTK_EPOCH}, /* "epoch" reserved for system epoch time */
+ {"feb", MONTH, 2},
+ {"february", MONTH, 2},
+ {"fri", DOW, 5},
+ {"friday", DOW, 5},
+ {"h", UNITS, DTK_HOUR}, /* "hour" */
+ {LATE, RESERV, DTK_LATE}, /* "infinity" reserved for "late time" */
+ {"isodow", UNITS, DTK_ISODOW}, /* ISO day of week, Sunday == 7 */
+ {"isoyear", UNITS, DTK_ISOYEAR}, /* year in terms of the ISO week date */
+ {"j", UNITS, DTK_JULIAN},
+ {"jan", MONTH, 1},
+ {"january", MONTH, 1},
+ {"jd", UNITS, DTK_JULIAN},
+ {"jul", MONTH, 7},
+ {"julian", UNITS, DTK_JULIAN},
+ {"july", MONTH, 7},
+ {"jun", MONTH, 6},
+ {"june", MONTH, 6},
+ {"m", UNITS, DTK_MONTH}, /* "month" for ISO input */
+ {"mar", MONTH, 3},
+ {"march", MONTH, 3},
+ {"may", MONTH, 5},
+ {"mm", UNITS, DTK_MINUTE}, /* "minute" for ISO input */
+ {"mon", DOW, 1},
+ {"monday", DOW, 1},
+ {"nov", MONTH, 11},
+ {"november", MONTH, 11},
+ {NOW, RESERV, DTK_NOW}, /* current transaction time */
+ {"oct", MONTH, 10},
+ {"october", MONTH, 10},
+ {"on", IGNORE_DTF, 0}, /* "on" (throwaway) */
+ {"pm", AMPM, PM},
+ {"s", UNITS, DTK_SECOND}, /* "seconds" for ISO input */
+ {"sat", DOW, 6},
+ {"saturday", DOW, 6},
+ {"sep", MONTH, 9},
+ {"sept", MONTH, 9},
+ {"september", MONTH, 9},
+ {"sun", DOW, 0},
+ {"sunday", DOW, 0},
+ {"t", ISOTIME, DTK_TIME}, /* Filler for ISO time fields */
+ {"thu", DOW, 4},
+ {"thur", DOW, 4},
+ {"thurs", DOW, 4},
+ {"thursday", DOW, 4},
+ {TODAY, RESERV, DTK_TODAY}, /* midnight */
+ {TOMORROW, RESERV, DTK_TOMORROW}, /* tomorrow midnight */
+ {"tue", DOW, 2},
+ {"tues", DOW, 2},
+ {"tuesday", DOW, 2},
+ {"wed", DOW, 3},
+ {"wednesday", DOW, 3},
+ {"weds", DOW, 3},
+ {"y", UNITS, DTK_YEAR}, /* "year" for ISO input */
+ {YESTERDAY, RESERV, DTK_YESTERDAY} /* yesterday midnight */
+};
+
+static const int szdatetktbl = sizeof datetktbl / sizeof datetktbl[0];
+
+/*
+ * deltatktbl: same format as datetktbl, but holds keywords used to represent
+ * time units (eg, for intervals, and for EXTRACT).
+ */
+static const datetkn deltatktbl[] = {
+ /* token, type, value */
+ {"@", IGNORE_DTF, 0}, /* postgres relative prefix */
+ {DAGO, AGO, 0}, /* "ago" indicates negative time offset */
+ {"c", UNITS, DTK_CENTURY}, /* "century" relative */
+ {"cent", UNITS, DTK_CENTURY}, /* "century" relative */
+ {"centuries", UNITS, DTK_CENTURY}, /* "centuries" relative */
+ {DCENTURY, UNITS, DTK_CENTURY}, /* "century" relative */
+ {"d", UNITS, DTK_DAY}, /* "day" relative */
+ {DDAY, UNITS, DTK_DAY}, /* "day" relative */
+ {"days", UNITS, DTK_DAY}, /* "days" relative */
+ {"dec", UNITS, DTK_DECADE}, /* "decade" relative */
+ {DDECADE, UNITS, DTK_DECADE}, /* "decade" relative */
+ {"decades", UNITS, DTK_DECADE}, /* "decades" relative */
+ {"decs", UNITS, DTK_DECADE}, /* "decades" relative */
+ {"h", UNITS, DTK_HOUR}, /* "hour" relative */
+ {DHOUR, UNITS, DTK_HOUR}, /* "hour" relative */
+ {"hours", UNITS, DTK_HOUR}, /* "hours" relative */
+ {"hr", UNITS, DTK_HOUR}, /* "hour" relative */
+ {"hrs", UNITS, DTK_HOUR}, /* "hours" relative */
+ {"m", UNITS, DTK_MINUTE}, /* "minute" relative */
+ {"microsecon", UNITS, DTK_MICROSEC}, /* "microsecond" relative */
+ {"mil", UNITS, DTK_MILLENNIUM}, /* "millennium" relative */
+ {"millennia", UNITS, DTK_MILLENNIUM}, /* "millennia" relative */
+ {DMILLENNIUM, UNITS, DTK_MILLENNIUM}, /* "millennium" relative */
+ {"millisecon", UNITS, DTK_MILLISEC}, /* relative */
+ {"mils", UNITS, DTK_MILLENNIUM}, /* "millennia" relative */
+ {"min", UNITS, DTK_MINUTE}, /* "minute" relative */
+ {"mins", UNITS, DTK_MINUTE}, /* "minutes" relative */
+ {DMINUTE, UNITS, DTK_MINUTE}, /* "minute" relative */
+ {"minutes", UNITS, DTK_MINUTE}, /* "minutes" relative */
+ {"mon", UNITS, DTK_MONTH}, /* "months" relative */
+ {"mons", UNITS, DTK_MONTH}, /* "months" relative */
+ {DMONTH, UNITS, DTK_MONTH}, /* "month" relative */
+ {"months", UNITS, DTK_MONTH},
+ {"ms", UNITS, DTK_MILLISEC},
+ {"msec", UNITS, DTK_MILLISEC},
+ {DMILLISEC, UNITS, DTK_MILLISEC},
+ {"mseconds", UNITS, DTK_MILLISEC},
+ {"msecs", UNITS, DTK_MILLISEC},
+ {"qtr", UNITS, DTK_QUARTER}, /* "quarter" relative */
+ {DQUARTER, UNITS, DTK_QUARTER}, /* "quarter" relative */
+ {"s", UNITS, DTK_SECOND},
+ {"sec", UNITS, DTK_SECOND},
+ {DSECOND, UNITS, DTK_SECOND},
+ {"seconds", UNITS, DTK_SECOND},
+ {"secs", UNITS, DTK_SECOND},
+ {DTIMEZONE, UNITS, DTK_TZ}, /* "timezone" time offset */
+ {"timezone_h", UNITS, DTK_TZ_HOUR}, /* timezone hour units */
+ {"timezone_m", UNITS, DTK_TZ_MINUTE}, /* timezone minutes units */
+ {"us", UNITS, DTK_MICROSEC}, /* "microsecond" relative */
+ {"usec", UNITS, DTK_MICROSEC}, /* "microsecond" relative */
+ {DMICROSEC, UNITS, DTK_MICROSEC}, /* "microsecond" relative */
+ {"useconds", UNITS, DTK_MICROSEC}, /* "microseconds" relative */
+ {"usecs", UNITS, DTK_MICROSEC}, /* "microseconds" relative */
+ {"w", UNITS, DTK_WEEK}, /* "week" relative */
+ {DWEEK, UNITS, DTK_WEEK}, /* "week" relative */
+ {"weeks", UNITS, DTK_WEEK}, /* "weeks" relative */
+ {"y", UNITS, DTK_YEAR}, /* "year" relative */
+ {DYEAR, UNITS, DTK_YEAR}, /* "year" relative */
+ {"years", UNITS, DTK_YEAR}, /* "years" relative */
+ {"yr", UNITS, DTK_YEAR}, /* "year" relative */
+ {"yrs", UNITS, DTK_YEAR} /* "years" relative */
+};
+
+static const int szdeltatktbl = sizeof deltatktbl / sizeof deltatktbl[0];
+
+static TimeZoneAbbrevTable *zoneabbrevtbl = NULL;
+
+/* Caches of recent lookup results in the above tables */
+
+static const datetkn *datecache[MAXDATEFIELDS] = {NULL};
+
+static const datetkn *deltacache[MAXDATEFIELDS] = {NULL};
+
+static const datetkn *abbrevcache[MAXDATEFIELDS] = {NULL};
+
+
+/*
+ * Calendar time to Julian date conversions.
+ * Julian date is commonly used in astronomical applications,
+ * since it is numerically accurate and computationally simple.
+ * The algorithms here will accurately convert between Julian day
+ * and calendar date for all non-negative Julian days
+ * (i.e. from Nov 24, -4713 on).
+ *
+ * Rewritten to eliminate overflow problems. This now allows the
+ * routines to work correctly for all Julian day counts from
+ * 0 to 2147483647 (Nov 24, -4713 to Jun 3, 5874898) assuming
+ * a 32-bit integer. Longer types should also work to the limits
+ * of their precision.
+ *
+ * Actually, date2j() will work sanely, in the sense of producing
+ * valid negative Julian dates, significantly before Nov 24, -4713.
+ * We rely on it to do so back to Nov 1, -4713; see IS_VALID_JULIAN()
+ * and associated commentary in timestamp.h.
+ */
+
+int
+date2j(int y, int m, int d)
+{
+ int julian;
+ int century;
+
+ if (m > 2)
+ {
+ m += 1;
+ y += 4800;
+ }
+ else
+ {
+ m += 13;
+ y += 4799;
+ }
+
+ century = y / 100;
+ julian = y * 365 - 32167;
+ julian += y / 4 - century + century / 4;
+ julian += 7834 * m / 256 + d;
+
+ return julian;
+} /* date2j() */
+
+void
+j2date(int jd, int *year, int *month, int *day)
+{
+ unsigned int julian;
+ unsigned int quad;
+ unsigned int extra;
+ int y;
+
+ julian = jd;
+ julian += 32044;
+ quad = julian / 146097;
+ extra = (julian - quad * 146097) * 4 + 3;
+ julian += 60 + quad * 3 + extra / 146097;
+ quad = julian / 1461;
+ julian -= quad * 1461;
+ y = julian * 4 / 1461;
+ julian = ((y != 0) ? ((julian + 305) % 365) : ((julian + 306) % 366))
+ + 123;
+ y += quad * 4;
+ *year = y - 4800;
+ quad = julian * 2141 / 65536;
+ *day = julian - 7834 * quad / 256;
+ *month = (quad + 10) % MONTHS_PER_YEAR + 1;
+} /* j2date() */
+
+
+/*
+ * j2day - convert Julian date to day-of-week (0..6 == Sun..Sat)
+ *
+ * Note: various places use the locution j2day(date - 1) to produce a
+ * result according to the convention 0..6 = Mon..Sun. This is a bit of
+ * a crock, but will work as long as the computation here is just a modulo.
+ */
+int
+j2day(int date)
+{
+ date += 1;
+ date %= 7;
+ /* Cope if division truncates towards zero, as it probably does */
+ if (date < 0)
+ date += 7;
+
+ return date;
+} /* j2day() */
+
+
+/*
+ * GetCurrentDateTime()
+ *
+ * Get the transaction start time ("now()") broken down as a struct pg_tm,
+ * converted according to the session timezone setting.
+ *
+ * This is just a convenience wrapper for GetCurrentTimeUsec, to cover the
+ * case where caller doesn't need either fractional seconds or tz offset.
+ */
+void
+GetCurrentDateTime(struct pg_tm *tm)
+{
+ fsec_t fsec;
+
+ GetCurrentTimeUsec(tm, &fsec, NULL);
+}
+
+/*
+ * GetCurrentTimeUsec()
+ *
+ * Get the transaction start time ("now()") broken down as a struct pg_tm,
+ * including fractional seconds and timezone offset. The time is converted
+ * according to the session timezone setting.
+ *
+ * Callers may pass tzp = NULL if they don't need the offset, but this does
+ * not affect the conversion behavior (unlike timestamp2tm()).
+ *
+ * Internally, we cache the result, since this could be called many times
+ * in a transaction, within which now() doesn't change.
+ */
+void
+GetCurrentTimeUsec(struct pg_tm *tm, fsec_t *fsec, int *tzp)
+{
+ TimestampTz cur_ts = GetCurrentTransactionStartTimestamp();
+
+ /*
+ * The cache key must include both current time and current timezone. By
+ * representing the timezone by just a pointer, we're assuming that
+ * distinct timezone settings could never have the same pointer value.
+ * This is true by virtue of the hashtable used inside pg_tzset();
+ * however, it might need another look if we ever allow entries in that
+ * hash to be recycled.
+ */
+ static TimestampTz cache_ts = 0;
+ static pg_tz *cache_timezone = NULL;
+ static struct pg_tm cache_tm;
+ static fsec_t cache_fsec;
+ static int cache_tz;
+
+ if (cur_ts != cache_ts || session_timezone != cache_timezone)
+ {
+ /*
+ * Make sure cache is marked invalid in case of error after partial
+ * update within timestamp2tm.
+ */
+ cache_timezone = NULL;
+
+ /*
+ * Perform the computation, storing results into cache. We do not
+ * really expect any error here, since current time surely ought to be
+ * within range, but check just for sanity's sake.
+ */
+ if (timestamp2tm(cur_ts, &cache_tz, &cache_tm, &cache_fsec,
+ NULL, session_timezone) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* OK, so mark the cache valid. */
+ cache_ts = cur_ts;
+ cache_timezone = session_timezone;
+ }
+
+ *tm = cache_tm;
+ *fsec = cache_fsec;
+ if (tzp != NULL)
+ *tzp = cache_tz;
+}
+
+
+/*
+ * Append seconds and fractional seconds (if any) at *cp.
+ *
+ * precision is the max number of fraction digits, fillzeros says to
+ * pad to two integral-seconds digits.
+ *
+ * Returns a pointer to the new end of string. No NUL terminator is put
+ * there; callers are responsible for NUL terminating str themselves.
+ *
+ * Note that any sign is stripped from the input sec and fsec values.
+ */
+static char *
+AppendSeconds(char *cp, int sec, fsec_t fsec, int precision, bool fillzeros)
+{
+ Assert(precision >= 0);
+
+ if (fillzeros)
+ cp = pg_ultostr_zeropad(cp, Abs(sec), 2);
+ else
+ cp = pg_ultostr(cp, Abs(sec));
+
+ /* fsec_t is just an int32 */
+ if (fsec != 0)
+ {
+ int32 value = Abs(fsec);
+ char *end = &cp[precision + 1];
+ bool gotnonzero = false;
+
+ *cp++ = '.';
+
+ /*
+ * Append the fractional seconds part. Note that we don't want any
+ * trailing zeros here, so since we're building the number in reverse
+ * we'll skip appending zeros until we've output a non-zero digit.
+ */
+ while (precision--)
+ {
+ int32 oldval = value;
+ int32 remainder;
+
+ value /= 10;
+ remainder = oldval - value * 10;
+
+ /* check if we got a non-zero */
+ if (remainder)
+ gotnonzero = true;
+
+ if (gotnonzero)
+ cp[precision] = '0' + remainder;
+ else
+ end = &cp[precision];
+ }
+
+ /*
+ * If we still have a non-zero value then precision must have not been
+ * enough to print the number. We punt the problem to pg_ultostr(),
+ * which will generate a correct answer in the minimum valid width.
+ */
+ if (value)
+ return pg_ultostr(cp, Abs(fsec));
+
+ return end;
+ }
+ else
+ return cp;
+}
+
+
+/*
+ * Variant of above that's specialized to timestamp case.
+ *
+ * Returns a pointer to the new end of string. No NUL terminator is put
+ * there; callers are responsible for NUL terminating str themselves.
+ */
+static char *
+AppendTimestampSeconds(char *cp, struct pg_tm *tm, fsec_t fsec)
+{
+ return AppendSeconds(cp, tm->tm_sec, fsec, MAX_TIMESTAMP_PRECISION, true);
+}
+
+
+/*
+ * Add val * multiplier to *sum.
+ * Returns true if successful, false on overflow.
+ */
+static bool
+int64_multiply_add(int64 val, int64 multiplier, int64 *sum)
+{
+ int64 product;
+
+ if (pg_mul_s64_overflow(val, multiplier, &product) ||
+ pg_add_s64_overflow(*sum, product, sum))
+ return false;
+ return true;
+}
+
+/*
+ * Multiply frac by scale (to produce microseconds) and add to itm_in->tm_usec.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustFractMicroseconds(double frac, int64 scale,
+ struct pg_itm_in *itm_in)
+{
+ int64 usec;
+
+ /* Fast path for common case */
+ if (frac == 0)
+ return true;
+
+ /*
+ * We assume the input frac has abs value less than 1, so overflow of frac
+ * or usec is not an issue for interesting values of scale.
+ */
+ frac *= scale;
+ usec = (int64) frac;
+
+ /* Round off any fractional microsecond */
+ frac -= usec;
+ if (frac > 0.5)
+ usec++;
+ else if (frac < -0.5)
+ usec--;
+
+ return !pg_add_s64_overflow(itm_in->tm_usec, usec, &itm_in->tm_usec);
+}
+
+/*
+ * Multiply frac by scale (to produce days). Add the integral part of the
+ * result to itm_in->tm_mday, the fractional part to itm_in->tm_usec.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustFractDays(double frac, int scale,
+ struct pg_itm_in *itm_in)
+{
+ int extra_days;
+
+ /* Fast path for common case */
+ if (frac == 0)
+ return true;
+
+ /*
+ * We assume the input frac has abs value less than 1, so overflow of frac
+ * or extra_days is not an issue.
+ */
+ frac *= scale;
+ extra_days = (int) frac;
+
+ /* ... but this could overflow, if tm_mday is already nonzero */
+ if (pg_add_s32_overflow(itm_in->tm_mday, extra_days, &itm_in->tm_mday))
+ return false;
+
+ /* Handle any fractional day */
+ frac -= extra_days;
+ return AdjustFractMicroseconds(frac, USECS_PER_DAY, itm_in);
+}
+
+/*
+ * Multiply frac by scale (to produce years), then further scale up to months.
+ * Add the integral part of the result to itm_in->tm_mon, discarding any
+ * fractional part.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustFractYears(double frac, int scale,
+ struct pg_itm_in *itm_in)
+{
+ /*
+ * As above, we assume abs(frac) < 1, so this can't overflow for any
+ * interesting value of scale.
+ */
+ int extra_months = (int) rint(frac * scale * MONTHS_PER_YEAR);
+
+ return !pg_add_s32_overflow(itm_in->tm_mon, extra_months, &itm_in->tm_mon);
+}
+
+/*
+ * Add (val + fval) * scale to itm_in->tm_usec.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustMicroseconds(int64 val, double fval, int64 scale,
+ struct pg_itm_in *itm_in)
+{
+ /* Handle the integer part */
+ if (!int64_multiply_add(val, scale, &itm_in->tm_usec))
+ return false;
+ /* Handle the float part */
+ return AdjustFractMicroseconds(fval, scale, itm_in);
+}
+
+/*
+ * Multiply val by scale (to produce days) and add to itm_in->tm_mday.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustDays(int64 val, int scale, struct pg_itm_in *itm_in)
+{
+ int days;
+
+ if (val < INT_MIN || val > INT_MAX)
+ return false;
+ return !pg_mul_s32_overflow((int32) val, scale, &days) &&
+ !pg_add_s32_overflow(itm_in->tm_mday, days, &itm_in->tm_mday);
+}
+
+/*
+ * Add val to itm_in->tm_mon (no need for scale here, as val is always
+ * in months already).
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustMonths(int64 val, struct pg_itm_in *itm_in)
+{
+ if (val < INT_MIN || val > INT_MAX)
+ return false;
+ return !pg_add_s32_overflow(itm_in->tm_mon, (int32) val, &itm_in->tm_mon);
+}
+
+/*
+ * Multiply val by scale (to produce years) and add to itm_in->tm_year.
+ * Returns true if successful, false if itm_in overflows.
+ */
+static bool
+AdjustYears(int64 val, int scale,
+ struct pg_itm_in *itm_in)
+{
+ int years;
+
+ if (val < INT_MIN || val > INT_MAX)
+ return false;
+ return !pg_mul_s32_overflow((int32) val, scale, &years) &&
+ !pg_add_s32_overflow(itm_in->tm_year, years, &itm_in->tm_year);
+}
+
+
+/*
+ * Parse the fractional part of a number (decimal point and optional digits,
+ * followed by end of string). Returns the fractional value into *frac.
+ *
+ * Returns 0 if successful, DTERR code if bogus input detected.
+ */
+static int
+ParseFraction(char *cp, double *frac)
+{
+ /* Caller should always pass the start of the fraction part */
+ Assert(*cp == '.');
+
+ /*
+ * We want to allow just "." with no digits, but some versions of strtod
+ * will report EINVAL for that, so special-case it.
+ */
+ if (cp[1] == '\0')
+ {
+ *frac = 0;
+ }
+ else
+ {
+ errno = 0;
+ *frac = strtod(cp, &cp);
+ /* check for parse failure */
+ if (*cp != '\0' || errno != 0)
+ return DTERR_BAD_FORMAT;
+ }
+ return 0;
+}
+
+/*
+ * Fetch a fractional-second value with suitable error checking.
+ * Same as ParseFraction except we convert the result to integer microseconds.
+ */
+static int
+ParseFractionalSecond(char *cp, fsec_t *fsec)
+{
+ double frac;
+ int dterr;
+
+ dterr = ParseFraction(cp, &frac);
+ if (dterr)
+ return dterr;
+ *fsec = rint(frac * 1000000);
+ return 0;
+}
+
+
+/* ParseDateTime()
+ * Break string into tokens based on a date/time context.
+ * Returns 0 if successful, DTERR code if bogus input detected.
+ *
+ * timestr - the input string
+ * workbuf - workspace for field string storage. This must be
+ * larger than the largest legal input for this datetime type --
+ * some additional space will be needed to NUL terminate fields.
+ * buflen - the size of workbuf
+ * field[] - pointers to field strings are returned in this array
+ * ftype[] - field type indicators are returned in this array
+ * maxfields - dimensions of the above two arrays
+ * *numfields - set to the actual number of fields detected
+ *
+ * The fields extracted from the input are stored as separate,
+ * null-terminated strings in the workspace at workbuf. Any text is
+ * converted to lower case.
+ *
+ * Several field types are assigned:
+ * DTK_NUMBER - digits and (possibly) a decimal point
+ * DTK_DATE - digits and two delimiters, or digits and text
+ * DTK_TIME - digits, colon delimiters, and possibly a decimal point
+ * DTK_STRING - text (no digits or punctuation)
+ * DTK_SPECIAL - leading "+" or "-" followed by text
+ * DTK_TZ - leading "+" or "-" followed by digits (also eats ':', '.', '-')
+ *
+ * Note that some field types can hold unexpected items:
+ * DTK_NUMBER can hold date fields (yy.ddd)
+ * DTK_STRING can hold months (January) and time zones (PST)
+ * DTK_DATE can hold time zone names (America/New_York, GMT-8)
+ */
+int
+ParseDateTime(const char *timestr, char *workbuf, size_t buflen,
+ char **field, int *ftype, int maxfields, int *numfields)
+{
+ int nf = 0;
+ const char *cp = timestr;
+ char *bufp = workbuf;
+ const char *bufend = workbuf + buflen;
+
+ /*
+ * Set the character pointed-to by "bufptr" to "newchar", and increment
+ * "bufptr". "end" gives the end of the buffer -- we return an error if
+ * there is no space left to append a character to the buffer. Note that
+ * "bufptr" is evaluated twice.
+ */
+#define APPEND_CHAR(bufptr, end, newchar) \
+ do \
+ { \
+ if (((bufptr) + 1) >= (end)) \
+ return DTERR_BAD_FORMAT; \
+ *(bufptr)++ = newchar; \
+ } while (0)
+
+ /* outer loop through fields */
+ while (*cp != '\0')
+ {
+ /* Ignore spaces between fields */
+ if (isspace((unsigned char) *cp))
+ {
+ cp++;
+ continue;
+ }
+
+ /* Record start of current field */
+ if (nf >= maxfields)
+ return DTERR_BAD_FORMAT;
+ field[nf] = bufp;
+
+ /* leading digit? then date or time */
+ if (isdigit((unsigned char) *cp))
+ {
+ APPEND_CHAR(bufp, bufend, *cp++);
+ while (isdigit((unsigned char) *cp))
+ APPEND_CHAR(bufp, bufend, *cp++);
+
+ /* time field? */
+ if (*cp == ':')
+ {
+ ftype[nf] = DTK_TIME;
+ APPEND_CHAR(bufp, bufend, *cp++);
+ while (isdigit((unsigned char) *cp) ||
+ (*cp == ':') || (*cp == '.'))
+ APPEND_CHAR(bufp, bufend, *cp++);
+ }
+ /* date field? allow embedded text month */
+ else if (*cp == '-' || *cp == '/' || *cp == '.')
+ {
+ /* save delimiting character to use later */
+ char delim = *cp;
+
+ APPEND_CHAR(bufp, bufend, *cp++);
+ /* second field is all digits? then no embedded text month */
+ if (isdigit((unsigned char) *cp))
+ {
+ ftype[nf] = ((delim == '.') ? DTK_NUMBER : DTK_DATE);
+ while (isdigit((unsigned char) *cp))
+ APPEND_CHAR(bufp, bufend, *cp++);
+
+ /*
+ * insist that the delimiters match to get a three-field
+ * date.
+ */
+ if (*cp == delim)
+ {
+ ftype[nf] = DTK_DATE;
+ APPEND_CHAR(bufp, bufend, *cp++);
+ while (isdigit((unsigned char) *cp) || *cp == delim)
+ APPEND_CHAR(bufp, bufend, *cp++);
+ }
+ }
+ else
+ {
+ ftype[nf] = DTK_DATE;
+ while (isalnum((unsigned char) *cp) || *cp == delim)
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+ }
+ }
+
+ /*
+ * otherwise, number only and will determine year, month, day, or
+ * concatenated fields later...
+ */
+ else
+ ftype[nf] = DTK_NUMBER;
+ }
+ /* Leading decimal point? Then fractional seconds... */
+ else if (*cp == '.')
+ {
+ APPEND_CHAR(bufp, bufend, *cp++);
+ while (isdigit((unsigned char) *cp))
+ APPEND_CHAR(bufp, bufend, *cp++);
+
+ ftype[nf] = DTK_NUMBER;
+ }
+
+ /*
+ * text? then date string, month, day of week, special, or timezone
+ */
+ else if (isalpha((unsigned char) *cp))
+ {
+ bool is_date;
+
+ ftype[nf] = DTK_STRING;
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+ while (isalpha((unsigned char) *cp))
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+
+ /*
+ * Dates can have embedded '-', '/', or '.' separators. It could
+ * also be a timezone name containing embedded '/', '+', '-', '_',
+ * or ':' (but '_' or ':' can't be the first punctuation). If the
+ * next character is a digit or '+', we need to check whether what
+ * we have so far is a recognized non-timezone keyword --- if so,
+ * don't believe that this is the start of a timezone.
+ */
+ is_date = false;
+ if (*cp == '-' || *cp == '/' || *cp == '.')
+ is_date = true;
+ else if (*cp == '+' || isdigit((unsigned char) *cp))
+ {
+ *bufp = '\0'; /* null-terminate current field value */
+ /* we need search only the core token table, not TZ names */
+ if (datebsearch(field[nf], datetktbl, szdatetktbl) == NULL)
+ is_date = true;
+ }
+ if (is_date)
+ {
+ ftype[nf] = DTK_DATE;
+ do
+ {
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+ } while (*cp == '+' || *cp == '-' ||
+ *cp == '/' || *cp == '_' ||
+ *cp == '.' || *cp == ':' ||
+ isalnum((unsigned char) *cp));
+ }
+ }
+ /* sign? then special or numeric timezone */
+ else if (*cp == '+' || *cp == '-')
+ {
+ APPEND_CHAR(bufp, bufend, *cp++);
+ /* soak up leading whitespace */
+ while (isspace((unsigned char) *cp))
+ cp++;
+ /* numeric timezone? */
+ /* note that "DTK_TZ" could also be a signed float or yyyy-mm */
+ if (isdigit((unsigned char) *cp))
+ {
+ ftype[nf] = DTK_TZ;
+ APPEND_CHAR(bufp, bufend, *cp++);
+ while (isdigit((unsigned char) *cp) ||
+ *cp == ':' || *cp == '.' || *cp == '-')
+ APPEND_CHAR(bufp, bufend, *cp++);
+ }
+ /* special? */
+ else if (isalpha((unsigned char) *cp))
+ {
+ ftype[nf] = DTK_SPECIAL;
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+ while (isalpha((unsigned char) *cp))
+ APPEND_CHAR(bufp, bufend, pg_tolower((unsigned char) *cp++));
+ }
+ /* otherwise something wrong... */
+ else
+ return DTERR_BAD_FORMAT;
+ }
+ /* ignore other punctuation but use as delimiter */
+ else if (ispunct((unsigned char) *cp))
+ {
+ cp++;
+ continue;
+ }
+ /* otherwise, something is not right... */
+ else
+ return DTERR_BAD_FORMAT;
+
+ /* force in a delimiter after each field */
+ *bufp++ = '\0';
+ nf++;
+ }
+
+ *numfields = nf;
+
+ return 0;
+}
+
+
+/* DecodeDateTime()
+ * Interpret previously parsed fields for general date and time.
+ * Return 0 if full date, 1 if only time, and negative DTERR code if problems.
+ * (Currently, all callers treat 1 as an error return too.)
+ *
+ * External format(s):
+ * "<weekday> <month>-<day>-<year> <hour>:<minute>:<second>"
+ * "Fri Feb-7-1997 15:23:27"
+ * "Feb-7-1997 15:23:27"
+ * "2-7-1997 15:23:27"
+ * "1997-2-7 15:23:27"
+ * "1997.038 15:23:27" (day of year 1-366)
+ * Also supports input in compact time:
+ * "970207 152327"
+ * "97038 152327"
+ * "20011225T040506.789-07"
+ *
+ * Use the system-provided functions to get the current time zone
+ * if not specified in the input string.
+ *
+ * If the date is outside the range of pg_time_t (in practice that could only
+ * happen if pg_time_t is just 32 bits), then assume UTC time zone - thomas
+ * 1997-05-27
+ */
+int
+DecodeDateTime(char **field, int *ftype, int nf,
+ int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp)
+{
+ int fmask = 0,
+ tmask,
+ type;
+ int ptype = 0; /* "prefix type" for ISO y2001m02d04 format */
+ int i;
+ int val;
+ int dterr;
+ int mer = HR24;
+ bool haveTextMonth = false;
+ bool isjulian = false;
+ bool is2digits = false;
+ bool bc = false;
+ pg_tz *namedTz = NULL;
+ pg_tz *abbrevTz = NULL;
+ pg_tz *valtz;
+ char *abbrev = NULL;
+ struct pg_tm cur_tm;
+
+ /*
+ * We'll insist on at least all of the date fields, but initialize the
+ * remaining fields in case they are not set later...
+ */
+ *dtype = DTK_DATE;
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ *fsec = 0;
+ /* don't know daylight savings time status apriori */
+ tm->tm_isdst = -1;
+ if (tzp != NULL)
+ *tzp = 0;
+
+ for (i = 0; i < nf; i++)
+ {
+ switch (ftype[i])
+ {
+ case DTK_DATE:
+
+ /*
+ * Integral julian day with attached time zone? All other
+ * forms with JD will be separated into distinct fields, so we
+ * handle just this case here.
+ */
+ if (ptype == DTK_JULIAN)
+ {
+ char *cp;
+ int val;
+
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+
+ errno = 0;
+ val = strtoint(field[i], &cp, 10);
+ if (errno == ERANGE || val < 0)
+ return DTERR_FIELD_OVERFLOW;
+
+ j2date(val, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ isjulian = true;
+
+ /* Get the time zone from the end of the string */
+ dterr = DecodeTimezone(cp, tzp);
+ if (dterr)
+ return dterr;
+
+ tmask = DTK_DATE_M | DTK_TIME_M | DTK_M(TZ);
+ ptype = 0;
+ break;
+ }
+
+ /*
+ * Already have a date? Then this might be a time zone name
+ * with embedded punctuation (e.g. "America/New_York") or a
+ * run-together time with trailing time zone (e.g. hhmmss-zz).
+ * - thomas 2001-12-25
+ *
+ * We consider it a time zone if we already have month & day.
+ * This is to allow the form "mmm dd hhmmss tz year", which
+ * we've historically accepted.
+ */
+ else if (ptype != 0 ||
+ ((fmask & (DTK_M(MONTH) | DTK_M(DAY))) ==
+ (DTK_M(MONTH) | DTK_M(DAY))))
+ {
+ /* No time zone accepted? Then quit... */
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+
+ if (isdigit((unsigned char) *field[i]) || ptype != 0)
+ {
+ char *cp;
+
+ if (ptype != 0)
+ {
+ /* Sanity check; should not fail this test */
+ if (ptype != DTK_TIME)
+ return DTERR_BAD_FORMAT;
+ ptype = 0;
+ }
+
+ /*
+ * Starts with a digit but we already have a time
+ * field? Then we are in trouble with a date and time
+ * already...
+ */
+ if ((fmask & DTK_TIME_M) == DTK_TIME_M)
+ return DTERR_BAD_FORMAT;
+
+ if ((cp = strchr(field[i], '-')) == NULL)
+ return DTERR_BAD_FORMAT;
+
+ /* Get the time zone from the end of the string */
+ dterr = DecodeTimezone(cp, tzp);
+ if (dterr)
+ return dterr;
+ *cp = '\0';
+
+ /*
+ * Then read the rest of the field as a concatenated
+ * time
+ */
+ dterr = DecodeNumberField(strlen(field[i]), field[i],
+ fmask,
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+
+ /*
+ * modify tmask after returning from
+ * DecodeNumberField()
+ */
+ tmask |= DTK_M(TZ);
+ }
+ else
+ {
+ namedTz = pg_tzset(field[i]);
+ if (!namedTz)
+ {
+ /*
+ * We should return an error code instead of
+ * ereport'ing directly, but then there is no way
+ * to report the bad time zone name.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized",
+ field[i])));
+ }
+ /* we'll apply the zone setting below */
+ tmask = DTK_M(TZ);
+ }
+ }
+ else
+ {
+ dterr = DecodeDate(field[i], fmask,
+ &tmask, &is2digits, tm);
+ if (dterr)
+ return dterr;
+ }
+ break;
+
+ case DTK_TIME:
+
+ /*
+ * This might be an ISO time following a "t" field.
+ */
+ if (ptype != 0)
+ {
+ /* Sanity check; should not fail this test */
+ if (ptype != DTK_TIME)
+ return DTERR_BAD_FORMAT;
+ ptype = 0;
+ }
+ dterr = DecodeTime(field[i], fmask, INTERVAL_FULL_RANGE,
+ &tmask, tm, fsec);
+ if (dterr)
+ return dterr;
+
+ /* check for time overflow */
+ if (time_overflows(tm->tm_hour, tm->tm_min, tm->tm_sec,
+ *fsec))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+
+ case DTK_TZ:
+ {
+ int tz;
+
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+
+ dterr = DecodeTimezone(field[i], &tz);
+ if (dterr)
+ return dterr;
+ *tzp = tz;
+ tmask = DTK_M(TZ);
+ }
+ break;
+
+ case DTK_NUMBER:
+
+ /*
+ * Was this an "ISO date" with embedded field labels? An
+ * example is "y2001m02d04" - thomas 2001-02-04
+ */
+ if (ptype != 0)
+ {
+ char *cp;
+ int val;
+
+ errno = 0;
+ val = strtoint(field[i], &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+
+ /*
+ * only a few kinds are allowed to have an embedded
+ * decimal
+ */
+ if (*cp == '.')
+ switch (ptype)
+ {
+ case DTK_JULIAN:
+ case DTK_TIME:
+ case DTK_SECOND:
+ break;
+ default:
+ return DTERR_BAD_FORMAT;
+ break;
+ }
+ else if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+
+ switch (ptype)
+ {
+ case DTK_YEAR:
+ tm->tm_year = val;
+ tmask = DTK_M(YEAR);
+ break;
+
+ case DTK_MONTH:
+
+ /*
+ * already have a month and hour? then assume
+ * minutes
+ */
+ if ((fmask & DTK_M(MONTH)) != 0 &&
+ (fmask & DTK_M(HOUR)) != 0)
+ {
+ tm->tm_min = val;
+ tmask = DTK_M(MINUTE);
+ }
+ else
+ {
+ tm->tm_mon = val;
+ tmask = DTK_M(MONTH);
+ }
+ break;
+
+ case DTK_DAY:
+ tm->tm_mday = val;
+ tmask = DTK_M(DAY);
+ break;
+
+ case DTK_HOUR:
+ tm->tm_hour = val;
+ tmask = DTK_M(HOUR);
+ break;
+
+ case DTK_MINUTE:
+ tm->tm_min = val;
+ tmask = DTK_M(MINUTE);
+ break;
+
+ case DTK_SECOND:
+ tm->tm_sec = val;
+ tmask = DTK_M(SECOND);
+ if (*cp == '.')
+ {
+ dterr = ParseFractionalSecond(cp, fsec);
+ if (dterr)
+ return dterr;
+ tmask = DTK_ALL_SECS_M;
+ }
+ break;
+
+ case DTK_TZ:
+ tmask = DTK_M(TZ);
+ dterr = DecodeTimezone(field[i], tzp);
+ if (dterr)
+ return dterr;
+ break;
+
+ case DTK_JULIAN:
+ /* previous field was a label for "julian date" */
+ if (val < 0)
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_DATE_M;
+ j2date(val, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ isjulian = true;
+
+ /* fractional Julian Day? */
+ if (*cp == '.')
+ {
+ double time;
+
+ dterr = ParseFraction(cp, &time);
+ if (dterr)
+ return dterr;
+ time *= USECS_PER_DAY;
+ dt2time(time,
+ &tm->tm_hour, &tm->tm_min,
+ &tm->tm_sec, fsec);
+ tmask |= DTK_TIME_M;
+ }
+ break;
+
+ case DTK_TIME:
+ /* previous field was "t" for ISO time */
+ dterr = DecodeNumberField(strlen(field[i]), field[i],
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ if (tmask != DTK_TIME_M)
+ return DTERR_BAD_FORMAT;
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ break;
+ }
+
+ ptype = 0;
+ *dtype = DTK_DATE;
+ }
+ else
+ {
+ char *cp;
+ int flen;
+
+ flen = strlen(field[i]);
+ cp = strchr(field[i], '.');
+
+ /* Embedded decimal and no date yet? */
+ if (cp != NULL && !(fmask & DTK_DATE_M))
+ {
+ dterr = DecodeDate(field[i], fmask,
+ &tmask, &is2digits, tm);
+ if (dterr)
+ return dterr;
+ }
+ /* embedded decimal and several digits before? */
+ else if (cp != NULL && flen - strlen(cp) > 2)
+ {
+ /*
+ * Interpret as a concatenated date or time Set the
+ * type field to allow decoding other fields later.
+ * Example: 20011223 or 040506
+ */
+ dterr = DecodeNumberField(flen, field[i], fmask,
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ }
+
+ /*
+ * Is this a YMD or HMS specification, or a year number?
+ * YMD and HMS are required to be six digits or more, so
+ * if it is 5 digits, it is a year. If it is six or more
+ * digits, we assume it is YMD or HMS unless no date and
+ * no time values have been specified. This forces 6+
+ * digit years to be at the end of the string, or to use
+ * the ISO date specification.
+ */
+ else if (flen >= 6 && (!(fmask & DTK_DATE_M) ||
+ !(fmask & DTK_TIME_M)))
+ {
+ dterr = DecodeNumberField(flen, field[i], fmask,
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ }
+ /* otherwise it is a single date/time field... */
+ else
+ {
+ dterr = DecodeNumber(flen, field[i],
+ haveTextMonth, fmask,
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr)
+ return dterr;
+ }
+ }
+ break;
+
+ case DTK_STRING:
+ case DTK_SPECIAL:
+ /* timezone abbrevs take precedence over built-in tokens */
+ type = DecodeTimezoneAbbrev(i, field[i], &val, &valtz);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(i, field[i], &val);
+ if (type == IGNORE_DTF)
+ continue;
+
+ tmask = DTK_M(type);
+ switch (type)
+ {
+ case RESERV:
+ switch (val)
+ {
+ case DTK_NOW:
+ tmask = (DTK_DATE_M | DTK_TIME_M | DTK_M(TZ));
+ *dtype = DTK_DATE;
+ GetCurrentTimeUsec(tm, fsec, tzp);
+ break;
+
+ case DTK_YESTERDAY:
+ tmask = DTK_DATE_M;
+ *dtype = DTK_DATE;
+ GetCurrentDateTime(&cur_tm);
+ j2date(date2j(cur_tm.tm_year, cur_tm.tm_mon, cur_tm.tm_mday) - 1,
+ &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ break;
+
+ case DTK_TODAY:
+ tmask = DTK_DATE_M;
+ *dtype = DTK_DATE;
+ GetCurrentDateTime(&cur_tm);
+ tm->tm_year = cur_tm.tm_year;
+ tm->tm_mon = cur_tm.tm_mon;
+ tm->tm_mday = cur_tm.tm_mday;
+ break;
+
+ case DTK_TOMORROW:
+ tmask = DTK_DATE_M;
+ *dtype = DTK_DATE;
+ GetCurrentDateTime(&cur_tm);
+ j2date(date2j(cur_tm.tm_year, cur_tm.tm_mon, cur_tm.tm_mday) + 1,
+ &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ break;
+
+ case DTK_ZULU:
+ tmask = (DTK_TIME_M | DTK_M(TZ));
+ *dtype = DTK_DATE;
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ if (tzp != NULL)
+ *tzp = 0;
+ break;
+
+ default:
+ *dtype = val;
+ }
+
+ break;
+
+ case MONTH:
+
+ /*
+ * already have a (numeric) month? then see if we can
+ * substitute...
+ */
+ if ((fmask & DTK_M(MONTH)) && !haveTextMonth &&
+ !(fmask & DTK_M(DAY)) && tm->tm_mon >= 1 &&
+ tm->tm_mon <= 31)
+ {
+ tm->tm_mday = tm->tm_mon;
+ tmask = DTK_M(DAY);
+ }
+ haveTextMonth = true;
+ tm->tm_mon = val;
+ break;
+
+ case DTZMOD:
+
+ /*
+ * daylight savings time modifier (solves "MET DST"
+ * syntax)
+ */
+ tmask |= DTK_M(DTZ);
+ tm->tm_isdst = 1;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp -= val;
+ break;
+
+ case DTZ:
+
+ /*
+ * set mask for TZ here _or_ check for DTZ later when
+ * getting default timezone
+ */
+ tmask |= DTK_M(TZ);
+ tm->tm_isdst = 1;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp = -val;
+ break;
+
+ case TZ:
+ tm->tm_isdst = 0;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp = -val;
+ break;
+
+ case DYNTZ:
+ tmask |= DTK_M(TZ);
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ /* we'll determine the actual offset later */
+ abbrevTz = valtz;
+ abbrev = field[i];
+ break;
+
+ case AMPM:
+ mer = val;
+ break;
+
+ case ADBC:
+ bc = (val == BC);
+ break;
+
+ case DOW:
+ tm->tm_wday = val;
+ break;
+
+ case UNITS:
+ tmask = 0;
+ ptype = val;
+ break;
+
+ case ISOTIME:
+
+ /*
+ * This is a filler field "t" indicating that the next
+ * field is time. Try to verify that this is sensible.
+ */
+ tmask = 0;
+
+ /* No preceding date? Then quit... */
+ if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ return DTERR_BAD_FORMAT;
+
+ /***
+ * We will need one of the following fields:
+ * DTK_NUMBER should be hhmmss.fff
+ * DTK_TIME should be hh:mm:ss.fff
+ * DTK_DATE should be hhmmss-zz
+ ***/
+ if (i >= nf - 1 ||
+ (ftype[i + 1] != DTK_NUMBER &&
+ ftype[i + 1] != DTK_TIME &&
+ ftype[i + 1] != DTK_DATE))
+ return DTERR_BAD_FORMAT;
+
+ ptype = val;
+ break;
+
+ case UNKNOWN_FIELD:
+
+ /*
+ * Before giving up and declaring error, check to see
+ * if it is an all-alpha timezone name.
+ */
+ namedTz = pg_tzset(field[i]);
+ if (!namedTz)
+ return DTERR_BAD_FORMAT;
+ /* we'll apply the zone setting below */
+ tmask = DTK_M(TZ);
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+
+ if (tmask & fmask)
+ return DTERR_BAD_FORMAT;
+ fmask |= tmask;
+ } /* end loop over fields */
+
+ /* do final checking/adjustment of Y/M/D fields */
+ dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
+ if (dterr)
+ return dterr;
+
+ /* handle AM/PM */
+ if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2)
+ return DTERR_FIELD_OVERFLOW;
+ if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2)
+ tm->tm_hour = 0;
+ else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2)
+ tm->tm_hour += HOURS_PER_DAY / 2;
+
+ /* do additional checking for full date specs... */
+ if (*dtype == DTK_DATE)
+ {
+ if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ {
+ if ((fmask & DTK_TIME_M) == DTK_TIME_M)
+ return 1;
+ return DTERR_BAD_FORMAT;
+ }
+
+ /*
+ * If we had a full timezone spec, compute the offset (we could not do
+ * it before, because we need the date to resolve DST status).
+ */
+ if (namedTz != NULL)
+ {
+ /* daylight savings time modifier disallowed with full TZ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ *tzp = DetermineTimeZoneOffset(tm, namedTz);
+ }
+
+ /*
+ * Likewise, if we had a dynamic timezone abbreviation, resolve it
+ * now.
+ */
+ if (abbrevTz != NULL)
+ {
+ /* daylight savings time modifier disallowed with dynamic TZ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ *tzp = DetermineTimeZoneAbbrevOffset(tm, abbrev, abbrevTz);
+ }
+
+ /* timezone not specified? then use session timezone */
+ if (tzp != NULL && !(fmask & DTK_M(TZ)))
+ {
+ /*
+ * daylight savings time modifier but no standard timezone? then
+ * error
+ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ *tzp = DetermineTimeZoneOffset(tm, session_timezone);
+ }
+ }
+
+ return 0;
+}
+
+
+/* DetermineTimeZoneOffset()
+ *
+ * Given a struct pg_tm in which tm_year, tm_mon, tm_mday, tm_hour, tm_min,
+ * and tm_sec fields are set, and a zic-style time zone definition, determine
+ * the applicable GMT offset and daylight-savings status at that time.
+ * Set the struct pg_tm's tm_isdst field accordingly, and return the GMT
+ * offset as the function result.
+ *
+ * Note: if the date is out of the range we can deal with, we return zero
+ * as the GMT offset and set tm_isdst = 0. We don't throw an error here,
+ * though probably some higher-level code will.
+ */
+int
+DetermineTimeZoneOffset(struct pg_tm *tm, pg_tz *tzp)
+{
+ pg_time_t t;
+
+ return DetermineTimeZoneOffsetInternal(tm, tzp, &t);
+}
+
+
+/* DetermineTimeZoneOffsetInternal()
+ *
+ * As above, but also return the actual UTC time imputed to the date/time
+ * into *tp.
+ *
+ * In event of an out-of-range date, we punt by returning zero into *tp.
+ * This is okay for the immediate callers but is a good reason for not
+ * exposing this worker function globally.
+ *
+ * Note: it might seem that we should use mktime() for this, but bitter
+ * experience teaches otherwise. This code is much faster than most versions
+ * of mktime(), anyway.
+ */
+static int
+DetermineTimeZoneOffsetInternal(struct pg_tm *tm, pg_tz *tzp, pg_time_t *tp)
+{
+ int date,
+ sec;
+ pg_time_t day,
+ mytime,
+ prevtime,
+ boundary,
+ beforetime,
+ aftertime;
+ long int before_gmtoff,
+ after_gmtoff;
+ int before_isdst,
+ after_isdst;
+ int res;
+
+ /*
+ * First, generate the pg_time_t value corresponding to the given
+ * y/m/d/h/m/s taken as GMT time. If this overflows, punt and decide the
+ * timezone is GMT. (For a valid Julian date, integer overflow should be
+ * impossible with 64-bit pg_time_t, but let's check for safety.)
+ */
+ if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday))
+ goto overflow;
+ date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - UNIX_EPOCH_JDATE;
+
+ day = ((pg_time_t) date) * SECS_PER_DAY;
+ if (day / SECS_PER_DAY != date)
+ goto overflow;
+ sec = tm->tm_sec + (tm->tm_min + tm->tm_hour * MINS_PER_HOUR) * SECS_PER_MINUTE;
+ mytime = day + sec;
+ /* since sec >= 0, overflow could only be from +day to -mytime */
+ if (mytime < 0 && day > 0)
+ goto overflow;
+
+ /*
+ * Find the DST time boundary just before or following the target time. We
+ * assume that all zones have GMT offsets less than 24 hours, and that DST
+ * boundaries can't be closer together than 48 hours, so backing up 24
+ * hours and finding the "next" boundary will work.
+ */
+ prevtime = mytime - SECS_PER_DAY;
+ if (mytime < 0 && prevtime > 0)
+ goto overflow;
+
+ res = pg_next_dst_boundary(&prevtime,
+ &before_gmtoff, &before_isdst,
+ &boundary,
+ &after_gmtoff, &after_isdst,
+ tzp);
+ if (res < 0)
+ goto overflow; /* failure? */
+
+ if (res == 0)
+ {
+ /* Non-DST zone, life is simple */
+ tm->tm_isdst = before_isdst;
+ *tp = mytime - before_gmtoff;
+ return -(int) before_gmtoff;
+ }
+
+ /*
+ * Form the candidate pg_time_t values with local-time adjustment
+ */
+ beforetime = mytime - before_gmtoff;
+ if ((before_gmtoff > 0 &&
+ mytime < 0 && beforetime > 0) ||
+ (before_gmtoff <= 0 &&
+ mytime > 0 && beforetime < 0))
+ goto overflow;
+ aftertime = mytime - after_gmtoff;
+ if ((after_gmtoff > 0 &&
+ mytime < 0 && aftertime > 0) ||
+ (after_gmtoff <= 0 &&
+ mytime > 0 && aftertime < 0))
+ goto overflow;
+
+ /*
+ * If both before or both after the boundary time, we know what to do. The
+ * boundary time itself is considered to be after the transition, which
+ * means we can accept aftertime == boundary in the second case.
+ */
+ if (beforetime < boundary && aftertime < boundary)
+ {
+ tm->tm_isdst = before_isdst;
+ *tp = beforetime;
+ return -(int) before_gmtoff;
+ }
+ if (beforetime > boundary && aftertime >= boundary)
+ {
+ tm->tm_isdst = after_isdst;
+ *tp = aftertime;
+ return -(int) after_gmtoff;
+ }
+
+ /*
+ * It's an invalid or ambiguous time due to timezone transition. In a
+ * spring-forward transition, prefer the "before" interpretation; in a
+ * fall-back transition, prefer "after". (We used to define and implement
+ * this test as "prefer the standard-time interpretation", but that rule
+ * does not help to resolve the behavior when both times are reported as
+ * standard time; which does happen, eg Europe/Moscow in Oct 2014. Also,
+ * in some zones such as Europe/Dublin, there is widespread confusion
+ * about which time offset is "standard" time, so it's fortunate that our
+ * behavior doesn't depend on that.)
+ */
+ if (beforetime > aftertime)
+ {
+ tm->tm_isdst = before_isdst;
+ *tp = beforetime;
+ return -(int) before_gmtoff;
+ }
+ tm->tm_isdst = after_isdst;
+ *tp = aftertime;
+ return -(int) after_gmtoff;
+
+overflow:
+ /* Given date is out of range, so assume UTC */
+ tm->tm_isdst = 0;
+ *tp = 0;
+ return 0;
+}
+
+
+/* DetermineTimeZoneAbbrevOffset()
+ *
+ * Determine the GMT offset and DST flag to be attributed to a dynamic
+ * time zone abbreviation, that is one whose meaning has changed over time.
+ * *tm contains the local time at which the meaning should be determined,
+ * and tm->tm_isdst receives the DST flag.
+ *
+ * This differs from the behavior of DetermineTimeZoneOffset() in that a
+ * standard-time or daylight-time abbreviation forces use of the corresponding
+ * GMT offset even when the zone was then in DS or standard time respectively.
+ * (However, that happens only if we can match the given abbreviation to some
+ * abbreviation that appears in the IANA timezone data. Otherwise, we fall
+ * back to doing DetermineTimeZoneOffset().)
+ */
+int
+DetermineTimeZoneAbbrevOffset(struct pg_tm *tm, const char *abbr, pg_tz *tzp)
+{
+ pg_time_t t;
+ int zone_offset;
+ int abbr_offset;
+ int abbr_isdst;
+
+ /*
+ * Compute the UTC time we want to probe at. (In event of overflow, we'll
+ * probe at the epoch, which is a bit random but probably doesn't matter.)
+ */
+ zone_offset = DetermineTimeZoneOffsetInternal(tm, tzp, &t);
+
+ /*
+ * Try to match the abbreviation to something in the zone definition.
+ */
+ if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp,
+ &abbr_offset, &abbr_isdst))
+ {
+ /* Success, so use the abbrev-specific answers. */
+ tm->tm_isdst = abbr_isdst;
+ return abbr_offset;
+ }
+
+ /*
+ * No match, so use the answers we already got from
+ * DetermineTimeZoneOffsetInternal.
+ */
+ return zone_offset;
+}
+
+
+/* DetermineTimeZoneAbbrevOffsetTS()
+ *
+ * As above but the probe time is specified as a TimestampTz (hence, UTC time),
+ * and DST status is returned into *isdst rather than into tm->tm_isdst.
+ */
+int
+DetermineTimeZoneAbbrevOffsetTS(TimestampTz ts, const char *abbr,
+ pg_tz *tzp, int *isdst)
+{
+ pg_time_t t = timestamptz_to_time_t(ts);
+ int zone_offset;
+ int abbr_offset;
+ int tz;
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ /*
+ * If the abbrev matches anything in the zone data, this is pretty easy.
+ */
+ if (DetermineTimeZoneAbbrevOffsetInternal(t, abbr, tzp,
+ &abbr_offset, isdst))
+ return abbr_offset;
+
+ /*
+ * Else, break down the timestamp so we can use DetermineTimeZoneOffset.
+ */
+ if (timestamp2tm(ts, &tz, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ zone_offset = DetermineTimeZoneOffset(&tm, tzp);
+ *isdst = tm.tm_isdst;
+ return zone_offset;
+}
+
+
+/* DetermineTimeZoneAbbrevOffsetInternal()
+ *
+ * Workhorse for above two functions: work from a pg_time_t probe instant.
+ * On success, return GMT offset and DST status into *offset and *isdst.
+ */
+static bool
+DetermineTimeZoneAbbrevOffsetInternal(pg_time_t t, const char *abbr, pg_tz *tzp,
+ int *offset, int *isdst)
+{
+ char upabbr[TZ_STRLEN_MAX + 1];
+ unsigned char *p;
+ long int gmtoff;
+
+ /* We need to force the abbrev to upper case */
+ strlcpy(upabbr, abbr, sizeof(upabbr));
+ for (p = (unsigned char *) upabbr; *p; p++)
+ *p = pg_toupper(*p);
+
+ /* Look up the abbrev's meaning at this time in this zone */
+ if (pg_interpret_timezone_abbrev(upabbr,
+ &t,
+ &gmtoff,
+ isdst,
+ tzp))
+ {
+ /* Change sign to agree with DetermineTimeZoneOffset() */
+ *offset = (int) -gmtoff;
+ return true;
+ }
+ return false;
+}
+
+
+/* DecodeTimeOnly()
+ * Interpret parsed string as time fields only.
+ * Returns 0 if successful, DTERR code if bogus input detected.
+ *
+ * Note that support for time zone is here for
+ * SQL TIME WITH TIME ZONE, but it reveals
+ * bogosity with SQL date/time standards, since
+ * we must infer a time zone from current time.
+ * - thomas 2000-03-10
+ * Allow specifying date to get a better time zone,
+ * if time zones are allowed. - thomas 2001-12-26
+ */
+int
+DecodeTimeOnly(char **field, int *ftype, int nf,
+ int *dtype, struct pg_tm *tm, fsec_t *fsec, int *tzp)
+{
+ int fmask = 0,
+ tmask,
+ type;
+ int ptype = 0; /* "prefix type" for ISO h04mm05s06 format */
+ int i;
+ int val;
+ int dterr;
+ bool isjulian = false;
+ bool is2digits = false;
+ bool bc = false;
+ int mer = HR24;
+ pg_tz *namedTz = NULL;
+ pg_tz *abbrevTz = NULL;
+ char *abbrev = NULL;
+ pg_tz *valtz;
+
+ *dtype = DTK_TIME;
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ *fsec = 0;
+ /* don't know daylight savings time status apriori */
+ tm->tm_isdst = -1;
+
+ if (tzp != NULL)
+ *tzp = 0;
+
+ for (i = 0; i < nf; i++)
+ {
+ switch (ftype[i])
+ {
+ case DTK_DATE:
+
+ /*
+ * Time zone not allowed? Then should not accept dates or time
+ * zones no matter what else!
+ */
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+
+ /* Under limited circumstances, we will accept a date... */
+ if (i == 0 && nf >= 2 &&
+ (ftype[nf - 1] == DTK_DATE || ftype[1] == DTK_TIME))
+ {
+ dterr = DecodeDate(field[i], fmask,
+ &tmask, &is2digits, tm);
+ if (dterr)
+ return dterr;
+ }
+ /* otherwise, this is a time and/or time zone */
+ else
+ {
+ if (isdigit((unsigned char) *field[i]))
+ {
+ char *cp;
+
+ /*
+ * Starts with a digit but we already have a time
+ * field? Then we are in trouble with time already...
+ */
+ if ((fmask & DTK_TIME_M) == DTK_TIME_M)
+ return DTERR_BAD_FORMAT;
+
+ /*
+ * Should not get here and fail. Sanity check only...
+ */
+ if ((cp = strchr(field[i], '-')) == NULL)
+ return DTERR_BAD_FORMAT;
+
+ /* Get the time zone from the end of the string */
+ dterr = DecodeTimezone(cp, tzp);
+ if (dterr)
+ return dterr;
+ *cp = '\0';
+
+ /*
+ * Then read the rest of the field as a concatenated
+ * time
+ */
+ dterr = DecodeNumberField(strlen(field[i]), field[i],
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ ftype[i] = dterr;
+
+ tmask |= DTK_M(TZ);
+ }
+ else
+ {
+ namedTz = pg_tzset(field[i]);
+ if (!namedTz)
+ {
+ /*
+ * We should return an error code instead of
+ * ereport'ing directly, but then there is no way
+ * to report the bad time zone name.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized",
+ field[i])));
+ }
+ /* we'll apply the zone setting below */
+ ftype[i] = DTK_TZ;
+ tmask = DTK_M(TZ);
+ }
+ }
+ break;
+
+ case DTK_TIME:
+ dterr = DecodeTime(field[i], (fmask | DTK_DATE_M),
+ INTERVAL_FULL_RANGE,
+ &tmask, tm, fsec);
+ if (dterr)
+ return dterr;
+ break;
+
+ case DTK_TZ:
+ {
+ int tz;
+
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+
+ dterr = DecodeTimezone(field[i], &tz);
+ if (dterr)
+ return dterr;
+ *tzp = tz;
+ tmask = DTK_M(TZ);
+ }
+ break;
+
+ case DTK_NUMBER:
+
+ /*
+ * Was this an "ISO time" with embedded field labels? An
+ * example is "h04mm05s06" - thomas 2001-02-04
+ */
+ if (ptype != 0)
+ {
+ char *cp;
+ int val;
+
+ /* Only accept a date under limited circumstances */
+ switch (ptype)
+ {
+ case DTK_JULIAN:
+ case DTK_YEAR:
+ case DTK_MONTH:
+ case DTK_DAY:
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ default:
+ break;
+ }
+
+ errno = 0;
+ val = strtoint(field[i], &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+
+ /*
+ * only a few kinds are allowed to have an embedded
+ * decimal
+ */
+ if (*cp == '.')
+ switch (ptype)
+ {
+ case DTK_JULIAN:
+ case DTK_TIME:
+ case DTK_SECOND:
+ break;
+ default:
+ return DTERR_BAD_FORMAT;
+ break;
+ }
+ else if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+
+ switch (ptype)
+ {
+ case DTK_YEAR:
+ tm->tm_year = val;
+ tmask = DTK_M(YEAR);
+ break;
+
+ case DTK_MONTH:
+
+ /*
+ * already have a month and hour? then assume
+ * minutes
+ */
+ if ((fmask & DTK_M(MONTH)) != 0 &&
+ (fmask & DTK_M(HOUR)) != 0)
+ {
+ tm->tm_min = val;
+ tmask = DTK_M(MINUTE);
+ }
+ else
+ {
+ tm->tm_mon = val;
+ tmask = DTK_M(MONTH);
+ }
+ break;
+
+ case DTK_DAY:
+ tm->tm_mday = val;
+ tmask = DTK_M(DAY);
+ break;
+
+ case DTK_HOUR:
+ tm->tm_hour = val;
+ tmask = DTK_M(HOUR);
+ break;
+
+ case DTK_MINUTE:
+ tm->tm_min = val;
+ tmask = DTK_M(MINUTE);
+ break;
+
+ case DTK_SECOND:
+ tm->tm_sec = val;
+ tmask = DTK_M(SECOND);
+ if (*cp == '.')
+ {
+ dterr = ParseFractionalSecond(cp, fsec);
+ if (dterr)
+ return dterr;
+ tmask = DTK_ALL_SECS_M;
+ }
+ break;
+
+ case DTK_TZ:
+ tmask = DTK_M(TZ);
+ dterr = DecodeTimezone(field[i], tzp);
+ if (dterr)
+ return dterr;
+ break;
+
+ case DTK_JULIAN:
+ /* previous field was a label for "julian date" */
+ if (val < 0)
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_DATE_M;
+ j2date(val, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ isjulian = true;
+
+ if (*cp == '.')
+ {
+ double time;
+
+ dterr = ParseFraction(cp, &time);
+ if (dterr)
+ return dterr;
+ time *= USECS_PER_DAY;
+ dt2time(time,
+ &tm->tm_hour, &tm->tm_min,
+ &tm->tm_sec, fsec);
+ tmask |= DTK_TIME_M;
+ }
+ break;
+
+ case DTK_TIME:
+ /* previous field was "t" for ISO time */
+ dterr = DecodeNumberField(strlen(field[i]), field[i],
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ ftype[i] = dterr;
+
+ if (tmask != DTK_TIME_M)
+ return DTERR_BAD_FORMAT;
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ break;
+ }
+
+ ptype = 0;
+ *dtype = DTK_DATE;
+ }
+ else
+ {
+ char *cp;
+ int flen;
+
+ flen = strlen(field[i]);
+ cp = strchr(field[i], '.');
+
+ /* Embedded decimal? */
+ if (cp != NULL)
+ {
+ /*
+ * Under limited circumstances, we will accept a
+ * date...
+ */
+ if (i == 0 && nf >= 2 && ftype[nf - 1] == DTK_DATE)
+ {
+ dterr = DecodeDate(field[i], fmask,
+ &tmask, &is2digits, tm);
+ if (dterr)
+ return dterr;
+ }
+ /* embedded decimal and several digits before? */
+ else if (flen - strlen(cp) > 2)
+ {
+ /*
+ * Interpret as a concatenated date or time Set
+ * the type field to allow decoding other fields
+ * later. Example: 20011223 or 040506
+ */
+ dterr = DecodeNumberField(flen, field[i],
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ ftype[i] = dterr;
+ }
+ else
+ return DTERR_BAD_FORMAT;
+ }
+ else if (flen > 4)
+ {
+ dterr = DecodeNumberField(flen, field[i],
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr < 0)
+ return dterr;
+ ftype[i] = dterr;
+ }
+ /* otherwise it is a single date/time field... */
+ else
+ {
+ dterr = DecodeNumber(flen, field[i],
+ false,
+ (fmask | DTK_DATE_M),
+ &tmask, tm,
+ fsec, &is2digits);
+ if (dterr)
+ return dterr;
+ }
+ }
+ break;
+
+ case DTK_STRING:
+ case DTK_SPECIAL:
+ /* timezone abbrevs take precedence over built-in tokens */
+ type = DecodeTimezoneAbbrev(i, field[i], &val, &valtz);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(i, field[i], &val);
+ if (type == IGNORE_DTF)
+ continue;
+
+ tmask = DTK_M(type);
+ switch (type)
+ {
+ case RESERV:
+ switch (val)
+ {
+ case DTK_NOW:
+ tmask = DTK_TIME_M;
+ *dtype = DTK_TIME;
+ GetCurrentTimeUsec(tm, fsec, NULL);
+ break;
+
+ case DTK_ZULU:
+ tmask = (DTK_TIME_M | DTK_M(TZ));
+ *dtype = DTK_TIME;
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ tm->tm_isdst = 0;
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+
+ break;
+
+ case DTZMOD:
+
+ /*
+ * daylight savings time modifier (solves "MET DST"
+ * syntax)
+ */
+ tmask |= DTK_M(DTZ);
+ tm->tm_isdst = 1;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp -= val;
+ break;
+
+ case DTZ:
+
+ /*
+ * set mask for TZ here _or_ check for DTZ later when
+ * getting default timezone
+ */
+ tmask |= DTK_M(TZ);
+ tm->tm_isdst = 1;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp = -val;
+ ftype[i] = DTK_TZ;
+ break;
+
+ case TZ:
+ tm->tm_isdst = 0;
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ *tzp = -val;
+ ftype[i] = DTK_TZ;
+ break;
+
+ case DYNTZ:
+ tmask |= DTK_M(TZ);
+ if (tzp == NULL)
+ return DTERR_BAD_FORMAT;
+ /* we'll determine the actual offset later */
+ abbrevTz = valtz;
+ abbrev = field[i];
+ ftype[i] = DTK_TZ;
+ break;
+
+ case AMPM:
+ mer = val;
+ break;
+
+ case ADBC:
+ bc = (val == BC);
+ break;
+
+ case UNITS:
+ tmask = 0;
+ ptype = val;
+ break;
+
+ case ISOTIME:
+ tmask = 0;
+
+ /***
+ * We will need one of the following fields:
+ * DTK_NUMBER should be hhmmss.fff
+ * DTK_TIME should be hh:mm:ss.fff
+ * DTK_DATE should be hhmmss-zz
+ ***/
+ if (i >= nf - 1 ||
+ (ftype[i + 1] != DTK_NUMBER &&
+ ftype[i + 1] != DTK_TIME &&
+ ftype[i + 1] != DTK_DATE))
+ return DTERR_BAD_FORMAT;
+
+ ptype = val;
+ break;
+
+ case UNKNOWN_FIELD:
+
+ /*
+ * Before giving up and declaring error, check to see
+ * if it is an all-alpha timezone name.
+ */
+ namedTz = pg_tzset(field[i]);
+ if (!namedTz)
+ return DTERR_BAD_FORMAT;
+ /* we'll apply the zone setting below */
+ tmask = DTK_M(TZ);
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+
+ if (tmask & fmask)
+ return DTERR_BAD_FORMAT;
+ fmask |= tmask;
+ } /* end loop over fields */
+
+ /* do final checking/adjustment of Y/M/D fields */
+ dterr = ValidateDate(fmask, isjulian, is2digits, bc, tm);
+ if (dterr)
+ return dterr;
+
+ /* handle AM/PM */
+ if (mer != HR24 && tm->tm_hour > HOURS_PER_DAY / 2)
+ return DTERR_FIELD_OVERFLOW;
+ if (mer == AM && tm->tm_hour == HOURS_PER_DAY / 2)
+ tm->tm_hour = 0;
+ else if (mer == PM && tm->tm_hour != HOURS_PER_DAY / 2)
+ tm->tm_hour += HOURS_PER_DAY / 2;
+
+ /* check for time overflow */
+ if (time_overflows(tm->tm_hour, tm->tm_min, tm->tm_sec, *fsec))
+ return DTERR_FIELD_OVERFLOW;
+
+ if ((fmask & DTK_TIME_M) != DTK_TIME_M)
+ return DTERR_BAD_FORMAT;
+
+ /*
+ * If we had a full timezone spec, compute the offset (we could not do it
+ * before, because we may need the date to resolve DST status).
+ */
+ if (namedTz != NULL)
+ {
+ long int gmtoff;
+
+ /* daylight savings time modifier disallowed with full TZ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ /* if non-DST zone, we do not need to know the date */
+ if (pg_get_timezone_offset(namedTz, &gmtoff))
+ {
+ *tzp = -(int) gmtoff;
+ }
+ else
+ {
+ /* a date has to be specified */
+ if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ return DTERR_BAD_FORMAT;
+ *tzp = DetermineTimeZoneOffset(tm, namedTz);
+ }
+ }
+
+ /*
+ * Likewise, if we had a dynamic timezone abbreviation, resolve it now.
+ */
+ if (abbrevTz != NULL)
+ {
+ struct pg_tm tt,
+ *tmp = &tt;
+
+ /*
+ * daylight savings time modifier but no standard timezone? then error
+ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ if ((fmask & DTK_DATE_M) == 0)
+ GetCurrentDateTime(tmp);
+ else
+ {
+ /* a date has to be specified */
+ if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ return DTERR_BAD_FORMAT;
+ tmp->tm_year = tm->tm_year;
+ tmp->tm_mon = tm->tm_mon;
+ tmp->tm_mday = tm->tm_mday;
+ }
+ tmp->tm_hour = tm->tm_hour;
+ tmp->tm_min = tm->tm_min;
+ tmp->tm_sec = tm->tm_sec;
+ *tzp = DetermineTimeZoneAbbrevOffset(tmp, abbrev, abbrevTz);
+ tm->tm_isdst = tmp->tm_isdst;
+ }
+
+ /* timezone not specified? then use session timezone */
+ if (tzp != NULL && !(fmask & DTK_M(TZ)))
+ {
+ struct pg_tm tt,
+ *tmp = &tt;
+
+ /*
+ * daylight savings time modifier but no standard timezone? then error
+ */
+ if (fmask & DTK_M(DTZMOD))
+ return DTERR_BAD_FORMAT;
+
+ if ((fmask & DTK_DATE_M) == 0)
+ GetCurrentDateTime(tmp);
+ else
+ {
+ /* a date has to be specified */
+ if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ return DTERR_BAD_FORMAT;
+ tmp->tm_year = tm->tm_year;
+ tmp->tm_mon = tm->tm_mon;
+ tmp->tm_mday = tm->tm_mday;
+ }
+ tmp->tm_hour = tm->tm_hour;
+ tmp->tm_min = tm->tm_min;
+ tmp->tm_sec = tm->tm_sec;
+ *tzp = DetermineTimeZoneOffset(tmp, session_timezone);
+ tm->tm_isdst = tmp->tm_isdst;
+ }
+
+ return 0;
+}
+
+/* DecodeDate()
+ * Decode date string which includes delimiters.
+ * Return 0 if okay, a DTERR code if not.
+ *
+ * str: field to be parsed
+ * fmask: bitmask for field types already seen
+ * *tmask: receives bitmask for fields found here
+ * *is2digits: set to true if we find 2-digit year
+ * *tm: field values are stored into appropriate members of this struct
+ */
+static int
+DecodeDate(char *str, int fmask, int *tmask, bool *is2digits,
+ struct pg_tm *tm)
+{
+ fsec_t fsec;
+ int nf = 0;
+ int i,
+ len;
+ int dterr;
+ bool haveTextMonth = false;
+ int type,
+ val,
+ dmask = 0;
+ char *field[MAXDATEFIELDS];
+
+ *tmask = 0;
+
+ /* parse this string... */
+ while (*str != '\0' && nf < MAXDATEFIELDS)
+ {
+ /* skip field separators */
+ while (*str != '\0' && !isalnum((unsigned char) *str))
+ str++;
+
+ if (*str == '\0')
+ return DTERR_BAD_FORMAT; /* end of string after separator */
+
+ field[nf] = str;
+ if (isdigit((unsigned char) *str))
+ {
+ while (isdigit((unsigned char) *str))
+ str++;
+ }
+ else if (isalpha((unsigned char) *str))
+ {
+ while (isalpha((unsigned char) *str))
+ str++;
+ }
+
+ /* Just get rid of any non-digit, non-alpha characters... */
+ if (*str != '\0')
+ *str++ = '\0';
+ nf++;
+ }
+
+ /* look first for text fields, since that will be unambiguous month */
+ for (i = 0; i < nf; i++)
+ {
+ if (isalpha((unsigned char) *field[i]))
+ {
+ type = DecodeSpecial(i, field[i], &val);
+ if (type == IGNORE_DTF)
+ continue;
+
+ dmask = DTK_M(type);
+ switch (type)
+ {
+ case MONTH:
+ tm->tm_mon = val;
+ haveTextMonth = true;
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+ if (fmask & dmask)
+ return DTERR_BAD_FORMAT;
+
+ fmask |= dmask;
+ *tmask |= dmask;
+
+ /* mark this field as being completed */
+ field[i] = NULL;
+ }
+ }
+
+ /* now pick up remaining numeric fields */
+ for (i = 0; i < nf; i++)
+ {
+ if (field[i] == NULL)
+ continue;
+
+ if ((len = strlen(field[i])) <= 0)
+ return DTERR_BAD_FORMAT;
+
+ dterr = DecodeNumber(len, field[i], haveTextMonth, fmask,
+ &dmask, tm,
+ &fsec, is2digits);
+ if (dterr)
+ return dterr;
+
+ if (fmask & dmask)
+ return DTERR_BAD_FORMAT;
+
+ fmask |= dmask;
+ *tmask |= dmask;
+ }
+
+ if ((fmask & ~(DTK_M(DOY) | DTK_M(TZ))) != DTK_DATE_M)
+ return DTERR_BAD_FORMAT;
+
+ /* validation of the field values must wait until ValidateDate() */
+
+ return 0;
+}
+
+/* ValidateDate()
+ * Check valid year/month/day values, handle BC and DOY cases
+ * Return 0 if okay, a DTERR code if not.
+ */
+int
+ValidateDate(int fmask, bool isjulian, bool is2digits, bool bc,
+ struct pg_tm *tm)
+{
+ if (fmask & DTK_M(YEAR))
+ {
+ if (isjulian)
+ {
+ /* tm_year is correct and should not be touched */
+ }
+ else if (bc)
+ {
+ /* there is no year zero in AD/BC notation */
+ if (tm->tm_year <= 0)
+ return DTERR_FIELD_OVERFLOW;
+ /* internally, we represent 1 BC as year zero, 2 BC as -1, etc */
+ tm->tm_year = -(tm->tm_year - 1);
+ }
+ else if (is2digits)
+ {
+ /* process 1 or 2-digit input as 1970-2069 AD, allow '0' and '00' */
+ if (tm->tm_year < 0) /* just paranoia */
+ return DTERR_FIELD_OVERFLOW;
+ if (tm->tm_year < 70)
+ tm->tm_year += 2000;
+ else if (tm->tm_year < 100)
+ tm->tm_year += 1900;
+ }
+ else
+ {
+ /* there is no year zero in AD/BC notation */
+ if (tm->tm_year <= 0)
+ return DTERR_FIELD_OVERFLOW;
+ }
+ }
+
+ /* now that we have correct year, decode DOY */
+ if (fmask & DTK_M(DOY))
+ {
+ j2date(date2j(tm->tm_year, 1, 1) + tm->tm_yday - 1,
+ &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ }
+
+ /* check for valid month */
+ if (fmask & DTK_M(MONTH))
+ {
+ if (tm->tm_mon < 1 || tm->tm_mon > MONTHS_PER_YEAR)
+ return DTERR_MD_FIELD_OVERFLOW;
+ }
+
+ /* minimal check for valid day */
+ if (fmask & DTK_M(DAY))
+ {
+ if (tm->tm_mday < 1 || tm->tm_mday > 31)
+ return DTERR_MD_FIELD_OVERFLOW;
+ }
+
+ if ((fmask & DTK_DATE_M) == DTK_DATE_M)
+ {
+ /*
+ * Check for valid day of month, now that we know for sure the month
+ * and year. Note we don't use MD_FIELD_OVERFLOW here, since it seems
+ * unlikely that "Feb 29" is a YMD-order error.
+ */
+ if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1])
+ return DTERR_FIELD_OVERFLOW;
+ }
+
+ return 0;
+}
+
+
+/* DecodeTimeCommon()
+ * Decode time string which includes delimiters.
+ * Return 0 if okay, a DTERR code if not.
+ * tmask and itm are output parameters.
+ *
+ * This code is shared between the timestamp and interval cases.
+ * We return a struct pg_itm (of which only the tm_usec, tm_sec, tm_min,
+ * and tm_hour fields are used) and let the wrapper functions below
+ * convert and range-check as necessary.
+ */
+static int
+DecodeTimeCommon(char *str, int fmask, int range,
+ int *tmask, struct pg_itm *itm)
+{
+ char *cp;
+ int dterr;
+ fsec_t fsec = 0;
+
+ *tmask = DTK_TIME_M;
+
+ errno = 0;
+ itm->tm_hour = strtoi64(str, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+ if (*cp != ':')
+ return DTERR_BAD_FORMAT;
+ errno = 0;
+ itm->tm_min = strtoint(cp + 1, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+ if (*cp == '\0')
+ {
+ itm->tm_sec = 0;
+ /* If it's a MINUTE TO SECOND interval, take 2 fields as being mm:ss */
+ if (range == (INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND)))
+ {
+ if (itm->tm_hour > INT_MAX || itm->tm_hour < INT_MIN)
+ return DTERR_FIELD_OVERFLOW;
+ itm->tm_sec = itm->tm_min;
+ itm->tm_min = (int) itm->tm_hour;
+ itm->tm_hour = 0;
+ }
+ }
+ else if (*cp == '.')
+ {
+ /* always assume mm:ss.sss is MINUTE TO SECOND */
+ dterr = ParseFractionalSecond(cp, &fsec);
+ if (dterr)
+ return dterr;
+ if (itm->tm_hour > INT_MAX || itm->tm_hour < INT_MIN)
+ return DTERR_FIELD_OVERFLOW;
+ itm->tm_sec = itm->tm_min;
+ itm->tm_min = (int) itm->tm_hour;
+ itm->tm_hour = 0;
+ }
+ else if (*cp == ':')
+ {
+ errno = 0;
+ itm->tm_sec = strtoint(cp + 1, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+ if (*cp == '.')
+ {
+ dterr = ParseFractionalSecond(cp, &fsec);
+ if (dterr)
+ return dterr;
+ }
+ else if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+ }
+ else
+ return DTERR_BAD_FORMAT;
+
+ /* do a sanity check; but caller must check the range of tm_hour */
+ if (itm->tm_hour < 0 ||
+ itm->tm_min < 0 || itm->tm_min > MINS_PER_HOUR - 1 ||
+ itm->tm_sec < 0 || itm->tm_sec > SECS_PER_MINUTE ||
+ fsec < 0 || fsec > USECS_PER_SEC)
+ return DTERR_FIELD_OVERFLOW;
+
+ itm->tm_usec = (int) fsec;
+
+ return 0;
+}
+
+/* DecodeTime()
+ * Decode time string which includes delimiters.
+ * Return 0 if okay, a DTERR code if not.
+ *
+ * This version is used for timestamps. The results are returned into
+ * the tm_hour/tm_min/tm_sec fields of *tm, and microseconds into *fsec.
+ */
+static int
+DecodeTime(char *str, int fmask, int range,
+ int *tmask, struct pg_tm *tm, fsec_t *fsec)
+{
+ struct pg_itm itm;
+ int dterr;
+
+ dterr = DecodeTimeCommon(str, fmask, range,
+ tmask, &itm);
+ if (dterr)
+ return dterr;
+
+ if (itm.tm_hour > INT_MAX)
+ return DTERR_FIELD_OVERFLOW;
+ tm->tm_hour = (int) itm.tm_hour;
+ tm->tm_min = itm.tm_min;
+ tm->tm_sec = itm.tm_sec;
+ *fsec = itm.tm_usec;
+
+ return 0;
+}
+
+/* DecodeTimeForInterval()
+ * Decode time string which includes delimiters.
+ * Return 0 if okay, a DTERR code if not.
+ *
+ * This version is used for intervals. The results are returned into
+ * itm_in->tm_usec.
+ */
+static int
+DecodeTimeForInterval(char *str, int fmask, int range,
+ int *tmask, struct pg_itm_in *itm_in)
+{
+ struct pg_itm itm;
+ int dterr;
+
+ dterr = DecodeTimeCommon(str, fmask, range,
+ tmask, &itm);
+ if (dterr)
+ return dterr;
+
+ itm_in->tm_usec = itm.tm_usec;
+ if (!int64_multiply_add(itm.tm_hour, USECS_PER_HOUR, &itm_in->tm_usec) ||
+ !int64_multiply_add(itm.tm_min, USECS_PER_MINUTE, &itm_in->tm_usec) ||
+ !int64_multiply_add(itm.tm_sec, USECS_PER_SEC, &itm_in->tm_usec))
+ return DTERR_FIELD_OVERFLOW;
+
+ return 0;
+}
+
+
+/* DecodeNumber()
+ * Interpret plain numeric field as a date value in context.
+ * Return 0 if okay, a DTERR code if not.
+ */
+static int
+DecodeNumber(int flen, char *str, bool haveTextMonth, int fmask,
+ int *tmask, struct pg_tm *tm, fsec_t *fsec, bool *is2digits)
+{
+ int val;
+ char *cp;
+ int dterr;
+
+ *tmask = 0;
+
+ errno = 0;
+ val = strtoint(str, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+ if (cp == str)
+ return DTERR_BAD_FORMAT;
+
+ if (*cp == '.')
+ {
+ /*
+ * More than two digits before decimal point? Then could be a date or
+ * a run-together time: 2001.360 20011225 040506.789
+ */
+ if (cp - str > 2)
+ {
+ dterr = DecodeNumberField(flen, str,
+ (fmask | DTK_DATE_M),
+ tmask, tm,
+ fsec, is2digits);
+ if (dterr < 0)
+ return dterr;
+ return 0;
+ }
+
+ dterr = ParseFractionalSecond(cp, fsec);
+ if (dterr)
+ return dterr;
+ }
+ else if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+
+ /* Special case for day of year */
+ if (flen == 3 && (fmask & DTK_DATE_M) == DTK_M(YEAR) && val >= 1 &&
+ val <= 366)
+ {
+ *tmask = (DTK_M(DOY) | DTK_M(MONTH) | DTK_M(DAY));
+ tm->tm_yday = val;
+ /* tm_mon and tm_mday can't actually be set yet ... */
+ return 0;
+ }
+
+ /* Switch based on what we have so far */
+ switch (fmask & DTK_DATE_M)
+ {
+ case 0:
+
+ /*
+ * Nothing so far; make a decision about what we think the input
+ * is. There used to be lots of heuristics here, but the
+ * consensus now is to be paranoid. It *must* be either
+ * YYYY-MM-DD (with a more-than-two-digit year field), or the
+ * field order defined by DateOrder.
+ */
+ if (flen >= 3 || DateOrder == DATEORDER_YMD)
+ {
+ *tmask = DTK_M(YEAR);
+ tm->tm_year = val;
+ }
+ else if (DateOrder == DATEORDER_DMY)
+ {
+ *tmask = DTK_M(DAY);
+ tm->tm_mday = val;
+ }
+ else
+ {
+ *tmask = DTK_M(MONTH);
+ tm->tm_mon = val;
+ }
+ break;
+
+ case (DTK_M(YEAR)):
+ /* Must be at second field of YY-MM-DD */
+ *tmask = DTK_M(MONTH);
+ tm->tm_mon = val;
+ break;
+
+ case (DTK_M(MONTH)):
+ if (haveTextMonth)
+ {
+ /*
+ * We are at the first numeric field of a date that included a
+ * textual month name. We want to support the variants
+ * MON-DD-YYYY, DD-MON-YYYY, and YYYY-MON-DD as unambiguous
+ * inputs. We will also accept MON-DD-YY or DD-MON-YY in
+ * either DMY or MDY modes, as well as YY-MON-DD in YMD mode.
+ */
+ if (flen >= 3 || DateOrder == DATEORDER_YMD)
+ {
+ *tmask = DTK_M(YEAR);
+ tm->tm_year = val;
+ }
+ else
+ {
+ *tmask = DTK_M(DAY);
+ tm->tm_mday = val;
+ }
+ }
+ else
+ {
+ /* Must be at second field of MM-DD-YY */
+ *tmask = DTK_M(DAY);
+ tm->tm_mday = val;
+ }
+ break;
+
+ case (DTK_M(YEAR) | DTK_M(MONTH)):
+ if (haveTextMonth)
+ {
+ /* Need to accept DD-MON-YYYY even in YMD mode */
+ if (flen >= 3 && *is2digits)
+ {
+ /* Guess that first numeric field is day was wrong */
+ *tmask = DTK_M(DAY); /* YEAR is already set */
+ tm->tm_mday = tm->tm_year;
+ tm->tm_year = val;
+ *is2digits = false;
+ }
+ else
+ {
+ *tmask = DTK_M(DAY);
+ tm->tm_mday = val;
+ }
+ }
+ else
+ {
+ /* Must be at third field of YY-MM-DD */
+ *tmask = DTK_M(DAY);
+ tm->tm_mday = val;
+ }
+ break;
+
+ case (DTK_M(DAY)):
+ /* Must be at second field of DD-MM-YY */
+ *tmask = DTK_M(MONTH);
+ tm->tm_mon = val;
+ break;
+
+ case (DTK_M(MONTH) | DTK_M(DAY)):
+ /* Must be at third field of DD-MM-YY or MM-DD-YY */
+ *tmask = DTK_M(YEAR);
+ tm->tm_year = val;
+ break;
+
+ case (DTK_M(YEAR) | DTK_M(MONTH) | DTK_M(DAY)):
+ /* we have all the date, so it must be a time field */
+ dterr = DecodeNumberField(flen, str, fmask,
+ tmask, tm,
+ fsec, is2digits);
+ if (dterr < 0)
+ return dterr;
+ return 0;
+
+ default:
+ /* Anything else is bogus input */
+ return DTERR_BAD_FORMAT;
+ }
+
+ /*
+ * When processing a year field, mark it for adjustment if it's only one
+ * or two digits.
+ */
+ if (*tmask == DTK_M(YEAR))
+ *is2digits = (flen <= 2);
+
+ return 0;
+}
+
+
+/* DecodeNumberField()
+ * Interpret numeric string as a concatenated date or time field.
+ * Return a DTK token (>= 0) if successful, a DTERR code (< 0) if not.
+ *
+ * Use the context of previously decoded fields to help with
+ * the interpretation.
+ */
+static int
+DecodeNumberField(int len, char *str, int fmask,
+ int *tmask, struct pg_tm *tm, fsec_t *fsec, bool *is2digits)
+{
+ char *cp;
+
+ /*
+ * Have a decimal point? Then this is a date or something with a seconds
+ * field...
+ */
+ if ((cp = strchr(str, '.')) != NULL)
+ {
+ /*
+ * Can we use ParseFractionalSecond here? Not clear whether trailing
+ * junk should be rejected ...
+ */
+ if (cp[1] == '\0')
+ {
+ /* avoid assuming that strtod will accept "." */
+ *fsec = 0;
+ }
+ else
+ {
+ double frac;
+
+ errno = 0;
+ frac = strtod(cp, NULL);
+ if (errno != 0)
+ return DTERR_BAD_FORMAT;
+ *fsec = rint(frac * 1000000);
+ }
+ /* Now truncate off the fraction for further processing */
+ *cp = '\0';
+ len = strlen(str);
+ }
+ /* No decimal point and no complete date yet? */
+ else if ((fmask & DTK_DATE_M) != DTK_DATE_M)
+ {
+ if (len >= 6)
+ {
+ *tmask = DTK_DATE_M;
+
+ /*
+ * Start from end and consider first 2 as Day, next 2 as Month,
+ * and the rest as Year.
+ */
+ tm->tm_mday = atoi(str + (len - 2));
+ *(str + (len - 2)) = '\0';
+ tm->tm_mon = atoi(str + (len - 4));
+ *(str + (len - 4)) = '\0';
+ tm->tm_year = atoi(str);
+ if ((len - 4) == 2)
+ *is2digits = true;
+
+ return DTK_DATE;
+ }
+ }
+
+ /* not all time fields are specified? */
+ if ((fmask & DTK_TIME_M) != DTK_TIME_M)
+ {
+ /* hhmmss */
+ if (len == 6)
+ {
+ *tmask = DTK_TIME_M;
+ tm->tm_sec = atoi(str + 4);
+ *(str + 4) = '\0';
+ tm->tm_min = atoi(str + 2);
+ *(str + 2) = '\0';
+ tm->tm_hour = atoi(str);
+
+ return DTK_TIME;
+ }
+ /* hhmm? */
+ else if (len == 4)
+ {
+ *tmask = DTK_TIME_M;
+ tm->tm_sec = 0;
+ tm->tm_min = atoi(str + 2);
+ *(str + 2) = '\0';
+ tm->tm_hour = atoi(str);
+
+ return DTK_TIME;
+ }
+ }
+
+ return DTERR_BAD_FORMAT;
+}
+
+
+/* DecodeTimezone()
+ * Interpret string as a numeric timezone.
+ *
+ * Return 0 if okay (and set *tzp), a DTERR code if not okay.
+ */
+int
+DecodeTimezone(char *str, int *tzp)
+{
+ int tz;
+ int hr,
+ min,
+ sec = 0;
+ char *cp;
+
+ /* leading character must be "+" or "-" */
+ if (*str != '+' && *str != '-')
+ return DTERR_BAD_FORMAT;
+
+ errno = 0;
+ hr = strtoint(str + 1, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_TZDISP_OVERFLOW;
+
+ /* explicit delimiter? */
+ if (*cp == ':')
+ {
+ errno = 0;
+ min = strtoint(cp + 1, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_TZDISP_OVERFLOW;
+ if (*cp == ':')
+ {
+ errno = 0;
+ sec = strtoint(cp + 1, &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_TZDISP_OVERFLOW;
+ }
+ }
+ /* otherwise, might have run things together... */
+ else if (*cp == '\0' && strlen(str) > 3)
+ {
+ min = hr % 100;
+ hr = hr / 100;
+ /* we could, but don't, support a run-together hhmmss format */
+ }
+ else
+ min = 0;
+
+ /* Range-check the values; see notes in datatype/timestamp.h */
+ if (hr < 0 || hr > MAX_TZDISP_HOUR)
+ return DTERR_TZDISP_OVERFLOW;
+ if (min < 0 || min >= MINS_PER_HOUR)
+ return DTERR_TZDISP_OVERFLOW;
+ if (sec < 0 || sec >= SECS_PER_MINUTE)
+ return DTERR_TZDISP_OVERFLOW;
+
+ tz = (hr * MINS_PER_HOUR + min) * SECS_PER_MINUTE + sec;
+ if (*str == '-')
+ tz = -tz;
+
+ *tzp = -tz;
+
+ if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+
+ return 0;
+}
+
+
+/* DecodeTimezoneAbbrev()
+ * Interpret string as a timezone abbreviation, if possible.
+ *
+ * Returns an abbreviation type (TZ, DTZ, or DYNTZ), or UNKNOWN_FIELD if
+ * string is not any known abbreviation. On success, set *offset and *tz to
+ * represent the UTC offset (for TZ or DTZ) or underlying zone (for DYNTZ).
+ * Note that full timezone names (such as America/New_York) are not handled
+ * here, mostly for historical reasons.
+ *
+ * Given string must be lowercased already.
+ *
+ * Implement a cache lookup since it is likely that dates
+ * will be related in format.
+ */
+int
+DecodeTimezoneAbbrev(int field, char *lowtoken,
+ int *offset, pg_tz **tz)
+{
+ int type;
+ const datetkn *tp;
+
+ tp = abbrevcache[field];
+ /* use strncmp so that we match truncated tokens */
+ if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0)
+ {
+ if (zoneabbrevtbl)
+ tp = datebsearch(lowtoken, zoneabbrevtbl->abbrevs,
+ zoneabbrevtbl->numabbrevs);
+ else
+ tp = NULL;
+ }
+ if (tp == NULL)
+ {
+ type = UNKNOWN_FIELD;
+ *offset = 0;
+ *tz = NULL;
+ }
+ else
+ {
+ abbrevcache[field] = tp;
+ type = tp->type;
+ if (type == DYNTZ)
+ {
+ *offset = 0;
+ *tz = FetchDynamicTimeZone(zoneabbrevtbl, tp);
+ }
+ else
+ {
+ *offset = tp->value;
+ *tz = NULL;
+ }
+ }
+
+ return type;
+}
+
+
+/* DecodeSpecial()
+ * Decode text string using lookup table.
+ *
+ * Recognizes the keywords listed in datetktbl.
+ * Note: at one time this would also recognize timezone abbreviations,
+ * but no more; use DecodeTimezoneAbbrev for that.
+ *
+ * Given string must be lowercased already.
+ *
+ * Implement a cache lookup since it is likely that dates
+ * will be related in format.
+ */
+int
+DecodeSpecial(int field, char *lowtoken, int *val)
+{
+ int type;
+ const datetkn *tp;
+
+ tp = datecache[field];
+ /* use strncmp so that we match truncated tokens */
+ if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0)
+ {
+ tp = datebsearch(lowtoken, datetktbl, szdatetktbl);
+ }
+ if (tp == NULL)
+ {
+ type = UNKNOWN_FIELD;
+ *val = 0;
+ }
+ else
+ {
+ datecache[field] = tp;
+ type = tp->type;
+ *val = tp->value;
+ }
+
+ return type;
+}
+
+
+/* ClearPgItmIn
+ *
+ * Zero out a pg_itm_in
+ */
+static inline void
+ClearPgItmIn(struct pg_itm_in *itm_in)
+{
+ itm_in->tm_usec = 0;
+ itm_in->tm_mday = 0;
+ itm_in->tm_mon = 0;
+ itm_in->tm_year = 0;
+}
+
+
+/* DecodeInterval()
+ * Interpret previously parsed fields for general time interval.
+ * Returns 0 if successful, DTERR code if bogus input detected.
+ * dtype and itm_in are output parameters.
+ *
+ * Allow "date" field DTK_DATE since this could be just
+ * an unsigned floating point number. - thomas 1997-11-16
+ *
+ * Allow ISO-style time span, with implicit units on number of days
+ * preceding an hh:mm:ss field. - thomas 1998-04-30
+ */
+int
+DecodeInterval(char **field, int *ftype, int nf, int range,
+ int *dtype, struct pg_itm_in *itm_in)
+{
+ bool force_negative = false;
+ bool is_before = false;
+ char *cp;
+ int fmask = 0,
+ tmask,
+ type,
+ uval;
+ int i;
+ int dterr;
+ int64 val;
+ double fval;
+
+ *dtype = DTK_DELTA;
+ type = IGNORE_DTF;
+ ClearPgItmIn(itm_in);
+
+ /*----------
+ * The SQL standard defines the interval literal
+ * '-1 1:00:00'
+ * to mean "negative 1 days and negative 1 hours", while Postgres
+ * traditionally treats this as meaning "negative 1 days and positive
+ * 1 hours". In SQL_STANDARD intervalstyle, we apply the leading sign
+ * to all fields if there are no other explicit signs.
+ *
+ * We leave the signs alone if there are additional explicit signs.
+ * This protects us against misinterpreting postgres-style dump output,
+ * since the postgres-style output code has always put an explicit sign on
+ * all fields following a negative field. But note that SQL-spec output
+ * is ambiguous and can be misinterpreted on load! (So it's best practice
+ * to dump in postgres style, not SQL style.)
+ *----------
+ */
+ if (IntervalStyle == INTSTYLE_SQL_STANDARD && nf > 0 && *field[0] == '-')
+ {
+ force_negative = true;
+ /* Check for additional explicit signs */
+ for (i = 1; i < nf; i++)
+ {
+ if (*field[i] == '-' || *field[i] == '+')
+ {
+ force_negative = false;
+ break;
+ }
+ }
+ }
+
+ /* read through list backwards to pick up units before values */
+ for (i = nf - 1; i >= 0; i--)
+ {
+ switch (ftype[i])
+ {
+ case DTK_TIME:
+ dterr = DecodeTimeForInterval(field[i], fmask, range,
+ &tmask, itm_in);
+ if (dterr)
+ return dterr;
+ if (force_negative &&
+ itm_in->tm_usec > 0)
+ itm_in->tm_usec = -itm_in->tm_usec;
+ type = DTK_DAY;
+ break;
+
+ case DTK_TZ:
+
+ /*
+ * Timezone means a token with a leading sign character and at
+ * least one digit; there could be ':', '.', '-' embedded in
+ * it as well.
+ */
+ Assert(*field[i] == '-' || *field[i] == '+');
+
+ /*
+ * Check for signed hh:mm or hh:mm:ss. If so, process exactly
+ * like DTK_TIME case above, plus handling the sign.
+ */
+ if (strchr(field[i] + 1, ':') != NULL &&
+ DecodeTimeForInterval(field[i] + 1, fmask, range,
+ &tmask, itm_in) == 0)
+ {
+ if (*field[i] == '-')
+ {
+ /* flip the sign on time field */
+ if (itm_in->tm_usec == PG_INT64_MIN)
+ return DTERR_FIELD_OVERFLOW;
+ itm_in->tm_usec = -itm_in->tm_usec;
+ }
+
+ if (force_negative &&
+ itm_in->tm_usec > 0)
+ itm_in->tm_usec = -itm_in->tm_usec;
+
+ /*
+ * Set the next type to be a day, if units are not
+ * specified. This handles the case of '1 +02:03' since we
+ * are reading right to left.
+ */
+ type = DTK_DAY;
+ break;
+ }
+
+ /*
+ * Otherwise, fall through to DTK_NUMBER case, which can
+ * handle signed float numbers and signed year-month values.
+ */
+
+ /* FALLTHROUGH */
+
+ case DTK_DATE:
+ case DTK_NUMBER:
+ if (type == IGNORE_DTF)
+ {
+ /* use typmod to decide what rightmost field is */
+ switch (range)
+ {
+ case INTERVAL_MASK(YEAR):
+ type = DTK_YEAR;
+ break;
+ case INTERVAL_MASK(MONTH):
+ case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH):
+ type = DTK_MONTH;
+ break;
+ case INTERVAL_MASK(DAY):
+ type = DTK_DAY;
+ break;
+ case INTERVAL_MASK(HOUR):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR):
+ type = DTK_HOUR;
+ break;
+ case INTERVAL_MASK(MINUTE):
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ type = DTK_MINUTE;
+ break;
+ case INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ type = DTK_SECOND;
+ break;
+ default:
+ type = DTK_SECOND;
+ break;
+ }
+ }
+
+ errno = 0;
+ val = strtoi64(field[i], &cp, 10);
+ if (errno == ERANGE)
+ return DTERR_FIELD_OVERFLOW;
+
+ if (*cp == '-')
+ {
+ /* SQL "years-months" syntax */
+ int val2;
+
+ val2 = strtoint(cp + 1, &cp, 10);
+ if (errno == ERANGE || val2 < 0 || val2 >= MONTHS_PER_YEAR)
+ return DTERR_FIELD_OVERFLOW;
+ if (*cp != '\0')
+ return DTERR_BAD_FORMAT;
+ type = DTK_MONTH;
+ if (*field[i] == '-')
+ val2 = -val2;
+ if (pg_mul_s64_overflow(val, MONTHS_PER_YEAR, &val))
+ return DTERR_FIELD_OVERFLOW;
+ if (pg_add_s64_overflow(val, val2, &val))
+ return DTERR_FIELD_OVERFLOW;
+ fval = 0;
+ }
+ else if (*cp == '.')
+ {
+ dterr = ParseFraction(cp, &fval);
+ if (dterr)
+ return dterr;
+ if (*field[i] == '-')
+ fval = -fval;
+ }
+ else if (*cp == '\0')
+ fval = 0;
+ else
+ return DTERR_BAD_FORMAT;
+
+ tmask = 0; /* DTK_M(type); */
+
+ if (force_negative)
+ {
+ /* val and fval should be of same sign, but test anyway */
+ if (val > 0)
+ val = -val;
+ if (fval > 0)
+ fval = -fval;
+ }
+
+ switch (type)
+ {
+ case DTK_MICROSEC:
+ if (!AdjustMicroseconds(val, fval, 1, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(MICROSECOND);
+ break;
+
+ case DTK_MILLISEC:
+ if (!AdjustMicroseconds(val, fval, 1000, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(MILLISECOND);
+ break;
+
+ case DTK_SECOND:
+ if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+
+ /*
+ * If any subseconds were specified, consider this
+ * microsecond and millisecond input as well.
+ */
+ if (fval == 0)
+ tmask = DTK_M(SECOND);
+ else
+ tmask = DTK_ALL_SECS_M;
+ break;
+
+ case DTK_MINUTE:
+ if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(MINUTE);
+ break;
+
+ case DTK_HOUR:
+ if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(HOUR);
+ type = DTK_DAY; /* set for next field */
+ break;
+
+ case DTK_DAY:
+ if (!AdjustDays(val, 1, itm_in) ||
+ !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(DAY);
+ break;
+
+ case DTK_WEEK:
+ if (!AdjustDays(val, 7, itm_in) ||
+ !AdjustFractDays(fval, 7, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(WEEK);
+ break;
+
+ case DTK_MONTH:
+ if (!AdjustMonths(val, itm_in) ||
+ !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(MONTH);
+ break;
+
+ case DTK_YEAR:
+ if (!AdjustYears(val, 1, itm_in) ||
+ !AdjustFractYears(fval, 1, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(YEAR);
+ break;
+
+ case DTK_DECADE:
+ if (!AdjustYears(val, 10, itm_in) ||
+ !AdjustFractYears(fval, 10, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(DECADE);
+ break;
+
+ case DTK_CENTURY:
+ if (!AdjustYears(val, 100, itm_in) ||
+ !AdjustFractYears(fval, 100, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(CENTURY);
+ break;
+
+ case DTK_MILLENNIUM:
+ if (!AdjustYears(val, 1000, itm_in) ||
+ !AdjustFractYears(fval, 1000, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ tmask = DTK_M(MILLENNIUM);
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+ break;
+
+ case DTK_STRING:
+ case DTK_SPECIAL:
+ type = DecodeUnits(i, field[i], &uval);
+ if (type == IGNORE_DTF)
+ continue;
+
+ tmask = 0; /* DTK_M(type); */
+ switch (type)
+ {
+ case UNITS:
+ type = uval;
+ break;
+
+ case AGO:
+ is_before = true;
+ type = uval;
+ break;
+
+ case RESERV:
+ tmask = (DTK_DATE_M | DTK_TIME_M);
+ *dtype = uval;
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+ break;
+
+ default:
+ return DTERR_BAD_FORMAT;
+ }
+
+ if (tmask & fmask)
+ return DTERR_BAD_FORMAT;
+ fmask |= tmask;
+ }
+
+ /* ensure that at least one time field has been found */
+ if (fmask == 0)
+ return DTERR_BAD_FORMAT;
+
+ /* finally, AGO negates everything */
+ if (is_before)
+ {
+ if (itm_in->tm_usec == PG_INT64_MIN ||
+ itm_in->tm_mday == INT_MIN ||
+ itm_in->tm_mon == INT_MIN ||
+ itm_in->tm_year == INT_MIN)
+ return DTERR_FIELD_OVERFLOW;
+
+ itm_in->tm_usec = -itm_in->tm_usec;
+ itm_in->tm_mday = -itm_in->tm_mday;
+ itm_in->tm_mon = -itm_in->tm_mon;
+ itm_in->tm_year = -itm_in->tm_year;
+ }
+
+ return 0;
+}
+
+
+/*
+ * Helper functions to avoid duplicated code in DecodeISO8601Interval.
+ *
+ * Parse a decimal value and break it into integer and fractional parts.
+ * Set *endptr to end+1 of the parsed substring.
+ * Returns 0 or DTERR code.
+ */
+static int
+ParseISO8601Number(char *str, char **endptr, int64 *ipart, double *fpart)
+{
+ double val;
+
+ /*
+ * Historically this has accepted anything that strtod() would take,
+ * notably including "e" notation, so continue doing that. This is
+ * slightly annoying because the precision of double is less than that of
+ * int64, so we would lose accuracy for inputs larger than 2^53 or so.
+ * However, historically we rejected inputs outside the int32 range,
+ * making that concern moot. What we do now is reject abs(val) above
+ * 1.0e15 (a round number a bit less than 2^50), so that any accepted
+ * value will have an exact integer part, and thereby a fraction part with
+ * abs(*fpart) less than 1. In the absence of field complaints it doesn't
+ * seem worth working harder.
+ */
+ if (!(isdigit((unsigned char) *str) || *str == '-' || *str == '.'))
+ return DTERR_BAD_FORMAT;
+ errno = 0;
+ val = strtod(str, endptr);
+ /* did we not see anything that looks like a double? */
+ if (*endptr == str || errno != 0)
+ return DTERR_BAD_FORMAT;
+ /* watch out for overflow, including infinities; reject NaN too */
+ if (isnan(val) || val < -1.0e15 || val > 1.0e15)
+ return DTERR_FIELD_OVERFLOW;
+ /* be very sure we truncate towards zero (cf dtrunc()) */
+ if (val >= 0)
+ *ipart = (int64) floor(val);
+ else
+ *ipart = (int64) -floor(-val);
+ *fpart = val - *ipart;
+ /* Callers expect this to hold */
+ Assert(*fpart > -1.0 && *fpart < 1.0);
+ return 0;
+}
+
+/*
+ * Determine number of integral digits in a valid ISO 8601 number field
+ * (we should ignore sign and any fraction part)
+ */
+static int
+ISO8601IntegerWidth(char *fieldstart)
+{
+ /* We might have had a leading '-' */
+ if (*fieldstart == '-')
+ fieldstart++;
+ return strspn(fieldstart, "0123456789");
+}
+
+
+/* DecodeISO8601Interval()
+ * Decode an ISO 8601 time interval of the "format with designators"
+ * (section 4.4.3.2) or "alternative format" (section 4.4.3.3)
+ * Examples: P1D for 1 day
+ * PT1H for 1 hour
+ * P2Y6M7DT1H30M for 2 years, 6 months, 7 days 1 hour 30 min
+ * P0002-06-07T01:30:00 the same value in alternative format
+ *
+ * Returns 0 if successful, DTERR code if bogus input detected.
+ * Note: error code should be DTERR_BAD_FORMAT if input doesn't look like
+ * ISO8601, otherwise this could cause unexpected error messages.
+ * dtype and itm_in are output parameters.
+ *
+ * A couple exceptions from the spec:
+ * - a week field ('W') may coexist with other units
+ * - allows decimals in fields other than the least significant unit.
+ */
+int
+DecodeISO8601Interval(char *str,
+ int *dtype, struct pg_itm_in *itm_in)
+{
+ bool datepart = true;
+ bool havefield = false;
+
+ *dtype = DTK_DELTA;
+ ClearPgItmIn(itm_in);
+
+ if (strlen(str) < 2 || str[0] != 'P')
+ return DTERR_BAD_FORMAT;
+
+ str++;
+ while (*str)
+ {
+ char *fieldstart;
+ int64 val;
+ double fval;
+ char unit;
+ int dterr;
+
+ if (*str == 'T') /* T indicates the beginning of the time part */
+ {
+ datepart = false;
+ havefield = false;
+ str++;
+ continue;
+ }
+
+ fieldstart = str;
+ dterr = ParseISO8601Number(str, &str, &val, &fval);
+ if (dterr)
+ return dterr;
+
+ /*
+ * Note: we could step off the end of the string here. Code below
+ * *must* exit the loop if unit == '\0'.
+ */
+ unit = *str++;
+
+ if (datepart)
+ {
+ switch (unit) /* before T: Y M W D */
+ {
+ case 'Y':
+ if (!AdjustYears(val, 1, itm_in) ||
+ !AdjustFractYears(fval, 1, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'M':
+ if (!AdjustMonths(val, itm_in) ||
+ !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'W':
+ if (!AdjustDays(val, 7, itm_in) ||
+ !AdjustFractDays(fval, 7, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'D':
+ if (!AdjustDays(val, 1, itm_in) ||
+ !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'T': /* ISO 8601 4.4.3.3 Alternative Format / Basic */
+ case '\0':
+ if (ISO8601IntegerWidth(fieldstart) == 8 && !havefield)
+ {
+ if (!AdjustYears(val / 10000, 1, itm_in) ||
+ !AdjustMonths((val / 100) % 100, itm_in) ||
+ !AdjustDays(val % 100, 1, itm_in) ||
+ !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (unit == '\0')
+ return 0;
+ datepart = false;
+ havefield = false;
+ continue;
+ }
+ /* Else fall through to extended alternative format */
+ /* FALLTHROUGH */
+ case '-': /* ISO 8601 4.4.3.3 Alternative Format,
+ * Extended */
+ if (havefield)
+ return DTERR_BAD_FORMAT;
+
+ if (!AdjustYears(val, 1, itm_in) ||
+ !AdjustFractYears(fval, 1, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (unit == '\0')
+ return 0;
+ if (unit == 'T')
+ {
+ datepart = false;
+ havefield = false;
+ continue;
+ }
+
+ dterr = ParseISO8601Number(str, &str, &val, &fval);
+ if (dterr)
+ return dterr;
+ if (!AdjustMonths(val, itm_in) ||
+ !AdjustFractDays(fval, DAYS_PER_MONTH, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (*str == '\0')
+ return 0;
+ if (*str == 'T')
+ {
+ datepart = false;
+ havefield = false;
+ continue;
+ }
+ if (*str != '-')
+ return DTERR_BAD_FORMAT;
+ str++;
+
+ dterr = ParseISO8601Number(str, &str, &val, &fval);
+ if (dterr)
+ return dterr;
+ if (!AdjustDays(val, 1, itm_in) ||
+ !AdjustFractMicroseconds(fval, USECS_PER_DAY, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (*str == '\0')
+ return 0;
+ if (*str == 'T')
+ {
+ datepart = false;
+ havefield = false;
+ continue;
+ }
+ return DTERR_BAD_FORMAT;
+ default:
+ /* not a valid date unit suffix */
+ return DTERR_BAD_FORMAT;
+ }
+ }
+ else
+ {
+ switch (unit) /* after T: H M S */
+ {
+ case 'H':
+ if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'M':
+ if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case 'S':
+ if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ break;
+ case '\0': /* ISO 8601 4.4.3.3 Alternative Format */
+ if (ISO8601IntegerWidth(fieldstart) == 6 && !havefield)
+ {
+ if (!AdjustMicroseconds(val / 10000, 0, USECS_PER_HOUR, itm_in) ||
+ !AdjustMicroseconds((val / 100) % 100, 0, USECS_PER_MINUTE, itm_in) ||
+ !AdjustMicroseconds(val % 100, 0, USECS_PER_SEC, itm_in) ||
+ !AdjustFractMicroseconds(fval, 1, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ return 0;
+ }
+ /* Else fall through to extended alternative format */
+ /* FALLTHROUGH */
+ case ':': /* ISO 8601 4.4.3.3 Alternative Format,
+ * Extended */
+ if (havefield)
+ return DTERR_BAD_FORMAT;
+
+ if (!AdjustMicroseconds(val, fval, USECS_PER_HOUR, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (unit == '\0')
+ return 0;
+
+ dterr = ParseISO8601Number(str, &str, &val, &fval);
+ if (dterr)
+ return dterr;
+ if (!AdjustMicroseconds(val, fval, USECS_PER_MINUTE, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (*str == '\0')
+ return 0;
+ if (*str != ':')
+ return DTERR_BAD_FORMAT;
+ str++;
+
+ dterr = ParseISO8601Number(str, &str, &val, &fval);
+ if (dterr)
+ return dterr;
+ if (!AdjustMicroseconds(val, fval, USECS_PER_SEC, itm_in))
+ return DTERR_FIELD_OVERFLOW;
+ if (*str == '\0')
+ return 0;
+ return DTERR_BAD_FORMAT;
+
+ default:
+ /* not a valid time unit suffix */
+ return DTERR_BAD_FORMAT;
+ }
+ }
+
+ havefield = true;
+ }
+
+ return 0;
+}
+
+
+/* DecodeUnits()
+ * Decode text string using lookup table.
+ *
+ * This routine recognizes keywords associated with time interval units.
+ *
+ * Given string must be lowercased already.
+ *
+ * Implement a cache lookup since it is likely that dates
+ * will be related in format.
+ */
+int
+DecodeUnits(int field, char *lowtoken, int *val)
+{
+ int type;
+ const datetkn *tp;
+
+ tp = deltacache[field];
+ /* use strncmp so that we match truncated tokens */
+ if (tp == NULL || strncmp(lowtoken, tp->token, TOKMAXLEN) != 0)
+ {
+ tp = datebsearch(lowtoken, deltatktbl, szdeltatktbl);
+ }
+ if (tp == NULL)
+ {
+ type = UNKNOWN_FIELD;
+ *val = 0;
+ }
+ else
+ {
+ deltacache[field] = tp;
+ type = tp->type;
+ *val = tp->value;
+ }
+
+ return type;
+} /* DecodeUnits() */
+
+/*
+ * Report an error detected by one of the datetime input processing routines.
+ *
+ * dterr is the error code, str is the original input string, datatype is
+ * the name of the datatype we were trying to accept.
+ *
+ * Note: it might seem useless to distinguish DTERR_INTERVAL_OVERFLOW and
+ * DTERR_TZDISP_OVERFLOW from DTERR_FIELD_OVERFLOW, but SQL99 mandates three
+ * separate SQLSTATE codes, so ...
+ */
+void
+DateTimeParseError(int dterr, const char *str, const char *datatype)
+{
+ switch (dterr)
+ {
+ case DTERR_FIELD_OVERFLOW:
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("date/time field value out of range: \"%s\"",
+ str)));
+ break;
+ case DTERR_MD_FIELD_OVERFLOW:
+ /* <nanny>same as above, but add hint about DateStyle</nanny> */
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("date/time field value out of range: \"%s\"",
+ str),
+ errhint("Perhaps you need a different \"datestyle\" setting.")));
+ break;
+ case DTERR_INTERVAL_OVERFLOW:
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERVAL_FIELD_OVERFLOW),
+ errmsg("interval field value out of range: \"%s\"",
+ str)));
+ break;
+ case DTERR_TZDISP_OVERFLOW:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TIME_ZONE_DISPLACEMENT_VALUE),
+ errmsg("time zone displacement out of range: \"%s\"",
+ str)));
+ break;
+ case DTERR_BAD_FORMAT:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ datatype, str)));
+ break;
+ }
+}
+
+/* datebsearch()
+ * Binary search -- from Knuth (6.2.1) Algorithm B. Special case like this
+ * is WAY faster than the generic bsearch().
+ */
+static const datetkn *
+datebsearch(const char *key, const datetkn *base, int nel)
+{
+ if (nel > 0)
+ {
+ const datetkn *last = base + nel - 1,
+ *position;
+ int result;
+
+ while (last >= base)
+ {
+ position = base + ((last - base) >> 1);
+ /* precheck the first character for a bit of extra speed */
+ result = (int) key[0] - (int) position->token[0];
+ if (result == 0)
+ {
+ /* use strncmp so that we match truncated tokens */
+ result = strncmp(key, position->token, TOKMAXLEN);
+ if (result == 0)
+ return position;
+ }
+ if (result < 0)
+ last = position - 1;
+ else
+ base = position + 1;
+ }
+ }
+ return NULL;
+}
+
+/* EncodeTimezone()
+ * Copies representation of a numeric timezone offset to str.
+ *
+ * Returns a pointer to the new end of string. No NUL terminator is put
+ * there; callers are responsible for NUL terminating str themselves.
+ */
+static char *
+EncodeTimezone(char *str, int tz, int style)
+{
+ int hour,
+ min,
+ sec;
+
+ sec = abs(tz);
+ min = sec / SECS_PER_MINUTE;
+ sec -= min * SECS_PER_MINUTE;
+ hour = min / MINS_PER_HOUR;
+ min -= hour * MINS_PER_HOUR;
+
+ /* TZ is negated compared to sign we wish to display ... */
+ *str++ = (tz <= 0 ? '+' : '-');
+
+ if (sec != 0)
+ {
+ str = pg_ultostr_zeropad(str, hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, min, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, sec, 2);
+ }
+ else if (min != 0 || style == USE_XSD_DATES)
+ {
+ str = pg_ultostr_zeropad(str, hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, min, 2);
+ }
+ else
+ str = pg_ultostr_zeropad(str, hour, 2);
+ return str;
+}
+
+/* EncodeDateOnly()
+ * Encode date as local time.
+ */
+void
+EncodeDateOnly(struct pg_tm *tm, int style, char *str)
+{
+ Assert(tm->tm_mon >= 1 && tm->tm_mon <= MONTHS_PER_YEAR);
+
+ switch (style)
+ {
+ case USE_ISO_DATES:
+ case USE_XSD_DATES:
+ /* compatible with ISO date formats */
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ break;
+
+ case USE_SQL_DATES:
+ /* compatible with Oracle/Ingres date formats */
+ if (DateOrder == DATEORDER_DMY)
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ }
+ else
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ }
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ break;
+
+ case USE_GERMAN_DATES:
+ /* German-style date format */
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = '.';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '.';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ break;
+
+ case USE_POSTGRES_DATES:
+ default:
+ /* traditional date-only style for Postgres */
+ if (DateOrder == DATEORDER_DMY)
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ }
+ else
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ }
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ break;
+ }
+
+ if (tm->tm_year <= 0)
+ {
+ memcpy(str, " BC", 3); /* Don't copy NUL */
+ str += 3;
+ }
+ *str = '\0';
+}
+
+
+/* EncodeTimeOnly()
+ * Encode time fields only.
+ *
+ * tm and fsec are the value to encode, print_tz determines whether to include
+ * a time zone (the difference between time and timetz types), tz is the
+ * numeric time zone offset, style is the date style, str is where to write the
+ * output.
+ */
+void
+EncodeTimeOnly(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, int style, char *str)
+{
+ str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, tm->tm_min, 2);
+ *str++ = ':';
+ str = AppendSeconds(str, tm->tm_sec, fsec, MAX_TIME_PRECISION, true);
+ if (print_tz)
+ str = EncodeTimezone(str, tz, style);
+ *str = '\0';
+}
+
+
+/* EncodeDateTime()
+ * Encode date and time interpreted as local time.
+ *
+ * tm and fsec are the value to encode, print_tz determines whether to include
+ * a time zone (the difference between timestamp and timestamptz types), tz is
+ * the numeric time zone offset, tzn is the textual time zone, which if
+ * specified will be used instead of tz by some styles, style is the date
+ * style, str is where to write the output.
+ *
+ * Supported date styles:
+ * Postgres - day mon hh:mm:ss yyyy tz
+ * SQL - mm/dd/yyyy hh:mm:ss.ss tz
+ * ISO - yyyy-mm-dd hh:mm:ss+/-tz
+ * German - dd.mm.yyyy hh:mm:ss tz
+ * XSD - yyyy-mm-ddThh:mm:ss.ss+/-tz
+ */
+void
+EncodeDateTime(struct pg_tm *tm, fsec_t fsec, bool print_tz, int tz, const char *tzn, int style, char *str)
+{
+ int day;
+
+ Assert(tm->tm_mon >= 1 && tm->tm_mon <= MONTHS_PER_YEAR);
+
+ /*
+ * Negative tm_isdst means we have no valid time zone translation.
+ */
+ if (tm->tm_isdst < 0)
+ print_tz = false;
+
+ switch (style)
+ {
+ case USE_ISO_DATES:
+ case USE_XSD_DATES:
+ /* Compatible with ISO-8601 date formats */
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '-';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = (style == USE_ISO_DATES) ? ' ' : 'T';
+ str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, tm->tm_min, 2);
+ *str++ = ':';
+ str = AppendTimestampSeconds(str, tm, fsec);
+ if (print_tz)
+ str = EncodeTimezone(str, tz, style);
+ break;
+
+ case USE_SQL_DATES:
+ /* Compatible with Oracle/Ingres date formats */
+ if (DateOrder == DATEORDER_DMY)
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ }
+ else
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ }
+ *str++ = '/';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ *str++ = ' ';
+ str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, tm->tm_min, 2);
+ *str++ = ':';
+ str = AppendTimestampSeconds(str, tm, fsec);
+
+ /*
+ * Note: the uses of %.*s in this function would be risky if the
+ * timezone names ever contain non-ASCII characters, since we are
+ * not being careful to do encoding-aware clipping. However, all
+ * TZ abbreviations in the IANA database are plain ASCII.
+ */
+ if (print_tz)
+ {
+ if (tzn)
+ {
+ sprintf(str, " %.*s", MAXTZLEN, tzn);
+ str += strlen(str);
+ }
+ else
+ str = EncodeTimezone(str, tz, style);
+ }
+ break;
+
+ case USE_GERMAN_DATES:
+ /* German variant on European style */
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = '.';
+ str = pg_ultostr_zeropad(str, tm->tm_mon, 2);
+ *str++ = '.';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+ *str++ = ' ';
+ str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, tm->tm_min, 2);
+ *str++ = ':';
+ str = AppendTimestampSeconds(str, tm, fsec);
+
+ if (print_tz)
+ {
+ if (tzn)
+ {
+ sprintf(str, " %.*s", MAXTZLEN, tzn);
+ str += strlen(str);
+ }
+ else
+ str = EncodeTimezone(str, tz, style);
+ }
+ break;
+
+ case USE_POSTGRES_DATES:
+ default:
+ /* Backward-compatible with traditional Postgres abstime dates */
+ day = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday);
+ tm->tm_wday = j2day(day);
+ memcpy(str, days[tm->tm_wday], 3);
+ str += 3;
+ *str++ = ' ';
+ if (DateOrder == DATEORDER_DMY)
+ {
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ *str++ = ' ';
+ memcpy(str, months[tm->tm_mon - 1], 3);
+ str += 3;
+ }
+ else
+ {
+ memcpy(str, months[tm->tm_mon - 1], 3);
+ str += 3;
+ *str++ = ' ';
+ str = pg_ultostr_zeropad(str, tm->tm_mday, 2);
+ }
+ *str++ = ' ';
+ str = pg_ultostr_zeropad(str, tm->tm_hour, 2);
+ *str++ = ':';
+ str = pg_ultostr_zeropad(str, tm->tm_min, 2);
+ *str++ = ':';
+ str = AppendTimestampSeconds(str, tm, fsec);
+ *str++ = ' ';
+ str = pg_ultostr_zeropad(str,
+ (tm->tm_year > 0) ? tm->tm_year : -(tm->tm_year - 1), 4);
+
+ if (print_tz)
+ {
+ if (tzn)
+ {
+ sprintf(str, " %.*s", MAXTZLEN, tzn);
+ str += strlen(str);
+ }
+ else
+ {
+ /*
+ * We have a time zone, but no string version. Use the
+ * numeric form, but be sure to include a leading space to
+ * avoid formatting something which would be rejected by
+ * the date/time parser later. - thomas 2001-10-19
+ */
+ *str++ = ' ';
+ str = EncodeTimezone(str, tz, style);
+ }
+ }
+ break;
+ }
+
+ if (tm->tm_year <= 0)
+ {
+ memcpy(str, " BC", 3); /* Don't copy NUL */
+ str += 3;
+ }
+ *str = '\0';
+}
+
+
+/*
+ * Helper functions to avoid duplicated code in EncodeInterval.
+ */
+
+/* Append an ISO-8601-style interval field, but only if value isn't zero */
+static char *
+AddISO8601IntPart(char *cp, int64 value, char units)
+{
+ if (value == 0)
+ return cp;
+ sprintf(cp, "%lld%c", (long long) value, units);
+ return cp + strlen(cp);
+}
+
+/* Append a postgres-style interval field, but only if value isn't zero */
+static char *
+AddPostgresIntPart(char *cp, int64 value, const char *units,
+ bool *is_zero, bool *is_before)
+{
+ if (value == 0)
+ return cp;
+ sprintf(cp, "%s%s%lld %s%s",
+ (!*is_zero) ? " " : "",
+ (*is_before && value > 0) ? "+" : "",
+ (long long) value,
+ units,
+ (value != 1) ? "s" : "");
+
+ /*
+ * Each nonzero field sets is_before for (only) the next one. This is a
+ * tad bizarre but it's how it worked before...
+ */
+ *is_before = (value < 0);
+ *is_zero = false;
+ return cp + strlen(cp);
+}
+
+/* Append a verbose-style interval field, but only if value isn't zero */
+static char *
+AddVerboseIntPart(char *cp, int64 value, const char *units,
+ bool *is_zero, bool *is_before)
+{
+ if (value == 0)
+ return cp;
+ /* first nonzero value sets is_before */
+ if (*is_zero)
+ {
+ *is_before = (value < 0);
+ value = Abs(value);
+ }
+ else if (*is_before)
+ value = -value;
+ sprintf(cp, " %lld %s%s", (long long) value, units, (value == 1) ? "" : "s");
+ *is_zero = false;
+ return cp + strlen(cp);
+}
+
+
+/* EncodeInterval()
+ * Interpret time structure as a delta time and convert to string.
+ *
+ * Support "traditional Postgres" and ISO-8601 styles.
+ * Actually, afaik ISO does not address time interval formatting,
+ * but this looks similar to the spec for absolute date/time.
+ * - thomas 1998-04-30
+ *
+ * Actually, afaik, ISO 8601 does specify formats for "time
+ * intervals...[of the]...format with time-unit designators", which
+ * are pretty ugly. The format looks something like
+ * P1Y1M1DT1H1M1.12345S
+ * but useful for exchanging data with computers instead of humans.
+ * - ron 2003-07-14
+ *
+ * And ISO's SQL 2008 standard specifies standards for
+ * "year-month literal"s (that look like '2-3') and
+ * "day-time literal"s (that look like ('4 5:6:7')
+ */
+void
+EncodeInterval(struct pg_itm *itm, int style, char *str)
+{
+ char *cp = str;
+ int year = itm->tm_year;
+ int mon = itm->tm_mon;
+ int64 mday = itm->tm_mday; /* tm_mday could be INT_MIN */
+ int64 hour = itm->tm_hour;
+ int min = itm->tm_min;
+ int sec = itm->tm_sec;
+ int fsec = itm->tm_usec;
+ bool is_before = false;
+ bool is_zero = true;
+
+ /*
+ * The sign of year and month are guaranteed to match, since they are
+ * stored internally as "month". But we'll need to check for is_before and
+ * is_zero when determining the signs of day and hour/minute/seconds
+ * fields.
+ */
+ switch (style)
+ {
+ /* SQL Standard interval format */
+ case INTSTYLE_SQL_STANDARD:
+ {
+ bool has_negative = year < 0 || mon < 0 ||
+ mday < 0 || hour < 0 ||
+ min < 0 || sec < 0 || fsec < 0;
+ bool has_positive = year > 0 || mon > 0 ||
+ mday > 0 || hour > 0 ||
+ min > 0 || sec > 0 || fsec > 0;
+ bool has_year_month = year != 0 || mon != 0;
+ bool has_day_time = mday != 0 || hour != 0 ||
+ min != 0 || sec != 0 || fsec != 0;
+ bool has_day = mday != 0;
+ bool sql_standard_value = !(has_negative && has_positive) &&
+ !(has_year_month && has_day_time);
+
+ /*
+ * SQL Standard wants only 1 "<sign>" preceding the whole
+ * interval ... but can't do that if mixed signs.
+ */
+ if (has_negative && sql_standard_value)
+ {
+ *cp++ = '-';
+ year = -year;
+ mon = -mon;
+ mday = -mday;
+ hour = -hour;
+ min = -min;
+ sec = -sec;
+ fsec = -fsec;
+ }
+
+ if (!has_negative && !has_positive)
+ {
+ sprintf(cp, "0");
+ }
+ else if (!sql_standard_value)
+ {
+ /*
+ * For non sql-standard interval values, force outputting
+ * the signs to avoid ambiguities with intervals with
+ * mixed sign components.
+ */
+ char year_sign = (year < 0 || mon < 0) ? '-' : '+';
+ char day_sign = (mday < 0) ? '-' : '+';
+ char sec_sign = (hour < 0 || min < 0 ||
+ sec < 0 || fsec < 0) ? '-' : '+';
+
+ sprintf(cp, "%c%d-%d %c%lld %c%lld:%02d:",
+ year_sign, abs(year), abs(mon),
+ day_sign, (long long) Abs(mday),
+ sec_sign, (long long) Abs(hour), abs(min));
+ cp += strlen(cp);
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true);
+ *cp = '\0';
+ }
+ else if (has_year_month)
+ {
+ sprintf(cp, "%d-%d", year, mon);
+ }
+ else if (has_day)
+ {
+ sprintf(cp, "%lld %lld:%02d:",
+ (long long) mday, (long long) hour, min);
+ cp += strlen(cp);
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true);
+ *cp = '\0';
+ }
+ else
+ {
+ sprintf(cp, "%lld:%02d:", (long long) hour, min);
+ cp += strlen(cp);
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true);
+ *cp = '\0';
+ }
+ }
+ break;
+
+ /* ISO 8601 "time-intervals by duration only" */
+ case INTSTYLE_ISO_8601:
+ /* special-case zero to avoid printing nothing */
+ if (year == 0 && mon == 0 && mday == 0 &&
+ hour == 0 && min == 0 && sec == 0 && fsec == 0)
+ {
+ sprintf(cp, "PT0S");
+ break;
+ }
+ *cp++ = 'P';
+ cp = AddISO8601IntPart(cp, year, 'Y');
+ cp = AddISO8601IntPart(cp, mon, 'M');
+ cp = AddISO8601IntPart(cp, mday, 'D');
+ if (hour != 0 || min != 0 || sec != 0 || fsec != 0)
+ *cp++ = 'T';
+ cp = AddISO8601IntPart(cp, hour, 'H');
+ cp = AddISO8601IntPart(cp, min, 'M');
+ if (sec != 0 || fsec != 0)
+ {
+ if (sec < 0 || fsec < 0)
+ *cp++ = '-';
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false);
+ *cp++ = 'S';
+ *cp++ = '\0';
+ }
+ break;
+
+ /* Compatible with postgresql < 8.4 when DateStyle = 'iso' */
+ case INTSTYLE_POSTGRES:
+ cp = AddPostgresIntPart(cp, year, "year", &is_zero, &is_before);
+
+ /*
+ * Ideally we should spell out "month" like we do for "year" and
+ * "day". However, for backward compatibility, we can't easily
+ * fix this. bjm 2011-05-24
+ */
+ cp = AddPostgresIntPart(cp, mon, "mon", &is_zero, &is_before);
+ cp = AddPostgresIntPart(cp, mday, "day", &is_zero, &is_before);
+ if (is_zero || hour != 0 || min != 0 || sec != 0 || fsec != 0)
+ {
+ bool minus = (hour < 0 || min < 0 || sec < 0 || fsec < 0);
+
+ sprintf(cp, "%s%s%02lld:%02d:",
+ is_zero ? "" : " ",
+ (minus ? "-" : (is_before ? "+" : "")),
+ (long long) Abs(hour), abs(min));
+ cp += strlen(cp);
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, true);
+ *cp = '\0';
+ }
+ break;
+
+ /* Compatible with postgresql < 8.4 when DateStyle != 'iso' */
+ case INTSTYLE_POSTGRES_VERBOSE:
+ default:
+ strcpy(cp, "@");
+ cp++;
+ cp = AddVerboseIntPart(cp, year, "year", &is_zero, &is_before);
+ cp = AddVerboseIntPart(cp, mon, "mon", &is_zero, &is_before);
+ cp = AddVerboseIntPart(cp, mday, "day", &is_zero, &is_before);
+ cp = AddVerboseIntPart(cp, hour, "hour", &is_zero, &is_before);
+ cp = AddVerboseIntPart(cp, min, "min", &is_zero, &is_before);
+ if (sec != 0 || fsec != 0)
+ {
+ *cp++ = ' ';
+ if (sec < 0 || (sec == 0 && fsec < 0))
+ {
+ if (is_zero)
+ is_before = true;
+ else if (!is_before)
+ *cp++ = '-';
+ }
+ else if (is_before)
+ *cp++ = '-';
+ cp = AppendSeconds(cp, sec, fsec, MAX_INTERVAL_PRECISION, false);
+ /* We output "ago", not negatives, so use abs(). */
+ sprintf(cp, " sec%s",
+ (abs(sec) != 1 || fsec != 0) ? "s" : "");
+ is_zero = false;
+ }
+ /* identically zero? then put in a unitless zero... */
+ if (is_zero)
+ strcat(cp, " 0");
+ if (is_before)
+ strcat(cp, " ago");
+ break;
+ }
+}
+
+
+/*
+ * We've been burnt by stupid errors in the ordering of the datetkn tables
+ * once too often. Arrange to check them during postmaster start.
+ */
+static bool
+CheckDateTokenTable(const char *tablename, const datetkn *base, int nel)
+{
+ bool ok = true;
+ int i;
+
+ for (i = 0; i < nel; i++)
+ {
+ /* check for token strings that don't fit */
+ if (strlen(base[i].token) > TOKMAXLEN)
+ {
+ /* %.*s is safe since all our tokens are ASCII */
+ elog(LOG, "token too long in %s table: \"%.*s\"",
+ tablename,
+ TOKMAXLEN + 1, base[i].token);
+ ok = false;
+ break; /* don't risk applying strcmp */
+ }
+ /* check for out of order */
+ if (i > 0 &&
+ strcmp(base[i - 1].token, base[i].token) >= 0)
+ {
+ elog(LOG, "ordering error in %s table: \"%s\" >= \"%s\"",
+ tablename,
+ base[i - 1].token,
+ base[i].token);
+ ok = false;
+ }
+ }
+ return ok;
+}
+
+bool
+CheckDateTokenTables(void)
+{
+ bool ok = true;
+
+ Assert(UNIX_EPOCH_JDATE == date2j(1970, 1, 1));
+ Assert(POSTGRES_EPOCH_JDATE == date2j(2000, 1, 1));
+
+ ok &= CheckDateTokenTable("datetktbl", datetktbl, szdatetktbl);
+ ok &= CheckDateTokenTable("deltatktbl", deltatktbl, szdeltatktbl);
+ return ok;
+}
+
+/*
+ * Common code for temporal prosupport functions: simplify, if possible,
+ * a call to a temporal type's length-coercion function.
+ *
+ * Types time, timetz, timestamp and timestamptz each have a range of allowed
+ * precisions. An unspecified precision is rigorously equivalent to the
+ * highest specifiable precision. We can replace the function call with a
+ * no-op RelabelType if it is coercing to the same or higher precision as the
+ * input is known to have.
+ *
+ * The input Node is always a FuncExpr, but to reduce the #include footprint
+ * of datetime.h, we declare it as Node *.
+ *
+ * Note: timestamp_scale throws an error when the typmod is out of range, but
+ * we can't get there from a cast: our typmodin will have caught it already.
+ */
+Node *
+TemporalSimplify(int32 max_precis, Node *node)
+{
+ FuncExpr *expr = castNode(FuncExpr, node);
+ Node *ret = NULL;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 old_precis = exprTypmod(source);
+ int32 new_precis = DatumGetInt32(((Const *) typmod)->constvalue);
+
+ if (new_precis < 0 || new_precis == max_precis ||
+ (old_precis >= 0 && new_precis >= old_precis))
+ ret = relabel_to_typmod(source, new_precis);
+ }
+
+ return ret;
+}
+
+/*
+ * This function gets called during timezone config file load or reload
+ * to create the final array of timezone tokens. The argument array
+ * is already sorted in name order.
+ *
+ * The result is a TimeZoneAbbrevTable (which must be a single malloc'd chunk)
+ * or NULL on malloc failure. No other error conditions are defined.
+ */
+TimeZoneAbbrevTable *
+ConvertTimeZoneAbbrevs(struct tzEntry *abbrevs, int n)
+{
+ TimeZoneAbbrevTable *tbl;
+ Size tbl_size;
+ int i;
+
+ /* Space for fixed fields and datetkn array */
+ tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) +
+ n * sizeof(datetkn);
+ tbl_size = MAXALIGN(tbl_size);
+ /* Count up space for dynamic abbreviations */
+ for (i = 0; i < n; i++)
+ {
+ struct tzEntry *abbr = abbrevs + i;
+
+ if (abbr->zone != NULL)
+ {
+ Size dsize;
+
+ dsize = offsetof(DynamicZoneAbbrev, zone) +
+ strlen(abbr->zone) + 1;
+ tbl_size += MAXALIGN(dsize);
+ }
+ }
+
+ /* Alloc the result ... */
+ tbl = malloc(tbl_size);
+ if (!tbl)
+ return NULL;
+
+ /* ... and fill it in */
+ tbl->tblsize = tbl_size;
+ tbl->numabbrevs = n;
+ /* in this loop, tbl_size reprises the space calculation above */
+ tbl_size = offsetof(TimeZoneAbbrevTable, abbrevs) +
+ n * sizeof(datetkn);
+ tbl_size = MAXALIGN(tbl_size);
+ for (i = 0; i < n; i++)
+ {
+ struct tzEntry *abbr = abbrevs + i;
+ datetkn *dtoken = tbl->abbrevs + i;
+
+ /* use strlcpy to truncate name if necessary */
+ strlcpy(dtoken->token, abbr->abbrev, TOKMAXLEN + 1);
+ if (abbr->zone != NULL)
+ {
+ /* Allocate a DynamicZoneAbbrev for this abbreviation */
+ DynamicZoneAbbrev *dtza;
+ Size dsize;
+
+ dtza = (DynamicZoneAbbrev *) ((char *) tbl + tbl_size);
+ dtza->tz = NULL;
+ strcpy(dtza->zone, abbr->zone);
+
+ dtoken->type = DYNTZ;
+ /* value is offset from table start to DynamicZoneAbbrev */
+ dtoken->value = (int32) tbl_size;
+
+ dsize = offsetof(DynamicZoneAbbrev, zone) +
+ strlen(abbr->zone) + 1;
+ tbl_size += MAXALIGN(dsize);
+ }
+ else
+ {
+ dtoken->type = abbr->is_dst ? DTZ : TZ;
+ dtoken->value = abbr->offset;
+ }
+ }
+
+ /* Assert the two loops above agreed on size calculations */
+ Assert(tbl->tblsize == tbl_size);
+
+ /* Check the ordering, if testing */
+ Assert(CheckDateTokenTable("timezone abbreviations", tbl->abbrevs, n));
+
+ return tbl;
+}
+
+/*
+ * Install a TimeZoneAbbrevTable as the active table.
+ *
+ * Caller is responsible that the passed table doesn't go away while in use.
+ */
+void
+InstallTimeZoneAbbrevs(TimeZoneAbbrevTable *tbl)
+{
+ zoneabbrevtbl = tbl;
+ /* reset abbrevcache, which may contain pointers into old table */
+ memset(abbrevcache, 0, sizeof(abbrevcache));
+}
+
+/*
+ * Helper subroutine to locate pg_tz timezone for a dynamic abbreviation.
+ */
+static pg_tz *
+FetchDynamicTimeZone(TimeZoneAbbrevTable *tbl, const datetkn *tp)
+{
+ DynamicZoneAbbrev *dtza;
+
+ /* Just some sanity checks to prevent indexing off into nowhere */
+ Assert(tp->type == DYNTZ);
+ Assert(tp->value > 0 && tp->value < tbl->tblsize);
+
+ dtza = (DynamicZoneAbbrev *) ((char *) tbl + tp->value);
+
+ /* Look up the underlying zone if we haven't already */
+ if (dtza->tz == NULL)
+ {
+ dtza->tz = pg_tzset(dtza->zone);
+
+ /*
+ * Ideally we'd let the caller ereport instead of doing it here, but
+ * then there is no way to report the bad time zone name.
+ */
+ if (dtza->tz == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_CONFIG_FILE_ERROR),
+ errmsg("time zone \"%s\" not recognized",
+ dtza->zone),
+ errdetail("This time zone name appears in the configuration file for time zone abbreviation \"%s\".",
+ tp->token)));
+ }
+ return dtza->tz;
+}
+
+
+/*
+ * This set-returning function reads all the available time zone abbreviations
+ * and returns a set of (abbrev, utc_offset, is_dst).
+ */
+Datum
+pg_timezone_abbrevs(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ int *pindex;
+ Datum result;
+ HeapTuple tuple;
+ Datum values[3];
+ bool nulls[3];
+ const datetkn *tp;
+ char buffer[TOKMAXLEN + 1];
+ int gmtoffset;
+ bool is_dst;
+ unsigned char *p;
+ struct pg_itm_in itm_in;
+ Interval *resInterval;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ pindex = (int *) palloc(sizeof(int));
+ *pindex = 0;
+ funcctx->user_fctx = (void *) pindex;
+
+ /*
+ * build tupdesc for result tuples. This must match this function's
+ * pg_proc entry!
+ */
+ tupdesc = CreateTemplateTupleDesc(3);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "abbrev",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "utc_offset",
+ INTERVALOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "is_dst",
+ BOOLOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ pindex = (int *) funcctx->user_fctx;
+
+ if (zoneabbrevtbl == NULL ||
+ *pindex >= zoneabbrevtbl->numabbrevs)
+ SRF_RETURN_DONE(funcctx);
+
+ tp = zoneabbrevtbl->abbrevs + *pindex;
+
+ switch (tp->type)
+ {
+ case TZ:
+ gmtoffset = tp->value;
+ is_dst = false;
+ break;
+ case DTZ:
+ gmtoffset = tp->value;
+ is_dst = true;
+ break;
+ case DYNTZ:
+ {
+ /* Determine the current meaning of the abbrev */
+ pg_tz *tzp;
+ TimestampTz now;
+ int isdst;
+
+ tzp = FetchDynamicTimeZone(zoneabbrevtbl, tp);
+ now = GetCurrentTransactionStartTimestamp();
+ gmtoffset = -DetermineTimeZoneAbbrevOffsetTS(now,
+ tp->token,
+ tzp,
+ &isdst);
+ is_dst = (bool) isdst;
+ break;
+ }
+ default:
+ elog(ERROR, "unrecognized timezone type %d", (int) tp->type);
+ gmtoffset = 0; /* keep compiler quiet */
+ is_dst = false;
+ break;
+ }
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /*
+ * Convert name to text, using upcasing conversion that is the inverse of
+ * what ParseDateTime() uses.
+ */
+ strlcpy(buffer, tp->token, sizeof(buffer));
+ for (p = (unsigned char *) buffer; *p; p++)
+ *p = pg_toupper(*p);
+
+ values[0] = CStringGetTextDatum(buffer);
+
+ /* Convert offset (in seconds) to an interval; can't overflow */
+ MemSet(&itm_in, 0, sizeof(struct pg_itm_in));
+ itm_in.tm_usec = (int64) gmtoffset * USECS_PER_SEC;
+ resInterval = (Interval *) palloc(sizeof(Interval));
+ (void) itmin2interval(&itm_in, resInterval);
+ values[1] = IntervalPGetDatum(resInterval);
+
+ values[2] = BoolGetDatum(is_dst);
+
+ (*pindex)++;
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+
+ SRF_RETURN_NEXT(funcctx, result);
+}
+
+/*
+ * This set-returning function reads all the available full time zones
+ * and returns a set of (name, abbrev, utc_offset, is_dst).
+ */
+Datum
+pg_timezone_names(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ pg_tzenum *tzenum;
+ pg_tz *tz;
+ Datum values[4];
+ bool nulls[4];
+ int tzoff;
+ struct pg_tm tm;
+ fsec_t fsec;
+ const char *tzn;
+ Interval *resInterval;
+ struct pg_itm_in itm_in;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* initialize timezone scanning code */
+ tzenum = pg_tzenumerate_start();
+
+ /* search for another zone to display */
+ for (;;)
+ {
+ tz = pg_tzenumerate_next(tzenum);
+ if (!tz)
+ break;
+
+ /* Convert now() to local time in this zone */
+ if (timestamp2tm(GetCurrentTransactionStartTimestamp(),
+ &tzoff, &tm, &fsec, &tzn, tz) != 0)
+ continue; /* ignore if conversion fails */
+
+ /*
+ * IANA's rather silly "Factory" time zone used to emit ridiculously
+ * long "abbreviations" such as "Local time zone must be set--see zic
+ * manual page" or "Local time zone must be set--use tzsetup". While
+ * modern versions of tzdb emit the much saner "-00", it seems some
+ * benighted packagers are hacking the IANA data so that it continues
+ * to produce these strings. To prevent producing a weirdly wide
+ * abbrev column, reject ridiculously long abbreviations.
+ */
+ if (tzn && strlen(tzn) > 31)
+ continue;
+
+ MemSet(nulls, 0, sizeof(nulls));
+
+ values[0] = CStringGetTextDatum(pg_get_timezone_name(tz));
+ values[1] = CStringGetTextDatum(tzn ? tzn : "");
+
+ /* Convert tzoff to an interval; can't overflow */
+ MemSet(&itm_in, 0, sizeof(struct pg_itm_in));
+ itm_in.tm_usec = (int64) -tzoff * USECS_PER_SEC;
+ resInterval = (Interval *) palloc(sizeof(Interval));
+ (void) itmin2interval(&itm_in, resInterval);
+ values[2] = IntervalPGetDatum(resInterval);
+
+ values[3] = BoolGetDatum(tm.tm_isdst > 0);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+ }
+
+ pg_tzenumerate_end(tzenum);
+ return (Datum) 0;
+}
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
new file mode 100644
index 0000000..f421024
--- /dev/null
+++ b/src/backend/utils/adt/datum.c
@@ -0,0 +1,554 @@
+/*-------------------------------------------------------------------------
+ *
+ * datum.c
+ * POSTGRES Datum (abstract data type) manipulation routines.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/datum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * In the implementation of these routines we assume the following:
+ *
+ * A) if a type is "byVal" then all the information is stored in the
+ * Datum itself (i.e. no pointers involved!). In this case the
+ * length of the type is always greater than zero and not more than
+ * "sizeof(Datum)"
+ *
+ * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
+ * then the "Datum" always contains a pointer to a stream of bytes.
+ * The number of significant bytes are always equal to the typlen.
+ *
+ * C) if a type is not "byVal" and has typlen == -1,
+ * then the "Datum" always points to a "struct varlena".
+ * This varlena structure has information about the actual length of this
+ * particular instance of the type and about its value.
+ *
+ * D) if a type is not "byVal" and has typlen == -2,
+ * then the "Datum" always points to a null-terminated C string.
+ *
+ * Note that we do not treat "toasted" datums specially; therefore what
+ * will be copied or compared is the compressed data or toast reference.
+ * An exception is made for datumCopy() of an expanded object, however,
+ * because most callers expect to get a simple contiguous (and pfree'able)
+ * result from datumCopy(). See also datumTransfer().
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_type_d.h"
+#include "common/hashfn.h"
+#include "fmgr.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/expandeddatum.h"
+
+
+/*-------------------------------------------------------------------------
+ * datumGetSize
+ *
+ * Find the "real" size of a datum, given the datum value,
+ * whether it is a "by value", and the declared type length.
+ * (For TOAST pointer datums, this is the size of the pointer datum.)
+ *
+ * This is essentially an out-of-line version of the att_addlength_datum()
+ * macro in access/tupmacs.h. We do a tad more error checking though.
+ *-------------------------------------------------------------------------
+ */
+Size
+datumGetSize(Datum value, bool typByVal, int typLen)
+{
+ Size size;
+
+ if (typByVal)
+ {
+ /* Pass-by-value types are always fixed-length */
+ Assert(typLen > 0 && typLen <= sizeof(Datum));
+ size = (Size) typLen;
+ }
+ else
+ {
+ if (typLen > 0)
+ {
+ /* Fixed-length pass-by-ref type */
+ size = (Size) typLen;
+ }
+ else if (typLen == -1)
+ {
+ /* It is a varlena datatype */
+ struct varlena *s = (struct varlena *) DatumGetPointer(value);
+
+ if (!PointerIsValid(s))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid Datum pointer")));
+
+ size = (Size) VARSIZE_ANY(s);
+ }
+ else if (typLen == -2)
+ {
+ /* It is a cstring datatype */
+ char *s = (char *) DatumGetPointer(value);
+
+ if (!PointerIsValid(s))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid Datum pointer")));
+
+ size = (Size) (strlen(s) + 1);
+ }
+ else
+ {
+ elog(ERROR, "invalid typLen: %d", typLen);
+ size = 0; /* keep compiler quiet */
+ }
+ }
+
+ return size;
+}
+
+/*-------------------------------------------------------------------------
+ * datumCopy
+ *
+ * Make a copy of a non-NULL datum.
+ *
+ * If the datatype is pass-by-reference, memory is obtained with palloc().
+ *
+ * If the value is a reference to an expanded object, we flatten into memory
+ * obtained with palloc(). We need to copy because one of the main uses of
+ * this function is to copy a datum out of a transient memory context that's
+ * about to be destroyed, and the expanded object is probably in a child
+ * context that will also go away. Moreover, many callers assume that the
+ * result is a single pfree-able chunk.
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumCopy(Datum value, bool typByVal, int typLen)
+{
+ Datum res;
+
+ if (typByVal)
+ res = value;
+ else if (typLen == -1)
+ {
+ /* It is a varlena datatype */
+ struct varlena *vl = (struct varlena *) DatumGetPointer(value);
+
+ if (VARATT_IS_EXTERNAL_EXPANDED(vl))
+ {
+ /* Flatten into the caller's memory context */
+ ExpandedObjectHeader *eoh = DatumGetEOHP(value);
+ Size resultsize;
+ char *resultptr;
+
+ resultsize = EOH_get_flat_size(eoh);
+ resultptr = (char *) palloc(resultsize);
+ EOH_flatten_into(eoh, (void *) resultptr, resultsize);
+ res = PointerGetDatum(resultptr);
+ }
+ else
+ {
+ /* Otherwise, just copy the varlena datum verbatim */
+ Size realSize;
+ char *resultptr;
+
+ realSize = (Size) VARSIZE_ANY(vl);
+ resultptr = (char *) palloc(realSize);
+ memcpy(resultptr, vl, realSize);
+ res = PointerGetDatum(resultptr);
+ }
+ }
+ else
+ {
+ /* Pass by reference, but not varlena, so not toasted */
+ Size realSize;
+ char *resultptr;
+
+ realSize = datumGetSize(value, typByVal, typLen);
+
+ resultptr = (char *) palloc(realSize);
+ memcpy(resultptr, DatumGetPointer(value), realSize);
+ res = PointerGetDatum(resultptr);
+ }
+ return res;
+}
+
+/*-------------------------------------------------------------------------
+ * datumTransfer
+ *
+ * Transfer a non-NULL datum into the current memory context.
+ *
+ * This is equivalent to datumCopy() except when the datum is a read-write
+ * pointer to an expanded object. In that case we merely reparent the object
+ * into the current context, and return its standard R/W pointer (in case the
+ * given one is a transient pointer of shorter lifespan).
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumTransfer(Datum value, bool typByVal, int typLen)
+{
+ if (!typByVal && typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
+ value = TransferExpandedObject(value, CurrentMemoryContext);
+ else
+ value = datumCopy(value, typByVal, typLen);
+ return value;
+}
+
+/*-------------------------------------------------------------------------
+ * datumIsEqual
+ *
+ * Return true if two datums are equal, false otherwise
+ *
+ * NOTE: XXX!
+ * We just compare the bytes of the two values, one by one.
+ * This routine will return false if there are 2 different
+ * representations of the same value (something along the lines
+ * of say the representation of zero in one's complement arithmetic).
+ * Also, it will probably not give the answer you want if either
+ * datum has been "toasted".
+ *
+ * Do not try to make this any smarter than it currently is with respect
+ * to "toasted" datums, because some of the callers could be working in the
+ * context of an aborted transaction.
+ *-------------------------------------------------------------------------
+ */
+bool
+datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
+{
+ bool res;
+
+ if (typByVal)
+ {
+ /*
+ * just compare the two datums. NOTE: just comparing "len" bytes will
+ * not do the work, because we do not know how these bytes are aligned
+ * inside the "Datum". We assume instead that any given datatype is
+ * consistent about how it fills extraneous bits in the Datum.
+ */
+ res = (value1 == value2);
+ }
+ else
+ {
+ Size size1,
+ size2;
+ char *s1,
+ *s2;
+
+ /*
+ * Compare the bytes pointed by the pointers stored in the datums.
+ */
+ size1 = datumGetSize(value1, typByVal, typLen);
+ size2 = datumGetSize(value2, typByVal, typLen);
+ if (size1 != size2)
+ return false;
+ s1 = (char *) DatumGetPointer(value1);
+ s2 = (char *) DatumGetPointer(value2);
+ res = (memcmp(s1, s2, size1) == 0);
+ }
+ return res;
+}
+
+/*-------------------------------------------------------------------------
+ * datum_image_eq
+ *
+ * Compares two datums for identical contents, based on byte images. Return
+ * true if the two datums are equal, false otherwise.
+ *-------------------------------------------------------------------------
+ */
+bool
+datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
+{
+ Size len1,
+ len2;
+ bool result = true;
+
+ if (typByVal)
+ {
+ result = (value1 == value2);
+ }
+ else if (typLen > 0)
+ {
+ result = (memcmp(DatumGetPointer(value1),
+ DatumGetPointer(value2),
+ typLen) == 0);
+ }
+ else if (typLen == -1)
+ {
+ len1 = toast_raw_datum_size(value1);
+ len2 = toast_raw_datum_size(value2);
+ /* No need to de-toast if lengths don't match. */
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ struct varlena *arg1val;
+ struct varlena *arg2val;
+
+ arg1val = PG_DETOAST_DATUM_PACKED(value1);
+ arg2val = PG_DETOAST_DATUM_PACKED(value2);
+
+ result = (memcmp(VARDATA_ANY(arg1val),
+ VARDATA_ANY(arg2val),
+ len1 - VARHDRSZ) == 0);
+
+ /* Only free memory if it's a copy made here. */
+ if ((Pointer) arg1val != (Pointer) value1)
+ pfree(arg1val);
+ if ((Pointer) arg2val != (Pointer) value2)
+ pfree(arg2val);
+ }
+ }
+ else if (typLen == -2)
+ {
+ char *s1,
+ *s2;
+
+ /* Compare cstring datums */
+ s1 = DatumGetCString(value1);
+ s2 = DatumGetCString(value2);
+ len1 = strlen(s1) + 1;
+ len2 = strlen(s2) + 1;
+ if (len1 != len2)
+ return false;
+ result = (memcmp(s1, s2, len1) == 0);
+ }
+ else
+ elog(ERROR, "unexpected typLen: %d", typLen);
+
+ return result;
+}
+
+/*-------------------------------------------------------------------------
+ * datum_image_hash
+ *
+ * Generate a hash value based on the binary representation of 'value'. Most
+ * use cases will want to use the hash function specific to the Datum's type,
+ * however, some corner cases require generating a hash value based on the
+ * actual bits rather than the logical value.
+ *-------------------------------------------------------------------------
+ */
+uint32
+datum_image_hash(Datum value, bool typByVal, int typLen)
+{
+ Size len;
+ uint32 result;
+
+ if (typByVal)
+ result = hash_bytes((unsigned char *) &value, sizeof(Datum));
+ else if (typLen > 0)
+ result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
+ else if (typLen == -1)
+ {
+ struct varlena *val;
+
+ len = toast_raw_datum_size(value);
+
+ val = PG_DETOAST_DATUM_PACKED(value);
+
+ result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
+
+ /* Only free memory if it's a copy made here. */
+ if ((Pointer) val != (Pointer) value)
+ pfree(val);
+ }
+ else if (typLen == -2)
+ {
+ char *s;
+
+ s = DatumGetCString(value);
+ len = strlen(s) + 1;
+
+ result = hash_bytes((unsigned char *) s, len);
+ }
+ else
+ {
+ elog(ERROR, "unexpected typLen: %d", typLen);
+ result = 0; /* keep compiler quiet */
+ }
+
+ return result;
+}
+
+/*-------------------------------------------------------------------------
+ * btequalimage
+ *
+ * Generic "equalimage" support function.
+ *
+ * B-Tree operator classes whose equality function could safely be replaced by
+ * datum_image_eq() in all cases can use this as their "equalimage" support
+ * function.
+ *
+ * Earlier minor releases erroneously associated this function with
+ * interval_ops. Detect that case to rescind deduplication support, without
+ * requiring initdb.
+ *-------------------------------------------------------------------------
+ */
+Datum
+btequalimage(PG_FUNCTION_ARGS)
+{
+ Oid opcintype = PG_GETARG_OID(0);
+
+ PG_RETURN_BOOL(opcintype != INTERVALOID);
+}
+
+/*-------------------------------------------------------------------------
+ * datumEstimateSpace
+ *
+ * Compute the amount of space that datumSerialize will require for a
+ * particular Datum.
+ *-------------------------------------------------------------------------
+ */
+Size
+datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
+{
+ Size sz = sizeof(int);
+
+ if (!isnull)
+ {
+ /* no need to use add_size, can't overflow */
+ if (typByVal)
+ sz += sizeof(Datum);
+ else if (typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
+ {
+ /* Expanded objects need to be flattened, see comment below */
+ sz += EOH_get_flat_size(DatumGetEOHP(value));
+ }
+ else
+ sz += datumGetSize(value, typByVal, typLen);
+ }
+
+ return sz;
+}
+
+/*-------------------------------------------------------------------------
+ * datumSerialize
+ *
+ * Serialize a possibly-NULL datum into caller-provided storage.
+ *
+ * Note: "expanded" objects are flattened so as to produce a self-contained
+ * representation, but other sorts of toast pointers are transferred as-is.
+ * This is because the intended use of this function is to pass the value
+ * to another process within the same database server. The other process
+ * could not access an "expanded" object within this process's memory, but
+ * we assume it can dereference the same TOAST pointers this one can.
+ *
+ * The format is as follows: first, we write a 4-byte header word, which
+ * is either the length of a pass-by-reference datum, -1 for a
+ * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
+ * further is written. If it is pass-by-value, sizeof(Datum) bytes
+ * follow. Otherwise, the number of bytes indicated by the header word
+ * follow. The caller is responsible for ensuring that there is enough
+ * storage to store the number of bytes that will be written; use
+ * datumEstimateSpace() to find out how many will be needed.
+ * *start_address is updated to point to the byte immediately following
+ * those written.
+ *-------------------------------------------------------------------------
+ */
+void
+datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
+ char **start_address)
+{
+ ExpandedObjectHeader *eoh = NULL;
+ int header;
+
+ /* Write header word. */
+ if (isnull)
+ header = -2;
+ else if (typByVal)
+ header = -1;
+ else if (typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
+ {
+ eoh = DatumGetEOHP(value);
+ header = EOH_get_flat_size(eoh);
+ }
+ else
+ header = datumGetSize(value, typByVal, typLen);
+ memcpy(*start_address, &header, sizeof(int));
+ *start_address += sizeof(int);
+
+ /* If not null, write payload bytes. */
+ if (!isnull)
+ {
+ if (typByVal)
+ {
+ memcpy(*start_address, &value, sizeof(Datum));
+ *start_address += sizeof(Datum);
+ }
+ else if (eoh)
+ {
+ char *tmp;
+
+ /*
+ * EOH_flatten_into expects the target address to be maxaligned,
+ * so we can't store directly to *start_address.
+ */
+ tmp = (char *) palloc(header);
+ EOH_flatten_into(eoh, (void *) tmp, header);
+ memcpy(*start_address, tmp, header);
+ *start_address += header;
+
+ /* be tidy. */
+ pfree(tmp);
+ }
+ else
+ {
+ memcpy(*start_address, DatumGetPointer(value), header);
+ *start_address += header;
+ }
+ }
+}
+
+/*-------------------------------------------------------------------------
+ * datumRestore
+ *
+ * Restore a possibly-NULL datum previously serialized by datumSerialize.
+ * *start_address is updated according to the number of bytes consumed.
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumRestore(char **start_address, bool *isnull)
+{
+ int header;
+ void *d;
+
+ /* Read header word. */
+ memcpy(&header, *start_address, sizeof(int));
+ *start_address += sizeof(int);
+
+ /* If this datum is NULL, we can stop here. */
+ if (header == -2)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ /* OK, datum is not null. */
+ *isnull = false;
+
+ /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
+ if (header == -1)
+ {
+ Datum val;
+
+ memcpy(&val, *start_address, sizeof(Datum));
+ *start_address += sizeof(Datum);
+ return val;
+ }
+
+ /* Pass-by-reference case; copy indicated number of bytes. */
+ Assert(header > 0);
+ d = palloc(header);
+ memcpy(d, *start_address, header);
+ *start_address += header;
+ return PointerGetDatum(d);
+}
diff --git a/src/backend/utils/adt/dbsize.c b/src/backend/utils/adt/dbsize.c
new file mode 100644
index 0000000..b4a2c8d
--- /dev/null
+++ b/src/backend/utils/adt/dbsize.c
@@ -0,0 +1,996 @@
+/*
+ * dbsize.c
+ * Database object size functions, and related inquiries
+ *
+ * Copyright (c) 2002-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/dbsize.c
+ *
+ */
+
+#include "postgres.h"
+
+#include <sys/stat.h>
+
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "catalog/catalog.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_tablespace.h"
+#include "commands/dbcommands.h"
+#include "commands/tablespace.h"
+#include "miscadmin.h"
+#include "storage/fd.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/numeric.h"
+#include "utils/rel.h"
+#include "utils/relfilenodemap.h"
+#include "utils/relmapper.h"
+#include "utils/syscache.h"
+
+/* Divide by two and round away from zero */
+#define half_rounded(x) (((x) + ((x) < 0 ? -1 : 1)) / 2)
+
+/* Units used in pg_size_pretty functions. All units must be powers of 2 */
+struct size_pretty_unit
+{
+ const char *name; /* bytes, kB, MB, GB etc */
+ uint32 limit; /* upper limit, prior to half rounding after
+ * converting to this unit. */
+ bool round; /* do half rounding for this unit */
+ uint8 unitbits; /* (1 << unitbits) bytes to make 1 of this
+ * unit */
+};
+
+/* When adding units here also update the error message in pg_size_bytes */
+static const struct size_pretty_unit size_pretty_units[] = {
+ {"bytes", 10 * 1024, false, 0},
+ {"kB", 20 * 1024 - 1, true, 10},
+ {"MB", 20 * 1024 - 1, true, 20},
+ {"GB", 20 * 1024 - 1, true, 30},
+ {"TB", 20 * 1024 - 1, true, 40},
+ {"PB", 20 * 1024 - 1, true, 50},
+ {NULL, 0, false, 0}
+};
+
+/* Return physical size of directory contents, or 0 if dir doesn't exist */
+static int64
+db_dir_size(const char *path)
+{
+ int64 dirsize = 0;
+ struct dirent *direntry;
+ DIR *dirdesc;
+ char filename[MAXPGPATH * 2];
+
+ dirdesc = AllocateDir(path);
+
+ if (!dirdesc)
+ return 0;
+
+ while ((direntry = ReadDir(dirdesc, path)) != NULL)
+ {
+ struct stat fst;
+
+ CHECK_FOR_INTERRUPTS();
+
+ if (strcmp(direntry->d_name, ".") == 0 ||
+ strcmp(direntry->d_name, "..") == 0)
+ continue;
+
+ snprintf(filename, sizeof(filename), "%s/%s", path, direntry->d_name);
+
+ if (stat(filename, &fst) < 0)
+ {
+ if (errno == ENOENT)
+ continue;
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", filename)));
+ }
+ dirsize += fst.st_size;
+ }
+
+ FreeDir(dirdesc);
+ return dirsize;
+}
+
+/*
+ * calculate size of database in all tablespaces
+ */
+static int64
+calculate_database_size(Oid dbOid)
+{
+ int64 totalsize;
+ DIR *dirdesc;
+ struct dirent *direntry;
+ char dirpath[MAXPGPATH];
+ char pathname[MAXPGPATH + 21 + sizeof(TABLESPACE_VERSION_DIRECTORY)];
+ AclResult aclresult;
+
+ /*
+ * User must have connect privilege for target database or have privileges
+ * of pg_read_all_stats
+ */
+ aclresult = pg_database_aclcheck(dbOid, GetUserId(), ACL_CONNECT);
+ if (aclresult != ACLCHECK_OK &&
+ !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS))
+ {
+ aclcheck_error(aclresult, OBJECT_DATABASE,
+ get_database_name(dbOid));
+ }
+
+ /* Shared storage in pg_global is not counted */
+
+ /* Include pg_default storage */
+ snprintf(pathname, sizeof(pathname), "base/%u", dbOid);
+ totalsize = db_dir_size(pathname);
+
+ /* Scan the non-default tablespaces */
+ snprintf(dirpath, MAXPGPATH, "pg_tblspc");
+ dirdesc = AllocateDir(dirpath);
+
+ while ((direntry = ReadDir(dirdesc, dirpath)) != NULL)
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ if (strcmp(direntry->d_name, ".") == 0 ||
+ strcmp(direntry->d_name, "..") == 0)
+ continue;
+
+ snprintf(pathname, sizeof(pathname), "pg_tblspc/%s/%s/%u",
+ direntry->d_name, TABLESPACE_VERSION_DIRECTORY, dbOid);
+ totalsize += db_dir_size(pathname);
+ }
+
+ FreeDir(dirdesc);
+
+ return totalsize;
+}
+
+Datum
+pg_database_size_oid(PG_FUNCTION_ARGS)
+{
+ Oid dbOid = PG_GETARG_OID(0);
+ int64 size;
+
+ size = calculate_database_size(dbOid);
+
+ if (size == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT64(size);
+}
+
+Datum
+pg_database_size_name(PG_FUNCTION_ARGS)
+{
+ Name dbName = PG_GETARG_NAME(0);
+ Oid dbOid = get_database_oid(NameStr(*dbName), false);
+ int64 size;
+
+ size = calculate_database_size(dbOid);
+
+ if (size == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT64(size);
+}
+
+
+/*
+ * Calculate total size of tablespace. Returns -1 if the tablespace directory
+ * cannot be found.
+ */
+static int64
+calculate_tablespace_size(Oid tblspcOid)
+{
+ char tblspcPath[MAXPGPATH];
+ char pathname[MAXPGPATH * 2];
+ int64 totalsize = 0;
+ DIR *dirdesc;
+ struct dirent *direntry;
+ AclResult aclresult;
+
+ /*
+ * User must have privileges of pg_read_all_stats or have CREATE privilege
+ * for target tablespace, either explicitly granted or implicitly because
+ * it is default for current database.
+ */
+ if (tblspcOid != MyDatabaseTableSpace &&
+ !has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS))
+ {
+ aclresult = pg_tablespace_aclcheck(tblspcOid, GetUserId(), ACL_CREATE);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, OBJECT_TABLESPACE,
+ get_tablespace_name(tblspcOid));
+ }
+
+ if (tblspcOid == DEFAULTTABLESPACE_OID)
+ snprintf(tblspcPath, MAXPGPATH, "base");
+ else if (tblspcOid == GLOBALTABLESPACE_OID)
+ snprintf(tblspcPath, MAXPGPATH, "global");
+ else
+ snprintf(tblspcPath, MAXPGPATH, "pg_tblspc/%u/%s", tblspcOid,
+ TABLESPACE_VERSION_DIRECTORY);
+
+ dirdesc = AllocateDir(tblspcPath);
+
+ if (!dirdesc)
+ return -1;
+
+ while ((direntry = ReadDir(dirdesc, tblspcPath)) != NULL)
+ {
+ struct stat fst;
+
+ CHECK_FOR_INTERRUPTS();
+
+ if (strcmp(direntry->d_name, ".") == 0 ||
+ strcmp(direntry->d_name, "..") == 0)
+ continue;
+
+ snprintf(pathname, sizeof(pathname), "%s/%s", tblspcPath, direntry->d_name);
+
+ if (stat(pathname, &fst) < 0)
+ {
+ if (errno == ENOENT)
+ continue;
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", pathname)));
+ }
+
+ if (S_ISDIR(fst.st_mode))
+ totalsize += db_dir_size(pathname);
+
+ totalsize += fst.st_size;
+ }
+
+ FreeDir(dirdesc);
+
+ return totalsize;
+}
+
+Datum
+pg_tablespace_size_oid(PG_FUNCTION_ARGS)
+{
+ Oid tblspcOid = PG_GETARG_OID(0);
+ int64 size;
+
+ size = calculate_tablespace_size(tblspcOid);
+
+ if (size < 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT64(size);
+}
+
+Datum
+pg_tablespace_size_name(PG_FUNCTION_ARGS)
+{
+ Name tblspcName = PG_GETARG_NAME(0);
+ Oid tblspcOid = get_tablespace_oid(NameStr(*tblspcName), false);
+ int64 size;
+
+ size = calculate_tablespace_size(tblspcOid);
+
+ if (size < 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT64(size);
+}
+
+
+/*
+ * calculate size of (one fork of) a relation
+ *
+ * Note: we can safely apply this to temp tables of other sessions, so there
+ * is no check here or at the call sites for that.
+ */
+static int64
+calculate_relation_size(RelFileNode *rfn, BackendId backend, ForkNumber forknum)
+{
+ int64 totalsize = 0;
+ char *relationpath;
+ char pathname[MAXPGPATH];
+ unsigned int segcount = 0;
+
+ relationpath = relpathbackend(*rfn, backend, forknum);
+
+ for (segcount = 0;; segcount++)
+ {
+ struct stat fst;
+
+ CHECK_FOR_INTERRUPTS();
+
+ if (segcount == 0)
+ snprintf(pathname, MAXPGPATH, "%s",
+ relationpath);
+ else
+ snprintf(pathname, MAXPGPATH, "%s.%u",
+ relationpath, segcount);
+
+ if (stat(pathname, &fst) < 0)
+ {
+ if (errno == ENOENT)
+ break;
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", pathname)));
+ }
+ totalsize += fst.st_size;
+ }
+
+ return totalsize;
+}
+
+Datum
+pg_relation_size(PG_FUNCTION_ARGS)
+{
+ Oid relOid = PG_GETARG_OID(0);
+ text *forkName = PG_GETARG_TEXT_PP(1);
+ Relation rel;
+ int64 size;
+
+ rel = try_relation_open(relOid, AccessShareLock);
+
+ /*
+ * Before 9.2, we used to throw an error if the relation didn't exist, but
+ * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class"
+ * less robust, because while we scan pg_class with an MVCC snapshot,
+ * someone else might drop the table. It's better to return NULL for
+ * already-dropped tables than throw an error and abort the whole query.
+ */
+ if (rel == NULL)
+ PG_RETURN_NULL();
+
+ size = calculate_relation_size(&(rel->rd_node), rel->rd_backend,
+ forkname_to_number(text_to_cstring(forkName)));
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(size);
+}
+
+/*
+ * Calculate total on-disk size of a TOAST relation, including its indexes.
+ * Must not be applied to non-TOAST relations.
+ */
+static int64
+calculate_toast_table_size(Oid toastrelid)
+{
+ int64 size = 0;
+ Relation toastRel;
+ ForkNumber forkNum;
+ ListCell *lc;
+ List *indexlist;
+
+ toastRel = relation_open(toastrelid, AccessShareLock);
+
+ /* toast heap size, including FSM and VM size */
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ size += calculate_relation_size(&(toastRel->rd_node),
+ toastRel->rd_backend, forkNum);
+
+ /* toast index size, including FSM and VM size */
+ indexlist = RelationGetIndexList(toastRel);
+
+ /* Size is calculated using all the indexes available */
+ foreach(lc, indexlist)
+ {
+ Relation toastIdxRel;
+
+ toastIdxRel = relation_open(lfirst_oid(lc),
+ AccessShareLock);
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ size += calculate_relation_size(&(toastIdxRel->rd_node),
+ toastIdxRel->rd_backend, forkNum);
+
+ relation_close(toastIdxRel, AccessShareLock);
+ }
+ list_free(indexlist);
+ relation_close(toastRel, AccessShareLock);
+
+ return size;
+}
+
+/*
+ * Calculate total on-disk size of a given table,
+ * including FSM and VM, plus TOAST table if any.
+ * Indexes other than the TOAST table's index are not included.
+ *
+ * Note that this also behaves sanely if applied to an index or toast table;
+ * those won't have attached toast tables, but they can have multiple forks.
+ */
+static int64
+calculate_table_size(Relation rel)
+{
+ int64 size = 0;
+ ForkNumber forkNum;
+
+ /*
+ * heap size, including FSM and VM
+ */
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ size += calculate_relation_size(&(rel->rd_node), rel->rd_backend,
+ forkNum);
+
+ /*
+ * Size of toast relation
+ */
+ if (OidIsValid(rel->rd_rel->reltoastrelid))
+ size += calculate_toast_table_size(rel->rd_rel->reltoastrelid);
+
+ return size;
+}
+
+/*
+ * Calculate total on-disk size of all indexes attached to the given table.
+ *
+ * Can be applied safely to an index, but you'll just get zero.
+ */
+static int64
+calculate_indexes_size(Relation rel)
+{
+ int64 size = 0;
+
+ /*
+ * Aggregate all indexes on the given relation
+ */
+ if (rel->rd_rel->relhasindex)
+ {
+ List *index_oids = RelationGetIndexList(rel);
+ ListCell *cell;
+
+ foreach(cell, index_oids)
+ {
+ Oid idxOid = lfirst_oid(cell);
+ Relation idxRel;
+ ForkNumber forkNum;
+
+ idxRel = relation_open(idxOid, AccessShareLock);
+
+ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++)
+ size += calculate_relation_size(&(idxRel->rd_node),
+ idxRel->rd_backend,
+ forkNum);
+
+ relation_close(idxRel, AccessShareLock);
+ }
+
+ list_free(index_oids);
+ }
+
+ return size;
+}
+
+Datum
+pg_table_size(PG_FUNCTION_ARGS)
+{
+ Oid relOid = PG_GETARG_OID(0);
+ Relation rel;
+ int64 size;
+
+ rel = try_relation_open(relOid, AccessShareLock);
+
+ if (rel == NULL)
+ PG_RETURN_NULL();
+
+ size = calculate_table_size(rel);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(size);
+}
+
+Datum
+pg_indexes_size(PG_FUNCTION_ARGS)
+{
+ Oid relOid = PG_GETARG_OID(0);
+ Relation rel;
+ int64 size;
+
+ rel = try_relation_open(relOid, AccessShareLock);
+
+ if (rel == NULL)
+ PG_RETURN_NULL();
+
+ size = calculate_indexes_size(rel);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(size);
+}
+
+/*
+ * Compute the on-disk size of all files for the relation,
+ * including heap data, index data, toast data, FSM, VM.
+ */
+static int64
+calculate_total_relation_size(Relation rel)
+{
+ int64 size;
+
+ /*
+ * Aggregate the table size, this includes size of the heap, toast and
+ * toast index with free space and visibility map
+ */
+ size = calculate_table_size(rel);
+
+ /*
+ * Add size of all attached indexes as well
+ */
+ size += calculate_indexes_size(rel);
+
+ return size;
+}
+
+Datum
+pg_total_relation_size(PG_FUNCTION_ARGS)
+{
+ Oid relOid = PG_GETARG_OID(0);
+ Relation rel;
+ int64 size;
+
+ rel = try_relation_open(relOid, AccessShareLock);
+
+ if (rel == NULL)
+ PG_RETURN_NULL();
+
+ size = calculate_total_relation_size(rel);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_INT64(size);
+}
+
+/*
+ * formatting with size units
+ */
+Datum
+pg_size_pretty(PG_FUNCTION_ARGS)
+{
+ int64 size = PG_GETARG_INT64(0);
+ char buf[64];
+ const struct size_pretty_unit *unit;
+
+ for (unit = size_pretty_units; unit->name != NULL; unit++)
+ {
+ uint8 bits;
+
+ /* use this unit if there are no more units or we're below the limit */
+ if (unit[1].name == NULL || Abs(size) < unit->limit)
+ {
+ if (unit->round)
+ size = half_rounded(size);
+
+ snprintf(buf, sizeof(buf), INT64_FORMAT " %s", size, unit->name);
+ break;
+ }
+
+ /*
+ * Determine the number of bits to use to build the divisor. We may
+ * need to use 1 bit less than the difference between this and the
+ * next unit if the next unit uses half rounding. Or we may need to
+ * shift an extra bit if this unit uses half rounding and the next one
+ * does not. We use division rather than shifting right by this
+ * number of bits to ensure positive and negative values are rounded
+ * in the same way.
+ */
+ bits = (unit[1].unitbits - unit->unitbits - (unit[1].round == true)
+ + (unit->round == true));
+ size /= ((int64) 1) << bits;
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(buf));
+}
+
+static char *
+numeric_to_cstring(Numeric n)
+{
+ Datum d = NumericGetDatum(n);
+
+ return DatumGetCString(DirectFunctionCall1(numeric_out, d));
+}
+
+static bool
+numeric_is_less(Numeric a, Numeric b)
+{
+ Datum da = NumericGetDatum(a);
+ Datum db = NumericGetDatum(b);
+
+ return DatumGetBool(DirectFunctionCall2(numeric_lt, da, db));
+}
+
+static Numeric
+numeric_absolute(Numeric n)
+{
+ Datum d = NumericGetDatum(n);
+ Datum result;
+
+ result = DirectFunctionCall1(numeric_abs, d);
+ return DatumGetNumeric(result);
+}
+
+static Numeric
+numeric_half_rounded(Numeric n)
+{
+ Datum d = NumericGetDatum(n);
+ Datum zero;
+ Datum one;
+ Datum two;
+ Datum result;
+
+ zero = NumericGetDatum(int64_to_numeric(0));
+ one = NumericGetDatum(int64_to_numeric(1));
+ two = NumericGetDatum(int64_to_numeric(2));
+
+ if (DatumGetBool(DirectFunctionCall2(numeric_ge, d, zero)))
+ d = DirectFunctionCall2(numeric_add, d, one);
+ else
+ d = DirectFunctionCall2(numeric_sub, d, one);
+
+ result = DirectFunctionCall2(numeric_div_trunc, d, two);
+ return DatumGetNumeric(result);
+}
+
+static Numeric
+numeric_truncated_divide(Numeric n, int64 divisor)
+{
+ Datum d = NumericGetDatum(n);
+ Datum divisor_numeric;
+ Datum result;
+
+ divisor_numeric = NumericGetDatum(int64_to_numeric(divisor));
+ result = DirectFunctionCall2(numeric_div_trunc, d, divisor_numeric);
+ return DatumGetNumeric(result);
+}
+
+Datum
+pg_size_pretty_numeric(PG_FUNCTION_ARGS)
+{
+ Numeric size = PG_GETARG_NUMERIC(0);
+ char *result = NULL;
+ const struct size_pretty_unit *unit;
+
+ for (unit = size_pretty_units; unit->name != NULL; unit++)
+ {
+ unsigned int shiftby;
+
+ /* use this unit if there are no more units or we're below the limit */
+ if (unit[1].name == NULL ||
+ numeric_is_less(numeric_absolute(size),
+ int64_to_numeric(unit->limit)))
+ {
+ if (unit->round)
+ size = numeric_half_rounded(size);
+
+ result = psprintf("%s %s", numeric_to_cstring(size), unit->name);
+ break;
+ }
+
+ /*
+ * Determine the number of bits to use to build the divisor. We may
+ * need to use 1 bit less than the difference between this and the
+ * next unit if the next unit uses half rounding. Or we may need to
+ * shift an extra bit if this unit uses half rounding and the next one
+ * does not.
+ */
+ shiftby = (unit[1].unitbits - unit->unitbits - (unit[1].round == true)
+ + (unit->round == true));
+ size = numeric_truncated_divide(size, ((int64) 1) << shiftby);
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * Convert a human-readable size to a size in bytes
+ */
+Datum
+pg_size_bytes(PG_FUNCTION_ARGS)
+{
+ text *arg = PG_GETARG_TEXT_PP(0);
+ char *str,
+ *strptr,
+ *endptr;
+ char saved_char;
+ Numeric num;
+ int64 result;
+ bool have_digits = false;
+
+ str = text_to_cstring(arg);
+
+ /* Skip leading whitespace */
+ strptr = str;
+ while (isspace((unsigned char) *strptr))
+ strptr++;
+
+ /* Check that we have a valid number and determine where it ends */
+ endptr = strptr;
+
+ /* Part (1): sign */
+ if (*endptr == '-' || *endptr == '+')
+ endptr++;
+
+ /* Part (2): main digit string */
+ if (isdigit((unsigned char) *endptr))
+ {
+ have_digits = true;
+ do
+ endptr++;
+ while (isdigit((unsigned char) *endptr));
+ }
+
+ /* Part (3): optional decimal point and fractional digits */
+ if (*endptr == '.')
+ {
+ endptr++;
+ if (isdigit((unsigned char) *endptr))
+ {
+ have_digits = true;
+ do
+ endptr++;
+ while (isdigit((unsigned char) *endptr));
+ }
+ }
+
+ /* Complain if we don't have a valid number at this point */
+ if (!have_digits)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid size: \"%s\"", str)));
+
+ /* Part (4): optional exponent */
+ if (*endptr == 'e' || *endptr == 'E')
+ {
+ long exponent;
+ char *cp;
+
+ /*
+ * Note we might one day support EB units, so if what follows 'E'
+ * isn't a number, just treat it all as a unit to be parsed.
+ */
+ exponent = strtol(endptr + 1, &cp, 10);
+ (void) exponent; /* Silence -Wunused-result warnings */
+ if (cp > endptr + 1)
+ endptr = cp;
+ }
+
+ /*
+ * Parse the number, saving the next character, which may be the first
+ * character of the unit string.
+ */
+ saved_char = *endptr;
+ *endptr = '\0';
+
+ num = DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum(strptr),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+
+ *endptr = saved_char;
+
+ /* Skip whitespace between number and unit */
+ strptr = endptr;
+ while (isspace((unsigned char) *strptr))
+ strptr++;
+
+ /* Handle possible unit */
+ if (*strptr != '\0')
+ {
+ const struct size_pretty_unit *unit;
+ int64 multiplier = 0;
+
+ /* Trim any trailing whitespace */
+ endptr = str + VARSIZE_ANY_EXHDR(arg) - 1;
+
+ while (isspace((unsigned char) *endptr))
+ endptr--;
+
+ endptr++;
+ *endptr = '\0';
+
+ for (unit = size_pretty_units; unit->name != NULL; unit++)
+ {
+ /* Parse the unit case-insensitively */
+ if (pg_strcasecmp(strptr, unit->name) == 0)
+ {
+ multiplier = ((int64) 1) << unit->unitbits;
+ break;
+ }
+ }
+
+ /* Verify we found a valid unit in the loop above */
+ if (unit->name == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid size: \"%s\"", text_to_cstring(arg)),
+ errdetail("Invalid size unit: \"%s\".", strptr),
+ errhint("Valid units are \"bytes\", \"kB\", \"MB\", \"GB\", \"TB\", and \"PB\".")));
+
+ if (multiplier > 1)
+ {
+ Numeric mul_num;
+
+ mul_num = int64_to_numeric(multiplier);
+
+ num = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
+ NumericGetDatum(mul_num),
+ NumericGetDatum(num)));
+ }
+ }
+
+ result = DatumGetInt64(DirectFunctionCall1(numeric_int8,
+ NumericGetDatum(num)));
+
+ PG_RETURN_INT64(result);
+}
+
+/*
+ * Get the filenode of a relation
+ *
+ * This is expected to be used in queries like
+ * SELECT pg_relation_filenode(oid) FROM pg_class;
+ * That leads to a couple of choices. We work from the pg_class row alone
+ * rather than actually opening each relation, for efficiency. We don't
+ * fail if we can't find the relation --- some rows might be visible in
+ * the query's MVCC snapshot even though the relations have been dropped.
+ * (Note: we could avoid using the catcache, but there's little point
+ * because the relation mapper also works "in the now".) We also don't
+ * fail if the relation doesn't have storage. In all these cases it
+ * seems better to quietly return NULL.
+ */
+Datum
+pg_relation_filenode(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Oid result;
+ HeapTuple tuple;
+ Form_pg_class relform;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ PG_RETURN_NULL();
+ relform = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (RELKIND_HAS_STORAGE(relform->relkind))
+ {
+ if (relform->relfilenode)
+ result = relform->relfilenode;
+ else /* Consult the relation mapper */
+ result = RelationMapOidToFilenode(relid,
+ relform->relisshared);
+ }
+ else
+ {
+ /* no storage, return NULL */
+ result = InvalidOid;
+ }
+
+ ReleaseSysCache(tuple);
+
+ if (!OidIsValid(result))
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * Get the relation via (reltablespace, relfilenode)
+ *
+ * This is expected to be used when somebody wants to match an individual file
+ * on the filesystem back to its table. That's not trivially possible via
+ * pg_class, because that doesn't contain the relfilenodes of shared and nailed
+ * tables.
+ *
+ * We don't fail but return NULL if we cannot find a mapping.
+ *
+ * InvalidOid can be passed instead of the current database's default
+ * tablespace.
+ */
+Datum
+pg_filenode_relation(PG_FUNCTION_ARGS)
+{
+ Oid reltablespace = PG_GETARG_OID(0);
+ Oid relfilenode = PG_GETARG_OID(1);
+ Oid heaprel;
+
+ /* test needed so RelidByRelfilenode doesn't misbehave */
+ if (!OidIsValid(relfilenode))
+ PG_RETURN_NULL();
+
+ heaprel = RelidByRelfilenode(reltablespace, relfilenode);
+
+ if (!OidIsValid(heaprel))
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_OID(heaprel);
+}
+
+/*
+ * Get the pathname (relative to $PGDATA) of a relation
+ *
+ * See comments for pg_relation_filenode.
+ */
+Datum
+pg_relation_filepath(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ HeapTuple tuple;
+ Form_pg_class relform;
+ RelFileNode rnode;
+ BackendId backend;
+ char *path;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ PG_RETURN_NULL();
+ relform = (Form_pg_class) GETSTRUCT(tuple);
+
+ if (RELKIND_HAS_STORAGE(relform->relkind))
+ {
+ /* This logic should match RelationInitPhysicalAddr */
+ if (relform->reltablespace)
+ rnode.spcNode = relform->reltablespace;
+ else
+ rnode.spcNode = MyDatabaseTableSpace;
+ if (rnode.spcNode == GLOBALTABLESPACE_OID)
+ rnode.dbNode = InvalidOid;
+ else
+ rnode.dbNode = MyDatabaseId;
+ if (relform->relfilenode)
+ rnode.relNode = relform->relfilenode;
+ else /* Consult the relation mapper */
+ rnode.relNode = RelationMapOidToFilenode(relid,
+ relform->relisshared);
+ }
+ else
+ {
+ /* no storage, return NULL */
+ rnode.relNode = InvalidOid;
+ /* some compilers generate warnings without these next two lines */
+ rnode.dbNode = InvalidOid;
+ rnode.spcNode = InvalidOid;
+ }
+
+ if (!OidIsValid(rnode.relNode))
+ {
+ ReleaseSysCache(tuple);
+ PG_RETURN_NULL();
+ }
+
+ /* Determine owning backend. */
+ switch (relform->relpersistence)
+ {
+ case RELPERSISTENCE_UNLOGGED:
+ case RELPERSISTENCE_PERMANENT:
+ backend = InvalidBackendId;
+ break;
+ case RELPERSISTENCE_TEMP:
+ if (isTempOrTempToastNamespace(relform->relnamespace))
+ backend = BackendIdForTempRelations();
+ else
+ {
+ /* Do it the hard way. */
+ backend = GetTempNamespaceBackendId(relform->relnamespace);
+ Assert(backend != InvalidBackendId);
+ }
+ break;
+ default:
+ elog(ERROR, "invalid relpersistence: %c", relform->relpersistence);
+ backend = InvalidBackendId; /* placate compiler */
+ break;
+ }
+
+ ReleaseSysCache(tuple);
+
+ path = relpathbackend(rnode, backend, MAIN_FORKNUM);
+
+ PG_RETURN_TEXT_P(cstring_to_text(path));
+}
diff --git a/src/backend/utils/adt/domains.c b/src/backend/utils/adt/domains.c
new file mode 100644
index 0000000..3de0cb0
--- /dev/null
+++ b/src/backend/utils/adt/domains.c
@@ -0,0 +1,390 @@
+/*-------------------------------------------------------------------------
+ *
+ * domains.c
+ * I/O functions for domain types.
+ *
+ * The output functions for a domain type are just the same ones provided
+ * by its underlying base type. The input functions, however, must be
+ * prepared to apply any constraints defined by the type. So, we create
+ * special input functions that invoke the base type's input function
+ * and then check the constraints.
+ *
+ * The overhead required for constraint checking can be high, since examining
+ * the catalogs to discover the constraints for a given domain is not cheap.
+ * We have three mechanisms for minimizing this cost:
+ * 1. We rely on the typcache to keep up-to-date copies of the constraints.
+ * 2. In a nest of domains, we flatten the checking of all the levels
+ * into just one operation (the typcache does this for us).
+ * 3. If there are CHECK constraints, we cache a standalone ExprContext
+ * to evaluate them in.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/domains.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "lib/stringinfo.h"
+#include "utils/builtins.h"
+#include "utils/expandeddatum.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+/*
+ * structure to cache state across multiple calls
+ */
+typedef struct DomainIOData
+{
+ Oid domain_type;
+ /* Data needed to call base type's input function */
+ Oid typiofunc;
+ Oid typioparam;
+ int32 typtypmod;
+ FmgrInfo proc;
+ /* Reference to cached list of constraint items to check */
+ DomainConstraintRef constraint_ref;
+ /* Context for evaluating CHECK constraints in */
+ ExprContext *econtext;
+ /* Memory context this cache is in */
+ MemoryContext mcxt;
+} DomainIOData;
+
+
+/*
+ * domain_state_setup - initialize the cache for a new domain type.
+ *
+ * Note: we can't re-use the same cache struct for a new domain type,
+ * since there's no provision for releasing the DomainConstraintRef.
+ * If a call site needs to deal with a new domain type, we just leak
+ * the old struct for the duration of the query.
+ */
+static DomainIOData *
+domain_state_setup(Oid domainType, bool binary, MemoryContext mcxt)
+{
+ DomainIOData *my_extra;
+ TypeCacheEntry *typentry;
+ Oid baseType;
+
+ my_extra = (DomainIOData *) MemoryContextAlloc(mcxt, sizeof(DomainIOData));
+
+ /*
+ * Verify that domainType represents a valid domain type. We need to be
+ * careful here because domain_in and domain_recv can be called from SQL,
+ * possibly with incorrect arguments. We use lookup_type_cache mainly
+ * because it will throw a clean user-facing error for a bad OID; but also
+ * it can cache the underlying base type info.
+ */
+ typentry = lookup_type_cache(domainType, TYPECACHE_DOMAIN_BASE_INFO);
+ if (typentry->typtype != TYPTYPE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("type %s is not a domain",
+ format_type_be(domainType))));
+
+ /* Find out the base type */
+ baseType = typentry->domainBaseType;
+ my_extra->typtypmod = typentry->domainBaseTypmod;
+
+ /* Look up underlying I/O function */
+ if (binary)
+ getTypeBinaryInputInfo(baseType,
+ &my_extra->typiofunc,
+ &my_extra->typioparam);
+ else
+ getTypeInputInfo(baseType,
+ &my_extra->typiofunc,
+ &my_extra->typioparam);
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc, mcxt);
+
+ /* Look up constraints for domain */
+ InitDomainConstraintRef(domainType, &my_extra->constraint_ref, mcxt, true);
+
+ /* We don't make an ExprContext until needed */
+ my_extra->econtext = NULL;
+ my_extra->mcxt = mcxt;
+
+ /* Mark cache valid */
+ my_extra->domain_type = domainType;
+
+ return my_extra;
+}
+
+/*
+ * domain_check_input - apply the cached checks.
+ *
+ * This is roughly similar to the handling of CoerceToDomain nodes in
+ * execExpr*.c, but we execute each constraint separately, rather than
+ * compiling them in-line within a larger expression.
+ */
+static void
+domain_check_input(Datum value, bool isnull, DomainIOData *my_extra)
+{
+ ExprContext *econtext = my_extra->econtext;
+ ListCell *l;
+
+ /* Make sure we have up-to-date constraints */
+ UpdateDomainConstraintRef(&my_extra->constraint_ref);
+
+ foreach(l, my_extra->constraint_ref.constraints)
+ {
+ DomainConstraintState *con = (DomainConstraintState *) lfirst(l);
+
+ switch (con->constrainttype)
+ {
+ case DOM_CONSTRAINT_NOTNULL:
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NOT_NULL_VIOLATION),
+ errmsg("domain %s does not allow null values",
+ format_type_be(my_extra->domain_type)),
+ errdatatype(my_extra->domain_type)));
+ break;
+ case DOM_CONSTRAINT_CHECK:
+ {
+ /* Make the econtext if we didn't already */
+ if (econtext == NULL)
+ {
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(my_extra->mcxt);
+ econtext = CreateStandaloneExprContext();
+ MemoryContextSwitchTo(oldcontext);
+ my_extra->econtext = econtext;
+ }
+
+ /*
+ * Set up value to be returned by CoerceToDomainValue
+ * nodes. Unlike in the generic expression case, this
+ * econtext couldn't be shared with anything else, so no
+ * need to save and restore fields. But we do need to
+ * protect the passed-in value against being changed by
+ * called functions. (It couldn't be a R/W expanded
+ * object for most uses, but that seems possible for
+ * domain_check().)
+ */
+ econtext->domainValue_datum =
+ MakeExpandedObjectReadOnly(value, isnull,
+ my_extra->constraint_ref.tcache->typlen);
+ econtext->domainValue_isNull = isnull;
+
+ if (!ExecCheck(con->check_exprstate, econtext))
+ ereport(ERROR,
+ (errcode(ERRCODE_CHECK_VIOLATION),
+ errmsg("value for domain %s violates check constraint \"%s\"",
+ format_type_be(my_extra->domain_type),
+ con->name),
+ errdomainconstraint(my_extra->domain_type,
+ con->name)));
+ break;
+ }
+ default:
+ elog(ERROR, "unrecognized constraint type: %d",
+ (int) con->constrainttype);
+ break;
+ }
+ }
+
+ /*
+ * Before exiting, call any shutdown callbacks and reset econtext's
+ * per-tuple memory. This avoids leaking non-memory resources, if
+ * anything in the expression(s) has any.
+ */
+ if (econtext)
+ ReScanExprContext(econtext);
+}
+
+
+/*
+ * domain_in - input routine for any domain type.
+ */
+Datum
+domain_in(PG_FUNCTION_ARGS)
+{
+ char *string;
+ Oid domainType;
+ DomainIOData *my_extra;
+ Datum value;
+
+ /*
+ * Since domain_in is not strict, we have to check for null inputs. The
+ * typioparam argument should never be null in normal system usage, but it
+ * could be null in a manual invocation --- if so, just return null.
+ */
+ if (PG_ARGISNULL(0))
+ string = NULL;
+ else
+ string = PG_GETARG_CSTRING(0);
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ domainType = PG_GETARG_OID(1);
+
+ /*
+ * We arrange to look up the needed info just once per series of calls,
+ * assuming the domain type doesn't change underneath us (which really
+ * shouldn't happen, but cope if it does).
+ */
+ my_extra = (DomainIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL || my_extra->domain_type != domainType)
+ {
+ my_extra = domain_state_setup(domainType, false,
+ fcinfo->flinfo->fn_mcxt);
+ fcinfo->flinfo->fn_extra = (void *) my_extra;
+ }
+
+ /*
+ * Invoke the base type's typinput procedure to convert the data.
+ */
+ value = InputFunctionCall(&my_extra->proc,
+ string,
+ my_extra->typioparam,
+ my_extra->typtypmod);
+
+ /*
+ * Do the necessary checks to ensure it's a valid domain value.
+ */
+ domain_check_input(value, (string == NULL), my_extra);
+
+ if (string == NULL)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_DATUM(value);
+}
+
+/*
+ * domain_recv - binary input routine for any domain type.
+ */
+Datum
+domain_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf;
+ Oid domainType;
+ DomainIOData *my_extra;
+ Datum value;
+
+ /*
+ * Since domain_recv is not strict, we have to check for null inputs. The
+ * typioparam argument should never be null in normal system usage, but it
+ * could be null in a manual invocation --- if so, just return null.
+ */
+ if (PG_ARGISNULL(0))
+ buf = NULL;
+ else
+ buf = (StringInfo) PG_GETARG_POINTER(0);
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ domainType = PG_GETARG_OID(1);
+
+ /*
+ * We arrange to look up the needed info just once per series of calls,
+ * assuming the domain type doesn't change underneath us (which really
+ * shouldn't happen, but cope if it does).
+ */
+ my_extra = (DomainIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL || my_extra->domain_type != domainType)
+ {
+ my_extra = domain_state_setup(domainType, true,
+ fcinfo->flinfo->fn_mcxt);
+ fcinfo->flinfo->fn_extra = (void *) my_extra;
+ }
+
+ /*
+ * Invoke the base type's typreceive procedure to convert the data.
+ */
+ value = ReceiveFunctionCall(&my_extra->proc,
+ buf,
+ my_extra->typioparam,
+ my_extra->typtypmod);
+
+ /*
+ * Do the necessary checks to ensure it's a valid domain value.
+ */
+ domain_check_input(value, (buf == NULL), my_extra);
+
+ if (buf == NULL)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_DATUM(value);
+}
+
+/*
+ * domain_check - check that a datum satisfies the constraints of a
+ * domain. extra and mcxt can be passed if they are available from,
+ * say, a FmgrInfo structure, or they can be NULL, in which case the
+ * setup is repeated for each call.
+ */
+void
+domain_check(Datum value, bool isnull, Oid domainType,
+ void **extra, MemoryContext mcxt)
+{
+ DomainIOData *my_extra = NULL;
+
+ if (mcxt == NULL)
+ mcxt = CurrentMemoryContext;
+
+ /*
+ * We arrange to look up the needed info just once per series of calls,
+ * assuming the domain type doesn't change underneath us (which really
+ * shouldn't happen, but cope if it does).
+ */
+ if (extra)
+ my_extra = (DomainIOData *) *extra;
+ if (my_extra == NULL || my_extra->domain_type != domainType)
+ {
+ my_extra = domain_state_setup(domainType, true, mcxt);
+ if (extra)
+ *extra = (void *) my_extra;
+ }
+
+ /*
+ * Do the necessary checks to ensure it's a valid domain value.
+ */
+ domain_check_input(value, isnull, my_extra);
+}
+
+/*
+ * errdatatype --- stores schema_name and datatype_name of a datatype
+ * within the current errordata.
+ */
+int
+errdatatype(Oid datatypeOid)
+{
+ HeapTuple tup;
+ Form_pg_type typtup;
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(datatypeOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", datatypeOid);
+ typtup = (Form_pg_type) GETSTRUCT(tup);
+
+ err_generic_string(PG_DIAG_SCHEMA_NAME,
+ get_namespace_name(typtup->typnamespace));
+ err_generic_string(PG_DIAG_DATATYPE_NAME, NameStr(typtup->typname));
+
+ ReleaseSysCache(tup);
+
+ return 0; /* return value does not matter */
+}
+
+/*
+ * errdomainconstraint --- stores schema_name, datatype_name and
+ * constraint_name of a domain-related constraint within the current errordata.
+ */
+int
+errdomainconstraint(Oid datatypeOid, const char *conname)
+{
+ errdatatype(datatypeOid);
+ err_generic_string(PG_DIAG_CONSTRAINT_NAME, conname);
+
+ return 0; /* return value does not matter */
+}
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
new file mode 100644
index 0000000..feb3e83
--- /dev/null
+++ b/src/backend/utils/adt/encode.c
@@ -0,0 +1,602 @@
+/*-------------------------------------------------------------------------
+ *
+ * encode.c
+ * Various data encoding/decoding things.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/encode.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+
+
+/*
+ * Encoding conversion API.
+ * encode_len() and decode_len() compute the amount of space needed, while
+ * encode() and decode() perform the actual conversions. It is okay for
+ * the _len functions to return an overestimate, but not an underestimate.
+ * (Having said that, large overestimates could cause unnecessary errors,
+ * so it's better to get it right.) The conversion routines write to the
+ * buffer at *res and return the true length of their output.
+ */
+struct pg_encoding
+{
+ uint64 (*encode_len) (const char *data, size_t dlen);
+ uint64 (*decode_len) (const char *data, size_t dlen);
+ uint64 (*encode) (const char *data, size_t dlen, char *res);
+ uint64 (*decode) (const char *data, size_t dlen, char *res);
+};
+
+static const struct pg_encoding *pg_find_encoding(const char *name);
+
+/*
+ * SQL functions.
+ */
+
+Datum
+binary_encode(PG_FUNCTION_ARGS)
+{
+ bytea *data = PG_GETARG_BYTEA_PP(0);
+ Datum name = PG_GETARG_DATUM(1);
+ text *result;
+ char *namebuf;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
+ const struct pg_encoding *enc;
+
+ namebuf = TextDatumGetCString(name);
+
+ enc = pg_find_encoding(namebuf);
+ if (enc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized encoding: \"%s\"", namebuf)));
+
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->encode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of encoding conversion is too large")));
+
+ result = palloc(VARHDRSZ + resultlen);
+
+ res = enc->encode(dataptr, datalen, VARDATA(result));
+
+ /* Make this FATAL 'cause we've trodden on memory ... */
+ if (res > resultlen)
+ elog(FATAL, "overflow - encode estimate too small");
+
+ SET_VARSIZE(result, VARHDRSZ + res);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+Datum
+binary_decode(PG_FUNCTION_ARGS)
+{
+ text *data = PG_GETARG_TEXT_PP(0);
+ Datum name = PG_GETARG_DATUM(1);
+ bytea *result;
+ char *namebuf;
+ char *dataptr;
+ size_t datalen;
+ uint64 resultlen;
+ uint64 res;
+ const struct pg_encoding *enc;
+
+ namebuf = TextDatumGetCString(name);
+
+ enc = pg_find_encoding(namebuf);
+ if (enc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized encoding: \"%s\"", namebuf)));
+
+ dataptr = VARDATA_ANY(data);
+ datalen = VARSIZE_ANY_EXHDR(data);
+
+ resultlen = enc->decode_len(dataptr, datalen);
+
+ /*
+ * resultlen possibly overflows uint32, therefore on 32-bit machines it's
+ * unsafe to rely on palloc's internal check.
+ */
+ if (resultlen > MaxAllocSize - VARHDRSZ)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("result of decoding conversion is too large")));
+
+ result = palloc(VARHDRSZ + resultlen);
+
+ res = enc->decode(dataptr, datalen, VARDATA(result));
+
+ /* Make this FATAL 'cause we've trodden on memory ... */
+ if (res > resultlen)
+ elog(FATAL, "overflow - decode estimate too small");
+
+ SET_VARSIZE(result, VARHDRSZ + res);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+
+/*
+ * HEX
+ */
+
+static const char hextbl[] = "0123456789abcdef";
+
+static const int8 hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+uint64
+hex_encode(const char *src, size_t len, char *dst)
+{
+ const char *end = src + len;
+
+ while (src < end)
+ {
+ *dst++ = hextbl[(*src >> 4) & 0xF];
+ *dst++ = hextbl[*src & 0xF];
+ src++;
+ }
+ return (uint64) len * 2;
+}
+
+static inline char
+get_hex(const char *cp)
+{
+ unsigned char c = (unsigned char) *cp;
+ int res = -1;
+
+ if (c < 127)
+ res = hexlookup[c];
+
+ if (res < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid hexadecimal digit: \"%.*s\"",
+ pg_mblen(cp), cp)));
+
+ return (char) res;
+}
+
+uint64
+hex_decode(const char *src, size_t len, char *dst)
+{
+ const char *s,
+ *srcend;
+ char v1,
+ v2,
+ *p;
+
+ srcend = src + len;
+ s = src;
+ p = dst;
+ while (s < srcend)
+ {
+ if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r')
+ {
+ s++;
+ continue;
+ }
+ v1 = get_hex(s) << 4;
+ s++;
+ if (s >= srcend)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid hexadecimal data: odd number of digits")));
+
+ v2 = get_hex(s);
+ s++;
+ *p++ = v1 | v2;
+ }
+
+ return p - dst;
+}
+
+static uint64
+hex_enc_len(const char *src, size_t srclen)
+{
+ return (uint64) srclen << 1;
+}
+
+static uint64
+hex_dec_len(const char *src, size_t srclen)
+{
+ return (uint64) srclen >> 1;
+}
+
+/*
+ * BASE64
+ */
+
+static const char _base64[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+static const int8 b64lookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
+ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
+ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
+};
+
+static uint64
+pg_base64_encode(const char *src, size_t len, char *dst)
+{
+ char *p,
+ *lend = dst + 76;
+ const char *s,
+ *end = src + len;
+ int pos = 2;
+ uint32 buf = 0;
+
+ s = src;
+ p = dst;
+
+ while (s < end)
+ {
+ buf |= (unsigned char) *s << (pos << 3);
+ pos--;
+ s++;
+
+ /* write it out */
+ if (pos < 0)
+ {
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = _base64[(buf >> 6) & 0x3f];
+ *p++ = _base64[buf & 0x3f];
+
+ pos = 2;
+ buf = 0;
+ }
+ if (p >= lend)
+ {
+ *p++ = '\n';
+ lend = p + 76;
+ }
+ }
+ if (pos != 2)
+ {
+ *p++ = _base64[(buf >> 18) & 0x3f];
+ *p++ = _base64[(buf >> 12) & 0x3f];
+ *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
+ *p++ = '=';
+ }
+
+ return p - dst;
+}
+
+static uint64
+pg_base64_decode(const char *src, size_t len, char *dst)
+{
+ const char *srcend = src + len,
+ *s = src;
+ char *p = dst;
+ char c;
+ int b = 0;
+ uint32 buf = 0;
+ int pos = 0,
+ end = 0;
+
+ while (s < srcend)
+ {
+ c = *s++;
+
+ if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
+ continue;
+
+ if (c == '=')
+ {
+ /* end sequence */
+ if (!end)
+ {
+ if (pos == 2)
+ end = 1;
+ else if (pos == 3)
+ end = 2;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unexpected \"=\" while decoding base64 sequence")));
+ }
+ b = 0;
+ }
+ else
+ {
+ b = -1;
+ if (c > 0 && c < 127)
+ b = b64lookup[(unsigned char) c];
+ if (b < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
+ pg_mblen(s - 1), s - 1)));
+ }
+ /* add it to buffer */
+ buf = (buf << 6) + b;
+ pos++;
+ if (pos == 4)
+ {
+ *p++ = (buf >> 16) & 255;
+ if (end == 0 || end > 1)
+ *p++ = (buf >> 8) & 255;
+ if (end == 0 || end > 2)
+ *p++ = buf & 255;
+ buf = 0;
+ pos = 0;
+ }
+ }
+
+ if (pos != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid base64 end sequence"),
+ errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
+
+ return p - dst;
+}
+
+
+static uint64
+pg_base64_enc_len(const char *src, size_t srclen)
+{
+ /* 3 bytes will be converted to 4, linefeed after 76 chars */
+ return ((uint64) srclen + 2) * 4 / 3 + (uint64) srclen / (76 * 3 / 4);
+}
+
+static uint64
+pg_base64_dec_len(const char *src, size_t srclen)
+{
+ return ((uint64) srclen * 3) >> 2;
+}
+
+/*
+ * Escape
+ * Minimally escape bytea to text.
+ * De-escape text to bytea.
+ *
+ * We must escape zero bytes and high-bit-set bytes to avoid generating
+ * text that might be invalid in the current encoding, or that might
+ * change to something else if passed through an encoding conversion
+ * (leading to failing to de-escape to the original bytea value).
+ * Also of course backslash itself has to be escaped.
+ *
+ * De-escaping processes \\ and any \### octal
+ */
+
+#define VAL(CH) ((CH) - '0')
+#define DIG(VAL) ((VAL) + '0')
+
+static uint64
+esc_encode(const char *src, size_t srclen, char *dst)
+{
+ const char *end = src + srclen;
+ char *rp = dst;
+ uint64 len = 0;
+
+ while (src < end)
+ {
+ unsigned char c = (unsigned char) *src;
+
+ if (c == '\0' || IS_HIGHBIT_SET(c))
+ {
+ rp[0] = '\\';
+ rp[1] = DIG(c >> 6);
+ rp[2] = DIG((c >> 3) & 7);
+ rp[3] = DIG(c & 7);
+ rp += 4;
+ len += 4;
+ }
+ else if (c == '\\')
+ {
+ rp[0] = '\\';
+ rp[1] = '\\';
+ rp += 2;
+ len += 2;
+ }
+ else
+ {
+ *rp++ = c;
+ len++;
+ }
+
+ src++;
+ }
+
+ return len;
+}
+
+static uint64
+esc_decode(const char *src, size_t srclen, char *dst)
+{
+ const char *end = src + srclen;
+ char *rp = dst;
+ uint64 len = 0;
+
+ while (src < end)
+ {
+ if (src[0] != '\\')
+ *rp++ = *src++;
+ else if (src + 3 < end &&
+ (src[1] >= '0' && src[1] <= '3') &&
+ (src[2] >= '0' && src[2] <= '7') &&
+ (src[3] >= '0' && src[3] <= '7'))
+ {
+ int val;
+
+ val = VAL(src[1]);
+ val <<= 3;
+ val += VAL(src[2]);
+ val <<= 3;
+ *rp++ = val + VAL(src[3]);
+ src += 4;
+ }
+ else if (src + 1 < end &&
+ (src[1] == '\\'))
+ {
+ *rp++ = '\\';
+ src += 2;
+ }
+ else
+ {
+ /*
+ * One backslash, not followed by ### valid octal. Should never
+ * get here, since esc_dec_len does same check.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+
+ len++;
+ }
+
+ return len;
+}
+
+static uint64
+esc_enc_len(const char *src, size_t srclen)
+{
+ const char *end = src + srclen;
+ uint64 len = 0;
+
+ while (src < end)
+ {
+ if (*src == '\0' || IS_HIGHBIT_SET(*src))
+ len += 4;
+ else if (*src == '\\')
+ len += 2;
+ else
+ len++;
+
+ src++;
+ }
+
+ return len;
+}
+
+static uint64
+esc_dec_len(const char *src, size_t srclen)
+{
+ const char *end = src + srclen;
+ uint64 len = 0;
+
+ while (src < end)
+ {
+ if (src[0] != '\\')
+ src++;
+ else if (src + 3 < end &&
+ (src[1] >= '0' && src[1] <= '3') &&
+ (src[2] >= '0' && src[2] <= '7') &&
+ (src[3] >= '0' && src[3] <= '7'))
+ {
+ /*
+ * backslash + valid octal
+ */
+ src += 4;
+ }
+ else if (src + 1 < end &&
+ (src[1] == '\\'))
+ {
+ /*
+ * two backslashes = backslash
+ */
+ src += 2;
+ }
+ else
+ {
+ /*
+ * one backslash, not followed by ### valid octal
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+
+ len++;
+ }
+ return len;
+}
+
+/*
+ * Common
+ */
+
+static const struct
+{
+ const char *name;
+ struct pg_encoding enc;
+} enclist[] =
+
+{
+ {
+ "hex",
+ {
+ hex_enc_len, hex_dec_len, hex_encode, hex_decode
+ }
+ },
+ {
+ "base64",
+ {
+ pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
+ }
+ },
+ {
+ "escape",
+ {
+ esc_enc_len, esc_dec_len, esc_encode, esc_decode
+ }
+ },
+ {
+ NULL,
+ {
+ NULL, NULL, NULL, NULL
+ }
+ }
+};
+
+static const struct pg_encoding *
+pg_find_encoding(const char *name)
+{
+ int i;
+
+ for (i = 0; enclist[i].name; i++)
+ if (pg_strcasecmp(enclist[i].name, name) == 0)
+ return &enclist[i].enc;
+
+ return NULL;
+}
diff --git a/src/backend/utils/adt/enum.c b/src/backend/utils/adt/enum.c
new file mode 100644
index 0000000..0cc7a6d
--- /dev/null
+++ b/src/backend/utils/adt/enum.c
@@ -0,0 +1,610 @@
+/*-------------------------------------------------------------------------
+ *
+ * enum.c
+ * I/O functions, operators, aggregates etc for enum types
+ *
+ * Copyright (c) 2006-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/enum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/genam.h"
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/pg_enum.h"
+#include "libpq/pqformat.h"
+#include "storage/procarray.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+
+static Oid enum_endpoint(Oid enumtypoid, ScanDirection direction);
+static ArrayType *enum_range_internal(Oid enumtypoid, Oid lower, Oid upper);
+
+
+/*
+ * Disallow use of an uncommitted pg_enum tuple.
+ *
+ * We need to make sure that uncommitted enum values don't get into indexes.
+ * If they did, and if we then rolled back the pg_enum addition, we'd have
+ * broken the index because value comparisons will not work reliably without
+ * an underlying pg_enum entry. (Note that removal of the heap entry
+ * containing an enum value is not sufficient to ensure that it doesn't appear
+ * in upper levels of indexes.) To do this we prevent an uncommitted row from
+ * being used for any SQL-level purpose. This is stronger than necessary,
+ * since the value might not be getting inserted into a table or there might
+ * be no index on its column, but it's easy to enforce centrally.
+ *
+ * However, it's okay to allow use of uncommitted values belonging to enum
+ * types that were themselves created in the same transaction, because then
+ * any such index would also be new and would go away altogether on rollback.
+ * We don't implement that fully right now, but we do allow free use of enum
+ * values created during CREATE TYPE AS ENUM, which are surely of the same
+ * lifespan as the enum type. (This case is required by "pg_restore -1".)
+ * Values added by ALTER TYPE ADD VALUE are currently restricted, but could
+ * be allowed if the enum type could be proven to have been created earlier
+ * in the same transaction. (Note that comparing tuple xmins would not work
+ * for that, because the type tuple might have been updated in the current
+ * transaction. Subtransactions also create hazards to be accounted for.)
+ *
+ * This function needs to be called (directly or indirectly) in any of the
+ * functions below that could return an enum value to SQL operations.
+ */
+static void
+check_safe_enum_use(HeapTuple enumval_tup)
+{
+ TransactionId xmin;
+ Form_pg_enum en = (Form_pg_enum) GETSTRUCT(enumval_tup);
+
+ /*
+ * If the row is hinted as committed, it's surely safe. This provides a
+ * fast path for all normal use-cases.
+ */
+ if (HeapTupleHeaderXminCommitted(enumval_tup->t_data))
+ return;
+
+ /*
+ * Usually, a row would get hinted as committed when it's read or loaded
+ * into syscache; but just in case not, let's check the xmin directly.
+ */
+ xmin = HeapTupleHeaderGetXmin(enumval_tup->t_data);
+ if (!TransactionIdIsInProgress(xmin) &&
+ TransactionIdDidCommit(xmin))
+ return;
+
+ /*
+ * Check if the enum value is uncommitted. If not, it's safe, because it
+ * was made during CREATE TYPE AS ENUM and can't be shorter-lived than its
+ * owning type. (This'd also be false for values made by other
+ * transactions; but the previous tests should have handled all of those.)
+ */
+ if (!EnumUncommitted(en->oid))
+ return;
+
+ /*
+ * There might well be other tests we could do here to narrow down the
+ * unsafe conditions, but for now just raise an exception.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNSAFE_NEW_ENUM_VALUE_USAGE),
+ errmsg("unsafe use of new value \"%s\" of enum type %s",
+ NameStr(en->enumlabel),
+ format_type_be(en->enumtypid)),
+ errhint("New enum values must be committed before they can be used.")));
+}
+
+
+/* Basic I/O support */
+
+Datum
+enum_in(PG_FUNCTION_ARGS)
+{
+ char *name = PG_GETARG_CSTRING(0);
+ Oid enumtypoid = PG_GETARG_OID(1);
+ Oid enumoid;
+ HeapTuple tup;
+
+ /* must check length to prevent Assert failure within SearchSysCache */
+ if (strlen(name) >= NAMEDATALEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input value for enum %s: \"%s\"",
+ format_type_be(enumtypoid),
+ name)));
+
+ tup = SearchSysCache2(ENUMTYPOIDNAME,
+ ObjectIdGetDatum(enumtypoid),
+ CStringGetDatum(name));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input value for enum %s: \"%s\"",
+ format_type_be(enumtypoid),
+ name)));
+
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(tup);
+
+ /*
+ * This comes from pg_enum.oid and stores system oids in user tables. This
+ * oid must be preserved by binary upgrades.
+ */
+ enumoid = ((Form_pg_enum) GETSTRUCT(tup))->oid;
+
+ ReleaseSysCache(tup);
+
+ PG_RETURN_OID(enumoid);
+}
+
+Datum
+enum_out(PG_FUNCTION_ARGS)
+{
+ Oid enumval = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple tup;
+ Form_pg_enum en;
+
+ tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(enumval));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid internal value for enum: %u",
+ enumval)));
+ en = (Form_pg_enum) GETSTRUCT(tup);
+
+ result = pstrdup(NameStr(en->enumlabel));
+
+ ReleaseSysCache(tup);
+
+ PG_RETURN_CSTRING(result);
+}
+
+/* Binary I/O support */
+Datum
+enum_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Oid enumtypoid = PG_GETARG_OID(1);
+ Oid enumoid;
+ HeapTuple tup;
+ char *name;
+ int nbytes;
+
+ name = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+
+ /* must check length to prevent Assert failure within SearchSysCache */
+ if (strlen(name) >= NAMEDATALEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input value for enum %s: \"%s\"",
+ format_type_be(enumtypoid),
+ name)));
+
+ tup = SearchSysCache2(ENUMTYPOIDNAME,
+ ObjectIdGetDatum(enumtypoid),
+ CStringGetDatum(name));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input value for enum %s: \"%s\"",
+ format_type_be(enumtypoid),
+ name)));
+
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(tup);
+
+ enumoid = ((Form_pg_enum) GETSTRUCT(tup))->oid;
+
+ ReleaseSysCache(tup);
+
+ pfree(name);
+
+ PG_RETURN_OID(enumoid);
+}
+
+Datum
+enum_send(PG_FUNCTION_ARGS)
+{
+ Oid enumval = PG_GETARG_OID(0);
+ StringInfoData buf;
+ HeapTuple tup;
+ Form_pg_enum en;
+
+ tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(enumval));
+ if (!HeapTupleIsValid(tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid internal value for enum: %u",
+ enumval)));
+ en = (Form_pg_enum) GETSTRUCT(tup);
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, NameStr(en->enumlabel), strlen(NameStr(en->enumlabel)));
+
+ ReleaseSysCache(tup);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/* Comparison functions and related */
+
+/*
+ * enum_cmp_internal is the common engine for all the visible comparison
+ * functions, except for enum_eq and enum_ne which can just check for OID
+ * equality directly.
+ */
+static int
+enum_cmp_internal(Oid arg1, Oid arg2, FunctionCallInfo fcinfo)
+{
+ TypeCacheEntry *tcache;
+
+ /*
+ * We don't need the typcache except in the hopefully-uncommon case that
+ * one or both Oids are odd. This means that cursory testing of code that
+ * fails to pass flinfo to an enum comparison function might not disclose
+ * the oversight. To make such errors more obvious, Assert that we have a
+ * place to cache even when we take a fast-path exit.
+ */
+ Assert(fcinfo->flinfo != NULL);
+
+ /* Equal OIDs are equal no matter what */
+ if (arg1 == arg2)
+ return 0;
+
+ /* Fast path: even-numbered Oids are known to compare correctly */
+ if ((arg1 & 1) == 0 && (arg2 & 1) == 0)
+ {
+ if (arg1 < arg2)
+ return -1;
+ else
+ return 1;
+ }
+
+ /* Locate the typcache entry for the enum type */
+ tcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+ if (tcache == NULL)
+ {
+ HeapTuple enum_tup;
+ Form_pg_enum en;
+ Oid typeoid;
+
+ /* Get the OID of the enum type containing arg1 */
+ enum_tup = SearchSysCache1(ENUMOID, ObjectIdGetDatum(arg1));
+ if (!HeapTupleIsValid(enum_tup))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid internal value for enum: %u",
+ arg1)));
+ en = (Form_pg_enum) GETSTRUCT(enum_tup);
+ typeoid = en->enumtypid;
+ ReleaseSysCache(enum_tup);
+ /* Now locate and remember the typcache entry */
+ tcache = lookup_type_cache(typeoid, 0);
+ fcinfo->flinfo->fn_extra = (void *) tcache;
+ }
+
+ /* The remaining comparison logic is in typcache.c */
+ return compare_values_of_enum(tcache, arg1, arg2);
+}
+
+Datum
+enum_lt(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) < 0);
+}
+
+Datum
+enum_le(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) <= 0);
+}
+
+Datum
+enum_eq(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(a == b);
+}
+
+Datum
+enum_ne(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(a != b);
+}
+
+Datum
+enum_ge(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) >= 0);
+}
+
+Datum
+enum_gt(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(enum_cmp_internal(a, b, fcinfo) > 0);
+}
+
+Datum
+enum_smaller(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_OID(enum_cmp_internal(a, b, fcinfo) < 0 ? a : b);
+}
+
+Datum
+enum_larger(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_OID(enum_cmp_internal(a, b, fcinfo) > 0 ? a : b);
+}
+
+Datum
+enum_cmp(PG_FUNCTION_ARGS)
+{
+ Oid a = PG_GETARG_OID(0);
+ Oid b = PG_GETARG_OID(1);
+
+ PG_RETURN_INT32(enum_cmp_internal(a, b, fcinfo));
+}
+
+/* Enum programming support functions */
+
+/*
+ * enum_endpoint: common code for enum_first/enum_last
+ */
+static Oid
+enum_endpoint(Oid enumtypoid, ScanDirection direction)
+{
+ Relation enum_rel;
+ Relation enum_idx;
+ SysScanDesc enum_scan;
+ HeapTuple enum_tuple;
+ ScanKeyData skey;
+ Oid minmax;
+
+ /*
+ * Find the first/last enum member using pg_enum_typid_sortorder_index.
+ * Note we must not use the syscache. See comments for RenumberEnumType
+ * in catalog/pg_enum.c for more info.
+ */
+ ScanKeyInit(&skey,
+ Anum_pg_enum_enumtypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(enumtypoid));
+
+ enum_rel = table_open(EnumRelationId, AccessShareLock);
+ enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock);
+ enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL,
+ 1, &skey);
+
+ enum_tuple = systable_getnext_ordered(enum_scan, direction);
+ if (HeapTupleIsValid(enum_tuple))
+ {
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(enum_tuple);
+ minmax = ((Form_pg_enum) GETSTRUCT(enum_tuple))->oid;
+ }
+ else
+ {
+ /* should only happen with an empty enum */
+ minmax = InvalidOid;
+ }
+
+ systable_endscan_ordered(enum_scan);
+ index_close(enum_idx, AccessShareLock);
+ table_close(enum_rel, AccessShareLock);
+
+ return minmax;
+}
+
+Datum
+enum_first(PG_FUNCTION_ARGS)
+{
+ Oid enumtypoid;
+ Oid min;
+
+ /*
+ * We rely on being able to get the specific enum type from the calling
+ * expression tree. Notice that the actual value of the argument isn't
+ * examined at all; in particular it might be NULL.
+ */
+ enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (enumtypoid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine actual enum type")));
+
+ /* Get the OID using the index */
+ min = enum_endpoint(enumtypoid, ForwardScanDirection);
+
+ if (!OidIsValid(min))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("enum %s contains no values",
+ format_type_be(enumtypoid))));
+
+ PG_RETURN_OID(min);
+}
+
+Datum
+enum_last(PG_FUNCTION_ARGS)
+{
+ Oid enumtypoid;
+ Oid max;
+
+ /*
+ * We rely on being able to get the specific enum type from the calling
+ * expression tree. Notice that the actual value of the argument isn't
+ * examined at all; in particular it might be NULL.
+ */
+ enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (enumtypoid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine actual enum type")));
+
+ /* Get the OID using the index */
+ max = enum_endpoint(enumtypoid, BackwardScanDirection);
+
+ if (!OidIsValid(max))
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("enum %s contains no values",
+ format_type_be(enumtypoid))));
+
+ PG_RETURN_OID(max);
+}
+
+/* 2-argument variant of enum_range */
+Datum
+enum_range_bounds(PG_FUNCTION_ARGS)
+{
+ Oid lower;
+ Oid upper;
+ Oid enumtypoid;
+
+ if (PG_ARGISNULL(0))
+ lower = InvalidOid;
+ else
+ lower = PG_GETARG_OID(0);
+ if (PG_ARGISNULL(1))
+ upper = InvalidOid;
+ else
+ upper = PG_GETARG_OID(1);
+
+ /*
+ * We rely on being able to get the specific enum type from the calling
+ * expression tree. The generic type mechanism should have ensured that
+ * both are of the same type.
+ */
+ enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (enumtypoid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine actual enum type")));
+
+ PG_RETURN_ARRAYTYPE_P(enum_range_internal(enumtypoid, lower, upper));
+}
+
+/* 1-argument variant of enum_range */
+Datum
+enum_range_all(PG_FUNCTION_ARGS)
+{
+ Oid enumtypoid;
+
+ /*
+ * We rely on being able to get the specific enum type from the calling
+ * expression tree. Notice that the actual value of the argument isn't
+ * examined at all; in particular it might be NULL.
+ */
+ enumtypoid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (enumtypoid == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not determine actual enum type")));
+
+ PG_RETURN_ARRAYTYPE_P(enum_range_internal(enumtypoid,
+ InvalidOid, InvalidOid));
+}
+
+static ArrayType *
+enum_range_internal(Oid enumtypoid, Oid lower, Oid upper)
+{
+ ArrayType *result;
+ Relation enum_rel;
+ Relation enum_idx;
+ SysScanDesc enum_scan;
+ HeapTuple enum_tuple;
+ ScanKeyData skey;
+ Datum *elems;
+ int max,
+ cnt;
+ bool left_found;
+
+ /*
+ * Scan the enum members in order using pg_enum_typid_sortorder_index.
+ * Note we must not use the syscache. See comments for RenumberEnumType
+ * in catalog/pg_enum.c for more info.
+ */
+ ScanKeyInit(&skey,
+ Anum_pg_enum_enumtypid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(enumtypoid));
+
+ enum_rel = table_open(EnumRelationId, AccessShareLock);
+ enum_idx = index_open(EnumTypIdSortOrderIndexId, AccessShareLock);
+ enum_scan = systable_beginscan_ordered(enum_rel, enum_idx, NULL, 1, &skey);
+
+ max = 64;
+ elems = (Datum *) palloc(max * sizeof(Datum));
+ cnt = 0;
+ left_found = !OidIsValid(lower);
+
+ while (HeapTupleIsValid(enum_tuple = systable_getnext_ordered(enum_scan, ForwardScanDirection)))
+ {
+ Oid enum_oid = ((Form_pg_enum) GETSTRUCT(enum_tuple))->oid;
+
+ if (!left_found && lower == enum_oid)
+ left_found = true;
+
+ if (left_found)
+ {
+ /* check it's safe to use in SQL */
+ check_safe_enum_use(enum_tuple);
+
+ if (cnt >= max)
+ {
+ max *= 2;
+ elems = (Datum *) repalloc(elems, max * sizeof(Datum));
+ }
+
+ elems[cnt++] = ObjectIdGetDatum(enum_oid);
+ }
+
+ if (OidIsValid(upper) && upper == enum_oid)
+ break;
+ }
+
+ systable_endscan_ordered(enum_scan);
+ index_close(enum_idx, AccessShareLock);
+ table_close(enum_rel, AccessShareLock);
+
+ /* and build the result array */
+ /* note this hardwires some details about the representation of Oid */
+ result = construct_array(elems, cnt, enumtypoid,
+ sizeof(Oid), true, TYPALIGN_INT);
+
+ pfree(elems);
+
+ return result;
+}
diff --git a/src/backend/utils/adt/expandeddatum.c b/src/backend/utils/adt/expandeddatum.c
new file mode 100644
index 0000000..e66da9b
--- /dev/null
+++ b/src/backend/utils/adt/expandeddatum.c
@@ -0,0 +1,145 @@
+/*-------------------------------------------------------------------------
+ *
+ * expandeddatum.c
+ * Support functions for "expanded" value representations.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/expandeddatum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/expandeddatum.h"
+#include "utils/memutils.h"
+
+/*
+ * DatumGetEOHP
+ *
+ * Given a Datum that is an expanded-object reference, extract the pointer.
+ *
+ * This is a bit tedious since the pointer may not be properly aligned;
+ * compare VARATT_EXTERNAL_GET_POINTER().
+ */
+ExpandedObjectHeader *
+DatumGetEOHP(Datum d)
+{
+ varattrib_1b_e *datum = (varattrib_1b_e *) DatumGetPointer(d);
+ varatt_expanded ptr;
+
+ Assert(VARATT_IS_EXTERNAL_EXPANDED(datum));
+ memcpy(&ptr, VARDATA_EXTERNAL(datum), sizeof(ptr));
+ Assert(VARATT_IS_EXPANDED_HEADER(ptr.eohptr));
+ return ptr.eohptr;
+}
+
+/*
+ * EOH_init_header
+ *
+ * Initialize the common header of an expanded object.
+ *
+ * The main thing this encapsulates is initializing the TOAST pointers.
+ */
+void
+EOH_init_header(ExpandedObjectHeader *eohptr,
+ const ExpandedObjectMethods *methods,
+ MemoryContext obj_context)
+{
+ varatt_expanded ptr;
+
+ eohptr->vl_len_ = EOH_HEADER_MAGIC;
+ eohptr->eoh_methods = methods;
+ eohptr->eoh_context = obj_context;
+
+ ptr.eohptr = eohptr;
+
+ SET_VARTAG_EXTERNAL(eohptr->eoh_rw_ptr, VARTAG_EXPANDED_RW);
+ memcpy(VARDATA_EXTERNAL(eohptr->eoh_rw_ptr), &ptr, sizeof(ptr));
+
+ SET_VARTAG_EXTERNAL(eohptr->eoh_ro_ptr, VARTAG_EXPANDED_RO);
+ memcpy(VARDATA_EXTERNAL(eohptr->eoh_ro_ptr), &ptr, sizeof(ptr));
+}
+
+/*
+ * EOH_get_flat_size
+ * EOH_flatten_into
+ *
+ * Convenience functions for invoking the "methods" of an expanded object.
+ */
+
+Size
+EOH_get_flat_size(ExpandedObjectHeader *eohptr)
+{
+ return eohptr->eoh_methods->get_flat_size(eohptr);
+}
+
+void
+EOH_flatten_into(ExpandedObjectHeader *eohptr,
+ void *result, Size allocated_size)
+{
+ eohptr->eoh_methods->flatten_into(eohptr, result, allocated_size);
+}
+
+/*
+ * If the Datum represents a R/W expanded object, change it to R/O.
+ * Otherwise return the original Datum.
+ *
+ * Caller must ensure that the datum is a non-null varlena value. Typically
+ * this is invoked via MakeExpandedObjectReadOnly(), which checks that.
+ */
+Datum
+MakeExpandedObjectReadOnlyInternal(Datum d)
+{
+ ExpandedObjectHeader *eohptr;
+
+ /* Nothing to do if not a read-write expanded-object pointer */
+ if (!VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)))
+ return d;
+
+ /* Now safe to extract the object pointer */
+ eohptr = DatumGetEOHP(d);
+
+ /* Return the built-in read-only pointer instead of given pointer */
+ return EOHPGetRODatum(eohptr);
+}
+
+/*
+ * Transfer ownership of an expanded object to a new parent memory context.
+ * The object must be referenced by a R/W pointer, and what we return is
+ * always its "standard" R/W pointer, which is certain to have the same
+ * lifespan as the object itself. (The passed-in pointer might not, and
+ * in any case wouldn't provide a unique identifier if it's not that one.)
+ */
+Datum
+TransferExpandedObject(Datum d, MemoryContext new_parent)
+{
+ ExpandedObjectHeader *eohptr = DatumGetEOHP(d);
+
+ /* Assert caller gave a R/W pointer */
+ Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)));
+
+ /* Transfer ownership */
+ MemoryContextSetParent(eohptr->eoh_context, new_parent);
+
+ /* Return the object's standard read-write pointer */
+ return EOHPGetRWDatum(eohptr);
+}
+
+/*
+ * Delete an expanded object (must be referenced by a R/W pointer).
+ */
+void
+DeleteExpandedObject(Datum d)
+{
+ ExpandedObjectHeader *eohptr = DatumGetEOHP(d);
+
+ /* Assert caller gave a R/W pointer */
+ Assert(VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)));
+
+ /* Kill it */
+ MemoryContextDelete(eohptr->eoh_context);
+}
diff --git a/src/backend/utils/adt/expandedrecord.c b/src/backend/utils/adt/expandedrecord.c
new file mode 100644
index 0000000..3b3e0a9
--- /dev/null
+++ b/src/backend/utils/adt/expandedrecord.c
@@ -0,0 +1,1633 @@
+/*-------------------------------------------------------------------------
+ *
+ * expandedrecord.c
+ * Functions for manipulating composite expanded objects.
+ *
+ * This module supports "expanded objects" (cf. expandeddatum.h) that can
+ * store values of named composite types, domains over named composite types,
+ * and record types (registered or anonymous).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/expandedrecord.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "access/heaptoast.h"
+#include "access/htup_details.h"
+#include "catalog/heap.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/expandedrecord.h"
+#include "utils/memutils.h"
+#include "utils/typcache.h"
+
+
+/* "Methods" required for an expanded object */
+static Size ER_get_flat_size(ExpandedObjectHeader *eohptr);
+static void ER_flatten_into(ExpandedObjectHeader *eohptr,
+ void *result, Size allocated_size);
+
+static const ExpandedObjectMethods ER_methods =
+{
+ ER_get_flat_size,
+ ER_flatten_into
+};
+
+/* Other local functions */
+static void ER_mc_callback(void *arg);
+static MemoryContext get_short_term_cxt(ExpandedRecordHeader *erh);
+static void build_dummy_expanded_header(ExpandedRecordHeader *main_erh);
+static pg_noinline void check_domain_for_new_field(ExpandedRecordHeader *erh,
+ int fnumber,
+ Datum newValue, bool isnull);
+static pg_noinline void check_domain_for_new_tuple(ExpandedRecordHeader *erh,
+ HeapTuple tuple);
+
+
+/*
+ * Build an expanded record of the specified composite type
+ *
+ * type_id can be RECORDOID, but only if a positive typmod is given.
+ *
+ * The expanded record is initially "empty", having a state logically
+ * equivalent to a NULL composite value (not ROW(NULL, NULL, ...)).
+ * Note that this might not be a valid state for a domain type;
+ * if the caller needs to check that, call
+ * expanded_record_set_tuple(erh, NULL, false, false).
+ *
+ * The expanded object will be a child of parentcontext.
+ */
+ExpandedRecordHeader *
+make_expanded_record_from_typeid(Oid type_id, int32 typmod,
+ MemoryContext parentcontext)
+{
+ ExpandedRecordHeader *erh;
+ int flags = 0;
+ TupleDesc tupdesc;
+ uint64 tupdesc_id;
+ MemoryContext objcxt;
+ char *chunk;
+
+ if (type_id != RECORDOID)
+ {
+ /*
+ * Consult the typcache to see if it's a domain over composite, and in
+ * any case to get the tupdesc and tupdesc identifier.
+ */
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(type_id,
+ TYPECACHE_TUPDESC |
+ TYPECACHE_DOMAIN_BASE_INFO);
+ if (typentry->typtype == TYPTYPE_DOMAIN)
+ {
+ flags |= ER_FLAG_IS_DOMAIN;
+ typentry = lookup_type_cache(typentry->domainBaseType,
+ TYPECACHE_TUPDESC);
+ }
+ if (typentry->tupDesc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("type %s is not composite",
+ format_type_be(type_id))));
+ tupdesc = typentry->tupDesc;
+ tupdesc_id = typentry->tupDesc_identifier;
+ }
+ else
+ {
+ /*
+ * For RECORD types, get the tupdesc and identifier from typcache.
+ */
+ tupdesc = lookup_rowtype_tupdesc(type_id, typmod);
+ tupdesc_id = assign_record_type_identifier(type_id, typmod);
+ }
+
+ /*
+ * Allocate private context for expanded object. We use a regular-size
+ * context, not a small one, to improve the odds that we can fit a tupdesc
+ * into it without needing an extra malloc block. (This code path doesn't
+ * ever need to copy a tupdesc into the expanded record, but let's be
+ * consistent with the other ways of making an expanded record.)
+ */
+ objcxt = AllocSetContextCreate(parentcontext,
+ "expanded record",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Since we already know the number of fields in the tupdesc, we can
+ * allocate the dvalues/dnulls arrays along with the record header. This
+ * is useless if we never need those arrays, but it costs almost nothing,
+ * and it will save a palloc cycle if we do need them.
+ */
+ erh = (ExpandedRecordHeader *)
+ MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader))
+ + tupdesc->natts * (sizeof(Datum) + sizeof(bool)));
+
+ /* Ensure all header fields are initialized to 0/null */
+ memset(erh, 0, sizeof(ExpandedRecordHeader));
+
+ EOH_init_header(&erh->hdr, &ER_methods, objcxt);
+ erh->er_magic = ER_MAGIC;
+
+ /* Set up dvalues/dnulls, with no valid contents as yet */
+ chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader));
+ erh->dvalues = (Datum *) chunk;
+ erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum));
+ erh->nfields = tupdesc->natts;
+
+ /* Fill in composite-type identification info */
+ erh->er_decltypeid = type_id;
+ erh->er_typeid = tupdesc->tdtypeid;
+ erh->er_typmod = tupdesc->tdtypmod;
+ erh->er_tupdesc_id = tupdesc_id;
+
+ erh->flags = flags;
+
+ /*
+ * If what we got from the typcache is a refcounted tupdesc, we need to
+ * acquire our own refcount on it. We manage the refcount with a memory
+ * context callback rather than assuming that the CurrentResourceOwner is
+ * longer-lived than this expanded object.
+ */
+ if (tupdesc->tdrefcount >= 0)
+ {
+ /* Register callback to release the refcount */
+ erh->er_mcb.func = ER_mc_callback;
+ erh->er_mcb.arg = (void *) erh;
+ MemoryContextRegisterResetCallback(erh->hdr.eoh_context,
+ &erh->er_mcb);
+
+ /* And save the pointer */
+ erh->er_tupdesc = tupdesc;
+ tupdesc->tdrefcount++;
+
+ /* If we called lookup_rowtype_tupdesc, release the pin it took */
+ if (type_id == RECORDOID)
+ ReleaseTupleDesc(tupdesc);
+ }
+ else
+ {
+ /*
+ * If it's not refcounted, just assume it will outlive the expanded
+ * object. (This can happen for shared record types, for instance.)
+ */
+ erh->er_tupdesc = tupdesc;
+ }
+
+ /*
+ * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the
+ * record remains logically empty.
+ */
+
+ return erh;
+}
+
+/*
+ * Build an expanded record of the rowtype defined by the tupdesc
+ *
+ * The tupdesc is copied if necessary (i.e., if we can't just bump its
+ * reference count instead).
+ *
+ * The expanded record is initially "empty", having a state logically
+ * equivalent to a NULL composite value (not ROW(NULL, NULL, ...)).
+ *
+ * The expanded object will be a child of parentcontext.
+ */
+ExpandedRecordHeader *
+make_expanded_record_from_tupdesc(TupleDesc tupdesc,
+ MemoryContext parentcontext)
+{
+ ExpandedRecordHeader *erh;
+ uint64 tupdesc_id;
+ MemoryContext objcxt;
+ MemoryContext oldcxt;
+ char *chunk;
+
+ if (tupdesc->tdtypeid != RECORDOID)
+ {
+ /*
+ * If it's a named composite type (not RECORD), we prefer to reference
+ * the typcache's copy of the tupdesc, which is guaranteed to be
+ * refcounted (the given tupdesc might not be). In any case, we need
+ * to consult the typcache to get the correct tupdesc identifier.
+ *
+ * Note that tdtypeid couldn't be a domain type, so we need not
+ * consider that case here.
+ */
+ TypeCacheEntry *typentry;
+
+ typentry = lookup_type_cache(tupdesc->tdtypeid, TYPECACHE_TUPDESC);
+ if (typentry->tupDesc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("type %s is not composite",
+ format_type_be(tupdesc->tdtypeid))));
+ tupdesc = typentry->tupDesc;
+ tupdesc_id = typentry->tupDesc_identifier;
+ }
+ else
+ {
+ /*
+ * For RECORD types, get the appropriate unique identifier (possibly
+ * freshly assigned).
+ */
+ tupdesc_id = assign_record_type_identifier(tupdesc->tdtypeid,
+ tupdesc->tdtypmod);
+ }
+
+ /*
+ * Allocate private context for expanded object. We use a regular-size
+ * context, not a small one, to improve the odds that we can fit a tupdesc
+ * into it without needing an extra malloc block.
+ */
+ objcxt = AllocSetContextCreate(parentcontext,
+ "expanded record",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Since we already know the number of fields in the tupdesc, we can
+ * allocate the dvalues/dnulls arrays along with the record header. This
+ * is useless if we never need those arrays, but it costs almost nothing,
+ * and it will save a palloc cycle if we do need them.
+ */
+ erh = (ExpandedRecordHeader *)
+ MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader))
+ + tupdesc->natts * (sizeof(Datum) + sizeof(bool)));
+
+ /* Ensure all header fields are initialized to 0/null */
+ memset(erh, 0, sizeof(ExpandedRecordHeader));
+
+ EOH_init_header(&erh->hdr, &ER_methods, objcxt);
+ erh->er_magic = ER_MAGIC;
+
+ /* Set up dvalues/dnulls, with no valid contents as yet */
+ chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader));
+ erh->dvalues = (Datum *) chunk;
+ erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum));
+ erh->nfields = tupdesc->natts;
+
+ /* Fill in composite-type identification info */
+ erh->er_decltypeid = erh->er_typeid = tupdesc->tdtypeid;
+ erh->er_typmod = tupdesc->tdtypmod;
+ erh->er_tupdesc_id = tupdesc_id;
+
+ /*
+ * Copy tupdesc if needed, but we prefer to bump its refcount if possible.
+ * We manage the refcount with a memory context callback rather than
+ * assuming that the CurrentResourceOwner is longer-lived than this
+ * expanded object.
+ */
+ if (tupdesc->tdrefcount >= 0)
+ {
+ /* Register callback to release the refcount */
+ erh->er_mcb.func = ER_mc_callback;
+ erh->er_mcb.arg = (void *) erh;
+ MemoryContextRegisterResetCallback(erh->hdr.eoh_context,
+ &erh->er_mcb);
+
+ /* And save the pointer */
+ erh->er_tupdesc = tupdesc;
+ tupdesc->tdrefcount++;
+ }
+ else
+ {
+ /* Just copy it */
+ oldcxt = MemoryContextSwitchTo(objcxt);
+ erh->er_tupdesc = CreateTupleDescCopy(tupdesc);
+ erh->flags |= ER_FLAG_TUPDESC_ALLOCED;
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ /*
+ * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the
+ * record remains logically empty.
+ */
+
+ return erh;
+}
+
+/*
+ * Build an expanded record of the same rowtype as the given expanded record
+ *
+ * This is faster than either of the above routines because we can bypass
+ * typcache lookup(s).
+ *
+ * The expanded record is initially "empty" --- we do not copy whatever
+ * tuple might be in the source expanded record.
+ *
+ * The expanded object will be a child of parentcontext.
+ */
+ExpandedRecordHeader *
+make_expanded_record_from_exprecord(ExpandedRecordHeader *olderh,
+ MemoryContext parentcontext)
+{
+ ExpandedRecordHeader *erh;
+ TupleDesc tupdesc = expanded_record_get_tupdesc(olderh);
+ MemoryContext objcxt;
+ MemoryContext oldcxt;
+ char *chunk;
+
+ /*
+ * Allocate private context for expanded object. We use a regular-size
+ * context, not a small one, to improve the odds that we can fit a tupdesc
+ * into it without needing an extra malloc block.
+ */
+ objcxt = AllocSetContextCreate(parentcontext,
+ "expanded record",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /*
+ * Since we already know the number of fields in the tupdesc, we can
+ * allocate the dvalues/dnulls arrays along with the record header. This
+ * is useless if we never need those arrays, but it costs almost nothing,
+ * and it will save a palloc cycle if we do need them.
+ */
+ erh = (ExpandedRecordHeader *)
+ MemoryContextAlloc(objcxt, MAXALIGN(sizeof(ExpandedRecordHeader))
+ + tupdesc->natts * (sizeof(Datum) + sizeof(bool)));
+
+ /* Ensure all header fields are initialized to 0/null */
+ memset(erh, 0, sizeof(ExpandedRecordHeader));
+
+ EOH_init_header(&erh->hdr, &ER_methods, objcxt);
+ erh->er_magic = ER_MAGIC;
+
+ /* Set up dvalues/dnulls, with no valid contents as yet */
+ chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader));
+ erh->dvalues = (Datum *) chunk;
+ erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum));
+ erh->nfields = tupdesc->natts;
+
+ /* Fill in composite-type identification info */
+ erh->er_decltypeid = olderh->er_decltypeid;
+ erh->er_typeid = olderh->er_typeid;
+ erh->er_typmod = olderh->er_typmod;
+ erh->er_tupdesc_id = olderh->er_tupdesc_id;
+
+ /* The only flag bit that transfers over is IS_DOMAIN */
+ erh->flags = olderh->flags & ER_FLAG_IS_DOMAIN;
+
+ /*
+ * Copy tupdesc if needed, but we prefer to bump its refcount if possible.
+ * We manage the refcount with a memory context callback rather than
+ * assuming that the CurrentResourceOwner is longer-lived than this
+ * expanded object.
+ */
+ if (tupdesc->tdrefcount >= 0)
+ {
+ /* Register callback to release the refcount */
+ erh->er_mcb.func = ER_mc_callback;
+ erh->er_mcb.arg = (void *) erh;
+ MemoryContextRegisterResetCallback(erh->hdr.eoh_context,
+ &erh->er_mcb);
+
+ /* And save the pointer */
+ erh->er_tupdesc = tupdesc;
+ tupdesc->tdrefcount++;
+ }
+ else if (olderh->flags & ER_FLAG_TUPDESC_ALLOCED)
+ {
+ /* We need to make our own copy of the tupdesc */
+ oldcxt = MemoryContextSwitchTo(objcxt);
+ erh->er_tupdesc = CreateTupleDescCopy(tupdesc);
+ erh->flags |= ER_FLAG_TUPDESC_ALLOCED;
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ {
+ /*
+ * Assume the tupdesc will outlive this expanded object, just like
+ * we're assuming it will outlive the source object.
+ */
+ erh->er_tupdesc = tupdesc;
+ }
+
+ /*
+ * We don't set ER_FLAG_DVALUES_VALID or ER_FLAG_FVALUE_VALID, so the
+ * record remains logically empty.
+ */
+
+ return erh;
+}
+
+/*
+ * Insert given tuple as the value of the expanded record
+ *
+ * It is caller's responsibility that the tuple matches the record's
+ * previously-assigned rowtype. (However domain constraints, if any,
+ * will be checked here.)
+ *
+ * The tuple is physically copied into the expanded record's local storage
+ * if "copy" is true, otherwise it's caller's responsibility that the tuple
+ * will live as long as the expanded record does.
+ *
+ * Out-of-line field values in the tuple are automatically inlined if
+ * "expand_external" is true, otherwise not. (The combination copy = false,
+ * expand_external = true is not sensible and not supported.)
+ *
+ * Alternatively, tuple can be NULL, in which case we just set the expanded
+ * record to be empty.
+ */
+void
+expanded_record_set_tuple(ExpandedRecordHeader *erh,
+ HeapTuple tuple,
+ bool copy,
+ bool expand_external)
+{
+ int oldflags;
+ HeapTuple oldtuple;
+ char *oldfstartptr;
+ char *oldfendptr;
+ int newflags;
+ HeapTuple newtuple;
+ MemoryContext oldcxt;
+
+ /* Shouldn't ever be trying to assign new data to a dummy header */
+ Assert(!(erh->flags & ER_FLAG_IS_DUMMY));
+
+ /*
+ * Before performing the assignment, see if result will satisfy domain.
+ */
+ if (erh->flags & ER_FLAG_IS_DOMAIN)
+ check_domain_for_new_tuple(erh, tuple);
+
+ /*
+ * If we need to get rid of out-of-line field values, do so, using the
+ * short-term context to avoid leaking whatever cruft the toast fetch
+ * might generate.
+ */
+ if (expand_external && tuple)
+ {
+ /* Assert caller didn't ask for unsupported case */
+ Assert(copy);
+ if (HeapTupleHasExternal(tuple))
+ {
+ oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh));
+ tuple = toast_flatten_tuple(tuple, erh->er_tupdesc);
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ expand_external = false; /* need not clean up below */
+ }
+
+ /*
+ * Initialize new flags, keeping only non-data status bits.
+ */
+ oldflags = erh->flags;
+ newflags = oldflags & ER_FLAGS_NON_DATA;
+
+ /*
+ * Copy tuple into local storage if needed. We must be sure this succeeds
+ * before we start to modify the expanded record's state.
+ */
+ if (copy && tuple)
+ {
+ oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context);
+ newtuple = heap_copytuple(tuple);
+ newflags |= ER_FLAG_FVALUE_ALLOCED;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* We can now flush anything that detoasting might have leaked. */
+ if (expand_external)
+ MemoryContextReset(erh->er_short_term_cxt);
+ }
+ else
+ newtuple = tuple;
+
+ /* Make copies of fields we're about to overwrite */
+ oldtuple = erh->fvalue;
+ oldfstartptr = erh->fstartptr;
+ oldfendptr = erh->fendptr;
+
+ /*
+ * It's now safe to update the expanded record's state.
+ */
+ if (newtuple)
+ {
+ /* Save flat representation */
+ erh->fvalue = newtuple;
+ erh->fstartptr = (char *) newtuple->t_data;
+ erh->fendptr = ((char *) newtuple->t_data) + newtuple->t_len;
+ newflags |= ER_FLAG_FVALUE_VALID;
+
+ /* Remember if we have any out-of-line field values */
+ if (HeapTupleHasExternal(newtuple))
+ newflags |= ER_FLAG_HAVE_EXTERNAL;
+ }
+ else
+ {
+ erh->fvalue = NULL;
+ erh->fstartptr = erh->fendptr = NULL;
+ }
+
+ erh->flags = newflags;
+
+ /* Reset flat-size info; we don't bother to make it valid now */
+ erh->flat_size = 0;
+
+ /*
+ * Now, release any storage belonging to old field values. It's safe to
+ * do this because ER_FLAG_DVALUES_VALID is no longer set in erh->flags;
+ * even if we fail partway through, the record is valid, and at worst
+ * we've failed to reclaim some space.
+ */
+ if (oldflags & ER_FLAG_DVALUES_ALLOCED)
+ {
+ TupleDesc tupdesc = erh->er_tupdesc;
+ int i;
+
+ for (i = 0; i < erh->nfields; i++)
+ {
+ if (!erh->dnulls[i] &&
+ !(TupleDescAttr(tupdesc, i)->attbyval))
+ {
+ char *oldValue = (char *) DatumGetPointer(erh->dvalues[i]);
+
+ if (oldValue < oldfstartptr || oldValue >= oldfendptr)
+ pfree(oldValue);
+ }
+ }
+ }
+
+ /* Likewise free the old tuple, if it was locally allocated */
+ if (oldflags & ER_FLAG_FVALUE_ALLOCED)
+ heap_freetuple(oldtuple);
+
+ /* We won't make a new deconstructed representation until/unless needed */
+}
+
+/*
+ * make_expanded_record_from_datum: build expanded record from composite Datum
+ *
+ * This combines the functions of make_expanded_record_from_typeid and
+ * expanded_record_set_tuple. However, we do not force a lookup of the
+ * tupdesc immediately, reasoning that it might never be needed.
+ *
+ * The expanded object will be a child of parentcontext.
+ *
+ * Note: a composite datum cannot self-identify as being of a domain type,
+ * so we need not consider domain cases here.
+ */
+Datum
+make_expanded_record_from_datum(Datum recorddatum, MemoryContext parentcontext)
+{
+ ExpandedRecordHeader *erh;
+ HeapTupleHeader tuphdr;
+ HeapTupleData tmptup;
+ HeapTuple newtuple;
+ MemoryContext objcxt;
+ MemoryContext oldcxt;
+
+ /*
+ * Allocate private context for expanded object. We use a regular-size
+ * context, not a small one, to improve the odds that we can fit a tupdesc
+ * into it without needing an extra malloc block.
+ */
+ objcxt = AllocSetContextCreate(parentcontext,
+ "expanded record",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Set up expanded record header, initializing fields to 0/null */
+ erh = (ExpandedRecordHeader *)
+ MemoryContextAllocZero(objcxt, sizeof(ExpandedRecordHeader));
+
+ EOH_init_header(&erh->hdr, &ER_methods, objcxt);
+ erh->er_magic = ER_MAGIC;
+
+ /*
+ * Detoast and copy source record into private context, as a HeapTuple.
+ * (If we actually have to detoast the source, we'll leak some memory in
+ * the caller's context, but it doesn't seem worth worrying about.)
+ */
+ tuphdr = DatumGetHeapTupleHeader(recorddatum);
+
+ tmptup.t_len = HeapTupleHeaderGetDatumLength(tuphdr);
+ ItemPointerSetInvalid(&(tmptup.t_self));
+ tmptup.t_tableOid = InvalidOid;
+ tmptup.t_data = tuphdr;
+
+ oldcxt = MemoryContextSwitchTo(objcxt);
+ newtuple = heap_copytuple(&tmptup);
+ erh->flags |= ER_FLAG_FVALUE_ALLOCED;
+ MemoryContextSwitchTo(oldcxt);
+
+ /* Fill in composite-type identification info */
+ erh->er_decltypeid = erh->er_typeid = HeapTupleHeaderGetTypeId(tuphdr);
+ erh->er_typmod = HeapTupleHeaderGetTypMod(tuphdr);
+
+ /* remember we have a flat representation */
+ erh->fvalue = newtuple;
+ erh->fstartptr = (char *) newtuple->t_data;
+ erh->fendptr = ((char *) newtuple->t_data) + newtuple->t_len;
+ erh->flags |= ER_FLAG_FVALUE_VALID;
+
+ /* Shouldn't need to set ER_FLAG_HAVE_EXTERNAL */
+ Assert(!HeapTupleHeaderHasExternal(tuphdr));
+
+ /*
+ * We won't look up the tupdesc till we have to, nor make a deconstructed
+ * representation. We don't have enough info to fill flat_size and
+ * friends, either.
+ */
+
+ /* return a R/W pointer to the expanded record */
+ return EOHPGetRWDatum(&erh->hdr);
+}
+
+/*
+ * get_flat_size method for expanded records
+ *
+ * Note: call this in a reasonably short-lived memory context, in case of
+ * memory leaks from activities such as detoasting.
+ */
+static Size
+ER_get_flat_size(ExpandedObjectHeader *eohptr)
+{
+ ExpandedRecordHeader *erh = (ExpandedRecordHeader *) eohptr;
+ TupleDesc tupdesc;
+ Size len;
+ Size data_len;
+ int hoff;
+ bool hasnull;
+ int i;
+
+ Assert(erh->er_magic == ER_MAGIC);
+
+ /*
+ * The flat representation has to be a valid composite datum. Make sure
+ * that we have a registered, not anonymous, RECORD type.
+ */
+ if (erh->er_typeid == RECORDOID &&
+ erh->er_typmod < 0)
+ {
+ tupdesc = expanded_record_get_tupdesc(erh);
+ assign_record_type_typmod(tupdesc);
+ erh->er_typmod = tupdesc->tdtypmod;
+ }
+
+ /*
+ * If we have a valid flattened value without out-of-line fields, we can
+ * just use it as-is.
+ */
+ if (erh->flags & ER_FLAG_FVALUE_VALID &&
+ !(erh->flags & ER_FLAG_HAVE_EXTERNAL))
+ return erh->fvalue->t_len;
+
+ /* If we have a cached size value, believe that */
+ if (erh->flat_size)
+ return erh->flat_size;
+
+ /* If we haven't yet deconstructed the tuple, do that */
+ if (!(erh->flags & ER_FLAG_DVALUES_VALID))
+ deconstruct_expanded_record(erh);
+
+ /* Tuple descriptor must be valid by now */
+ tupdesc = erh->er_tupdesc;
+
+ /*
+ * Composite datums mustn't contain any out-of-line values.
+ */
+ if (erh->flags & ER_FLAG_HAVE_EXTERNAL)
+ {
+ for (i = 0; i < erh->nfields; i++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
+
+ if (!erh->dnulls[i] &&
+ !attr->attbyval && attr->attlen == -1 &&
+ VARATT_IS_EXTERNAL(DatumGetPointer(erh->dvalues[i])))
+ {
+ /*
+ * expanded_record_set_field_internal can do the actual work
+ * of detoasting. It needn't recheck domain constraints.
+ */
+ expanded_record_set_field_internal(erh, i + 1,
+ erh->dvalues[i], false,
+ true,
+ false);
+ }
+ }
+
+ /*
+ * We have now removed all external field values, so we can clear the
+ * flag about them. This won't cause ER_flatten_into() to mistakenly
+ * take the fast path, since expanded_record_set_field() will have
+ * cleared ER_FLAG_FVALUE_VALID.
+ */
+ erh->flags &= ~ER_FLAG_HAVE_EXTERNAL;
+ }
+
+ /* Test if we currently have any null values */
+ hasnull = false;
+ for (i = 0; i < erh->nfields; i++)
+ {
+ if (erh->dnulls[i])
+ {
+ hasnull = true;
+ break;
+ }
+ }
+
+ /* Determine total space needed */
+ len = offsetof(HeapTupleHeaderData, t_bits);
+
+ if (hasnull)
+ len += BITMAPLEN(tupdesc->natts);
+
+ hoff = len = MAXALIGN(len); /* align user data safely */
+
+ data_len = heap_compute_data_size(tupdesc, erh->dvalues, erh->dnulls);
+
+ len += data_len;
+
+ /* Cache for next time */
+ erh->flat_size = len;
+ erh->data_len = data_len;
+ erh->hoff = hoff;
+ erh->hasnull = hasnull;
+
+ return len;
+}
+
+/*
+ * flatten_into method for expanded records
+ */
+static void
+ER_flatten_into(ExpandedObjectHeader *eohptr,
+ void *result, Size allocated_size)
+{
+ ExpandedRecordHeader *erh = (ExpandedRecordHeader *) eohptr;
+ HeapTupleHeader tuphdr = (HeapTupleHeader) result;
+ TupleDesc tupdesc;
+
+ Assert(erh->er_magic == ER_MAGIC);
+
+ /* Easy if we have a valid flattened value without out-of-line fields */
+ if (erh->flags & ER_FLAG_FVALUE_VALID &&
+ !(erh->flags & ER_FLAG_HAVE_EXTERNAL))
+ {
+ Assert(allocated_size == erh->fvalue->t_len);
+ memcpy(tuphdr, erh->fvalue->t_data, allocated_size);
+ /* The original flattened value might not have datum header fields */
+ HeapTupleHeaderSetDatumLength(tuphdr, allocated_size);
+ HeapTupleHeaderSetTypeId(tuphdr, erh->er_typeid);
+ HeapTupleHeaderSetTypMod(tuphdr, erh->er_typmod);
+ return;
+ }
+
+ /* Else allocation should match previous get_flat_size result */
+ Assert(allocated_size == erh->flat_size);
+
+ /* We'll need the tuple descriptor */
+ tupdesc = expanded_record_get_tupdesc(erh);
+
+ /* We must ensure that any pad space is zero-filled */
+ memset(tuphdr, 0, allocated_size);
+
+ /* Set up header fields of composite Datum */
+ HeapTupleHeaderSetDatumLength(tuphdr, allocated_size);
+ HeapTupleHeaderSetTypeId(tuphdr, erh->er_typeid);
+ HeapTupleHeaderSetTypMod(tuphdr, erh->er_typmod);
+ /* We also make sure that t_ctid is invalid unless explicitly set */
+ ItemPointerSetInvalid(&(tuphdr->t_ctid));
+
+ HeapTupleHeaderSetNatts(tuphdr, tupdesc->natts);
+ tuphdr->t_hoff = erh->hoff;
+
+ /* And fill the data area from dvalues/dnulls */
+ heap_fill_tuple(tupdesc,
+ erh->dvalues,
+ erh->dnulls,
+ (char *) tuphdr + erh->hoff,
+ erh->data_len,
+ &tuphdr->t_infomask,
+ (erh->hasnull ? tuphdr->t_bits : NULL));
+}
+
+/*
+ * Look up the tupdesc for the expanded record's actual type
+ *
+ * Note: code internal to this module is allowed to just fetch
+ * erh->er_tupdesc if ER_FLAG_DVALUES_VALID is set; otherwise it should call
+ * expanded_record_get_tupdesc. This function is the out-of-line portion
+ * of expanded_record_get_tupdesc.
+ */
+TupleDesc
+expanded_record_fetch_tupdesc(ExpandedRecordHeader *erh)
+{
+ TupleDesc tupdesc;
+
+ /* Easy if we already have it (but caller should have checked already) */
+ if (erh->er_tupdesc)
+ return erh->er_tupdesc;
+
+ /* Lookup the composite type's tupdesc using the typcache */
+ tupdesc = lookup_rowtype_tupdesc(erh->er_typeid, erh->er_typmod);
+
+ /*
+ * If it's a refcounted tupdesc rather than a statically allocated one, we
+ * want to manage the refcount with a memory context callback rather than
+ * assuming that the CurrentResourceOwner is longer-lived than this
+ * expanded object.
+ */
+ if (tupdesc->tdrefcount >= 0)
+ {
+ /* Register callback if we didn't already */
+ if (erh->er_mcb.arg == NULL)
+ {
+ erh->er_mcb.func = ER_mc_callback;
+ erh->er_mcb.arg = (void *) erh;
+ MemoryContextRegisterResetCallback(erh->hdr.eoh_context,
+ &erh->er_mcb);
+ }
+
+ /* Remember our own pointer */
+ erh->er_tupdesc = tupdesc;
+ tupdesc->tdrefcount++;
+
+ /* Release the pin lookup_rowtype_tupdesc acquired */
+ ReleaseTupleDesc(tupdesc);
+ }
+ else
+ {
+ /* Just remember the pointer */
+ erh->er_tupdesc = tupdesc;
+ }
+
+ /* In either case, fetch the process-global ID for this tupdesc */
+ erh->er_tupdesc_id = assign_record_type_identifier(tupdesc->tdtypeid,
+ tupdesc->tdtypmod);
+
+ return tupdesc;
+}
+
+/*
+ * Get a HeapTuple representing the current value of the expanded record
+ *
+ * If valid, the originally stored tuple is returned, so caller must not
+ * scribble on it. Otherwise, we return a HeapTuple created in the current
+ * memory context. In either case, no attempt has been made to inline
+ * out-of-line toasted values, so the tuple isn't usable as a composite
+ * datum.
+ *
+ * Returns NULL if expanded record is empty.
+ */
+HeapTuple
+expanded_record_get_tuple(ExpandedRecordHeader *erh)
+{
+ /* Easy case if we still have original tuple */
+ if (erh->flags & ER_FLAG_FVALUE_VALID)
+ return erh->fvalue;
+
+ /* Else just build a tuple from datums */
+ if (erh->flags & ER_FLAG_DVALUES_VALID)
+ return heap_form_tuple(erh->er_tupdesc, erh->dvalues, erh->dnulls);
+
+ /* Expanded record is empty */
+ return NULL;
+}
+
+/*
+ * Memory context reset callback for cleaning up external resources
+ */
+static void
+ER_mc_callback(void *arg)
+{
+ ExpandedRecordHeader *erh = (ExpandedRecordHeader *) arg;
+ TupleDesc tupdesc = erh->er_tupdesc;
+
+ /* Release our privately-managed tupdesc refcount, if any */
+ if (tupdesc)
+ {
+ erh->er_tupdesc = NULL; /* just for luck */
+ if (tupdesc->tdrefcount > 0)
+ {
+ if (--tupdesc->tdrefcount == 0)
+ FreeTupleDesc(tupdesc);
+ }
+ }
+}
+
+/*
+ * DatumGetExpandedRecord: get a writable expanded record from an input argument
+ *
+ * Caution: if the input is a read/write pointer, this returns the input
+ * argument; so callers must be sure that their changes are "safe", that is
+ * they cannot leave the record in a corrupt state.
+ */
+ExpandedRecordHeader *
+DatumGetExpandedRecord(Datum d)
+{
+ /* If it's a writable expanded record already, just return it */
+ if (VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(d)))
+ {
+ ExpandedRecordHeader *erh = (ExpandedRecordHeader *) DatumGetEOHP(d);
+
+ Assert(erh->er_magic == ER_MAGIC);
+ return erh;
+ }
+
+ /* Else expand the hard way */
+ d = make_expanded_record_from_datum(d, CurrentMemoryContext);
+ return (ExpandedRecordHeader *) DatumGetEOHP(d);
+}
+
+/*
+ * Create the Datum/isnull representation of an expanded record object
+ * if we didn't do so already. After calling this, it's OK to read the
+ * dvalues/dnulls arrays directly, rather than going through get_field.
+ *
+ * Note that if the object is currently empty ("null"), this will change
+ * it to represent a row of nulls.
+ */
+void
+deconstruct_expanded_record(ExpandedRecordHeader *erh)
+{
+ TupleDesc tupdesc;
+ Datum *dvalues;
+ bool *dnulls;
+ int nfields;
+
+ if (erh->flags & ER_FLAG_DVALUES_VALID)
+ return; /* already valid, nothing to do */
+
+ /* We'll need the tuple descriptor */
+ tupdesc = expanded_record_get_tupdesc(erh);
+
+ /*
+ * Allocate arrays in private context, if we don't have them already. We
+ * don't expect to see a change in nfields here, so while we cope if it
+ * happens, we don't bother avoiding a leak of the old arrays (which might
+ * not be separately palloc'd, anyway).
+ */
+ nfields = tupdesc->natts;
+ if (erh->dvalues == NULL || erh->nfields != nfields)
+ {
+ char *chunk;
+
+ /*
+ * To save a palloc cycle, we allocate both the Datum and isnull
+ * arrays in one palloc chunk.
+ */
+ chunk = MemoryContextAlloc(erh->hdr.eoh_context,
+ nfields * (sizeof(Datum) + sizeof(bool)));
+ dvalues = (Datum *) chunk;
+ dnulls = (bool *) (chunk + nfields * sizeof(Datum));
+ erh->dvalues = dvalues;
+ erh->dnulls = dnulls;
+ erh->nfields = nfields;
+ }
+ else
+ {
+ dvalues = erh->dvalues;
+ dnulls = erh->dnulls;
+ }
+
+ if (erh->flags & ER_FLAG_FVALUE_VALID)
+ {
+ /* Deconstruct tuple */
+ heap_deform_tuple(erh->fvalue, tupdesc, dvalues, dnulls);
+ }
+ else
+ {
+ /* If record was empty, instantiate it as a row of nulls */
+ memset(dvalues, 0, nfields * sizeof(Datum));
+ memset(dnulls, true, nfields * sizeof(bool));
+ }
+
+ /* Mark the dvalues as valid */
+ erh->flags |= ER_FLAG_DVALUES_VALID;
+}
+
+/*
+ * Look up a record field by name
+ *
+ * If there is a field named "fieldname", fill in the contents of finfo
+ * and return "true". Else return "false" without changing *finfo.
+ */
+bool
+expanded_record_lookup_field(ExpandedRecordHeader *erh, const char *fieldname,
+ ExpandedRecordFieldInfo *finfo)
+{
+ TupleDesc tupdesc;
+ int fno;
+ Form_pg_attribute attr;
+ const FormData_pg_attribute *sysattr;
+
+ tupdesc = expanded_record_get_tupdesc(erh);
+
+ /* First, check user-defined attributes */
+ for (fno = 0; fno < tupdesc->natts; fno++)
+ {
+ attr = TupleDescAttr(tupdesc, fno);
+ if (namestrcmp(&attr->attname, fieldname) == 0 &&
+ !attr->attisdropped)
+ {
+ finfo->fnumber = attr->attnum;
+ finfo->ftypeid = attr->atttypid;
+ finfo->ftypmod = attr->atttypmod;
+ finfo->fcollation = attr->attcollation;
+ return true;
+ }
+ }
+
+ /* How about system attributes? */
+ sysattr = SystemAttributeByName(fieldname);
+ if (sysattr != NULL)
+ {
+ finfo->fnumber = sysattr->attnum;
+ finfo->ftypeid = sysattr->atttypid;
+ finfo->ftypmod = sysattr->atttypmod;
+ finfo->fcollation = sysattr->attcollation;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Fetch value of record field
+ *
+ * expanded_record_get_field is the frontend for this; it handles the
+ * easy inline-able cases.
+ */
+Datum
+expanded_record_fetch_field(ExpandedRecordHeader *erh, int fnumber,
+ bool *isnull)
+{
+ if (fnumber > 0)
+ {
+ /* Empty record has null fields */
+ if (ExpandedRecordIsEmpty(erh))
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+ /* Make sure we have deconstructed form */
+ deconstruct_expanded_record(erh);
+ /* Out-of-range field number reads as null */
+ if (unlikely(fnumber > erh->nfields))
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+ *isnull = erh->dnulls[fnumber - 1];
+ return erh->dvalues[fnumber - 1];
+ }
+ else
+ {
+ /* System columns read as null if we haven't got flat tuple */
+ if (erh->fvalue == NULL)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+ /* heap_getsysattr doesn't actually use tupdesc, so just pass null */
+ return heap_getsysattr(erh->fvalue, fnumber, NULL, isnull);
+ }
+}
+
+/*
+ * Set value of record field
+ *
+ * If the expanded record is of domain type, the assignment will be rejected
+ * (without changing the record's state) if the domain's constraints would
+ * be violated.
+ *
+ * If expand_external is true and newValue is an out-of-line value, we'll
+ * forcibly detoast it so that the record does not depend on external storage.
+ *
+ * Internal callers can pass check_constraints = false to skip application
+ * of domain constraints. External callers should never do that.
+ */
+void
+expanded_record_set_field_internal(ExpandedRecordHeader *erh, int fnumber,
+ Datum newValue, bool isnull,
+ bool expand_external,
+ bool check_constraints)
+{
+ TupleDesc tupdesc;
+ Form_pg_attribute attr;
+ Datum *dvalues;
+ bool *dnulls;
+ char *oldValue;
+
+ /*
+ * Shouldn't ever be trying to assign new data to a dummy header, except
+ * in the case of an internal call for field inlining.
+ */
+ Assert(!(erh->flags & ER_FLAG_IS_DUMMY) || !check_constraints);
+
+ /* Before performing the assignment, see if result will satisfy domain */
+ if ((erh->flags & ER_FLAG_IS_DOMAIN) && check_constraints)
+ check_domain_for_new_field(erh, fnumber, newValue, isnull);
+
+ /* If we haven't yet deconstructed the tuple, do that */
+ if (!(erh->flags & ER_FLAG_DVALUES_VALID))
+ deconstruct_expanded_record(erh);
+
+ /* Tuple descriptor must be valid by now */
+ tupdesc = erh->er_tupdesc;
+ Assert(erh->nfields == tupdesc->natts);
+
+ /* Caller error if fnumber is system column or nonexistent column */
+ if (unlikely(fnumber <= 0 || fnumber > erh->nfields))
+ elog(ERROR, "cannot assign to field %d of expanded record", fnumber);
+
+ /*
+ * Copy new field value into record's context, and deal with detoasting,
+ * if needed.
+ */
+ attr = TupleDescAttr(tupdesc, fnumber - 1);
+ if (!isnull && !attr->attbyval)
+ {
+ MemoryContext oldcxt;
+
+ /* If requested, detoast any external value */
+ if (expand_external)
+ {
+ if (attr->attlen == -1 &&
+ VARATT_IS_EXTERNAL(DatumGetPointer(newValue)))
+ {
+ /* Detoasting should be done in short-lived context. */
+ oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh));
+ newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue)));
+ MemoryContextSwitchTo(oldcxt);
+ }
+ else
+ expand_external = false; /* need not clean up below */
+ }
+
+ /* Copy value into record's context */
+ oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context);
+ newValue = datumCopy(newValue, false, attr->attlen);
+ MemoryContextSwitchTo(oldcxt);
+
+ /* We can now flush anything that detoasting might have leaked */
+ if (expand_external)
+ MemoryContextReset(erh->er_short_term_cxt);
+
+ /* Remember that we have field(s) that may need to be pfree'd */
+ erh->flags |= ER_FLAG_DVALUES_ALLOCED;
+
+ /*
+ * While we're here, note whether it's an external toasted value,
+ * because that could mean we need to inline it later. (Think not to
+ * merge this into the previous expand_external logic: datumCopy could
+ * by itself have made the value non-external.)
+ */
+ if (attr->attlen == -1 &&
+ VARATT_IS_EXTERNAL(DatumGetPointer(newValue)))
+ erh->flags |= ER_FLAG_HAVE_EXTERNAL;
+ }
+
+ /*
+ * We're ready to make irreversible changes.
+ */
+ dvalues = erh->dvalues;
+ dnulls = erh->dnulls;
+
+ /* Flattened value will no longer represent record accurately */
+ erh->flags &= ~ER_FLAG_FVALUE_VALID;
+ /* And we don't know the flattened size either */
+ erh->flat_size = 0;
+
+ /* Grab old field value for pfree'ing, if needed. */
+ if (!attr->attbyval && !dnulls[fnumber - 1])
+ oldValue = (char *) DatumGetPointer(dvalues[fnumber - 1]);
+ else
+ oldValue = NULL;
+
+ /* And finally we can insert the new field. */
+ dvalues[fnumber - 1] = newValue;
+ dnulls[fnumber - 1] = isnull;
+
+ /*
+ * Free old field if needed; this keeps repeated field replacements from
+ * bloating the record's storage. If the pfree somehow fails, it won't
+ * corrupt the record.
+ *
+ * If we're updating a dummy header, we can't risk pfree'ing the old
+ * value, because most likely the expanded record's main header still has
+ * a pointer to it. This won't result in any sustained memory leak, since
+ * whatever we just allocated here is in the short-lived domain check
+ * context.
+ */
+ if (oldValue && !(erh->flags & ER_FLAG_IS_DUMMY))
+ {
+ /* Don't try to pfree a part of the original flat record */
+ if (oldValue < erh->fstartptr || oldValue >= erh->fendptr)
+ pfree(oldValue);
+ }
+}
+
+/*
+ * Set all record field(s)
+ *
+ * Caller must ensure that the provided datums are of the right types
+ * to match the record's previously assigned rowtype.
+ *
+ * If expand_external is true, we'll forcibly detoast out-of-line field values
+ * so that the record does not depend on external storage.
+ *
+ * Unlike repeated application of expanded_record_set_field(), this does not
+ * guarantee to leave the expanded record in a non-corrupt state in event
+ * of an error. Typically it would only be used for initializing a new
+ * expanded record. Also, because we expect this to be applied at most once
+ * in the lifespan of an expanded record, we do not worry about any cruft
+ * that detoasting might leak.
+ */
+void
+expanded_record_set_fields(ExpandedRecordHeader *erh,
+ const Datum *newValues, const bool *isnulls,
+ bool expand_external)
+{
+ TupleDesc tupdesc;
+ Datum *dvalues;
+ bool *dnulls;
+ int fnumber;
+ MemoryContext oldcxt;
+
+ /* Shouldn't ever be trying to assign new data to a dummy header */
+ Assert(!(erh->flags & ER_FLAG_IS_DUMMY));
+
+ /* If we haven't yet deconstructed the tuple, do that */
+ if (!(erh->flags & ER_FLAG_DVALUES_VALID))
+ deconstruct_expanded_record(erh);
+
+ /* Tuple descriptor must be valid by now */
+ tupdesc = erh->er_tupdesc;
+ Assert(erh->nfields == tupdesc->natts);
+
+ /* Flattened value will no longer represent record accurately */
+ erh->flags &= ~ER_FLAG_FVALUE_VALID;
+ /* And we don't know the flattened size either */
+ erh->flat_size = 0;
+
+ oldcxt = MemoryContextSwitchTo(erh->hdr.eoh_context);
+
+ dvalues = erh->dvalues;
+ dnulls = erh->dnulls;
+
+ for (fnumber = 0; fnumber < erh->nfields; fnumber++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, fnumber);
+ Datum newValue;
+ bool isnull;
+
+ /* Ignore dropped columns */
+ if (attr->attisdropped)
+ continue;
+
+ newValue = newValues[fnumber];
+ isnull = isnulls[fnumber];
+
+ if (!attr->attbyval)
+ {
+ /*
+ * Copy new field value into record's context, and deal with
+ * detoasting, if needed.
+ */
+ if (!isnull)
+ {
+ /* Is it an external toasted value? */
+ if (attr->attlen == -1 &&
+ VARATT_IS_EXTERNAL(DatumGetPointer(newValue)))
+ {
+ if (expand_external)
+ {
+ /* Detoast as requested while copying the value */
+ newValue = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(newValue)));
+ }
+ else
+ {
+ /* Just copy the value */
+ newValue = datumCopy(newValue, false, -1);
+ /* If it's still external, remember that */
+ if (VARATT_IS_EXTERNAL(DatumGetPointer(newValue)))
+ erh->flags |= ER_FLAG_HAVE_EXTERNAL;
+ }
+ }
+ else
+ {
+ /* Not an external value, just copy it */
+ newValue = datumCopy(newValue, false, attr->attlen);
+ }
+
+ /* Remember that we have field(s) that need to be pfree'd */
+ erh->flags |= ER_FLAG_DVALUES_ALLOCED;
+ }
+
+ /*
+ * Free old field value, if any (not likely, since really we ought
+ * to be inserting into an empty record).
+ */
+ if (unlikely(!dnulls[fnumber]))
+ {
+ char *oldValue;
+
+ oldValue = (char *) DatumGetPointer(dvalues[fnumber]);
+ /* Don't try to pfree a part of the original flat record */
+ if (oldValue < erh->fstartptr || oldValue >= erh->fendptr)
+ pfree(oldValue);
+ }
+ }
+
+ /* And finally we can insert the new field. */
+ dvalues[fnumber] = newValue;
+ dnulls[fnumber] = isnull;
+ }
+
+ /*
+ * Because we don't guarantee atomicity of set_fields(), we can just leave
+ * checking of domain constraints to occur as the final step; if it throws
+ * an error, too bad.
+ */
+ if (erh->flags & ER_FLAG_IS_DOMAIN)
+ {
+ /* We run domain_check in a short-lived context to limit cruft */
+ MemoryContextSwitchTo(get_short_term_cxt(erh));
+
+ domain_check(ExpandedRecordGetRODatum(erh), false,
+ erh->er_decltypeid,
+ &erh->er_domaininfo,
+ erh->hdr.eoh_context);
+ }
+
+ MemoryContextSwitchTo(oldcxt);
+}
+
+/*
+ * Construct (or reset) working memory context for short-term operations.
+ *
+ * This context is used for domain check evaluation and for detoasting.
+ *
+ * If we don't have a short-lived memory context, make one; if we have one,
+ * reset it to get rid of any leftover cruft. (It is a tad annoying to need a
+ * whole context for this, since it will often go unused --- but it's hard to
+ * avoid memory leaks otherwise. We can make the context small, at least.)
+ */
+static MemoryContext
+get_short_term_cxt(ExpandedRecordHeader *erh)
+{
+ if (erh->er_short_term_cxt == NULL)
+ erh->er_short_term_cxt =
+ AllocSetContextCreate(erh->hdr.eoh_context,
+ "expanded record short-term context",
+ ALLOCSET_SMALL_SIZES);
+ else
+ MemoryContextReset(erh->er_short_term_cxt);
+ return erh->er_short_term_cxt;
+}
+
+/*
+ * Construct "dummy header" for checking domain constraints.
+ *
+ * Since we don't want to modify the state of the expanded record until
+ * we've validated the constraints, our approach is to set up a dummy
+ * record header containing the new field value(s) and then pass that to
+ * domain_check. We retain the dummy header as part of the expanded
+ * record's state to save palloc cycles, but reinitialize (most of)
+ * its contents on each use.
+ */
+static void
+build_dummy_expanded_header(ExpandedRecordHeader *main_erh)
+{
+ ExpandedRecordHeader *erh;
+ TupleDesc tupdesc = expanded_record_get_tupdesc(main_erh);
+
+ /* Ensure we have a short-lived context */
+ (void) get_short_term_cxt(main_erh);
+
+ /*
+ * Allocate dummy header on first time through, or in the unlikely event
+ * that the number of fields changes (in which case we just leak the old
+ * one). Include space for its field values in the request.
+ */
+ erh = main_erh->er_dummy_header;
+ if (erh == NULL || erh->nfields != tupdesc->natts)
+ {
+ char *chunk;
+
+ erh = (ExpandedRecordHeader *)
+ MemoryContextAlloc(main_erh->hdr.eoh_context,
+ MAXALIGN(sizeof(ExpandedRecordHeader))
+ + tupdesc->natts * (sizeof(Datum) + sizeof(bool)));
+
+ /* Ensure all header fields are initialized to 0/null */
+ memset(erh, 0, sizeof(ExpandedRecordHeader));
+
+ /*
+ * We set up the dummy header with an indication that its memory
+ * context is the short-lived context. This is so that, if any
+ * detoasting of out-of-line values happens due to an attempt to
+ * extract a composite datum from the dummy header, the detoasted
+ * stuff will end up in the short-lived context and not cause a leak.
+ * This is cheating a bit on the expanded-object protocol; but since
+ * we never pass a R/W pointer to the dummy object to any other code,
+ * nothing else is authorized to delete or transfer ownership of the
+ * object's context, so it should be safe enough.
+ */
+ EOH_init_header(&erh->hdr, &ER_methods, main_erh->er_short_term_cxt);
+ erh->er_magic = ER_MAGIC;
+
+ /* Set up dvalues/dnulls, with no valid contents as yet */
+ chunk = (char *) erh + MAXALIGN(sizeof(ExpandedRecordHeader));
+ erh->dvalues = (Datum *) chunk;
+ erh->dnulls = (bool *) (chunk + tupdesc->natts * sizeof(Datum));
+ erh->nfields = tupdesc->natts;
+
+ /*
+ * The fields we just set are assumed to remain constant through
+ * multiple uses of the dummy header to check domain constraints. All
+ * other dummy header fields should be explicitly reset below, to
+ * ensure there's not accidental effects of one check on the next one.
+ */
+
+ main_erh->er_dummy_header = erh;
+ }
+
+ /*
+ * If anything inquires about the dummy header's declared type, it should
+ * report the composite base type, not the domain type (since the VALUE in
+ * a domain check constraint is of the base type not the domain). Hence
+ * we do not transfer over the IS_DOMAIN flag, nor indeed any of the main
+ * header's flags, since the dummy header is empty of data at this point.
+ * But don't forget to mark header as dummy.
+ */
+ erh->flags = ER_FLAG_IS_DUMMY;
+
+ /* Copy composite-type identification info */
+ erh->er_decltypeid = erh->er_typeid = main_erh->er_typeid;
+ erh->er_typmod = main_erh->er_typmod;
+
+ /* Dummy header does not need its own tupdesc refcount */
+ erh->er_tupdesc = tupdesc;
+ erh->er_tupdesc_id = main_erh->er_tupdesc_id;
+
+ /*
+ * It's tempting to copy over whatever we know about the flat size, but
+ * there's no point since we're surely about to modify the dummy record's
+ * field(s). Instead just clear anything left over from a previous usage
+ * cycle.
+ */
+ erh->flat_size = 0;
+
+ /* Copy over fvalue if we have it, so that system columns are available */
+ erh->fvalue = main_erh->fvalue;
+ erh->fstartptr = main_erh->fstartptr;
+ erh->fendptr = main_erh->fendptr;
+}
+
+/*
+ * Precheck domain constraints for a set_field operation
+ */
+static pg_noinline void
+check_domain_for_new_field(ExpandedRecordHeader *erh, int fnumber,
+ Datum newValue, bool isnull)
+{
+ ExpandedRecordHeader *dummy_erh;
+ MemoryContext oldcxt;
+
+ /* Construct dummy header to contain proposed new field set */
+ build_dummy_expanded_header(erh);
+ dummy_erh = erh->er_dummy_header;
+
+ /*
+ * If record isn't empty, just deconstruct it (if needed) and copy over
+ * the existing field values. If it is empty, just fill fields with nulls
+ * manually --- don't call deconstruct_expanded_record prematurely.
+ */
+ if (!ExpandedRecordIsEmpty(erh))
+ {
+ deconstruct_expanded_record(erh);
+ memcpy(dummy_erh->dvalues, erh->dvalues,
+ dummy_erh->nfields * sizeof(Datum));
+ memcpy(dummy_erh->dnulls, erh->dnulls,
+ dummy_erh->nfields * sizeof(bool));
+ /* There might be some external values in there... */
+ dummy_erh->flags |= erh->flags & ER_FLAG_HAVE_EXTERNAL;
+ }
+ else
+ {
+ memset(dummy_erh->dvalues, 0, dummy_erh->nfields * sizeof(Datum));
+ memset(dummy_erh->dnulls, true, dummy_erh->nfields * sizeof(bool));
+ }
+
+ /* Either way, we now have valid dvalues */
+ dummy_erh->flags |= ER_FLAG_DVALUES_VALID;
+
+ /* Caller error if fnumber is system column or nonexistent column */
+ if (unlikely(fnumber <= 0 || fnumber > dummy_erh->nfields))
+ elog(ERROR, "cannot assign to field %d of expanded record", fnumber);
+
+ /* Insert proposed new value into dummy field array */
+ dummy_erh->dvalues[fnumber - 1] = newValue;
+ dummy_erh->dnulls[fnumber - 1] = isnull;
+
+ /*
+ * The proposed new value might be external, in which case we'd better set
+ * the flag for that in dummy_erh. (This matters in case something in the
+ * domain check expressions tries to extract a flat value from the dummy
+ * header.)
+ */
+ if (!isnull)
+ {
+ Form_pg_attribute attr = TupleDescAttr(erh->er_tupdesc, fnumber - 1);
+
+ if (!attr->attbyval && attr->attlen == -1 &&
+ VARATT_IS_EXTERNAL(DatumGetPointer(newValue)))
+ dummy_erh->flags |= ER_FLAG_HAVE_EXTERNAL;
+ }
+
+ /*
+ * We call domain_check in the short-lived context, so that any cruft
+ * leaked by expression evaluation can be reclaimed.
+ */
+ oldcxt = MemoryContextSwitchTo(erh->er_short_term_cxt);
+
+ /*
+ * And now we can apply the check. Note we use main header's domain cache
+ * space, so that caching carries across repeated uses.
+ */
+ domain_check(ExpandedRecordGetRODatum(dummy_erh), false,
+ erh->er_decltypeid,
+ &erh->er_domaininfo,
+ erh->hdr.eoh_context);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ /* We might as well clean up cruft immediately. */
+ MemoryContextReset(erh->er_short_term_cxt);
+}
+
+/*
+ * Precheck domain constraints for a set_tuple operation
+ */
+static pg_noinline void
+check_domain_for_new_tuple(ExpandedRecordHeader *erh, HeapTuple tuple)
+{
+ ExpandedRecordHeader *dummy_erh;
+ MemoryContext oldcxt;
+
+ /* If we're being told to set record to empty, just see if NULL is OK */
+ if (tuple == NULL)
+ {
+ /* We run domain_check in a short-lived context to limit cruft */
+ oldcxt = MemoryContextSwitchTo(get_short_term_cxt(erh));
+
+ domain_check((Datum) 0, true,
+ erh->er_decltypeid,
+ &erh->er_domaininfo,
+ erh->hdr.eoh_context);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ /* We might as well clean up cruft immediately. */
+ MemoryContextReset(erh->er_short_term_cxt);
+
+ return;
+ }
+
+ /* Construct dummy header to contain replacement tuple */
+ build_dummy_expanded_header(erh);
+ dummy_erh = erh->er_dummy_header;
+
+ /* Insert tuple, but don't bother to deconstruct its fields for now */
+ dummy_erh->fvalue = tuple;
+ dummy_erh->fstartptr = (char *) tuple->t_data;
+ dummy_erh->fendptr = ((char *) tuple->t_data) + tuple->t_len;
+ dummy_erh->flags |= ER_FLAG_FVALUE_VALID;
+
+ /* Remember if we have any out-of-line field values */
+ if (HeapTupleHasExternal(tuple))
+ dummy_erh->flags |= ER_FLAG_HAVE_EXTERNAL;
+
+ /*
+ * We call domain_check in the short-lived context, so that any cruft
+ * leaked by expression evaluation can be reclaimed.
+ */
+ oldcxt = MemoryContextSwitchTo(erh->er_short_term_cxt);
+
+ /*
+ * And now we can apply the check. Note we use main header's domain cache
+ * space, so that caching carries across repeated uses.
+ */
+ domain_check(ExpandedRecordGetRODatum(dummy_erh), false,
+ erh->er_decltypeid,
+ &erh->er_domaininfo,
+ erh->hdr.eoh_context);
+
+ MemoryContextSwitchTo(oldcxt);
+
+ /* We might as well clean up cruft immediately. */
+ MemoryContextReset(erh->er_short_term_cxt);
+}
diff --git a/src/backend/utils/adt/float.c b/src/backend/utils/adt/float.c
new file mode 100644
index 0000000..63bb0f2
--- /dev/null
+++ b/src/backend/utils/adt/float.c
@@ -0,0 +1,4074 @@
+/*-------------------------------------------------------------------------
+ *
+ * float.c
+ * Functions for the built-in floating-point types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/float.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <float.h>
+#include <math.h>
+#include <limits.h>
+
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "common/pg_prng.h"
+#include "common/shortest_dec.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/sortsupport.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * Configurable GUC parameter
+ *
+ * If >0, use shortest-decimal format for output; this is both the default and
+ * allows for compatibility with clients that explicitly set a value here to
+ * get round-trip-accurate results. If 0 or less, then use the old, slow,
+ * decimal rounding method.
+ */
+int extra_float_digits = 1;
+
+/* Cached constants for degree-based trig functions */
+static bool degree_consts_set = false;
+static float8 sin_30 = 0;
+static float8 one_minus_cos_60 = 0;
+static float8 asin_0_5 = 0;
+static float8 acos_0_5 = 0;
+static float8 atan_1_0 = 0;
+static float8 tan_45 = 0;
+static float8 cot_45 = 0;
+
+/*
+ * These are intentionally not static; don't "fix" them. They will never
+ * be referenced by other files, much less changed; but we don't want the
+ * compiler to know that, else it might try to precompute expressions
+ * involving them. See comments for init_degree_constants().
+ */
+float8 degree_c_thirty = 30.0;
+float8 degree_c_forty_five = 45.0;
+float8 degree_c_sixty = 60.0;
+float8 degree_c_one_half = 0.5;
+float8 degree_c_one = 1.0;
+
+/* State for drandom() and setseed() */
+static bool drandom_seed_set = false;
+static pg_prng_state drandom_seed;
+
+/* Local function prototypes */
+static double sind_q1(double x);
+static double cosd_q1(double x);
+static void init_degree_constants(void);
+
+
+/*
+ * We use these out-of-line ereport() calls to report float overflow,
+ * underflow, and zero-divide, because following our usual practice of
+ * repeating them at each call site would lead to a lot of code bloat.
+ *
+ * This does mean that you don't get a useful error location indicator.
+ */
+pg_noinline void
+float_overflow_error(void)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value out of range: overflow")));
+}
+
+pg_noinline void
+float_underflow_error(void)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value out of range: underflow")));
+}
+
+pg_noinline void
+float_zero_divide_error(void)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+}
+
+
+/*
+ * Returns -1 if 'val' represents negative infinity, 1 if 'val'
+ * represents (positive) infinity, and 0 otherwise. On some platforms,
+ * this is equivalent to the isinf() macro, but not everywhere: C99
+ * does not specify that isinf() needs to distinguish between positive
+ * and negative infinity.
+ */
+int
+is_infinite(double val)
+{
+ int inf = isinf(val);
+
+ if (inf == 0)
+ return 0;
+ else if (val > 0)
+ return 1;
+ else
+ return -1;
+}
+
+
+/* ========== USER I/O ROUTINES ========== */
+
+
+/*
+ * float4in - converts "num" to float4
+ *
+ * Note that this code now uses strtof(), where it used to use strtod().
+ *
+ * The motivation for using strtof() is to avoid a double-rounding problem:
+ * for certain decimal inputs, if you round the input correctly to a double,
+ * and then round the double to a float, the result is incorrect in that it
+ * does not match the result of rounding the decimal value to float directly.
+ *
+ * One of the best examples is 7.038531e-26:
+ *
+ * 0xAE43FDp-107 = 7.03853069185120912085...e-26
+ * midpoint 7.03853100000000022281...e-26
+ * 0xAE43FEp-107 = 7.03853130814879132477...e-26
+ *
+ * making 0xAE43FDp-107 the correct float result, but if you do the conversion
+ * via a double, you get
+ *
+ * 0xAE43FD.7FFFFFF8p-107 = 7.03853099999999907487...e-26
+ * midpoint 7.03853099999999964884...e-26
+ * 0xAE43FD.80000000p-107 = 7.03853100000000022281...e-26
+ * 0xAE43FD.80000008p-107 = 7.03853100000000137076...e-26
+ *
+ * so the value rounds to the double exactly on the midpoint between the two
+ * nearest floats, and then rounding again to a float gives the incorrect
+ * result of 0xAE43FEp-107.
+ *
+ */
+Datum
+float4in(PG_FUNCTION_ARGS)
+{
+ char *num = PG_GETARG_CSTRING(0);
+ char *orig_num;
+ float val;
+ char *endptr;
+
+ /*
+ * endptr points to the first character _after_ the sequence we recognized
+ * as a valid floating point number. orig_num points to the original input
+ * string.
+ */
+ orig_num = num;
+
+ /* skip leading whitespace */
+ while (*num != '\0' && isspace((unsigned char) *num))
+ num++;
+
+ /*
+ * Check for an empty-string input to begin with, to avoid the vagaries of
+ * strtod() on different platforms.
+ */
+ if (*num == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "real", orig_num)));
+
+ errno = 0;
+ val = strtof(num, &endptr);
+
+ /* did we not see anything that looks like a double? */
+ if (endptr == num || errno != 0)
+ {
+ int save_errno = errno;
+
+ /*
+ * C99 requires that strtof() accept NaN, [+-]Infinity, and [+-]Inf,
+ * but not all platforms support all of these (and some accept them
+ * but set ERANGE anyway...) Therefore, we check for these inputs
+ * ourselves if strtof() fails.
+ *
+ * Note: C99 also requires hexadecimal input as well as some extended
+ * forms of NaN, but we consider these forms unportable and don't try
+ * to support them. You can use 'em if your strtof() takes 'em.
+ */
+ if (pg_strncasecmp(num, "NaN", 3) == 0)
+ {
+ val = get_float4_nan();
+ endptr = num + 3;
+ }
+ else if (pg_strncasecmp(num, "Infinity", 8) == 0)
+ {
+ val = get_float4_infinity();
+ endptr = num + 8;
+ }
+ else if (pg_strncasecmp(num, "+Infinity", 9) == 0)
+ {
+ val = get_float4_infinity();
+ endptr = num + 9;
+ }
+ else if (pg_strncasecmp(num, "-Infinity", 9) == 0)
+ {
+ val = -get_float4_infinity();
+ endptr = num + 9;
+ }
+ else if (pg_strncasecmp(num, "inf", 3) == 0)
+ {
+ val = get_float4_infinity();
+ endptr = num + 3;
+ }
+ else if (pg_strncasecmp(num, "+inf", 4) == 0)
+ {
+ val = get_float4_infinity();
+ endptr = num + 4;
+ }
+ else if (pg_strncasecmp(num, "-inf", 4) == 0)
+ {
+ val = -get_float4_infinity();
+ endptr = num + 4;
+ }
+ else if (save_errno == ERANGE)
+ {
+ /*
+ * Some platforms return ERANGE for denormalized numbers (those
+ * that are not zero, but are too close to zero to have full
+ * precision). We'd prefer not to throw error for that, so try to
+ * detect whether it's a "real" out-of-range condition by checking
+ * to see if the result is zero or huge.
+ *
+ * Use isinf() rather than HUGE_VALF on VS2013 because it
+ * generates a spurious overflow warning for -HUGE_VALF. Also use
+ * isinf() if HUGE_VALF is missing.
+ */
+ if (val == 0.0 ||
+#if !defined(HUGE_VALF) || (defined(_MSC_VER) && (_MSC_VER < 1900))
+ isinf(val)
+#else
+ (val >= HUGE_VALF || val <= -HUGE_VALF)
+#endif
+ )
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"%s\" is out of range for type real",
+ orig_num)));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "real", orig_num)));
+ }
+
+ /* skip trailing whitespace */
+ while (*endptr != '\0' && isspace((unsigned char) *endptr))
+ endptr++;
+
+ /* if there is any junk left at the end of the string, bail out */
+ if (*endptr != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "real", orig_num)));
+
+ PG_RETURN_FLOAT4(val);
+}
+
+/*
+ * float4out - converts a float4 number to a string
+ * using a standard output format
+ */
+Datum
+float4out(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+ char *ascii = (char *) palloc(32);
+ int ndig = FLT_DIG + extra_float_digits;
+
+ if (extra_float_digits > 0)
+ {
+ float_to_shortest_decimal_buf(num, ascii);
+ PG_RETURN_CSTRING(ascii);
+ }
+
+ (void) pg_strfromd(ascii, 32, ndig, num);
+ PG_RETURN_CSTRING(ascii);
+}
+
+/*
+ * float4recv - converts external binary format to float4
+ */
+Datum
+float4recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_FLOAT4(pq_getmsgfloat4(buf));
+}
+
+/*
+ * float4send - converts float4 to binary format
+ */
+Datum
+float4send(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat4(&buf, num);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * float8in - converts "num" to float8
+ */
+Datum
+float8in(PG_FUNCTION_ARGS)
+{
+ char *num = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_FLOAT8(float8in_internal(num, NULL, "double precision", num));
+}
+
+/* Convenience macro: set *have_error flag (if provided) or throw error */
+#define RETURN_ERROR(throw_error, have_error) \
+do { \
+ if (have_error) { \
+ *have_error = true; \
+ return 0.0; \
+ } else { \
+ throw_error; \
+ } \
+} while (0)
+
+/*
+ * float8in_internal_opt_error - guts of float8in()
+ *
+ * This is exposed for use by functions that want a reasonably
+ * platform-independent way of inputting doubles. The behavior is
+ * essentially like strtod + ereport on error, but note the following
+ * differences:
+ * 1. Both leading and trailing whitespace are skipped.
+ * 2. If endptr_p is NULL, we throw error if there's trailing junk.
+ * Otherwise, it's up to the caller to complain about trailing junk.
+ * 3. In event of a syntax error, the report mentions the given type_name
+ * and prints orig_string as the input; this is meant to support use of
+ * this function with types such as "box" and "point", where what we are
+ * parsing here is just a substring of orig_string.
+ *
+ * "num" could validly be declared "const char *", but that results in an
+ * unreasonable amount of extra casting both here and in callers, so we don't.
+ *
+ * When "*have_error" flag is provided, it's set instead of throwing an
+ * error. This is helpful when caller need to handle errors by itself.
+ */
+double
+float8in_internal_opt_error(char *num, char **endptr_p,
+ const char *type_name, const char *orig_string,
+ bool *have_error)
+{
+ double val;
+ char *endptr;
+
+ if (have_error)
+ *have_error = false;
+
+ /* skip leading whitespace */
+ while (*num != '\0' && isspace((unsigned char) *num))
+ num++;
+
+ /*
+ * Check for an empty-string input to begin with, to avoid the vagaries of
+ * strtod() on different platforms.
+ */
+ if (*num == '\0')
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string))),
+ have_error);
+
+ errno = 0;
+ val = strtod(num, &endptr);
+
+ /* did we not see anything that looks like a double? */
+ if (endptr == num || errno != 0)
+ {
+ int save_errno = errno;
+
+ /*
+ * C99 requires that strtod() accept NaN, [+-]Infinity, and [+-]Inf,
+ * but not all platforms support all of these (and some accept them
+ * but set ERANGE anyway...) Therefore, we check for these inputs
+ * ourselves if strtod() fails.
+ *
+ * Note: C99 also requires hexadecimal input as well as some extended
+ * forms of NaN, but we consider these forms unportable and don't try
+ * to support them. You can use 'em if your strtod() takes 'em.
+ */
+ if (pg_strncasecmp(num, "NaN", 3) == 0)
+ {
+ val = get_float8_nan();
+ endptr = num + 3;
+ }
+ else if (pg_strncasecmp(num, "Infinity", 8) == 0)
+ {
+ val = get_float8_infinity();
+ endptr = num + 8;
+ }
+ else if (pg_strncasecmp(num, "+Infinity", 9) == 0)
+ {
+ val = get_float8_infinity();
+ endptr = num + 9;
+ }
+ else if (pg_strncasecmp(num, "-Infinity", 9) == 0)
+ {
+ val = -get_float8_infinity();
+ endptr = num + 9;
+ }
+ else if (pg_strncasecmp(num, "inf", 3) == 0)
+ {
+ val = get_float8_infinity();
+ endptr = num + 3;
+ }
+ else if (pg_strncasecmp(num, "+inf", 4) == 0)
+ {
+ val = get_float8_infinity();
+ endptr = num + 4;
+ }
+ else if (pg_strncasecmp(num, "-inf", 4) == 0)
+ {
+ val = -get_float8_infinity();
+ endptr = num + 4;
+ }
+ else if (save_errno == ERANGE)
+ {
+ /*
+ * Some platforms return ERANGE for denormalized numbers (those
+ * that are not zero, but are too close to zero to have full
+ * precision). We'd prefer not to throw error for that, so try to
+ * detect whether it's a "real" out-of-range condition by checking
+ * to see if the result is zero or huge.
+ *
+ * On error, we intentionally complain about double precision not
+ * the given type name, and we print only the part of the string
+ * that is the current number.
+ */
+ if (val == 0.0 || val >= HUGE_VAL || val <= -HUGE_VAL)
+ {
+ char *errnumber = pstrdup(num);
+
+ errnumber[endptr - num] = '\0';
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("\"%s\" is out of range for type double precision",
+ errnumber))),
+ have_error);
+ }
+ }
+ else
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type "
+ "%s: \"%s\"",
+ type_name, orig_string))),
+ have_error);
+ }
+
+ /* skip trailing whitespace */
+ while (*endptr != '\0' && isspace((unsigned char) *endptr))
+ endptr++;
+
+ /* report stopping point if wanted, else complain if not end of string */
+ if (endptr_p)
+ *endptr_p = endptr;
+ else if (*endptr != '\0')
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type "
+ "%s: \"%s\"",
+ type_name, orig_string))),
+ have_error);
+
+ return val;
+}
+
+/*
+ * Interface to float8in_internal_opt_error() without "have_error" argument.
+ */
+double
+float8in_internal(char *num, char **endptr_p,
+ const char *type_name, const char *orig_string)
+{
+ return float8in_internal_opt_error(num, endptr_p, type_name,
+ orig_string, NULL);
+}
+
+
+/*
+ * float8out - converts float8 number to a string
+ * using a standard output format
+ */
+Datum
+float8out(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_CSTRING(float8out_internal(num));
+}
+
+/*
+ * float8out_internal - guts of float8out()
+ *
+ * This is exposed for use by functions that want a reasonably
+ * platform-independent way of outputting doubles.
+ * The result is always palloc'd.
+ */
+char *
+float8out_internal(double num)
+{
+ char *ascii = (char *) palloc(32);
+ int ndig = DBL_DIG + extra_float_digits;
+
+ if (extra_float_digits > 0)
+ {
+ double_to_shortest_decimal_buf(num, ascii);
+ return ascii;
+ }
+
+ (void) pg_strfromd(ascii, 32, ndig, num);
+ return ascii;
+}
+
+/*
+ * float8recv - converts external binary format to float8
+ */
+Datum
+float8recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_FLOAT8(pq_getmsgfloat8(buf));
+}
+
+/*
+ * float8send - converts float8 to binary format
+ */
+Datum
+float8send(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, num);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/* ========== PUBLIC ROUTINES ========== */
+
+
+/*
+ * ======================
+ * FLOAT4 BASE OPERATIONS
+ * ======================
+ */
+
+/*
+ * float4abs - returns |arg1| (absolute value)
+ */
+Datum
+float4abs(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+
+ PG_RETURN_FLOAT4((float4) fabs(arg1));
+}
+
+/*
+ * float4um - returns -arg1 (unary minus)
+ */
+Datum
+float4um(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 result;
+
+ result = -arg1;
+ PG_RETURN_FLOAT4(result);
+}
+
+Datum
+float4up(PG_FUNCTION_ARGS)
+{
+ float4 arg = PG_GETARG_FLOAT4(0);
+
+ PG_RETURN_FLOAT4(arg);
+}
+
+Datum
+float4larger(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+ float4 result;
+
+ if (float4_gt(arg1, arg2))
+ result = arg1;
+ else
+ result = arg2;
+ PG_RETURN_FLOAT4(result);
+}
+
+Datum
+float4smaller(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+ float4 result;
+
+ if (float4_lt(arg1, arg2))
+ result = arg1;
+ else
+ result = arg2;
+ PG_RETURN_FLOAT4(result);
+}
+
+/*
+ * ======================
+ * FLOAT8 BASE OPERATIONS
+ * ======================
+ */
+
+/*
+ * float8abs - returns |arg1| (absolute value)
+ */
+Datum
+float8abs(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(fabs(arg1));
+}
+
+
+/*
+ * float8um - returns -arg1 (unary minus)
+ */
+Datum
+float8um(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ result = -arg1;
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+float8up(PG_FUNCTION_ARGS)
+{
+ float8 arg = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(arg);
+}
+
+Datum
+float8larger(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+ float8 result;
+
+ if (float8_gt(arg1, arg2))
+ result = arg1;
+ else
+ result = arg2;
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+float8smaller(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+ float8 result;
+
+ if (float8_lt(arg1, arg2))
+ result = arg1;
+ else
+ result = arg2;
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * ====================
+ * ARITHMETIC OPERATORS
+ * ====================
+ */
+
+/*
+ * float4pl - returns arg1 + arg2
+ * float4mi - returns arg1 - arg2
+ * float4mul - returns arg1 * arg2
+ * float4div - returns arg1 / arg2
+ */
+Datum
+float4pl(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT4(float4_pl(arg1, arg2));
+}
+
+Datum
+float4mi(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT4(float4_mi(arg1, arg2));
+}
+
+Datum
+float4mul(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT4(float4_mul(arg1, arg2));
+}
+
+Datum
+float4div(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT4(float4_div(arg1, arg2));
+}
+
+/*
+ * float8pl - returns arg1 + arg2
+ * float8mi - returns arg1 - arg2
+ * float8mul - returns arg1 * arg2
+ * float8div - returns arg1 / arg2
+ */
+Datum
+float8pl(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_pl(arg1, arg2));
+}
+
+Datum
+float8mi(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_mi(arg1, arg2));
+}
+
+Datum
+float8mul(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_mul(arg1, arg2));
+}
+
+Datum
+float8div(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_div(arg1, arg2));
+}
+
+
+/*
+ * ====================
+ * COMPARISON OPERATORS
+ * ====================
+ */
+
+/*
+ * float4{eq,ne,lt,le,gt,ge} - float4/float4 comparison operations
+ */
+int
+float4_cmp_internal(float4 a, float4 b)
+{
+ if (float4_gt(a, b))
+ return 1;
+ if (float4_lt(a, b))
+ return -1;
+ return 0;
+}
+
+Datum
+float4eq(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_eq(arg1, arg2));
+}
+
+Datum
+float4ne(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_ne(arg1, arg2));
+}
+
+Datum
+float4lt(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_lt(arg1, arg2));
+}
+
+Datum
+float4le(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_le(arg1, arg2));
+}
+
+Datum
+float4gt(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_gt(arg1, arg2));
+}
+
+Datum
+float4ge(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float4_ge(arg1, arg2));
+}
+
+Datum
+btfloat4cmp(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_INT32(float4_cmp_internal(arg1, arg2));
+}
+
+static int
+btfloat4fastcmp(Datum x, Datum y, SortSupport ssup)
+{
+ float4 arg1 = DatumGetFloat4(x);
+ float4 arg2 = DatumGetFloat4(y);
+
+ return float4_cmp_internal(arg1, arg2);
+}
+
+Datum
+btfloat4sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = btfloat4fastcmp;
+ PG_RETURN_VOID();
+}
+
+/*
+ * float8{eq,ne,lt,le,gt,ge} - float8/float8 comparison operations
+ */
+int
+float8_cmp_internal(float8 a, float8 b)
+{
+ if (float8_gt(a, b))
+ return 1;
+ if (float8_lt(a, b))
+ return -1;
+ return 0;
+}
+
+Datum
+float8eq(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_eq(arg1, arg2));
+}
+
+Datum
+float8ne(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_ne(arg1, arg2));
+}
+
+Datum
+float8lt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_lt(arg1, arg2));
+}
+
+Datum
+float8le(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_le(arg1, arg2));
+}
+
+Datum
+float8gt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_gt(arg1, arg2));
+}
+
+Datum
+float8ge(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_ge(arg1, arg2));
+}
+
+Datum
+btfloat8cmp(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_INT32(float8_cmp_internal(arg1, arg2));
+}
+
+static int
+btfloat8fastcmp(Datum x, Datum y, SortSupport ssup)
+{
+ float8 arg1 = DatumGetFloat8(x);
+ float8 arg2 = DatumGetFloat8(y);
+
+ return float8_cmp_internal(arg1, arg2);
+}
+
+Datum
+btfloat8sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = btfloat8fastcmp;
+ PG_RETURN_VOID();
+}
+
+Datum
+btfloat48cmp(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ /* widen float4 to float8 and then compare */
+ PG_RETURN_INT32(float8_cmp_internal(arg1, arg2));
+}
+
+Datum
+btfloat84cmp(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ /* widen float4 to float8 and then compare */
+ PG_RETURN_INT32(float8_cmp_internal(arg1, arg2));
+}
+
+/*
+ * in_range support function for float8.
+ *
+ * Note: we needn't supply a float8_float4 variant, as implicit coercion
+ * of the offset value takes care of that scenario just as well.
+ */
+Datum
+in_range_float8_float8(PG_FUNCTION_ARGS)
+{
+ float8 val = PG_GETARG_FLOAT8(0);
+ float8 base = PG_GETARG_FLOAT8(1);
+ float8 offset = PG_GETARG_FLOAT8(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ float8 sum;
+
+ /*
+ * Reject negative or NaN offset. Negative is per spec, and NaN is
+ * because appropriate semantics for that seem non-obvious.
+ */
+ if (isnan(offset) || offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /*
+ * Deal with cases where val and/or base is NaN, following the rule that
+ * NaN sorts after non-NaN (cf float8_cmp_internal). The offset cannot
+ * affect the conclusion.
+ */
+ if (isnan(val))
+ {
+ if (isnan(base))
+ PG_RETURN_BOOL(true); /* NAN = NAN */
+ else
+ PG_RETURN_BOOL(!less); /* NAN > non-NAN */
+ }
+ else if (isnan(base))
+ {
+ PG_RETURN_BOOL(less); /* non-NAN < NAN */
+ }
+
+ /*
+ * Deal with cases where both base and offset are infinite, and computing
+ * base +/- offset would produce NaN. This corresponds to a window frame
+ * whose boundary infinitely precedes +inf or infinitely follows -inf,
+ * which is not well-defined. For consistency with other cases involving
+ * infinities, such as the fact that +inf infinitely follows +inf, we
+ * choose to assume that +inf infinitely precedes +inf and -inf infinitely
+ * follows -inf, and therefore that all finite and infinite values are in
+ * such a window frame.
+ *
+ * offset is known positive, so we need only check the sign of base in
+ * this test.
+ */
+ if (isinf(offset) && isinf(base) &&
+ (sub ? base > 0 : base < 0))
+ PG_RETURN_BOOL(true);
+
+ /*
+ * Otherwise it should be safe to compute base +/- offset. We trust the
+ * FPU to cope if an input is +/-inf or the true sum would overflow, and
+ * produce a suitably signed infinity, which will compare properly against
+ * val whether or not that's infinity.
+ */
+ if (sub)
+ sum = base - offset;
+ else
+ sum = base + offset;
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+/*
+ * in_range support function for float4.
+ *
+ * We would need a float4_float8 variant in any case, so we supply that and
+ * let implicit coercion take care of the float4_float4 case.
+ */
+Datum
+in_range_float4_float8(PG_FUNCTION_ARGS)
+{
+ float4 val = PG_GETARG_FLOAT4(0);
+ float4 base = PG_GETARG_FLOAT4(1);
+ float8 offset = PG_GETARG_FLOAT8(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ float8 sum;
+
+ /*
+ * Reject negative or NaN offset. Negative is per spec, and NaN is
+ * because appropriate semantics for that seem non-obvious.
+ */
+ if (isnan(offset) || offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /*
+ * Deal with cases where val and/or base is NaN, following the rule that
+ * NaN sorts after non-NaN (cf float8_cmp_internal). The offset cannot
+ * affect the conclusion.
+ */
+ if (isnan(val))
+ {
+ if (isnan(base))
+ PG_RETURN_BOOL(true); /* NAN = NAN */
+ else
+ PG_RETURN_BOOL(!less); /* NAN > non-NAN */
+ }
+ else if (isnan(base))
+ {
+ PG_RETURN_BOOL(less); /* non-NAN < NAN */
+ }
+
+ /*
+ * Deal with cases where both base and offset are infinite, and computing
+ * base +/- offset would produce NaN. This corresponds to a window frame
+ * whose boundary infinitely precedes +inf or infinitely follows -inf,
+ * which is not well-defined. For consistency with other cases involving
+ * infinities, such as the fact that +inf infinitely follows +inf, we
+ * choose to assume that +inf infinitely precedes +inf and -inf infinitely
+ * follows -inf, and therefore that all finite and infinite values are in
+ * such a window frame.
+ *
+ * offset is known positive, so we need only check the sign of base in
+ * this test.
+ */
+ if (isinf(offset) && isinf(base) &&
+ (sub ? base > 0 : base < 0))
+ PG_RETURN_BOOL(true);
+
+ /*
+ * Otherwise it should be safe to compute base +/- offset. We trust the
+ * FPU to cope if an input is +/-inf or the true sum would overflow, and
+ * produce a suitably signed infinity, which will compare properly against
+ * val whether or not that's infinity.
+ */
+ if (sub)
+ sum = base - offset;
+ else
+ sum = base + offset;
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+
+/*
+ * ===================
+ * CONVERSION ROUTINES
+ * ===================
+ */
+
+/*
+ * ftod - converts a float4 number to a float8 number
+ */
+Datum
+ftod(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+
+ PG_RETURN_FLOAT8((float8) num);
+}
+
+
+/*
+ * dtof - converts a float8 number to a float4 number
+ */
+Datum
+dtof(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+ float4 result;
+
+ result = (float4) num;
+ if (unlikely(isinf(result)) && !isinf(num))
+ float_overflow_error();
+ if (unlikely(result == 0.0f) && num != 0.0)
+ float_underflow_error();
+
+ PG_RETURN_FLOAT4(result);
+}
+
+
+/*
+ * dtoi4 - converts a float8 number to an int4 number
+ */
+Datum
+dtoi4(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT32(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ PG_RETURN_INT32((int32) num);
+}
+
+
+/*
+ * dtoi2 - converts a float8 number to an int2 number
+ */
+Datum
+dtoi2(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT16(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ PG_RETURN_INT16((int16) num);
+}
+
+
+/*
+ * i4tod - converts an int4 number to a float8 number
+ */
+Datum
+i4tod(PG_FUNCTION_ARGS)
+{
+ int32 num = PG_GETARG_INT32(0);
+
+ PG_RETURN_FLOAT8((float8) num);
+}
+
+
+/*
+ * i2tod - converts an int2 number to a float8 number
+ */
+Datum
+i2tod(PG_FUNCTION_ARGS)
+{
+ int16 num = PG_GETARG_INT16(0);
+
+ PG_RETURN_FLOAT8((float8) num);
+}
+
+
+/*
+ * ftoi4 - converts a float4 number to an int4 number
+ */
+Datum
+ftoi4(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT32(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ PG_RETURN_INT32((int32) num);
+}
+
+
+/*
+ * ftoi2 - converts a float4 number to an int2 number
+ */
+Datum
+ftoi2(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT16(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ PG_RETURN_INT16((int16) num);
+}
+
+
+/*
+ * i4tof - converts an int4 number to a float4 number
+ */
+Datum
+i4tof(PG_FUNCTION_ARGS)
+{
+ int32 num = PG_GETARG_INT32(0);
+
+ PG_RETURN_FLOAT4((float4) num);
+}
+
+
+/*
+ * i2tof - converts an int2 number to a float4 number
+ */
+Datum
+i2tof(PG_FUNCTION_ARGS)
+{
+ int16 num = PG_GETARG_INT16(0);
+
+ PG_RETURN_FLOAT4((float4) num);
+}
+
+
+/*
+ * =======================
+ * RANDOM FLOAT8 OPERATORS
+ * =======================
+ */
+
+/*
+ * dround - returns ROUND(arg1)
+ */
+Datum
+dround(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(rint(arg1));
+}
+
+/*
+ * dceil - returns the smallest integer greater than or
+ * equal to the specified float
+ */
+Datum
+dceil(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(ceil(arg1));
+}
+
+/*
+ * dfloor - returns the largest integer lesser than or
+ * equal to the specified float
+ */
+Datum
+dfloor(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(floor(arg1));
+}
+
+/*
+ * dsign - returns -1 if the argument is less than 0, 0
+ * if the argument is equal to 0, and 1 if the
+ * argument is greater than zero.
+ */
+Datum
+dsign(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ if (arg1 > 0)
+ result = 1.0;
+ else if (arg1 < 0)
+ result = -1.0;
+ else
+ result = 0.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * dtrunc - returns truncation-towards-zero of arg1,
+ * arg1 >= 0 ... the greatest integer less
+ * than or equal to arg1
+ * arg1 < 0 ... the least integer greater
+ * than or equal to arg1
+ */
+Datum
+dtrunc(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ if (arg1 >= 0)
+ result = floor(arg1);
+ else
+ result = -floor(-arg1);
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dsqrt - returns square root of arg1
+ */
+Datum
+dsqrt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ if (arg1 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("cannot take square root of a negative number")));
+
+ result = sqrt(arg1);
+ if (unlikely(isinf(result)) && !isinf(arg1))
+ float_overflow_error();
+ if (unlikely(result == 0.0) && arg1 != 0.0)
+ float_underflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dcbrt - returns cube root of arg1
+ */
+Datum
+dcbrt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ result = cbrt(arg1);
+ if (unlikely(isinf(result)) && !isinf(arg1))
+ float_overflow_error();
+ if (unlikely(result == 0.0) && arg1 != 0.0)
+ float_underflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dpow - returns pow(arg1,arg2)
+ */
+Datum
+dpow(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+ float8 result;
+
+ /*
+ * The POSIX spec says that NaN ^ 0 = 1, and 1 ^ NaN = 1, while all other
+ * cases with NaN inputs yield NaN (with no error). Many older platforms
+ * get one or more of these cases wrong, so deal with them via explicit
+ * logic rather than trusting pow(3).
+ */
+ if (isnan(arg1))
+ {
+ if (isnan(arg2) || arg2 != 0.0)
+ PG_RETURN_FLOAT8(get_float8_nan());
+ PG_RETURN_FLOAT8(1.0);
+ }
+ if (isnan(arg2))
+ {
+ if (arg1 != 1.0)
+ PG_RETURN_FLOAT8(get_float8_nan());
+ PG_RETURN_FLOAT8(1.0);
+ }
+
+ /*
+ * The SQL spec requires that we emit a particular SQLSTATE error code for
+ * certain error conditions. Specifically, we don't return a
+ * divide-by-zero error code for 0 ^ -1.
+ */
+ if (arg1 == 0 && arg2 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("zero raised to a negative power is undefined")));
+ if (arg1 < 0 && floor(arg2) != arg2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("a negative number raised to a non-integer power yields a complex result")));
+
+ /*
+ * We don't trust the platform's pow() to handle infinity cases per POSIX
+ * spec either, so deal with those explicitly too. It's easier to handle
+ * infinite y first, so that it doesn't matter if x is also infinite.
+ */
+ if (isinf(arg2))
+ {
+ float8 absx = fabs(arg1);
+
+ if (absx == 1.0)
+ result = 1.0;
+ else if (arg2 > 0.0) /* y = +Inf */
+ {
+ if (absx > 1.0)
+ result = arg2;
+ else
+ result = 0.0;
+ }
+ else /* y = -Inf */
+ {
+ if (absx > 1.0)
+ result = 0.0;
+ else
+ result = -arg2;
+ }
+ }
+ else if (isinf(arg1))
+ {
+ if (arg2 == 0.0)
+ result = 1.0;
+ else if (arg1 > 0.0) /* x = +Inf */
+ {
+ if (arg2 > 0.0)
+ result = arg1;
+ else
+ result = 0.0;
+ }
+ else /* x = -Inf */
+ {
+ /*
+ * Per POSIX, the sign of the result depends on whether y is an
+ * odd integer. Since x < 0, we already know from the previous
+ * domain check that y is an integer. It is odd if y/2 is not
+ * also an integer.
+ */
+ float8 halfy = arg2 / 2; /* should be computed exactly */
+ bool yisoddinteger = (floor(halfy) != halfy);
+
+ if (arg2 > 0.0)
+ result = yisoddinteger ? arg1 : -arg1;
+ else
+ result = yisoddinteger ? -0.0 : 0.0;
+ }
+ }
+ else
+ {
+ /*
+ * pow() sets errno on only some platforms, depending on whether it
+ * follows _IEEE_, _POSIX_, _XOPEN_, or _SVID_, so we must check both
+ * errno and invalid output values. (We can't rely on just the
+ * latter, either; some old platforms return a large-but-finite
+ * HUGE_VAL when reporting overflow.)
+ */
+ errno = 0;
+ result = pow(arg1, arg2);
+ if (errno == EDOM || isnan(result))
+ {
+ /*
+ * We handled all possible domain errors above, so this should be
+ * impossible. However, old glibc versions on x86 have a bug that
+ * causes them to fail this way for abs(y) greater than 2^63:
+ *
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=3866
+ *
+ * Hence, if we get here, assume y is finite but large (large
+ * enough to be certainly even). The result should be 0 if x == 0,
+ * 1.0 if abs(x) == 1.0, otherwise an overflow or underflow error.
+ */
+ if (arg1 == 0.0)
+ result = 0.0; /* we already verified y is positive */
+ else
+ {
+ float8 absx = fabs(arg1);
+
+ if (absx == 1.0)
+ result = 1.0;
+ else if (arg2 >= 0.0 ? (absx > 1.0) : (absx < 1.0))
+ float_overflow_error();
+ else
+ float_underflow_error();
+ }
+ }
+ else if (errno == ERANGE)
+ {
+ if (result != 0.0)
+ float_overflow_error();
+ else
+ float_underflow_error();
+ }
+ else
+ {
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+ if (unlikely(result == 0.0) && arg1 != 0.0)
+ float_underflow_error();
+ }
+ }
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dexp - returns the exponential function of arg1
+ */
+Datum
+dexp(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * Handle NaN and Inf cases explicitly. This avoids needing to assume
+ * that the platform's exp() conforms to POSIX for these cases, and it
+ * removes some edge cases for the overflow checks below.
+ */
+ if (isnan(arg1))
+ result = arg1;
+ else if (isinf(arg1))
+ {
+ /* Per POSIX, exp(-Inf) is 0 */
+ result = (arg1 > 0.0) ? arg1 : 0;
+ }
+ else
+ {
+ /*
+ * On some platforms, exp() will not set errno but just return Inf or
+ * zero to report overflow/underflow; therefore, test both cases.
+ */
+ errno = 0;
+ result = exp(arg1);
+ if (unlikely(errno == ERANGE))
+ {
+ if (result != 0.0)
+ float_overflow_error();
+ else
+ float_underflow_error();
+ }
+ else if (unlikely(isinf(result)))
+ float_overflow_error();
+ else if (unlikely(result == 0.0))
+ float_underflow_error();
+ }
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dlog1 - returns the natural logarithm of arg1
+ */
+Datum
+dlog1(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * Emit particular SQLSTATE error codes for ln(). This is required by the
+ * SQL standard.
+ */
+ if (arg1 == 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of zero")));
+ if (arg1 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of a negative number")));
+
+ result = log(arg1);
+ if (unlikely(isinf(result)) && !isinf(arg1))
+ float_overflow_error();
+ if (unlikely(result == 0.0) && arg1 != 1.0)
+ float_underflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dlog10 - returns the base 10 logarithm of arg1
+ */
+Datum
+dlog10(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * Emit particular SQLSTATE error codes for log(). The SQL spec doesn't
+ * define log(), but it does define ln(), so it makes sense to emit the
+ * same error code for an analogous error condition.
+ */
+ if (arg1 == 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of zero")));
+ if (arg1 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of a negative number")));
+
+ result = log10(arg1);
+ if (unlikely(isinf(result)) && !isinf(arg1))
+ float_overflow_error();
+ if (unlikely(result == 0.0) && arg1 != 1.0)
+ float_underflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dacos - returns the arccos of arg1 (radians)
+ */
+Datum
+dacos(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /*
+ * The principal branch of the inverse cosine function maps values in the
+ * range [-1, 1] to values in the range [0, Pi], so we should reject any
+ * inputs outside that range and the result will always be finite.
+ */
+ if (arg1 < -1.0 || arg1 > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ result = acos(arg1);
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dasin - returns the arcsin of arg1 (radians)
+ */
+Datum
+dasin(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /*
+ * The principal branch of the inverse sine function maps values in the
+ * range [-1, 1] to values in the range [-Pi/2, Pi/2], so we should reject
+ * any inputs outside that range and the result will always be finite.
+ */
+ if (arg1 < -1.0 || arg1 > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ result = asin(arg1);
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * datan - returns the arctan of arg1 (radians)
+ */
+Datum
+datan(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /*
+ * The principal branch of the inverse tangent function maps all inputs to
+ * values in the range [-Pi/2, Pi/2], so the result should always be
+ * finite, even if the input is infinite.
+ */
+ result = atan(arg1);
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * atan2 - returns the arctan of arg1/arg2 (radians)
+ */
+Datum
+datan2(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if either input is NaN */
+ if (isnan(arg1) || isnan(arg2))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /*
+ * atan2 maps all inputs to values in the range [-Pi, Pi], so the result
+ * should always be finite, even if the inputs are infinite.
+ */
+ result = atan2(arg1, arg2);
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dcos - returns the cosine of arg1 (radians)
+ */
+Datum
+dcos(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /*
+ * cos() is periodic and so theoretically can work for all finite inputs,
+ * but some implementations may choose to throw error if the input is so
+ * large that there are no significant digits in the result. So we should
+ * check for errors. POSIX allows an error to be reported either via
+ * errno or via fetestexcept(), but currently we only support checking
+ * errno. (fetestexcept() is rumored to report underflow unreasonably
+ * early on some platforms, so it's not clear that believing it would be a
+ * net improvement anyway.)
+ *
+ * For infinite inputs, POSIX specifies that the trigonometric functions
+ * should return a domain error; but we won't notice that unless the
+ * platform reports via errno, so also explicitly test for infinite
+ * inputs.
+ */
+ errno = 0;
+ result = cos(arg1);
+ if (errno != 0 || isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dcot - returns the cotangent of arg1 (radians)
+ */
+Datum
+dcot(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /* Be sure to throw an error if the input is infinite --- see dcos() */
+ errno = 0;
+ result = tan(arg1);
+ if (errno != 0 || isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ result = 1.0 / result;
+ /* Not checking for overflow because cot(0) == Inf */
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dsin - returns the sine of arg1 (radians)
+ */
+Datum
+dsin(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /* Be sure to throw an error if the input is infinite --- see dcos() */
+ errno = 0;
+ result = sin(arg1);
+ if (errno != 0 || isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dtan - returns the tangent of arg1 (radians)
+ */
+Datum
+dtan(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ /* Be sure to throw an error if the input is infinite --- see dcos() */
+ errno = 0;
+ result = tan(arg1);
+ if (errno != 0 || isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+ /* Not checking for overflow because tan(pi/2) == Inf */
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/* ========== DEGREE-BASED TRIGONOMETRIC FUNCTIONS ========== */
+
+
+/*
+ * Initialize the cached constants declared at the head of this file
+ * (sin_30 etc). The fact that we need those at all, let alone need this
+ * Rube-Goldberg-worthy method of initializing them, is because there are
+ * compilers out there that will precompute expressions such as sin(constant)
+ * using a sin() function different from what will be used at runtime. If we
+ * want exact results, we must ensure that none of the scaling constants used
+ * in the degree-based trig functions are computed that way. To do so, we
+ * compute them from the variables degree_c_thirty etc, which are also really
+ * constants, but the compiler cannot assume that.
+ *
+ * Other hazards we are trying to forestall with this kluge include the
+ * possibility that compilers will rearrange the expressions, or compute
+ * some intermediate results in registers wider than a standard double.
+ *
+ * In the places where we use these constants, the typical pattern is like
+ * volatile float8 sin_x = sin(x * RADIANS_PER_DEGREE);
+ * return (sin_x / sin_30);
+ * where we hope to get a value of exactly 1.0 from the division when x = 30.
+ * The volatile temporary variable is needed on machines with wide float
+ * registers, to ensure that the result of sin(x) is rounded to double width
+ * the same as the value of sin_30 has been. Experimentation with gcc shows
+ * that marking the temp variable volatile is necessary to make the store and
+ * reload actually happen; hopefully the same trick works for other compilers.
+ * (gcc's documentation suggests using the -ffloat-store compiler switch to
+ * ensure this, but that is compiler-specific and it also pessimizes code in
+ * many places where we don't care about this.)
+ */
+static void
+init_degree_constants(void)
+{
+ sin_30 = sin(degree_c_thirty * RADIANS_PER_DEGREE);
+ one_minus_cos_60 = 1.0 - cos(degree_c_sixty * RADIANS_PER_DEGREE);
+ asin_0_5 = asin(degree_c_one_half);
+ acos_0_5 = acos(degree_c_one_half);
+ atan_1_0 = atan(degree_c_one);
+ tan_45 = sind_q1(degree_c_forty_five) / cosd_q1(degree_c_forty_five);
+ cot_45 = cosd_q1(degree_c_forty_five) / sind_q1(degree_c_forty_five);
+ degree_consts_set = true;
+}
+
+#define INIT_DEGREE_CONSTANTS() \
+do { \
+ if (!degree_consts_set) \
+ init_degree_constants(); \
+} while(0)
+
+
+/*
+ * asind_q1 - returns the inverse sine of x in degrees, for x in
+ * the range [0, 1]. The result is an angle in the
+ * first quadrant --- [0, 90] degrees.
+ *
+ * For the 3 special case inputs (0, 0.5 and 1), this
+ * function will return exact values (0, 30 and 90
+ * degrees respectively).
+ */
+static double
+asind_q1(double x)
+{
+ /*
+ * Stitch together inverse sine and cosine functions for the ranges [0,
+ * 0.5] and (0.5, 1]. Each expression below is guaranteed to return
+ * exactly 30 for x=0.5, so the result is a continuous monotonic function
+ * over the full range.
+ */
+ if (x <= 0.5)
+ {
+ volatile float8 asin_x = asin(x);
+
+ return (asin_x / asin_0_5) * 30.0;
+ }
+ else
+ {
+ volatile float8 acos_x = acos(x);
+
+ return 90.0 - (acos_x / acos_0_5) * 60.0;
+ }
+}
+
+
+/*
+ * acosd_q1 - returns the inverse cosine of x in degrees, for x in
+ * the range [0, 1]. The result is an angle in the
+ * first quadrant --- [0, 90] degrees.
+ *
+ * For the 3 special case inputs (0, 0.5 and 1), this
+ * function will return exact values (0, 60 and 90
+ * degrees respectively).
+ */
+static double
+acosd_q1(double x)
+{
+ /*
+ * Stitch together inverse sine and cosine functions for the ranges [0,
+ * 0.5] and (0.5, 1]. Each expression below is guaranteed to return
+ * exactly 60 for x=0.5, so the result is a continuous monotonic function
+ * over the full range.
+ */
+ if (x <= 0.5)
+ {
+ volatile float8 asin_x = asin(x);
+
+ return 90.0 - (asin_x / asin_0_5) * 30.0;
+ }
+ else
+ {
+ volatile float8 acos_x = acos(x);
+
+ return (acos_x / acos_0_5) * 60.0;
+ }
+}
+
+
+/*
+ * dacosd - returns the arccos of arg1 (degrees)
+ */
+Datum
+dacosd(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ INIT_DEGREE_CONSTANTS();
+
+ /*
+ * The principal branch of the inverse cosine function maps values in the
+ * range [-1, 1] to values in the range [0, 180], so we should reject any
+ * inputs outside that range and the result will always be finite.
+ */
+ if (arg1 < -1.0 || arg1 > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ if (arg1 >= 0.0)
+ result = acosd_q1(arg1);
+ else
+ result = 90.0 + asind_q1(-arg1);
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dasind - returns the arcsin of arg1 (degrees)
+ */
+Datum
+dasind(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ INIT_DEGREE_CONSTANTS();
+
+ /*
+ * The principal branch of the inverse sine function maps values in the
+ * range [-1, 1] to values in the range [-90, 90], so we should reject any
+ * inputs outside that range and the result will always be finite.
+ */
+ if (arg1 < -1.0 || arg1 > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ if (arg1 >= 0.0)
+ result = asind_q1(arg1);
+ else
+ result = -asind_q1(-arg1);
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * datand - returns the arctan of arg1 (degrees)
+ */
+Datum
+datand(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+ volatile float8 atan_arg1;
+
+ /* Per the POSIX spec, return NaN if the input is NaN */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ INIT_DEGREE_CONSTANTS();
+
+ /*
+ * The principal branch of the inverse tangent function maps all inputs to
+ * values in the range [-90, 90], so the result should always be finite,
+ * even if the input is infinite. Additionally, we take care to ensure
+ * than when arg1 is 1, the result is exactly 45.
+ */
+ atan_arg1 = atan(arg1);
+ result = (atan_arg1 / atan_1_0) * 45.0;
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * atan2d - returns the arctan of arg1/arg2 (degrees)
+ */
+Datum
+datan2d(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+ float8 result;
+ volatile float8 atan2_arg1_arg2;
+
+ /* Per the POSIX spec, return NaN if either input is NaN */
+ if (isnan(arg1) || isnan(arg2))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ INIT_DEGREE_CONSTANTS();
+
+ /*
+ * atan2d maps all inputs to values in the range [-180, 180], so the
+ * result should always be finite, even if the inputs are infinite.
+ *
+ * Note: this coding assumes that atan(1.0) is a suitable scaling constant
+ * to get an exact result from atan2(). This might well fail on us at
+ * some point, requiring us to decide exactly what inputs we think we're
+ * going to guarantee an exact result for.
+ */
+ atan2_arg1_arg2 = atan2(arg1, arg2);
+ result = (atan2_arg1_arg2 / atan_1_0) * 45.0;
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * sind_0_to_30 - returns the sine of an angle that lies between 0 and
+ * 30 degrees. This will return exactly 0 when x is 0,
+ * and exactly 0.5 when x is 30 degrees.
+ */
+static double
+sind_0_to_30(double x)
+{
+ volatile float8 sin_x = sin(x * RADIANS_PER_DEGREE);
+
+ return (sin_x / sin_30) / 2.0;
+}
+
+
+/*
+ * cosd_0_to_60 - returns the cosine of an angle that lies between 0
+ * and 60 degrees. This will return exactly 1 when x
+ * is 0, and exactly 0.5 when x is 60 degrees.
+ */
+static double
+cosd_0_to_60(double x)
+{
+ volatile float8 one_minus_cos_x = 1.0 - cos(x * RADIANS_PER_DEGREE);
+
+ return 1.0 - (one_minus_cos_x / one_minus_cos_60) / 2.0;
+}
+
+
+/*
+ * sind_q1 - returns the sine of an angle in the first quadrant
+ * (0 to 90 degrees).
+ */
+static double
+sind_q1(double x)
+{
+ /*
+ * Stitch together the sine and cosine functions for the ranges [0, 30]
+ * and (30, 90]. These guarantee to return exact answers at their
+ * endpoints, so the overall result is a continuous monotonic function
+ * that gives exact results when x = 0, 30 and 90 degrees.
+ */
+ if (x <= 30.0)
+ return sind_0_to_30(x);
+ else
+ return cosd_0_to_60(90.0 - x);
+}
+
+
+/*
+ * cosd_q1 - returns the cosine of an angle in the first quadrant
+ * (0 to 90 degrees).
+ */
+static double
+cosd_q1(double x)
+{
+ /*
+ * Stitch together the sine and cosine functions for the ranges [0, 60]
+ * and (60, 90]. These guarantee to return exact answers at their
+ * endpoints, so the overall result is a continuous monotonic function
+ * that gives exact results when x = 0, 60 and 90 degrees.
+ */
+ if (x <= 60.0)
+ return cosd_0_to_60(x);
+ else
+ return sind_0_to_30(90.0 - x);
+}
+
+
+/*
+ * dcosd - returns the cosine of arg1 (degrees)
+ */
+Datum
+dcosd(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+ int sign = 1;
+
+ /*
+ * Per the POSIX spec, return NaN if the input is NaN and throw an error
+ * if the input is infinite.
+ */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ if (isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ INIT_DEGREE_CONSTANTS();
+
+ /* Reduce the range of the input to [0,90] degrees */
+ arg1 = fmod(arg1, 360.0);
+
+ if (arg1 < 0.0)
+ {
+ /* cosd(-x) = cosd(x) */
+ arg1 = -arg1;
+ }
+
+ if (arg1 > 180.0)
+ {
+ /* cosd(360-x) = cosd(x) */
+ arg1 = 360.0 - arg1;
+ }
+
+ if (arg1 > 90.0)
+ {
+ /* cosd(180-x) = -cosd(x) */
+ arg1 = 180.0 - arg1;
+ sign = -sign;
+ }
+
+ result = sign * cosd_q1(arg1);
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dcotd - returns the cotangent of arg1 (degrees)
+ */
+Datum
+dcotd(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+ volatile float8 cot_arg1;
+ int sign = 1;
+
+ /*
+ * Per the POSIX spec, return NaN if the input is NaN and throw an error
+ * if the input is infinite.
+ */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ if (isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ INIT_DEGREE_CONSTANTS();
+
+ /* Reduce the range of the input to [0,90] degrees */
+ arg1 = fmod(arg1, 360.0);
+
+ if (arg1 < 0.0)
+ {
+ /* cotd(-x) = -cotd(x) */
+ arg1 = -arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 180.0)
+ {
+ /* cotd(360-x) = -cotd(x) */
+ arg1 = 360.0 - arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 90.0)
+ {
+ /* cotd(180-x) = -cotd(x) */
+ arg1 = 180.0 - arg1;
+ sign = -sign;
+ }
+
+ cot_arg1 = cosd_q1(arg1) / sind_q1(arg1);
+ result = sign * (cot_arg1 / cot_45);
+
+ /*
+ * On some machines we get cotd(270) = minus zero, but this isn't always
+ * true. For portability, and because the user constituency for this
+ * function probably doesn't want minus zero, force it to plain zero.
+ */
+ if (result == 0.0)
+ result = 0.0;
+
+ /* Not checking for overflow because cotd(0) == Inf */
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dsind - returns the sine of arg1 (degrees)
+ */
+Datum
+dsind(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+ int sign = 1;
+
+ /*
+ * Per the POSIX spec, return NaN if the input is NaN and throw an error
+ * if the input is infinite.
+ */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ if (isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ INIT_DEGREE_CONSTANTS();
+
+ /* Reduce the range of the input to [0,90] degrees */
+ arg1 = fmod(arg1, 360.0);
+
+ if (arg1 < 0.0)
+ {
+ /* sind(-x) = -sind(x) */
+ arg1 = -arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 180.0)
+ {
+ /* sind(360-x) = -sind(x) */
+ arg1 = 360.0 - arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 90.0)
+ {
+ /* sind(180-x) = sind(x) */
+ arg1 = 180.0 - arg1;
+ }
+
+ result = sign * sind_q1(arg1);
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dtand - returns the tangent of arg1 (degrees)
+ */
+Datum
+dtand(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+ volatile float8 tan_arg1;
+ int sign = 1;
+
+ /*
+ * Per the POSIX spec, return NaN if the input is NaN and throw an error
+ * if the input is infinite.
+ */
+ if (isnan(arg1))
+ PG_RETURN_FLOAT8(get_float8_nan());
+
+ if (isinf(arg1))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ INIT_DEGREE_CONSTANTS();
+
+ /* Reduce the range of the input to [0,90] degrees */
+ arg1 = fmod(arg1, 360.0);
+
+ if (arg1 < 0.0)
+ {
+ /* tand(-x) = -tand(x) */
+ arg1 = -arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 180.0)
+ {
+ /* tand(360-x) = -tand(x) */
+ arg1 = 360.0 - arg1;
+ sign = -sign;
+ }
+
+ if (arg1 > 90.0)
+ {
+ /* tand(180-x) = -tand(x) */
+ arg1 = 180.0 - arg1;
+ sign = -sign;
+ }
+
+ tan_arg1 = sind_q1(arg1) / cosd_q1(arg1);
+ result = sign * (tan_arg1 / tan_45);
+
+ /*
+ * On some machines we get tand(180) = minus zero, but this isn't always
+ * true. For portability, and because the user constituency for this
+ * function probably doesn't want minus zero, force it to plain zero.
+ */
+ if (result == 0.0)
+ result = 0.0;
+
+ /* Not checking for overflow because tand(90) == Inf */
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * degrees - returns degrees converted from radians
+ */
+Datum
+degrees(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(float8_div(arg1, RADIANS_PER_DEGREE));
+}
+
+
+/*
+ * dpi - returns the constant PI
+ */
+Datum
+dpi(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(M_PI);
+}
+
+
+/*
+ * radians - returns radians converted from degrees
+ */
+Datum
+radians(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+
+ PG_RETURN_FLOAT8(float8_mul(arg1, RADIANS_PER_DEGREE));
+}
+
+
+/* ========== HYPERBOLIC FUNCTIONS ========== */
+
+
+/*
+ * dsinh - returns the hyperbolic sine of arg1
+ */
+Datum
+dsinh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ errno = 0;
+ result = sinh(arg1);
+
+ /*
+ * if an ERANGE error occurs, it means there is an overflow. For sinh,
+ * the result should be either -infinity or infinity, depending on the
+ * sign of arg1.
+ */
+ if (errno == ERANGE)
+ {
+ if (arg1 < 0)
+ result = -get_float8_infinity();
+ else
+ result = get_float8_infinity();
+ }
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * dcosh - returns the hyperbolic cosine of arg1
+ */
+Datum
+dcosh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ errno = 0;
+ result = cosh(arg1);
+
+ /*
+ * if an ERANGE error occurs, it means there is an overflow. As cosh is
+ * always positive, it always means the result is positive infinity.
+ */
+ if (errno == ERANGE)
+ result = get_float8_infinity();
+
+ if (unlikely(result == 0.0))
+ float_underflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * dtanh - returns the hyperbolic tangent of arg1
+ */
+Datum
+dtanh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * For tanh, we don't need an errno check because it never overflows.
+ */
+ result = tanh(arg1);
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * dasinh - returns the inverse hyperbolic sine of arg1
+ */
+Datum
+dasinh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * For asinh, we don't need an errno check because it never overflows.
+ */
+ result = asinh(arg1);
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * dacosh - returns the inverse hyperbolic cosine of arg1
+ */
+Datum
+dacosh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * acosh is only defined for inputs >= 1.0. By checking this ourselves,
+ * we need not worry about checking for an EDOM error, which is a good
+ * thing because some implementations will report that for NaN. Otherwise,
+ * no error is possible.
+ */
+ if (arg1 < 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ result = acosh(arg1);
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * datanh - returns the inverse hyperbolic tangent of arg1
+ */
+Datum
+datanh(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float8 result;
+
+ /*
+ * atanh is only defined for inputs between -1 and 1. By checking this
+ * ourselves, we need not worry about checking for an EDOM error, which is
+ * a good thing because some implementations will report that for NaN.
+ */
+ if (arg1 < -1.0 || arg1 > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("input is out of range")));
+
+ /*
+ * Also handle the infinity cases ourselves; this is helpful because old
+ * glibc versions may produce the wrong errno for this. All other inputs
+ * cannot produce an error.
+ */
+ if (arg1 == -1.0)
+ result = -get_float8_infinity();
+ else if (arg1 == 1.0)
+ result = get_float8_infinity();
+ else
+ result = atanh(arg1);
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * drandom - returns a random number
+ */
+Datum
+drandom(PG_FUNCTION_ARGS)
+{
+ float8 result;
+
+ /* Initialize random seed, if not done yet in this process */
+ if (unlikely(!drandom_seed_set))
+ {
+ /*
+ * If possible, initialize the seed using high-quality random bits.
+ * Should that fail for some reason, we fall back on a lower-quality
+ * seed based on current time and PID.
+ */
+ if (unlikely(!pg_prng_strong_seed(&drandom_seed)))
+ {
+ TimestampTz now = GetCurrentTimestamp();
+ uint64 iseed;
+
+ /* Mix the PID with the most predictable bits of the timestamp */
+ iseed = (uint64) now ^ ((uint64) MyProcPid << 32);
+ pg_prng_seed(&drandom_seed, iseed);
+ }
+ drandom_seed_set = true;
+ }
+
+ /* pg_prng_double produces desired result range [0.0 - 1.0) */
+ result = pg_prng_double(&drandom_seed);
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+/*
+ * setseed - set seed for the random number generator
+ */
+Datum
+setseed(PG_FUNCTION_ARGS)
+{
+ float8 seed = PG_GETARG_FLOAT8(0);
+
+ if (seed < -1 || seed > 1 || isnan(seed))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("setseed parameter %g is out of allowed range [-1,1]",
+ seed)));
+
+ pg_prng_fseed(&drandom_seed, seed);
+ drandom_seed_set = true;
+
+ PG_RETURN_VOID();
+}
+
+
+
+/*
+ * =========================
+ * FLOAT AGGREGATE OPERATORS
+ * =========================
+ *
+ * float8_accum - accumulate for AVG(), variance aggregates, etc.
+ * float4_accum - same, but input data is float4
+ * float8_avg - produce final result for float AVG()
+ * float8_var_samp - produce final result for float VAR_SAMP()
+ * float8_var_pop - produce final result for float VAR_POP()
+ * float8_stddev_samp - produce final result for float STDDEV_SAMP()
+ * float8_stddev_pop - produce final result for float STDDEV_POP()
+ *
+ * The naive schoolbook implementation of these aggregates works by
+ * accumulating sum(X) and sum(X^2). However, this approach suffers from
+ * large rounding errors in the final computation of quantities like the
+ * population variance (N*sum(X^2) - sum(X)^2) / N^2, since each of the
+ * intermediate terms is potentially very large, while the difference is often
+ * quite small.
+ *
+ * Instead we use the Youngs-Cramer algorithm [1] which works by accumulating
+ * Sx=sum(X) and Sxx=sum((X-Sx/N)^2), using a numerically stable algorithm to
+ * incrementally update those quantities. The final computations of each of
+ * the aggregate values is then trivial and gives more accurate results (for
+ * example, the population variance is just Sxx/N). This algorithm is also
+ * fairly easy to generalize to allow parallel execution without loss of
+ * precision (see, for example, [2]). For more details, and a comparison of
+ * this with other algorithms, see [3].
+ *
+ * The transition datatype for all these aggregates is a 3-element array
+ * of float8, holding the values N, Sx, Sxx in that order.
+ *
+ * Note that we represent N as a float to avoid having to build a special
+ * datatype. Given a reasonable floating-point implementation, there should
+ * be no accuracy loss unless N exceeds 2 ^ 52 or so (by which time the
+ * user will have doubtless lost interest anyway...)
+ *
+ * [1] Some Results Relevant to Choice of Sum and Sum-of-Product Algorithms,
+ * E. A. Youngs and E. M. Cramer, Technometrics Vol 13, No 3, August 1971.
+ *
+ * [2] Updating Formulae and a Pairwise Algorithm for Computing Sample
+ * Variances, T. F. Chan, G. H. Golub & R. J. LeVeque, COMPSTAT 1982.
+ *
+ * [3] Numerically Stable Parallel Computation of (Co-)Variance, Erich
+ * Schubert and Michael Gertz, Proceedings of the 30th International
+ * Conference on Scientific and Statistical Database Management, 2018.
+ */
+
+static float8 *
+check_float8_array(ArrayType *transarray, const char *caller, int n)
+{
+ /*
+ * We expect the input to be an N-element float array; verify that. We
+ * don't need to use deconstruct_array() since the array data is just
+ * going to look like a C array of N float8 values.
+ */
+ if (ARR_NDIM(transarray) != 1 ||
+ ARR_DIMS(transarray)[0] != n ||
+ ARR_HASNULL(transarray) ||
+ ARR_ELEMTYPE(transarray) != FLOAT8OID)
+ elog(ERROR, "%s: expected %d-element float8 array", caller, n);
+ return (float8 *) ARR_DATA_PTR(transarray);
+}
+
+/*
+ * float8_combine
+ *
+ * An aggregate combine function used to combine two 3 fields
+ * aggregate transition data into a single transition data.
+ * This function is used only in two stage aggregation and
+ * shouldn't be called outside aggregate context.
+ */
+Datum
+float8_combine(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
+ float8 *transvalues1;
+ float8 *transvalues2;
+ float8 N1,
+ Sx1,
+ Sxx1,
+ N2,
+ Sx2,
+ Sxx2,
+ tmp,
+ N,
+ Sx,
+ Sxx;
+
+ transvalues1 = check_float8_array(transarray1, "float8_combine", 3);
+ transvalues2 = check_float8_array(transarray2, "float8_combine", 3);
+
+ N1 = transvalues1[0];
+ Sx1 = transvalues1[1];
+ Sxx1 = transvalues1[2];
+
+ N2 = transvalues2[0];
+ Sx2 = transvalues2[1];
+ Sxx2 = transvalues2[2];
+
+ /*--------------------
+ * The transition values combine using a generalization of the
+ * Youngs-Cramer algorithm as follows:
+ *
+ * N = N1 + N2
+ * Sx = Sx1 + Sx2
+ * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N;
+ *
+ * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+ * since those cases are trivial, and we then don't need to worry about
+ * division-by-zero errors in the general case.
+ *--------------------
+ */
+ if (N1 == 0.0)
+ {
+ N = N2;
+ Sx = Sx2;
+ Sxx = Sxx2;
+ }
+ else if (N2 == 0.0)
+ {
+ N = N1;
+ Sx = Sx1;
+ Sxx = Sxx1;
+ }
+ else
+ {
+ N = N1 + N2;
+ Sx = float8_pl(Sx1, Sx2);
+ tmp = Sx1 / N1 - Sx2 / N2;
+ Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp * tmp / N;
+ if (unlikely(isinf(Sxx)) && !isinf(Sxx1) && !isinf(Sxx2))
+ float_overflow_error();
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues1[0] = N;
+ transvalues1[1] = Sx;
+ transvalues1[2] = Sxx;
+
+ PG_RETURN_ARRAYTYPE_P(transarray1);
+ }
+ else
+ {
+ Datum transdatums[3];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+
+ result = construct_array(transdatums, 3,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+}
+
+Datum
+float8_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 newval = PG_GETARG_FLOAT8(1);
+ float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ tmp;
+
+ transvalues = check_float8_array(transarray, "float8_accum", 3);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new value into the
+ * transition values.
+ */
+ N += 1.0;
+ Sx += newval;
+ if (transvalues[0] > 0.0)
+ {
+ tmp = newval * N - Sx;
+ Sxx += tmp * tmp / (N * transvalues[0]);
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx should be NaN
+ * if any of the inputs are infinite, so we intentionally prevent Sxx
+ * from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx))
+ {
+ if (!isinf(transvalues[1]) && !isinf(newval))
+ float_overflow_error();
+
+ Sxx = get_float8_nan();
+ }
+ }
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx as 0. However, if
+ * the first input is Inf or NaN, we'd better force Sxx to NaN;
+ * otherwise we will falsely report variance zero when there are no
+ * more inputs.
+ */
+ if (isnan(newval) || isinf(newval))
+ Sxx = get_float8_nan();
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues[0] = N;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+ }
+ else
+ {
+ Datum transdatums[3];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+
+ result = construct_array(transdatums, 3,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+}
+
+Datum
+float4_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+
+ /* do computations as float8 */
+ float8 newval = PG_GETARG_FLOAT4(1);
+ float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ tmp;
+
+ transvalues = check_float8_array(transarray, "float4_accum", 3);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new value into the
+ * transition values.
+ */
+ N += 1.0;
+ Sx += newval;
+ if (transvalues[0] > 0.0)
+ {
+ tmp = newval * N - Sx;
+ Sxx += tmp * tmp / (N * transvalues[0]);
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx should be NaN
+ * if any of the inputs are infinite, so we intentionally prevent Sxx
+ * from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx))
+ {
+ if (!isinf(transvalues[1]) && !isinf(newval))
+ float_overflow_error();
+
+ Sxx = get_float8_nan();
+ }
+ }
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx as 0. However, if
+ * the first input is Inf or NaN, we'd better force Sxx to NaN;
+ * otherwise we will falsely report variance zero when there are no
+ * more inputs.
+ */
+ if (isnan(newval) || isinf(newval))
+ Sxx = get_float8_nan();
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues[0] = N;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+ }
+ else
+ {
+ Datum transdatums[3];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+
+ result = construct_array(transdatums, 3,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+}
+
+Datum
+float8_avg(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sx;
+
+ transvalues = check_float8_array(transarray, "float8_avg", 3);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ /* ignore Sxx */
+
+ /* SQL defines AVG of no values to be NULL */
+ if (N == 0.0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sx / N);
+}
+
+Datum
+float8_var_pop(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx;
+
+ transvalues = check_float8_array(transarray, "float8_var_pop", 3);
+ N = transvalues[0];
+ /* ignore Sx */
+ Sxx = transvalues[2];
+
+ /* Population variance is undefined when N is 0, so return NULL */
+ if (N == 0.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(Sxx / N);
+}
+
+Datum
+float8_var_samp(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx;
+
+ transvalues = check_float8_array(transarray, "float8_var_samp", 3);
+ N = transvalues[0];
+ /* ignore Sx */
+ Sxx = transvalues[2];
+
+ /* Sample variance is undefined when N is 0 or 1, so return NULL */
+ if (N <= 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(Sxx / (N - 1.0));
+}
+
+Datum
+float8_stddev_pop(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx;
+
+ transvalues = check_float8_array(transarray, "float8_stddev_pop", 3);
+ N = transvalues[0];
+ /* ignore Sx */
+ Sxx = transvalues[2];
+
+ /* Population stddev is undefined when N is 0, so return NULL */
+ if (N == 0.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(sqrt(Sxx / N));
+}
+
+Datum
+float8_stddev_samp(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx;
+
+ transvalues = check_float8_array(transarray, "float8_stddev_samp", 3);
+ N = transvalues[0];
+ /* ignore Sx */
+ Sxx = transvalues[2];
+
+ /* Sample stddev is undefined when N is 0 or 1, so return NULL */
+ if (N <= 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(sqrt(Sxx / (N - 1.0)));
+}
+
+/*
+ * =========================
+ * SQL2003 BINARY AGGREGATES
+ * =========================
+ *
+ * As with the preceding aggregates, we use the Youngs-Cramer algorithm to
+ * reduce rounding errors in the aggregate final functions.
+ *
+ * The transition datatype for all these aggregates is a 6-element array of
+ * float8, holding the values N, Sx=sum(X), Sxx=sum((X-Sx/N)^2), Sy=sum(Y),
+ * Syy=sum((Y-Sy/N)^2), Sxy=sum((X-Sx/N)*(Y-Sy/N)) in that order.
+ *
+ * Note that Y is the first argument to all these aggregates!
+ *
+ * It might seem attractive to optimize this by having multiple accumulator
+ * functions that only calculate the sums actually needed. But on most
+ * modern machines, a couple of extra floating-point multiplies will be
+ * insignificant compared to the other per-tuple overhead, so I've chosen
+ * to minimize code space instead.
+ */
+
+Datum
+float8_regr_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 newvalY = PG_GETARG_FLOAT8(1);
+ float8 newvalX = PG_GETARG_FLOAT8(2);
+ float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ Sy,
+ Syy,
+ Sxy,
+ tmpX,
+ tmpY,
+ scale;
+
+ transvalues = check_float8_array(transarray, "float8_regr_accum", 6);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+ Sy = transvalues[3];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
+
+ /*
+ * Use the Youngs-Cramer algorithm to incorporate the new values into the
+ * transition values.
+ */
+ N += 1.0;
+ Sx += newvalX;
+ Sy += newvalY;
+ if (transvalues[0] > 0.0)
+ {
+ tmpX = newvalX * N - Sx;
+ tmpY = newvalY * N - Sy;
+ scale = 1.0 / (N * transvalues[0]);
+ Sxx += tmpX * tmpX * scale;
+ Syy += tmpY * tmpY * scale;
+ Sxy += tmpX * tmpY * scale;
+
+ /*
+ * Overflow check. We only report an overflow error when finite
+ * inputs lead to infinite results. Note also that Sxx, Syy and Sxy
+ * should be NaN if any of the relevant inputs are infinite, so we
+ * intentionally prevent them from becoming infinite.
+ */
+ if (isinf(Sx) || isinf(Sxx) || isinf(Sy) || isinf(Syy) || isinf(Sxy))
+ {
+ if (((isinf(Sx) || isinf(Sxx)) &&
+ !isinf(transvalues[1]) && !isinf(newvalX)) ||
+ ((isinf(Sy) || isinf(Syy)) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)) ||
+ (isinf(Sxy) &&
+ !isinf(transvalues[1]) && !isinf(newvalX) &&
+ !isinf(transvalues[3]) && !isinf(newvalY)))
+ float_overflow_error();
+
+ if (isinf(Sxx))
+ Sxx = get_float8_nan();
+ if (isinf(Syy))
+ Syy = get_float8_nan();
+ if (isinf(Sxy))
+ Sxy = get_float8_nan();
+ }
+ }
+ else
+ {
+ /*
+ * At the first input, we normally can leave Sxx et al as 0. However,
+ * if the first input is Inf or NaN, we'd better force the dependent
+ * sums to NaN; otherwise we will falsely report variance zero when
+ * there are no more inputs.
+ */
+ if (isnan(newvalX) || isinf(newvalX))
+ Sxx = Sxy = get_float8_nan();
+ if (isnan(newvalY) || isinf(newvalY))
+ Syy = Sxy = get_float8_nan();
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues[0] = N;
+ transvalues[1] = Sx;
+ transvalues[2] = Sxx;
+ transvalues[3] = Sy;
+ transvalues[4] = Syy;
+ transvalues[5] = Sxy;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+ }
+ else
+ {
+ Datum transdatums[6];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+ transdatums[3] = Float8GetDatumFast(Sy);
+ transdatums[4] = Float8GetDatumFast(Syy);
+ transdatums[5] = Float8GetDatumFast(Sxy);
+
+ result = construct_array(transdatums, 6,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+}
+
+/*
+ * float8_regr_combine
+ *
+ * An aggregate combine function used to combine two 6 fields
+ * aggregate transition data into a single transition data.
+ * This function is used only in two stage aggregation and
+ * shouldn't be called outside aggregate context.
+ */
+Datum
+float8_regr_combine(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
+ float8 *transvalues1;
+ float8 *transvalues2;
+ float8 N1,
+ Sx1,
+ Sxx1,
+ Sy1,
+ Syy1,
+ Sxy1,
+ N2,
+ Sx2,
+ Sxx2,
+ Sy2,
+ Syy2,
+ Sxy2,
+ tmp1,
+ tmp2,
+ N,
+ Sx,
+ Sxx,
+ Sy,
+ Syy,
+ Sxy;
+
+ transvalues1 = check_float8_array(transarray1, "float8_regr_combine", 6);
+ transvalues2 = check_float8_array(transarray2, "float8_regr_combine", 6);
+
+ N1 = transvalues1[0];
+ Sx1 = transvalues1[1];
+ Sxx1 = transvalues1[2];
+ Sy1 = transvalues1[3];
+ Syy1 = transvalues1[4];
+ Sxy1 = transvalues1[5];
+
+ N2 = transvalues2[0];
+ Sx2 = transvalues2[1];
+ Sxx2 = transvalues2[2];
+ Sy2 = transvalues2[3];
+ Syy2 = transvalues2[4];
+ Sxy2 = transvalues2[5];
+
+ /*--------------------
+ * The transition values combine using a generalization of the
+ * Youngs-Cramer algorithm as follows:
+ *
+ * N = N1 + N2
+ * Sx = Sx1 + Sx2
+ * Sxx = Sxx1 + Sxx2 + N1 * N2 * (Sx1/N1 - Sx2/N2)^2 / N
+ * Sy = Sy1 + Sy2
+ * Syy = Syy1 + Syy2 + N1 * N2 * (Sy1/N1 - Sy2/N2)^2 / N
+ * Sxy = Sxy1 + Sxy2 + N1 * N2 * (Sx1/N1 - Sx2/N2) * (Sy1/N1 - Sy2/N2) / N
+ *
+ * It's worth handling the special cases N1 = 0 and N2 = 0 separately
+ * since those cases are trivial, and we then don't need to worry about
+ * division-by-zero errors in the general case.
+ *--------------------
+ */
+ if (N1 == 0.0)
+ {
+ N = N2;
+ Sx = Sx2;
+ Sxx = Sxx2;
+ Sy = Sy2;
+ Syy = Syy2;
+ Sxy = Sxy2;
+ }
+ else if (N2 == 0.0)
+ {
+ N = N1;
+ Sx = Sx1;
+ Sxx = Sxx1;
+ Sy = Sy1;
+ Syy = Syy1;
+ Sxy = Sxy1;
+ }
+ else
+ {
+ N = N1 + N2;
+ Sx = float8_pl(Sx1, Sx2);
+ tmp1 = Sx1 / N1 - Sx2 / N2;
+ Sxx = Sxx1 + Sxx2 + N1 * N2 * tmp1 * tmp1 / N;
+ if (unlikely(isinf(Sxx)) && !isinf(Sxx1) && !isinf(Sxx2))
+ float_overflow_error();
+ Sy = float8_pl(Sy1, Sy2);
+ tmp2 = Sy1 / N1 - Sy2 / N2;
+ Syy = Syy1 + Syy2 + N1 * N2 * tmp2 * tmp2 / N;
+ if (unlikely(isinf(Syy)) && !isinf(Syy1) && !isinf(Syy2))
+ float_overflow_error();
+ Sxy = Sxy1 + Sxy2 + N1 * N2 * tmp1 * tmp2 / N;
+ if (unlikely(isinf(Sxy)) && !isinf(Sxy1) && !isinf(Sxy2))
+ float_overflow_error();
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we construct a
+ * new array with the updated transition data and return it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ transvalues1[0] = N;
+ transvalues1[1] = Sx;
+ transvalues1[2] = Sxx;
+ transvalues1[3] = Sy;
+ transvalues1[4] = Syy;
+ transvalues1[5] = Sxy;
+
+ PG_RETURN_ARRAYTYPE_P(transarray1);
+ }
+ else
+ {
+ Datum transdatums[6];
+ ArrayType *result;
+
+ transdatums[0] = Float8GetDatumFast(N);
+ transdatums[1] = Float8GetDatumFast(Sx);
+ transdatums[2] = Float8GetDatumFast(Sxx);
+ transdatums[3] = Float8GetDatumFast(Sy);
+ transdatums[4] = Float8GetDatumFast(Syy);
+ transdatums[5] = Float8GetDatumFast(Sxy);
+
+ result = construct_array(transdatums, 6,
+ FLOAT8OID,
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+ }
+}
+
+
+Datum
+float8_regr_sxx(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx;
+
+ transvalues = check_float8_array(transarray, "float8_regr_sxx", 6);
+ N = transvalues[0];
+ Sxx = transvalues[2];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(Sxx);
+}
+
+Datum
+float8_regr_syy(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Syy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_syy", 6);
+ N = transvalues[0];
+ Syy = transvalues[4];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Syy is guaranteed to be non-negative */
+
+ PG_RETURN_FLOAT8(Syy);
+}
+
+Datum
+float8_regr_sxy(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_sxy", 6);
+ N = transvalues[0];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* A negative result is valid here */
+
+ PG_RETURN_FLOAT8(Sxy);
+}
+
+Datum
+float8_regr_avgx(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sx;
+
+ transvalues = check_float8_array(transarray, "float8_regr_avgx", 6);
+ N = transvalues[0];
+ Sx = transvalues[1];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sx / N);
+}
+
+Datum
+float8_regr_avgy(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_avgy", 6);
+ N = transvalues[0];
+ Sy = transvalues[3];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sy / N);
+}
+
+Datum
+float8_covar_pop(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_covar_pop", 6);
+ N = transvalues[0];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sxy / N);
+}
+
+Datum
+float8_covar_samp(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_covar_samp", 6);
+ N = transvalues[0];
+ Sxy = transvalues[5];
+
+ /* if N is <= 1 we should return NULL */
+ if (N < 2.0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sxy / (N - 1.0));
+}
+
+Datum
+float8_corr(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx,
+ Syy,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_corr", 6);
+ N = transvalues[0];
+ Sxx = transvalues[2];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+ /* per spec, return NULL for horizontal and vertical lines */
+ if (Sxx == 0 || Syy == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sxy / sqrt(Sxx * Syy));
+}
+
+Datum
+float8_regr_r2(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx,
+ Syy,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_r2", 6);
+ N = transvalues[0];
+ Sxx = transvalues[2];
+ Syy = transvalues[4];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx and Syy are guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
+ PG_RETURN_NULL();
+
+ /* per spec, return 1.0 for a horizontal line */
+ if (Syy == 0)
+ PG_RETURN_FLOAT8(1.0);
+
+ PG_RETURN_FLOAT8((Sxy * Sxy) / (Sxx * Syy));
+}
+
+Datum
+float8_regr_slope(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sxx,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_slope", 6);
+ N = transvalues[0];
+ Sxx = transvalues[2];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(Sxy / Sxx);
+}
+
+Datum
+float8_regr_intercept(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ float8 *transvalues;
+ float8 N,
+ Sx,
+ Sxx,
+ Sy,
+ Sxy;
+
+ transvalues = check_float8_array(transarray, "float8_regr_intercept", 6);
+ N = transvalues[0];
+ Sx = transvalues[1];
+ Sxx = transvalues[2];
+ Sy = transvalues[3];
+ Sxy = transvalues[5];
+
+ /* if N is 0 we should return NULL */
+ if (N < 1.0)
+ PG_RETURN_NULL();
+
+ /* Note that Sxx is guaranteed to be non-negative */
+
+ /* per spec, return NULL for a vertical line */
+ if (Sxx == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8((Sy - Sx * Sxy / Sxx) / N);
+}
+
+
+/*
+ * ====================================
+ * MIXED-PRECISION ARITHMETIC OPERATORS
+ * ====================================
+ */
+
+/*
+ * float48pl - returns arg1 + arg2
+ * float48mi - returns arg1 - arg2
+ * float48mul - returns arg1 * arg2
+ * float48div - returns arg1 / arg2
+ */
+Datum
+float48pl(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_pl((float8) arg1, arg2));
+}
+
+Datum
+float48mi(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_mi((float8) arg1, arg2));
+}
+
+Datum
+float48mul(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_mul((float8) arg1, arg2));
+}
+
+Datum
+float48div(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_FLOAT8(float8_div((float8) arg1, arg2));
+}
+
+/*
+ * float84pl - returns arg1 + arg2
+ * float84mi - returns arg1 - arg2
+ * float84mul - returns arg1 * arg2
+ * float84div - returns arg1 / arg2
+ */
+Datum
+float84pl(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT8(float8_pl(arg1, (float8) arg2));
+}
+
+Datum
+float84mi(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT8(float8_mi(arg1, (float8) arg2));
+}
+
+Datum
+float84mul(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT8(float8_mul(arg1, (float8) arg2));
+}
+
+Datum
+float84div(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_FLOAT8(float8_div(arg1, (float8) arg2));
+}
+
+/*
+ * ====================
+ * COMPARISON OPERATORS
+ * ====================
+ */
+
+/*
+ * float48{eq,ne,lt,le,gt,ge} - float4/float8 comparison operations
+ */
+Datum
+float48eq(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_eq((float8) arg1, arg2));
+}
+
+Datum
+float48ne(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_ne((float8) arg1, arg2));
+}
+
+Datum
+float48lt(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_lt((float8) arg1, arg2));
+}
+
+Datum
+float48le(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_le((float8) arg1, arg2));
+}
+
+Datum
+float48gt(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_gt((float8) arg1, arg2));
+}
+
+Datum
+float48ge(PG_FUNCTION_ARGS)
+{
+ float4 arg1 = PG_GETARG_FLOAT4(0);
+ float8 arg2 = PG_GETARG_FLOAT8(1);
+
+ PG_RETURN_BOOL(float8_ge((float8) arg1, arg2));
+}
+
+/*
+ * float84{eq,ne,lt,le,gt,ge} - float8/float4 comparison operations
+ */
+Datum
+float84eq(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_eq(arg1, (float8) arg2));
+}
+
+Datum
+float84ne(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_ne(arg1, (float8) arg2));
+}
+
+Datum
+float84lt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_lt(arg1, (float8) arg2));
+}
+
+Datum
+float84le(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_le(arg1, (float8) arg2));
+}
+
+Datum
+float84gt(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_gt(arg1, (float8) arg2));
+}
+
+Datum
+float84ge(PG_FUNCTION_ARGS)
+{
+ float8 arg1 = PG_GETARG_FLOAT8(0);
+ float4 arg2 = PG_GETARG_FLOAT4(1);
+
+ PG_RETURN_BOOL(float8_ge(arg1, (float8) arg2));
+}
+
+/*
+ * Implements the float8 version of the width_bucket() function
+ * defined by SQL2003. See also width_bucket_numeric().
+ *
+ * 'bound1' and 'bound2' are the lower and upper bounds of the
+ * histogram's range, respectively. 'count' is the number of buckets
+ * in the histogram. width_bucket() returns an integer indicating the
+ * bucket number that 'operand' belongs to in an equiwidth histogram
+ * with the specified characteristics. An operand smaller than the
+ * lower bound is assigned to bucket 0. An operand greater than the
+ * upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow "NaN" for any of the float8 inputs, and we
+ * don't allow either of the histogram bounds to be +/- infinity.
+ */
+Datum
+width_bucket_float8(PG_FUNCTION_ARGS)
+{
+ float8 operand = PG_GETARG_FLOAT8(0);
+ float8 bound1 = PG_GETARG_FLOAT8(1);
+ float8 bound2 = PG_GETARG_FLOAT8(2);
+ int32 count = PG_GETARG_INT32(3);
+ int32 result;
+
+ if (count <= 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("count must be greater than zero")));
+
+ if (isnan(operand) || isnan(bound1) || isnan(bound2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("operand, lower bound, and upper bound cannot be NaN")));
+
+ /* Note that we allow "operand" to be infinite */
+ if (isinf(bound1) || isinf(bound2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("lower and upper bounds must be finite")));
+
+ if (bound1 < bound2)
+ {
+ if (operand < bound1)
+ result = 0;
+ else if (operand >= bound2)
+ {
+ if (pg_add_s32_overflow(count, 1, &result))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ }
+ else
+ result = ((float8) count * (operand - bound1) / (bound2 - bound1)) + 1;
+ }
+ else if (bound1 > bound2)
+ {
+ if (operand > bound1)
+ result = 0;
+ else if (operand <= bound2)
+ {
+ if (pg_add_s32_overflow(count, 1, &result))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ }
+ else
+ result = ((float8) count * (bound1 - operand) / (bound1 - bound2)) + 1;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("lower bound cannot equal upper bound")));
+ result = 0; /* keep the compiler quiet */
+ }
+
+ PG_RETURN_INT32(result);
+}
diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c
new file mode 100644
index 0000000..2918fdb
--- /dev/null
+++ b/src/backend/utils/adt/format_type.c
@@ -0,0 +1,480 @@
+/*-------------------------------------------------------------------------
+ *
+ * format_type.c
+ * Display type names "nicely".
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/format_type.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/numeric.h"
+#include "utils/syscache.h"
+
+static char *printTypmod(const char *typname, int32 typmod, Oid typmodout);
+
+
+/*
+ * SQL function: format_type(type_oid, typemod)
+ *
+ * `type_oid' is from pg_type.oid, `typemod' is from
+ * pg_attribute.atttypmod. This function will get the type name and
+ * format it and the modifier to canonical SQL format, if the type is
+ * a standard type. Otherwise you just get pg_type.typname back,
+ * double quoted if it contains funny characters or matches a keyword.
+ *
+ * If typemod is NULL then we are formatting a type name in a context where
+ * no typemod is available, eg a function argument or result type. This
+ * yields a slightly different result from specifying typemod = -1 in some
+ * cases. Given typemod = -1 we feel compelled to produce an output that
+ * the parser will interpret as having typemod -1, so that pg_dump will
+ * produce CREATE TABLE commands that recreate the original state. But
+ * given NULL typemod, we assume that the parser's interpretation of
+ * typemod doesn't matter, and so we are willing to output a slightly
+ * "prettier" representation of the same type. For example, type = bpchar
+ * and typemod = NULL gets you "character", whereas typemod = -1 gets you
+ * "bpchar" --- the former will be interpreted as character(1) by the
+ * parser, which does not yield typemod -1.
+ *
+ * XXX encoding a meaning in typemod = NULL is ugly; it'd have been
+ * cleaner to make two functions of one and two arguments respectively.
+ * Not worth changing it now, however.
+ */
+Datum
+format_type(PG_FUNCTION_ARGS)
+{
+ Oid type_oid;
+ int32 typemod;
+ char *result;
+ bits16 flags = FORMAT_TYPE_ALLOW_INVALID;
+
+ /* Since this function is not strict, we must test for null args */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ type_oid = PG_GETARG_OID(0);
+
+ if (PG_ARGISNULL(1))
+ typemod = -1;
+ else
+ {
+ typemod = PG_GETARG_INT32(1);
+ flags |= FORMAT_TYPE_TYPEMOD_GIVEN;
+ }
+
+ result = format_type_extended(type_oid, typemod, flags);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * format_type_extended
+ * Generate a possibly-qualified type name.
+ *
+ * The default behavior is to only qualify if the type is not in the search
+ * path, to ignore the given typmod, and to raise an error if a non-existent
+ * type_oid is given.
+ *
+ * The following bits in 'flags' modify the behavior:
+ * - FORMAT_TYPE_TYPEMOD_GIVEN
+ * include the typmod in the output (typmod could still be -1 though)
+ * - FORMAT_TYPE_ALLOW_INVALID
+ * if the type OID is invalid or unknown, return ??? or such instead
+ * of failing
+ * - FORMAT_TYPE_INVALID_AS_NULL
+ * if the type OID is invalid or unknown, return NULL instead of ???
+ * or such
+ * - FORMAT_TYPE_FORCE_QUALIFY
+ * always schema-qualify type names, regardless of search_path
+ *
+ * Note that TYPEMOD_GIVEN is not interchangeable with "typemod == -1";
+ * see the comments above for format_type().
+ *
+ * Returns a palloc'd string, or NULL.
+ */
+char *
+format_type_extended(Oid type_oid, int32 typemod, bits16 flags)
+{
+ HeapTuple tuple;
+ Form_pg_type typeform;
+ Oid array_base_type;
+ bool is_array;
+ char *buf;
+ bool with_typemod;
+
+ if (type_oid == InvalidOid)
+ {
+ if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0)
+ return NULL;
+ else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0)
+ return pstrdup("-");
+ }
+
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type_oid));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0)
+ return NULL;
+ else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0)
+ return pstrdup("???");
+ else
+ elog(ERROR, "cache lookup failed for type %u", type_oid);
+ }
+ typeform = (Form_pg_type) GETSTRUCT(tuple);
+
+ /*
+ * Check if it's a "true" array type. Pseudo-array types such as "name"
+ * shouldn't get deconstructed. Also check the toast property, and don't
+ * deconstruct "plain storage" array types --- this is because we don't
+ * want to show oidvector as oid[].
+ */
+ array_base_type = typeform->typelem;
+
+ if (IsTrueArrayType(typeform) &&
+ typeform->typstorage != TYPSTORAGE_PLAIN)
+ {
+ /* Switch our attention to the array element type */
+ ReleaseSysCache(tuple);
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(array_base_type));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if ((flags & FORMAT_TYPE_INVALID_AS_NULL) != 0)
+ return NULL;
+ else if ((flags & FORMAT_TYPE_ALLOW_INVALID) != 0)
+ return pstrdup("???[]");
+ else
+ elog(ERROR, "cache lookup failed for type %u", type_oid);
+ }
+ typeform = (Form_pg_type) GETSTRUCT(tuple);
+ type_oid = array_base_type;
+ is_array = true;
+ }
+ else
+ is_array = false;
+
+ with_typemod = (flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0 && (typemod >= 0);
+
+ /*
+ * See if we want to special-case the output for certain built-in types.
+ * Note that these special cases should all correspond to special
+ * productions in gram.y, to ensure that the type name will be taken as a
+ * system type, not a user type of the same name.
+ *
+ * If we do not provide a special-case output here, the type name will be
+ * handled the same way as a user type name --- in particular, it will be
+ * double-quoted if it matches any lexer keyword. This behavior is
+ * essential for some cases, such as types "bit" and "char".
+ */
+ buf = NULL; /* flag for no special case */
+
+ switch (type_oid)
+ {
+ case BITOID:
+ if (with_typemod)
+ buf = printTypmod("bit", typemod, typeform->typmodout);
+ else if ((flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0)
+ {
+ /*
+ * bit with typmod -1 is not the same as BIT, which means
+ * BIT(1) per SQL spec. Report it as the quoted typename so
+ * that parser will not assign a bogus typmod.
+ */
+ }
+ else
+ buf = pstrdup("bit");
+ break;
+
+ case BOOLOID:
+ buf = pstrdup("boolean");
+ break;
+
+ case BPCHAROID:
+ if (with_typemod)
+ buf = printTypmod("character", typemod, typeform->typmodout);
+ else if ((flags & FORMAT_TYPE_TYPEMOD_GIVEN) != 0)
+ {
+ /*
+ * bpchar with typmod -1 is not the same as CHARACTER, which
+ * means CHARACTER(1) per SQL spec. Report it as bpchar so
+ * that parser will not assign a bogus typmod.
+ */
+ }
+ else
+ buf = pstrdup("character");
+ break;
+
+ case FLOAT4OID:
+ buf = pstrdup("real");
+ break;
+
+ case FLOAT8OID:
+ buf = pstrdup("double precision");
+ break;
+
+ case INT2OID:
+ buf = pstrdup("smallint");
+ break;
+
+ case INT4OID:
+ buf = pstrdup("integer");
+ break;
+
+ case INT8OID:
+ buf = pstrdup("bigint");
+ break;
+
+ case NUMERICOID:
+ if (with_typemod)
+ buf = printTypmod("numeric", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("numeric");
+ break;
+
+ case INTERVALOID:
+ if (with_typemod)
+ buf = printTypmod("interval", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("interval");
+ break;
+
+ case TIMEOID:
+ if (with_typemod)
+ buf = printTypmod("time", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("time without time zone");
+ break;
+
+ case TIMETZOID:
+ if (with_typemod)
+ buf = printTypmod("time", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("time with time zone");
+ break;
+
+ case TIMESTAMPOID:
+ if (with_typemod)
+ buf = printTypmod("timestamp", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("timestamp without time zone");
+ break;
+
+ case TIMESTAMPTZOID:
+ if (with_typemod)
+ buf = printTypmod("timestamp", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("timestamp with time zone");
+ break;
+
+ case VARBITOID:
+ if (with_typemod)
+ buf = printTypmod("bit varying", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("bit varying");
+ break;
+
+ case VARCHAROID:
+ if (with_typemod)
+ buf = printTypmod("character varying", typemod, typeform->typmodout);
+ else
+ buf = pstrdup("character varying");
+ break;
+ }
+
+ if (buf == NULL)
+ {
+ /*
+ * Default handling: report the name as it appears in the catalog.
+ * Here, we must qualify the name if it is not visible in the search
+ * path or if caller requests it; and we must double-quote it if it's
+ * not a standard identifier or if it matches any keyword.
+ */
+ char *nspname;
+ char *typname;
+
+ if ((flags & FORMAT_TYPE_FORCE_QUALIFY) == 0 &&
+ TypeIsVisible(type_oid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name_or_temp(typeform->typnamespace);
+
+ typname = NameStr(typeform->typname);
+
+ buf = quote_qualified_identifier(nspname, typname);
+
+ if (with_typemod)
+ buf = printTypmod(buf, typemod, typeform->typmodout);
+ }
+
+ if (is_array)
+ buf = psprintf("%s[]", buf);
+
+ ReleaseSysCache(tuple);
+
+ return buf;
+}
+
+/*
+ * This version is for use within the backend in error messages, etc.
+ * One difference is that it will fail for an invalid type.
+ *
+ * The result is always a palloc'd string.
+ */
+char *
+format_type_be(Oid type_oid)
+{
+ return format_type_extended(type_oid, -1, 0);
+}
+
+/*
+ * This version returns a name that is always qualified (unless it's one
+ * of the SQL-keyword type names, such as TIMESTAMP WITH TIME ZONE).
+ */
+char *
+format_type_be_qualified(Oid type_oid)
+{
+ return format_type_extended(type_oid, -1, FORMAT_TYPE_FORCE_QUALIFY);
+}
+
+/*
+ * This version allows a nondefault typemod to be specified.
+ */
+char *
+format_type_with_typemod(Oid type_oid, int32 typemod)
+{
+ return format_type_extended(type_oid, typemod, FORMAT_TYPE_TYPEMOD_GIVEN);
+}
+
+/*
+ * Add typmod decoration to the basic type name
+ */
+static char *
+printTypmod(const char *typname, int32 typmod, Oid typmodout)
+{
+ char *res;
+
+ /* Shouldn't be called if typmod is -1 */
+ Assert(typmod >= 0);
+
+ if (typmodout == InvalidOid)
+ {
+ /* Default behavior: just print the integer typmod with parens */
+ res = psprintf("%s(%d)", typname, (int) typmod);
+ }
+ else
+ {
+ /* Use the type-specific typmodout procedure */
+ char *tmstr;
+
+ tmstr = DatumGetCString(OidFunctionCall1(typmodout,
+ Int32GetDatum(typmod)));
+ res = psprintf("%s%s", typname, tmstr);
+ }
+
+ return res;
+}
+
+
+/*
+ * type_maximum_size --- determine maximum width of a variable-width column
+ *
+ * If the max width is indeterminate, return -1. In particular, we return
+ * -1 for any type not known to this routine. We assume the caller has
+ * already determined that the type is a variable-width type, so it's not
+ * necessary to look up the type's pg_type tuple here.
+ *
+ * This may appear unrelated to format_type(), but in fact the two routines
+ * share knowledge of the encoding of typmod for different types, so it's
+ * convenient to keep them together. (XXX now that most of this knowledge
+ * has been pushed out of format_type into the typmodout functions, it's
+ * interesting to wonder if it's worth trying to factor this code too...)
+ */
+int32
+type_maximum_size(Oid type_oid, int32 typemod)
+{
+ if (typemod < 0)
+ return -1;
+
+ switch (type_oid)
+ {
+ case BPCHAROID:
+ case VARCHAROID:
+ /* typemod includes varlena header */
+
+ /* typemod is in characters not bytes */
+ return (typemod - VARHDRSZ) *
+ pg_encoding_max_length(GetDatabaseEncoding())
+ + VARHDRSZ;
+
+ case NUMERICOID:
+ return numeric_maximum_size(typemod);
+
+ case VARBITOID:
+ case BITOID:
+ /* typemod is the (max) number of bits */
+ return (typemod + (BITS_PER_BYTE - 1)) / BITS_PER_BYTE
+ + 2 * sizeof(int32);
+ }
+
+ /* Unknown type, or unlimited-width type such as 'text' */
+ return -1;
+}
+
+
+/*
+ * oidvectortypes - converts a vector of type OIDs to "typname" list
+ */
+Datum
+oidvectortypes(PG_FUNCTION_ARGS)
+{
+ oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0);
+ char *result;
+ int numargs = oidArray->dim1;
+ int num;
+ size_t total;
+ size_t left;
+
+ total = 20 * numargs + 1;
+ result = palloc(total);
+ result[0] = '\0';
+ left = total - 1;
+
+ for (num = 0; num < numargs; num++)
+ {
+ char *typename = format_type_extended(oidArray->values[num], -1,
+ FORMAT_TYPE_ALLOW_INVALID);
+ size_t slen = strlen(typename);
+
+ if (left < (slen + 2))
+ {
+ total += slen + 2;
+ result = repalloc(result, total);
+ left += slen + 2;
+ }
+
+ if (num > 0)
+ {
+ strcat(result, ", ");
+ left -= 2;
+ }
+ strcat(result, typename);
+ left -= slen;
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c
new file mode 100644
index 0000000..7b1f080
--- /dev/null
+++ b/src/backend/utils/adt/formatting.c
@@ -0,0 +1,6729 @@
+/* -----------------------------------------------------------------------
+ * formatting.c
+ *
+ * src/backend/utils/adt/formatting.c
+ *
+ *
+ * Portions Copyright (c) 1999-2022, PostgreSQL Global Development Group
+ *
+ *
+ * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
+ *
+ * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
+ * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
+ *
+ *
+ * Cache & Memory:
+ * Routines use (itself) internal cache for format pictures.
+ *
+ * The cache uses a static buffer and is persistent across transactions. If
+ * the format-picture is bigger than the cache buffer, the parser is called
+ * always.
+ *
+ * NOTE for Number version:
+ * All in this version is implemented as keywords ( => not used
+ * suffixes), because a format picture is for *one* item (number)
+ * only. It not is as a timestamp version, where each keyword (can)
+ * has suffix.
+ *
+ * NOTE for Timestamp routines:
+ * In this module the POSIX 'struct tm' type is *not* used, but rather
+ * PgSQL type, which has tm_mon based on one (*non* zero) and
+ * year *not* based on 1900, but is used full year number.
+ * Module supports AD / BC / AM / PM.
+ *
+ * Supported types for to_char():
+ *
+ * Timestamp, Numeric, int4, int8, float4, float8
+ *
+ * Supported types for reverse conversion:
+ *
+ * Timestamp - to_timestamp()
+ * Date - to_date()
+ * Numeric - to_number()
+ *
+ *
+ * Karel Zak
+ *
+ * TODO
+ * - better number building (formatting) / parsing, now it isn't
+ * ideal code
+ * - use Assert()
+ * - add support for roman number to standard number conversion
+ * - add support for number spelling
+ * - add support for string to string formatting (we must be better
+ * than Oracle :-),
+ * to_char('Hello', 'X X X X X') -> 'H e l l o'
+ *
+ * -----------------------------------------------------------------------
+ */
+
+#ifdef DEBUG_TO_FROM_CHAR
+#define DEBUG_elog_output DEBUG3
+#endif
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+/*
+ * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
+ * declare them in <wchar.h>, so include that too.
+ */
+#include <wchar.h>
+#ifdef HAVE_WCTYPE_H
+#include <wctype.h>
+#endif
+
+#ifdef USE_ICU
+#include <unicode/ustring.h>
+#endif
+
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "parser/scansup.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/float.h"
+#include "utils/formatting.h"
+#include "utils/memutils.h"
+#include "utils/numeric.h"
+#include "utils/pg_locale.h"
+
+/* ----------
+ * Convenience macros for error handling
+ * ----------
+ *
+ * Two macros below help to handle errors in functions that take
+ * 'bool *have_error' argument. When this argument is not NULL, it's expected
+ * that function will suppress ereports when possible. Instead it should
+ * return some default value and set *have_error flag.
+ *
+ * RETURN_ERROR() macro intended to wrap ereport() calls. When have_error
+ * function argument is not NULL, then instead of ereport'ing we set
+ * *have_error flag and go to on_error label. It's supposed that jump
+ * resources will be freed and some 'default' value returned.
+ *
+ * CHECK_ERROR() jumps on_error label when *have_error flag is defined and set.
+ * It's supposed to be used for immediate exit from the function on error
+ * after call of another function with 'bool *have_error' argument.
+ */
+#define RETURN_ERROR(throw_error) \
+do { \
+ if (have_error) \
+ { \
+ *have_error = true; \
+ goto on_error; \
+ } \
+ else \
+ { \
+ throw_error; \
+ } \
+} while (0)
+
+#define CHECK_ERROR \
+do { \
+ if (have_error && *have_error) \
+ goto on_error; \
+} while (0)
+
+/* ----------
+ * Routines flags
+ * ----------
+ */
+#define DCH_FLAG 0x1 /* DATE-TIME flag */
+#define NUM_FLAG 0x2 /* NUMBER flag */
+#define STD_FLAG 0x4 /* STANDARD flag */
+
+/* ----------
+ * KeyWord Index (ascii from position 32 (' ') to 126 (~))
+ * ----------
+ */
+#define KeyWord_INDEX_SIZE ('~' - ' ')
+#define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
+
+/* ----------
+ * Maximal length of one node
+ * ----------
+ */
+#define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
+#define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
+
+
+/* ----------
+ * Format parser structs
+ * ----------
+ */
+typedef struct
+{
+ const char *name; /* suffix string */
+ int len, /* suffix length */
+ id, /* used in node->suffix */
+ type; /* prefix / postfix */
+} KeySuffix;
+
+/* ----------
+ * FromCharDateMode
+ * ----------
+ *
+ * This value is used to nominate one of several distinct (and mutually
+ * exclusive) date conventions that a keyword can belong to.
+ */
+typedef enum
+{
+ FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
+ FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
+ FROM_CHAR_DATE_ISOWEEK /* ISO 8601 week date */
+} FromCharDateMode;
+
+typedef struct
+{
+ const char *name;
+ int len;
+ int id;
+ bool is_digit;
+ FromCharDateMode date_mode;
+} KeyWord;
+
+typedef struct
+{
+ uint8 type; /* NODE_TYPE_XXX, see below */
+ char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
+ uint8 suffix; /* keyword prefix/suffix code, if any */
+ const KeyWord *key; /* if type is ACTION */
+} FormatNode;
+
+#define NODE_TYPE_END 1
+#define NODE_TYPE_ACTION 2
+#define NODE_TYPE_CHAR 3
+#define NODE_TYPE_SEPARATOR 4
+#define NODE_TYPE_SPACE 5
+
+#define SUFFTYPE_PREFIX 1
+#define SUFFTYPE_POSTFIX 2
+
+#define CLOCK_24_HOUR 0
+#define CLOCK_12_HOUR 1
+
+
+/* ----------
+ * Full months
+ * ----------
+ */
+static const char *const months_full[] = {
+ "January", "February", "March", "April", "May", "June", "July",
+ "August", "September", "October", "November", "December", NULL
+};
+
+static const char *const days_short[] = {
+ "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
+};
+
+/* ----------
+ * AD / BC
+ * ----------
+ * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
+ * positive and map year == -1 to year zero, and shift all negative
+ * years up one. For interval years, we just return the year.
+ */
+#define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
+
+#define A_D_STR "A.D."
+#define a_d_STR "a.d."
+#define AD_STR "AD"
+#define ad_STR "ad"
+
+#define B_C_STR "B.C."
+#define b_c_STR "b.c."
+#define BC_STR "BC"
+#define bc_STR "bc"
+
+/*
+ * AD / BC strings for seq_search.
+ *
+ * These are given in two variants, a long form with periods and a standard
+ * form without.
+ *
+ * The array is laid out such that matches for AD have an even index, and
+ * matches for BC have an odd index. So the boolean value for BC is given by
+ * taking the array index of the match, modulo 2.
+ */
+static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
+static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
+
+/* ----------
+ * AM / PM
+ * ----------
+ */
+#define A_M_STR "A.M."
+#define a_m_STR "a.m."
+#define AM_STR "AM"
+#define am_STR "am"
+
+#define P_M_STR "P.M."
+#define p_m_STR "p.m."
+#define PM_STR "PM"
+#define pm_STR "pm"
+
+/*
+ * AM / PM strings for seq_search.
+ *
+ * These are given in two variants, a long form with periods and a standard
+ * form without.
+ *
+ * The array is laid out such that matches for AM have an even index, and
+ * matches for PM have an odd index. So the boolean value for PM is given by
+ * taking the array index of the match, modulo 2.
+ */
+static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
+static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
+
+/* ----------
+ * Months in roman-numeral
+ * (Must be in reverse order for seq_search (in FROM_CHAR), because
+ * 'VIII' must have higher precedence than 'V')
+ * ----------
+ */
+static const char *const rm_months_upper[] =
+{"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
+
+static const char *const rm_months_lower[] =
+{"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
+
+/* ----------
+ * Roman numbers
+ * ----------
+ */
+static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
+static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
+static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
+
+/* ----------
+ * Ordinal postfixes
+ * ----------
+ */
+static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
+static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
+
+/* ----------
+ * Flags & Options:
+ * ----------
+ */
+#define TH_UPPER 1
+#define TH_LOWER 2
+
+/* ----------
+ * Number description struct
+ * ----------
+ */
+typedef struct
+{
+ int pre, /* (count) numbers before decimal */
+ post, /* (count) numbers after decimal */
+ lsign, /* want locales sign */
+ flag, /* number parameters */
+ pre_lsign_num, /* tmp value for lsign */
+ multi, /* multiplier for 'V' */
+ zero_start, /* position of first zero */
+ zero_end, /* position of last zero */
+ need_locale; /* needs it locale */
+} NUMDesc;
+
+/* ----------
+ * Flags for NUMBER version
+ * ----------
+ */
+#define NUM_F_DECIMAL (1 << 1)
+#define NUM_F_LDECIMAL (1 << 2)
+#define NUM_F_ZERO (1 << 3)
+#define NUM_F_BLANK (1 << 4)
+#define NUM_F_FILLMODE (1 << 5)
+#define NUM_F_LSIGN (1 << 6)
+#define NUM_F_BRACKET (1 << 7)
+#define NUM_F_MINUS (1 << 8)
+#define NUM_F_PLUS (1 << 9)
+#define NUM_F_ROMAN (1 << 10)
+#define NUM_F_MULTI (1 << 11)
+#define NUM_F_PLUS_POST (1 << 12)
+#define NUM_F_MINUS_POST (1 << 13)
+#define NUM_F_EEEE (1 << 14)
+
+#define NUM_LSIGN_PRE (-1)
+#define NUM_LSIGN_POST 1
+#define NUM_LSIGN_NONE 0
+
+/* ----------
+ * Tests
+ * ----------
+ */
+#define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
+#define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
+#define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
+#define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
+#define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
+#define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
+#define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
+#define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
+#define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
+#define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
+#define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
+#define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
+
+/* ----------
+ * Format picture cache
+ *
+ * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
+ * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
+ *
+ * For simplicity, the cache entries are fixed-size, so they allow for the
+ * worst case of a FormatNode for each byte in the picture string.
+ *
+ * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
+ * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
+ * we don't waste too much space by palloc'ing them individually. Be sure
+ * to adjust those macros if you add fields to those structs.
+ *
+ * The max number of entries in each cache is DCH_CACHE_ENTRIES
+ * resp. NUM_CACHE_ENTRIES.
+ * ----------
+ */
+#define DCH_CACHE_OVERHEAD \
+ MAXALIGN(sizeof(bool) + sizeof(int))
+#define NUM_CACHE_OVERHEAD \
+ MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
+
+#define DCH_CACHE_SIZE \
+ ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
+#define NUM_CACHE_SIZE \
+ ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
+
+#define DCH_CACHE_ENTRIES 20
+#define NUM_CACHE_ENTRIES 20
+
+typedef struct
+{
+ FormatNode format[DCH_CACHE_SIZE + 1];
+ char str[DCH_CACHE_SIZE + 1];
+ bool std;
+ bool valid;
+ int age;
+} DCHCacheEntry;
+
+typedef struct
+{
+ FormatNode format[NUM_CACHE_SIZE + 1];
+ char str[NUM_CACHE_SIZE + 1];
+ bool valid;
+ int age;
+ NUMDesc Num;
+} NUMCacheEntry;
+
+/* global cache for date/time format pictures */
+static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
+static int n_DCHCache = 0; /* current number of entries */
+static int DCHCounter = 0; /* aging-event counter */
+
+/* global cache for number format pictures */
+static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
+static int n_NUMCache = 0; /* current number of entries */
+static int NUMCounter = 0; /* aging-event counter */
+
+/* ----------
+ * For char->date/time conversion
+ * ----------
+ */
+typedef struct
+{
+ FromCharDateMode mode;
+ int hh,
+ pm,
+ mi,
+ ss,
+ ssss,
+ d, /* stored as 1-7, Sunday = 1, 0 means missing */
+ dd,
+ ddd,
+ mm,
+ ms,
+ year,
+ bc,
+ ww,
+ w,
+ cc,
+ j,
+ us,
+ yysz, /* is it YY or YYYY ? */
+ clock, /* 12 or 24 hour clock? */
+ tzsign, /* +1, -1 or 0 if timezone info is absent */
+ tzh,
+ tzm,
+ ff; /* fractional precision */
+} TmFromChar;
+
+#define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
+
+/* ----------
+ * Debug
+ * ----------
+ */
+#ifdef DEBUG_TO_FROM_CHAR
+#define DEBUG_TMFC(_X) \
+ elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
+ (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
+ (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
+ (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
+ (_X)->yysz, (_X)->clock)
+#define DEBUG_TM(_X) \
+ elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
+ (_X)->tm_sec, (_X)->tm_year,\
+ (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
+ (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
+#else
+#define DEBUG_TMFC(_X)
+#define DEBUG_TM(_X)
+#endif
+
+/* ----------
+ * Datetime to char conversion
+ *
+ * To support intervals as well as timestamps, we use a custom "tm" struct
+ * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
+ * We omit the tm_isdst and tm_zone fields, which are not used here.
+ * ----------
+ */
+struct fmt_tm
+{
+ int tm_sec;
+ int tm_min;
+ int64 tm_hour;
+ int tm_mday;
+ int tm_mon;
+ int tm_year;
+ int tm_wday;
+ int tm_yday;
+ long int tm_gmtoff;
+};
+
+typedef struct TmToChar
+{
+ struct fmt_tm tm; /* almost the classic 'tm' struct */
+ fsec_t fsec; /* fractional seconds */
+ const char *tzn; /* timezone */
+} TmToChar;
+
+#define tmtcTm(_X) (&(_X)->tm)
+#define tmtcTzn(_X) ((_X)->tzn)
+#define tmtcFsec(_X) ((_X)->fsec)
+
+/* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
+#define COPY_tm(_DST, _SRC) \
+do { \
+ (_DST)->tm_sec = (_SRC)->tm_sec; \
+ (_DST)->tm_min = (_SRC)->tm_min; \
+ (_DST)->tm_hour = (_SRC)->tm_hour; \
+ (_DST)->tm_mday = (_SRC)->tm_mday; \
+ (_DST)->tm_mon = (_SRC)->tm_mon; \
+ (_DST)->tm_year = (_SRC)->tm_year; \
+ (_DST)->tm_wday = (_SRC)->tm_wday; \
+ (_DST)->tm_yday = (_SRC)->tm_yday; \
+ (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
+} while(0)
+
+/* Caution: this is used to zero both pg_tm and fmt_tm structs */
+#define ZERO_tm(_X) \
+do { \
+ memset(_X, 0, sizeof(*(_X))); \
+ (_X)->tm_mday = (_X)->tm_mon = 1; \
+} while(0)
+
+#define ZERO_tmtc(_X) \
+do { \
+ ZERO_tm( tmtcTm(_X) ); \
+ tmtcFsec(_X) = 0; \
+ tmtcTzn(_X) = NULL; \
+} while(0)
+
+/*
+ * to_char(time) appears to to_char() as an interval, so this check
+ * is really for interval and time data types.
+ */
+#define INVALID_FOR_INTERVAL \
+do { \
+ if (is_interval) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
+ errmsg("invalid format specification for an interval value"), \
+ errhint("Intervals are not tied to specific calendar dates."))); \
+} while(0)
+
+/*****************************************************************************
+ * KeyWord definitions
+ *****************************************************************************/
+
+/* ----------
+ * Suffixes (FormatNode.suffix is an OR of these codes)
+ * ----------
+ */
+#define DCH_S_FM 0x01
+#define DCH_S_TH 0x02
+#define DCH_S_th 0x04
+#define DCH_S_SP 0x08
+#define DCH_S_TM 0x10
+
+/* ----------
+ * Suffix tests
+ * ----------
+ */
+#define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
+#define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
+#define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
+#define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
+
+/* Oracle toggles FM behavior, we don't; see docs. */
+#define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
+#define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
+#define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
+
+/* ----------
+ * Suffixes definition for DATE-TIME TO/FROM CHAR
+ * ----------
+ */
+#define TM_SUFFIX_LEN 2
+
+static const KeySuffix DCH_suff[] = {
+ {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
+ {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
+ {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
+ {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
+ {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
+ {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
+ {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
+ /* last */
+ {NULL, 0, 0, 0}
+};
+
+
+/* ----------
+ * Format-pictures (KeyWord).
+ *
+ * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
+ * complicated -to-> easy:
+ *
+ * (example: "DDD","DD","Day","D" )
+ *
+ * (this specific sort needs the algorithm for sequential search for strings,
+ * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
+ * or "HH12"? You must first try "HH12", because "HH" is in string, but
+ * it is not good.
+ *
+ * (!)
+ * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
+ * (!)
+ *
+ * For fast search is used the 'int index[]', index is ascii table from position
+ * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
+ * position or -1 if char is not used in the KeyWord. Search example for
+ * string "MM":
+ * 1) see in index to index['M' - 32],
+ * 2) take keywords position (enum DCH_MI) from index
+ * 3) run sequential search in keywords[] from this position
+ *
+ * ----------
+ */
+
+typedef enum
+{
+ DCH_A_D,
+ DCH_A_M,
+ DCH_AD,
+ DCH_AM,
+ DCH_B_C,
+ DCH_BC,
+ DCH_CC,
+ DCH_DAY,
+ DCH_DDD,
+ DCH_DD,
+ DCH_DY,
+ DCH_Day,
+ DCH_Dy,
+ DCH_D,
+ DCH_FF1,
+ DCH_FF2,
+ DCH_FF3,
+ DCH_FF4,
+ DCH_FF5,
+ DCH_FF6,
+ DCH_FX, /* global suffix */
+ DCH_HH24,
+ DCH_HH12,
+ DCH_HH,
+ DCH_IDDD,
+ DCH_ID,
+ DCH_IW,
+ DCH_IYYY,
+ DCH_IYY,
+ DCH_IY,
+ DCH_I,
+ DCH_J,
+ DCH_MI,
+ DCH_MM,
+ DCH_MONTH,
+ DCH_MON,
+ DCH_MS,
+ DCH_Month,
+ DCH_Mon,
+ DCH_OF,
+ DCH_P_M,
+ DCH_PM,
+ DCH_Q,
+ DCH_RM,
+ DCH_SSSSS,
+ DCH_SSSS,
+ DCH_SS,
+ DCH_TZH,
+ DCH_TZM,
+ DCH_TZ,
+ DCH_US,
+ DCH_WW,
+ DCH_W,
+ DCH_Y_YYY,
+ DCH_YYYY,
+ DCH_YYY,
+ DCH_YY,
+ DCH_Y,
+ DCH_a_d,
+ DCH_a_m,
+ DCH_ad,
+ DCH_am,
+ DCH_b_c,
+ DCH_bc,
+ DCH_cc,
+ DCH_day,
+ DCH_ddd,
+ DCH_dd,
+ DCH_dy,
+ DCH_d,
+ DCH_ff1,
+ DCH_ff2,
+ DCH_ff3,
+ DCH_ff4,
+ DCH_ff5,
+ DCH_ff6,
+ DCH_fx,
+ DCH_hh24,
+ DCH_hh12,
+ DCH_hh,
+ DCH_iddd,
+ DCH_id,
+ DCH_iw,
+ DCH_iyyy,
+ DCH_iyy,
+ DCH_iy,
+ DCH_i,
+ DCH_j,
+ DCH_mi,
+ DCH_mm,
+ DCH_month,
+ DCH_mon,
+ DCH_ms,
+ DCH_of,
+ DCH_p_m,
+ DCH_pm,
+ DCH_q,
+ DCH_rm,
+ DCH_sssss,
+ DCH_ssss,
+ DCH_ss,
+ DCH_tzh,
+ DCH_tzm,
+ DCH_tz,
+ DCH_us,
+ DCH_ww,
+ DCH_w,
+ DCH_y_yyy,
+ DCH_yyyy,
+ DCH_yyy,
+ DCH_yy,
+ DCH_y,
+
+ /* last */
+ _DCH_last_
+} DCH_poz;
+
+typedef enum
+{
+ NUM_COMMA,
+ NUM_DEC,
+ NUM_0,
+ NUM_9,
+ NUM_B,
+ NUM_C,
+ NUM_D,
+ NUM_E,
+ NUM_FM,
+ NUM_G,
+ NUM_L,
+ NUM_MI,
+ NUM_PL,
+ NUM_PR,
+ NUM_RN,
+ NUM_SG,
+ NUM_SP,
+ NUM_S,
+ NUM_TH,
+ NUM_V,
+ NUM_b,
+ NUM_c,
+ NUM_d,
+ NUM_e,
+ NUM_fm,
+ NUM_g,
+ NUM_l,
+ NUM_mi,
+ NUM_pl,
+ NUM_pr,
+ NUM_rn,
+ NUM_sg,
+ NUM_sp,
+ NUM_s,
+ NUM_th,
+ NUM_v,
+
+ /* last */
+ _NUM_last_
+} NUM_poz;
+
+/* ----------
+ * KeyWords for DATE-TIME version
+ * ----------
+ */
+static const KeyWord DCH_keywords[] = {
+/* name, len, id, is_digit, date_mode */
+ {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
+ {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
+ {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
+ {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
+ {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
+ {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
+ {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
+ {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
+ {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
+ {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
+ {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
+ {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
+ {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
+ {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
+ {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
+ {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
+ {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
+ {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
+ {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
+ {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
+ {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
+ {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
+ {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
+ {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
+ {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
+ {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
+ {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
+ {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
+ {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
+ {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
+ {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
+ {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
+ {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
+ {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
+ {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
+ {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
+ {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
+ {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
+ {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
+ {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
+ {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
+ {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
+ {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
+ {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
+ {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
+ {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
+ {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
+ {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
+ {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
+ {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
+ {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
+ {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
+ {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
+ {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
+ {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
+ {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
+ {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
+ {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
+ {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
+ {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
+ {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
+ {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
+ {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
+ {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
+ {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
+ {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
+ {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
+ {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
+ {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
+ {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
+ {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
+ {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
+ {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
+ {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
+ {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
+ {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
+ {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
+ {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
+ {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
+ {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
+ {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
+ {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
+ {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
+ {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
+ {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
+ {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */
+ {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
+ {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
+ {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
+ {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
+ {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
+ {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
+ {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
+ {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */
+ {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
+ {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
+ {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
+ {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
+ {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
+ {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
+ {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
+ {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
+
+ /* last */
+ {NULL, 0, 0, 0, 0}
+};
+
+/* ----------
+ * KeyWords for NUMBER version
+ *
+ * The is_digit and date_mode fields are not relevant here.
+ * ----------
+ */
+static const KeyWord NUM_keywords[] = {
+/* name, len, id is in Index */
+ {",", 1, NUM_COMMA}, /* , */
+ {".", 1, NUM_DEC}, /* . */
+ {"0", 1, NUM_0}, /* 0 */
+ {"9", 1, NUM_9}, /* 9 */
+ {"B", 1, NUM_B}, /* B */
+ {"C", 1, NUM_C}, /* C */
+ {"D", 1, NUM_D}, /* D */
+ {"EEEE", 4, NUM_E}, /* E */
+ {"FM", 2, NUM_FM}, /* F */
+ {"G", 1, NUM_G}, /* G */
+ {"L", 1, NUM_L}, /* L */
+ {"MI", 2, NUM_MI}, /* M */
+ {"PL", 2, NUM_PL}, /* P */
+ {"PR", 2, NUM_PR},
+ {"RN", 2, NUM_RN}, /* R */
+ {"SG", 2, NUM_SG}, /* S */
+ {"SP", 2, NUM_SP},
+ {"S", 1, NUM_S},
+ {"TH", 2, NUM_TH}, /* T */
+ {"V", 1, NUM_V}, /* V */
+ {"b", 1, NUM_B}, /* b */
+ {"c", 1, NUM_C}, /* c */
+ {"d", 1, NUM_D}, /* d */
+ {"eeee", 4, NUM_E}, /* e */
+ {"fm", 2, NUM_FM}, /* f */
+ {"g", 1, NUM_G}, /* g */
+ {"l", 1, NUM_L}, /* l */
+ {"mi", 2, NUM_MI}, /* m */
+ {"pl", 2, NUM_PL}, /* p */
+ {"pr", 2, NUM_PR},
+ {"rn", 2, NUM_rn}, /* r */
+ {"sg", 2, NUM_SG}, /* s */
+ {"sp", 2, NUM_SP},
+ {"s", 1, NUM_S},
+ {"th", 2, NUM_th}, /* t */
+ {"v", 1, NUM_V}, /* v */
+
+ /* last */
+ {NULL, 0, 0}
+};
+
+
+/* ----------
+ * KeyWords index for DATE-TIME version
+ * ----------
+ */
+static const int DCH_index[KeyWord_INDEX_SIZE] = {
+/*
+0 1 2 3 4 5 6 7 8 9
+*/
+ /*---- first 0..31 chars are skipped ----*/
+
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
+ DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
+ DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
+ -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
+ DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
+ -1, DCH_of, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tzh, DCH_us, -1, DCH_ww,
+ -1, DCH_y_yyy, -1, -1, -1, -1
+
+ /*---- chars over 126 are skipped ----*/
+};
+
+/* ----------
+ * KeyWords index for NUMBER version
+ * ----------
+ */
+static const int NUM_index[KeyWord_INDEX_SIZE] = {
+/*
+0 1 2 3 4 5 6 7 8 9
+*/
+ /*---- first 0..31 chars are skipped ----*/
+
+ -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
+ -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
+ -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
+ NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
+ NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
+ NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
+ -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
+ -1, -1, -1, -1, -1, -1
+
+ /*---- chars over 126 are skipped ----*/
+};
+
+/* ----------
+ * Number processor struct
+ * ----------
+ */
+typedef struct NUMProc
+{
+ bool is_to_char;
+ NUMDesc *Num; /* number description */
+
+ int sign, /* '-' or '+' */
+ sign_wrote, /* was sign write */
+ num_count, /* number of write digits */
+ num_in, /* is inside number */
+ num_curr, /* current position in number */
+ out_pre_spaces, /* spaces before first digit */
+
+ read_dec, /* to_number - was read dec. point */
+ read_post, /* to_number - number of dec. digit */
+ read_pre; /* to_number - number non-dec. digit */
+
+ char *number, /* string with number */
+ *number_p, /* pointer to current number position */
+ *inout, /* in / out buffer */
+ *inout_p, /* pointer to current inout position */
+ *last_relevant, /* last relevant number after decimal point */
+
+ *L_negative_sign, /* Locale */
+ *L_positive_sign,
+ *decimal,
+ *L_thousands_sep,
+ *L_currency_symbol;
+} NUMProc;
+
+/* Return flags for DCH_from_char() */
+#define DCH_DATED 0x01
+#define DCH_TIMED 0x02
+#define DCH_ZONED 0x04
+
+/* ----------
+ * Functions
+ * ----------
+ */
+static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
+ const int *index);
+static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
+static bool is_separator_char(const char *str);
+static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
+static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
+ const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
+
+static void DCH_to_char(FormatNode *node, bool is_interval,
+ TmToChar *in, char *out, Oid collid);
+static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
+ Oid collid, bool std, bool *have_error);
+
+#ifdef DEBUG_TO_FROM_CHAR
+static void dump_index(const KeyWord *k, const int *index);
+static void dump_node(FormatNode *node, int max);
+#endif
+
+static const char *get_th(char *num, int type);
+static char *str_numth(char *dest, char *num, int type);
+static int adjust_partial_year_to_2020(int year);
+static int strspace_len(const char *str);
+static void from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
+ bool *have_error);
+static void from_char_set_int(int *dest, const int value, const FormatNode *node,
+ bool *have_error);
+static int from_char_parse_int_len(int *dest, const char **src, const int len,
+ FormatNode *node, bool *have_error);
+static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
+ bool *have_error);
+static int seq_search_ascii(const char *name, const char *const *array, int *len);
+static int seq_search_localized(const char *name, char **array, int *len,
+ Oid collid);
+static int from_char_seq_search(int *dest, const char **src,
+ const char *const *array,
+ char **localized_array, Oid collid,
+ FormatNode *node, bool *have_error);
+static void do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
+ struct pg_tm *tm, fsec_t *fsec, int *fprec,
+ uint32 *flags, bool *have_error);
+static char *fill_str(char *str, int c, int max);
+static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
+static char *int_to_roman(int number);
+static void NUM_prepare_locale(NUMProc *Np);
+static char *get_last_relevant_decnum(char *num);
+static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
+static void NUM_numpart_to_char(NUMProc *Np, int id);
+static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
+ char *number, int input_len, int to_char_out_pre_spaces,
+ int sign, bool is_to_char, Oid collid);
+static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
+static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
+static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
+static NUMCacheEntry *NUM_cache_getnew(const char *str);
+static NUMCacheEntry *NUM_cache_search(const char *str);
+static NUMCacheEntry *NUM_cache_fetch(const char *str);
+
+
+/* ----------
+ * Fast sequential search, use index for data selection which
+ * go to seq. cycle (it is very fast for unwanted strings)
+ * (can't be used binary search in format parsing)
+ * ----------
+ */
+static const KeyWord *
+index_seq_search(const char *str, const KeyWord *kw, const int *index)
+{
+ int poz;
+
+ if (!KeyWord_INDEX_FILTER(*str))
+ return NULL;
+
+ if ((poz = *(index + (*str - ' '))) > -1)
+ {
+ const KeyWord *k = kw + poz;
+
+ do
+ {
+ if (strncmp(str, k->name, k->len) == 0)
+ return k;
+ k++;
+ if (!k->name)
+ return NULL;
+ } while (*str == *k->name);
+ }
+ return NULL;
+}
+
+static const KeySuffix *
+suff_search(const char *str, const KeySuffix *suf, int type)
+{
+ const KeySuffix *s;
+
+ for (s = suf; s->name != NULL; s++)
+ {
+ if (s->type != type)
+ continue;
+
+ if (strncmp(str, s->name, s->len) == 0)
+ return s;
+ }
+ return NULL;
+}
+
+static bool
+is_separator_char(const char *str)
+{
+ /* ASCII printable character, but not letter or digit */
+ return (*str > 0x20 && *str < 0x7F &&
+ !(*str >= 'A' && *str <= 'Z') &&
+ !(*str >= 'a' && *str <= 'z') &&
+ !(*str >= '0' && *str <= '9'));
+}
+
+/* ----------
+ * Prepare NUMDesc (number description struct) via FormatNode struct
+ * ----------
+ */
+static void
+NUMDesc_prepare(NUMDesc *num, FormatNode *n)
+{
+ if (n->type != NODE_TYPE_ACTION)
+ return;
+
+ if (IS_EEEE(num) && n->key->id != NUM_E)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("\"EEEE\" must be the last pattern used")));
+
+ switch (n->key->id)
+ {
+ case NUM_9:
+ if (IS_BRACKET(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("\"9\" must be ahead of \"PR\"")));
+ if (IS_MULTI(num))
+ {
+ ++num->multi;
+ break;
+ }
+ if (IS_DECIMAL(num))
+ ++num->post;
+ else
+ ++num->pre;
+ break;
+
+ case NUM_0:
+ if (IS_BRACKET(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("\"0\" must be ahead of \"PR\"")));
+ if (!IS_ZERO(num) && !IS_DECIMAL(num))
+ {
+ num->flag |= NUM_F_ZERO;
+ num->zero_start = num->pre + 1;
+ }
+ if (!IS_DECIMAL(num))
+ ++num->pre;
+ else
+ ++num->post;
+
+ num->zero_end = num->pre + num->post;
+ break;
+
+ case NUM_B:
+ if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
+ num->flag |= NUM_F_BLANK;
+ break;
+
+ case NUM_D:
+ num->flag |= NUM_F_LDECIMAL;
+ num->need_locale = true;
+ /* FALLTHROUGH */
+ case NUM_DEC:
+ if (IS_DECIMAL(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("multiple decimal points")));
+ if (IS_MULTI(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"V\" and decimal point together")));
+ num->flag |= NUM_F_DECIMAL;
+ break;
+
+ case NUM_FM:
+ num->flag |= NUM_F_FILLMODE;
+ break;
+
+ case NUM_S:
+ if (IS_LSIGN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"S\" twice")));
+ if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
+ if (!IS_DECIMAL(num))
+ {
+ num->lsign = NUM_LSIGN_PRE;
+ num->pre_lsign_num = num->pre;
+ num->need_locale = true;
+ num->flag |= NUM_F_LSIGN;
+ }
+ else if (num->lsign == NUM_LSIGN_NONE)
+ {
+ num->lsign = NUM_LSIGN_POST;
+ num->need_locale = true;
+ num->flag |= NUM_F_LSIGN;
+ }
+ break;
+
+ case NUM_MI:
+ if (IS_LSIGN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"S\" and \"MI\" together")));
+ num->flag |= NUM_F_MINUS;
+ if (IS_DECIMAL(num))
+ num->flag |= NUM_F_MINUS_POST;
+ break;
+
+ case NUM_PL:
+ if (IS_LSIGN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"S\" and \"PL\" together")));
+ num->flag |= NUM_F_PLUS;
+ if (IS_DECIMAL(num))
+ num->flag |= NUM_F_PLUS_POST;
+ break;
+
+ case NUM_SG:
+ if (IS_LSIGN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"S\" and \"SG\" together")));
+ num->flag |= NUM_F_MINUS;
+ num->flag |= NUM_F_PLUS;
+ break;
+
+ case NUM_PR:
+ if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
+ num->flag |= NUM_F_BRACKET;
+ break;
+
+ case NUM_rn:
+ case NUM_RN:
+ num->flag |= NUM_F_ROMAN;
+ break;
+
+ case NUM_L:
+ case NUM_G:
+ num->need_locale = true;
+ break;
+
+ case NUM_V:
+ if (IS_DECIMAL(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"V\" and decimal point together")));
+ num->flag |= NUM_F_MULTI;
+ break;
+
+ case NUM_E:
+ if (IS_EEEE(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("cannot use \"EEEE\" twice")));
+ if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
+ IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
+ IS_ROMAN(num) || IS_MULTI(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("\"EEEE\" is incompatible with other formats"),
+ errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
+ num->flag |= NUM_F_EEEE;
+ break;
+ }
+}
+
+/* ----------
+ * Format parser, search small keywords and keyword's suffixes, and make
+ * format-node tree.
+ *
+ * for DATE-TIME & NUMBER version
+ * ----------
+ */
+static void
+parse_format(FormatNode *node, const char *str, const KeyWord *kw,
+ const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
+{
+ FormatNode *n;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "to_char/number(): run parser");
+#endif
+
+ n = node;
+
+ while (*str)
+ {
+ int suffix = 0;
+ const KeySuffix *s;
+
+ /*
+ * Prefix
+ */
+ if ((flags & DCH_FLAG) &&
+ (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
+ {
+ suffix |= s->id;
+ if (s->len)
+ str += s->len;
+ }
+
+ /*
+ * Keyword
+ */
+ if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
+ {
+ n->type = NODE_TYPE_ACTION;
+ n->suffix = suffix;
+ if (n->key->len)
+ str += n->key->len;
+
+ /*
+ * NUM version: Prepare global NUMDesc struct
+ */
+ if (flags & NUM_FLAG)
+ NUMDesc_prepare(Num, n);
+
+ /*
+ * Postfix
+ */
+ if ((flags & DCH_FLAG) && *str &&
+ (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
+ {
+ n->suffix |= s->id;
+ if (s->len)
+ str += s->len;
+ }
+
+ n++;
+ }
+ else if (*str)
+ {
+ int chlen;
+
+ if ((flags & STD_FLAG) && *str != '"')
+ {
+ /*
+ * Standard mode, allow only following separators: "-./,':; ".
+ * However, we support double quotes even in standard mode
+ * (see below). This is our extension of standard mode.
+ */
+ if (strchr("-./,':; ", *str) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid datetime format separator: \"%s\"",
+ pnstrdup(str, pg_mblen(str)))));
+
+ if (*str == ' ')
+ n->type = NODE_TYPE_SPACE;
+ else
+ n->type = NODE_TYPE_SEPARATOR;
+
+ n->character[0] = *str;
+ n->character[1] = '\0';
+ n->key = NULL;
+ n->suffix = 0;
+ n++;
+ str++;
+ }
+ else if (*str == '"')
+ {
+ /*
+ * Process double-quoted literal string, if any
+ */
+ str++;
+ while (*str)
+ {
+ if (*str == '"')
+ {
+ str++;
+ break;
+ }
+ /* backslash quotes the next character, if any */
+ if (*str == '\\' && *(str + 1))
+ str++;
+ chlen = pg_mblen(str);
+ n->type = NODE_TYPE_CHAR;
+ memcpy(n->character, str, chlen);
+ n->character[chlen] = '\0';
+ n->key = NULL;
+ n->suffix = 0;
+ n++;
+ str += chlen;
+ }
+ }
+ else
+ {
+ /*
+ * Outside double-quoted strings, backslash is only special if
+ * it immediately precedes a double quote.
+ */
+ if (*str == '\\' && *(str + 1) == '"')
+ str++;
+ chlen = pg_mblen(str);
+
+ if ((flags & DCH_FLAG) && is_separator_char(str))
+ n->type = NODE_TYPE_SEPARATOR;
+ else if (isspace((unsigned char) *str))
+ n->type = NODE_TYPE_SPACE;
+ else
+ n->type = NODE_TYPE_CHAR;
+
+ memcpy(n->character, str, chlen);
+ n->character[chlen] = '\0';
+ n->key = NULL;
+ n->suffix = 0;
+ n++;
+ str += chlen;
+ }
+ }
+ }
+
+ n->type = NODE_TYPE_END;
+ n->suffix = 0;
+}
+
+/* ----------
+ * DEBUG: Dump the FormatNode Tree (debug)
+ * ----------
+ */
+#ifdef DEBUG_TO_FROM_CHAR
+
+#define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
+#define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
+
+static void
+dump_node(FormatNode *node, int max)
+{
+ FormatNode *n;
+ int a;
+
+ elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
+
+ for (a = 0, n = node; a <= max; n++, a++)
+ {
+ if (n->type == NODE_TYPE_ACTION)
+ elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
+ a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
+ else if (n->type == NODE_TYPE_CHAR)
+ elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
+ a, n->character);
+ else if (n->type == NODE_TYPE_END)
+ {
+ elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
+ return;
+ }
+ else
+ elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
+ }
+}
+#endif /* DEBUG */
+
+/*****************************************************************************
+ * Private utils
+ *****************************************************************************/
+
+/* ----------
+ * Return ST/ND/RD/TH for simple (1..9) numbers
+ * type --> 0 upper, 1 lower
+ * ----------
+ */
+static const char *
+get_th(char *num, int type)
+{
+ int len = strlen(num),
+ last;
+
+ last = *(num + (len - 1));
+ if (!isdigit((unsigned char) last))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("\"%s\" is not a number", num)));
+
+ /*
+ * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
+ * 'ST/st', 'ND/nd', 'RD/rd', respectively
+ */
+ if ((len > 1) && (num[len - 2] == '1'))
+ last = 0;
+
+ switch (last)
+ {
+ case '1':
+ if (type == TH_UPPER)
+ return numTH[0];
+ return numth[0];
+ case '2':
+ if (type == TH_UPPER)
+ return numTH[1];
+ return numth[1];
+ case '3':
+ if (type == TH_UPPER)
+ return numTH[2];
+ return numth[2];
+ default:
+ if (type == TH_UPPER)
+ return numTH[3];
+ return numth[3];
+ }
+}
+
+/* ----------
+ * Convert string-number to ordinal string-number
+ * type --> 0 upper, 1 lower
+ * ----------
+ */
+static char *
+str_numth(char *dest, char *num, int type)
+{
+ if (dest != num)
+ strcpy(dest, num);
+ strcat(dest, get_th(num, type));
+ return dest;
+}
+
+/*****************************************************************************
+ * upper/lower/initcap functions
+ *****************************************************************************/
+
+#ifdef USE_ICU
+
+typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+static int32_t
+icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
+ UChar **buff_dest, UChar *buff_source, int32_t len_source)
+{
+ UErrorCode status;
+ int32_t len_dest;
+
+ len_dest = len_source; /* try first with same length */
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+ mylocale->info.icu.locale, &status);
+ if (status == U_BUFFER_OVERFLOW_ERROR)
+ {
+ /* try again with adjusted length */
+ pfree(*buff_dest);
+ *buff_dest = palloc(len_dest * sizeof(**buff_dest));
+ status = U_ZERO_ERROR;
+ len_dest = func(*buff_dest, len_dest, buff_source, len_source,
+ mylocale->info.icu.locale, &status);
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("case conversion failed: %s", u_errorName(status))));
+ return len_dest;
+}
+
+static int32_t
+u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode)
+{
+ return u_strToTitle(dest, destCapacity, src, srcLength,
+ NULL, locale, pErrorCode);
+}
+
+#endif /* USE_ICU */
+
+/*
+ * If the system provides the needed functions for wide-character manipulation
+ * (which are all standardized by C99), then we implement upper/lower/initcap
+ * using wide-character functions, if necessary. Otherwise we use the
+ * traditional <ctype.h> functions, which of course will not work as desired
+ * in multibyte character sets. Note that in either case we are effectively
+ * assuming that the database character encoding matches the encoding implied
+ * by LC_CTYPE.
+ *
+ * If the system provides locale_t and associated functions (which are
+ * standardized by Open Group's XBD), we can support collations that are
+ * neither default nor C. The code is written to handle both combinations
+ * of have-wide-characters and have-locale_t, though it's rather unlikely
+ * a platform would have the latter without the former.
+ */
+
+/*
+ * collation-aware, wide-character-aware lower function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+str_tolower(const char *buff, size_t nbytes, Oid collid)
+{
+ char *result;
+
+ if (!buff)
+ return NULL;
+
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for %s function",
+ "lower()"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ result = asc_tolower(buff, nbytes);
+ }
+ else
+ {
+ pg_locale_t mylocale;
+
+ mylocale = pg_newlocale_from_collation(collid);
+
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar;
+ int32_t len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToLower, mylocale,
+ &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
+
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
+#endif
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ }
+
+ /*
+ * Make result large enough; case change might change number
+ * of bytes
+ */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
+
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
+ }
+ else
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+
+ /*
+ * Note: we assume that tolower_l() will not be so broken as
+ * to need an isupper_l() guard test. When using the default
+ * collation, we apply the traditional Postgres behavior that
+ * forces ASCII-style treatment of I/i, but in non-default
+ * collations you get exactly what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
+#endif
+ *p = pg_tolower((unsigned char) *p);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * collation-aware, wide-character-aware upper function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+str_toupper(const char *buff, size_t nbytes, Oid collid)
+{
+ char *result;
+
+ if (!buff)
+ return NULL;
+
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for %s function",
+ "upper()"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ result = asc_toupper(buff, nbytes);
+ }
+ else
+ {
+ pg_locale_t mylocale;
+
+ mylocale = pg_newlocale_from_collation(collid);
+
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar,
+ len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToUpper, mylocale,
+ &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
+
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ else
+#endif
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ }
+
+ /*
+ * Make result large enough; case change might change number
+ * of bytes
+ */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
+
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
+ }
+ else
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+
+ /*
+ * Note: we assume that toupper_l() will not be so broken as
+ * to need an islower_l() guard test. When using the default
+ * collation, we apply the traditional Postgres behavior that
+ * forces ASCII-style treatment of I/i, but in non-default
+ * collations you get exactly what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ else
+#endif
+ *p = pg_toupper((unsigned char) *p);
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * collation-aware, wide-character-aware initcap function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+str_initcap(const char *buff, size_t nbytes, Oid collid)
+{
+ char *result;
+ int wasalnum = false;
+
+ if (!buff)
+ return NULL;
+
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for %s function",
+ "initcap()"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ /* C/POSIX collations use this path regardless of database encoding */
+ if (lc_ctype_is_c(collid))
+ {
+ result = asc_initcap(buff, nbytes);
+ }
+ else
+ {
+ pg_locale_t mylocale;
+
+ mylocale = pg_newlocale_from_collation(collid);
+
+#ifdef USE_ICU
+ if (mylocale && mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t len_uchar,
+ len_conv;
+ UChar *buff_uchar;
+ UChar *buff_conv;
+
+ len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
+ len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
+ &buff_conv, buff_uchar, len_uchar);
+ icu_from_uchar(&result, buff_conv, len_conv);
+ pfree(buff_uchar);
+ pfree(buff_conv);
+ }
+ else
+#endif
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ wchar_t *workspace;
+ size_t curr_char;
+ size_t result_size;
+
+ /* Overflow paranoia */
+ if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* Output workspace cannot have more codes than input bytes */
+ workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+ char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
+
+ for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
+ else
+ workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
+ wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
+ }
+ else
+#endif
+ {
+ if (wasalnum)
+ workspace[curr_char] = towlower(workspace[curr_char]);
+ else
+ workspace[curr_char] = towupper(workspace[curr_char]);
+ wasalnum = iswalnum(workspace[curr_char]);
+ }
+ }
+
+ /*
+ * Make result large enough; case change might change number
+ * of bytes
+ */
+ result_size = curr_char * pg_database_encoding_max_length() + 1;
+ result = palloc(result_size);
+
+ wchar2char(result, workspace, result_size, mylocale);
+ pfree(workspace);
+ }
+ else
+ {
+ char *p;
+
+ result = pnstrdup(buff, nbytes);
+
+ /*
+ * Note: we assume that toupper_l()/tolower_l() will not be so
+ * broken as to need guard tests. When using the default
+ * collation, we apply the traditional Postgres behavior that
+ * forces ASCII-style treatment of I/i, but in non-default
+ * collations you get exactly what the collation says.
+ */
+ for (p = result; *p; p++)
+ {
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ {
+ if (wasalnum)
+ *p = tolower_l((unsigned char) *p, mylocale->info.lt);
+ else
+ *p = toupper_l((unsigned char) *p, mylocale->info.lt);
+ wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
+ }
+ else
+#endif
+ {
+ if (wasalnum)
+ *p = pg_tolower((unsigned char) *p);
+ else
+ *p = pg_toupper((unsigned char) *p);
+ wasalnum = isalnum((unsigned char) *p);
+ }
+ }
+ }
+ }
+ }
+
+ return result;
+}
+
+/*
+ * ASCII-only lower function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+asc_tolower(const char *buff, size_t nbytes)
+{
+ char *result;
+ char *p;
+
+ if (!buff)
+ return NULL;
+
+ result = pnstrdup(buff, nbytes);
+
+ for (p = result; *p; p++)
+ *p = pg_ascii_tolower((unsigned char) *p);
+
+ return result;
+}
+
+/*
+ * ASCII-only upper function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+asc_toupper(const char *buff, size_t nbytes)
+{
+ char *result;
+ char *p;
+
+ if (!buff)
+ return NULL;
+
+ result = pnstrdup(buff, nbytes);
+
+ for (p = result; *p; p++)
+ *p = pg_ascii_toupper((unsigned char) *p);
+
+ return result;
+}
+
+/*
+ * ASCII-only initcap function
+ *
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function. The result is a palloc'd, null-terminated string.
+ */
+char *
+asc_initcap(const char *buff, size_t nbytes)
+{
+ char *result;
+ char *p;
+ int wasalnum = false;
+
+ if (!buff)
+ return NULL;
+
+ result = pnstrdup(buff, nbytes);
+
+ for (p = result; *p; p++)
+ {
+ char c;
+
+ if (wasalnum)
+ *p = c = pg_ascii_tolower((unsigned char) *p);
+ else
+ *p = c = pg_ascii_toupper((unsigned char) *p);
+ /* we don't trust isalnum() here */
+ wasalnum = ((c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ||
+ (c >= '0' && c <= '9'));
+ }
+
+ return result;
+}
+
+/* convenience routines for when the input is null-terminated */
+
+static char *
+str_tolower_z(const char *buff, Oid collid)
+{
+ return str_tolower(buff, strlen(buff), collid);
+}
+
+static char *
+str_toupper_z(const char *buff, Oid collid)
+{
+ return str_toupper(buff, strlen(buff), collid);
+}
+
+static char *
+str_initcap_z(const char *buff, Oid collid)
+{
+ return str_initcap(buff, strlen(buff), collid);
+}
+
+static char *
+asc_tolower_z(const char *buff)
+{
+ return asc_tolower(buff, strlen(buff));
+}
+
+static char *
+asc_toupper_z(const char *buff)
+{
+ return asc_toupper(buff, strlen(buff));
+}
+
+/* asc_initcap_z is not currently needed */
+
+
+/* ----------
+ * Skip TM / th in FROM_CHAR
+ *
+ * If S_THth is on, skip two chars, assuming there are two available
+ * ----------
+ */
+#define SKIP_THth(ptr, _suf) \
+ do { \
+ if (S_THth(_suf)) \
+ { \
+ if (*(ptr)) (ptr) += pg_mblen(ptr); \
+ if (*(ptr)) (ptr) += pg_mblen(ptr); \
+ } \
+ } while (0)
+
+
+#ifdef DEBUG_TO_FROM_CHAR
+/* -----------
+ * DEBUG: Call for debug and for index checking; (Show ASCII char
+ * and defined keyword for each used position
+ * ----------
+ */
+static void
+dump_index(const KeyWord *k, const int *index)
+{
+ int i,
+ count = 0,
+ free_i = 0;
+
+ elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
+
+ for (i = 0; i < KeyWord_INDEX_SIZE; i++)
+ {
+ if (index[i] != -1)
+ {
+ elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
+ count++;
+ }
+ else
+ {
+ free_i++;
+ elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
+ }
+ }
+ elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
+ count, free_i);
+}
+#endif /* DEBUG */
+
+/* ----------
+ * Return true if next format picture is not digit value
+ * ----------
+ */
+static bool
+is_next_separator(FormatNode *n)
+{
+ if (n->type == NODE_TYPE_END)
+ return false;
+
+ if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
+ return true;
+
+ /*
+ * Next node
+ */
+ n++;
+
+ /* end of format string is treated like a non-digit separator */
+ if (n->type == NODE_TYPE_END)
+ return true;
+
+ if (n->type == NODE_TYPE_ACTION)
+ {
+ if (n->key->is_digit)
+ return false;
+
+ return true;
+ }
+ else if (n->character[1] == '\0' &&
+ isdigit((unsigned char) n->character[0]))
+ return false;
+
+ return true; /* some non-digit input (separator) */
+}
+
+
+static int
+adjust_partial_year_to_2020(int year)
+{
+ /*
+ * Adjust all dates toward 2020; this is effectively what happens when we
+ * assume '70' is 1970 and '69' is 2069.
+ */
+ /* Force 0-69 into the 2000's */
+ if (year < 70)
+ return year + 2000;
+ /* Force 70-99 into the 1900's */
+ else if (year < 100)
+ return year + 1900;
+ /* Force 100-519 into the 2000's */
+ else if (year < 520)
+ return year + 2000;
+ /* Force 520-999 into the 1000's */
+ else if (year < 1000)
+ return year + 1000;
+ else
+ return year;
+}
+
+
+static int
+strspace_len(const char *str)
+{
+ int len = 0;
+
+ while (*str && isspace((unsigned char) *str))
+ {
+ str++;
+ len++;
+ }
+ return len;
+}
+
+/*
+ * Set the date mode of a from-char conversion.
+ *
+ * Puke if the date mode has already been set, and the caller attempts to set
+ * it to a conflicting mode.
+ *
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
+ */
+static void
+from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode, bool *have_error)
+{
+ if (mode != FROM_CHAR_DATE_NONE)
+ {
+ if (tmfc->mode == FROM_CHAR_DATE_NONE)
+ tmfc->mode = mode;
+ else if (tmfc->mode != mode)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid combination of date conventions"),
+ errhint("Do not mix Gregorian and ISO week date "
+ "conventions in a formatting template."))));
+ }
+
+on_error:
+ return;
+}
+
+/*
+ * Set the integer pointed to by 'dest' to the given value.
+ *
+ * Puke if the destination integer has previously been set to some other
+ * non-zero value.
+ *
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
+ */
+static void
+from_char_set_int(int *dest, const int value, const FormatNode *node,
+ bool *have_error)
+{
+ if (*dest != 0 && *dest != value)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("conflicting values for \"%s\" field in "
+ "formatting string",
+ node->key->name),
+ errdetail("This value contradicts a previous setting "
+ "for the same field type."))));
+ *dest = value;
+
+on_error:
+ return;
+}
+
+/*
+ * Read a single integer from the source string, into the int pointed to by
+ * 'dest'. If 'dest' is NULL, the result is discarded.
+ *
+ * In fixed-width mode (the node does not have the FM suffix), consume at most
+ * 'len' characters. However, any leading whitespace isn't counted in 'len'.
+ *
+ * We use strtol() to recover the integer value from the source string, in
+ * accordance with the given FormatNode.
+ *
+ * If the conversion completes successfully, src will have been advanced to
+ * point at the character immediately following the last character used in the
+ * conversion.
+ *
+ * Return the number of characters consumed.
+ *
+ * Note that from_char_parse_int() provides a more convenient wrapper where
+ * the length of the field is the same as the length of the format keyword (as
+ * with DD and MI).
+ *
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
+ * and -1 is returned.
+ */
+static int
+from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
+ bool *have_error)
+{
+ long result;
+ char copy[DCH_MAX_ITEM_SIZ + 1];
+ const char *init = *src;
+ int used;
+
+ /*
+ * Skip any whitespace before parsing the integer.
+ */
+ *src += strspace_len(*src);
+
+ Assert(len <= DCH_MAX_ITEM_SIZ);
+ used = (int) strlcpy(copy, *src, len + 1);
+
+ if (S_FM(node->suffix) || is_next_separator(node))
+ {
+ /*
+ * This node is in Fill Mode, or the next node is known to be a
+ * non-digit value, so we just slurp as many characters as we can get.
+ */
+ char *endptr;
+
+ errno = 0;
+ result = strtol(init, &endptr, 10);
+ *src = endptr;
+ }
+ else
+ {
+ /*
+ * We need to pull exactly the number of characters given in 'len' out
+ * of the string, and convert those.
+ */
+ char *last;
+
+ if (used < len)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("source string too short for \"%s\" "
+ "formatting field",
+ node->key->name),
+ errdetail("Field requires %d characters, "
+ "but only %d remain.",
+ len, used),
+ errhint("If your source string is not fixed-width, "
+ "try using the \"FM\" modifier."))));
+
+ errno = 0;
+ result = strtol(copy, &last, 10);
+ used = last - copy;
+
+ if (used > 0 && used < len)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid value \"%s\" for \"%s\"",
+ copy, node->key->name),
+ errdetail("Field requires %d characters, "
+ "but only %d could be parsed.",
+ len, used),
+ errhint("If your source string is not fixed-width, "
+ "try using the \"FM\" modifier."))));
+
+ *src += used;
+ }
+
+ if (*src == init)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid value \"%s\" for \"%s\"",
+ copy, node->key->name),
+ errdetail("Value must be an integer."))));
+
+ if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("value for \"%s\" in source string is out of range",
+ node->key->name),
+ errdetail("Value must be in the range %d to %d.",
+ INT_MIN, INT_MAX))));
+
+ if (dest != NULL)
+ {
+ from_char_set_int(dest, (int) result, node, have_error);
+ CHECK_ERROR;
+ }
+
+ return *src - init;
+
+on_error:
+ return -1;
+}
+
+/*
+ * Call from_char_parse_int_len(), using the length of the format keyword as
+ * the expected length of the field.
+ *
+ * Don't call this function if the field differs in length from the format
+ * keyword (as with HH24; the keyword length is 4, but the field length is 2).
+ * In such cases, call from_char_parse_int_len() instead to specify the
+ * required length explicitly.
+ */
+static int
+from_char_parse_int(int *dest, const char **src, FormatNode *node, bool *have_error)
+{
+ return from_char_parse_int_len(dest, src, node->key->len, node, have_error);
+}
+
+/*
+ * Sequentially search null-terminated "array" for a case-insensitive match
+ * to the initial character(s) of "name".
+ *
+ * Returns array index of match, or -1 for no match.
+ *
+ * *len is set to the length of the match, or 0 for no match.
+ *
+ * Case-insensitivity is defined per pg_ascii_tolower, so this is only
+ * suitable for comparisons to ASCII strings.
+ */
+static int
+seq_search_ascii(const char *name, const char *const *array, int *len)
+{
+ unsigned char firstc;
+ const char *const *a;
+
+ *len = 0;
+
+ /* empty string can't match anything */
+ if (!*name)
+ return -1;
+
+ /* we handle first char specially to gain some speed */
+ firstc = pg_ascii_tolower((unsigned char) *name);
+
+ for (a = array; *a != NULL; a++)
+ {
+ const char *p;
+ const char *n;
+
+ /* compare first chars */
+ if (pg_ascii_tolower((unsigned char) **a) != firstc)
+ continue;
+
+ /* compare rest of string */
+ for (p = *a + 1, n = name + 1;; p++, n++)
+ {
+ /* return success if we matched whole array entry */
+ if (*p == '\0')
+ {
+ *len = n - name;
+ return a - array;
+ }
+ /* else, must have another character in "name" ... */
+ if (*n == '\0')
+ break;
+ /* ... and it must match */
+ if (pg_ascii_tolower((unsigned char) *p) !=
+ pg_ascii_tolower((unsigned char) *n))
+ break;
+ }
+ }
+
+ return -1;
+}
+
+/*
+ * Sequentially search an array of possibly non-English words for
+ * a case-insensitive match to the initial character(s) of "name".
+ *
+ * This has the same API as seq_search_ascii(), but we use a more general
+ * case-folding transformation to achieve case-insensitivity. Case folding
+ * is done per the rules of the collation identified by "collid".
+ *
+ * The array is treated as const, but we don't declare it that way because
+ * the arrays exported by pg_locale.c aren't const.
+ */
+static int
+seq_search_localized(const char *name, char **array, int *len, Oid collid)
+{
+ char **a;
+ char *upper_name;
+ char *lower_name;
+
+ *len = 0;
+
+ /* empty string can't match anything */
+ if (!*name)
+ return -1;
+
+ /*
+ * The case-folding processing done below is fairly expensive, so before
+ * doing that, make a quick pass to see if there is an exact match.
+ */
+ for (a = array; *a != NULL; a++)
+ {
+ int element_len = strlen(*a);
+
+ if (strncmp(name, *a, element_len) == 0)
+ {
+ *len = element_len;
+ return a - array;
+ }
+ }
+
+ /*
+ * Fold to upper case, then to lower case, so that we can match reliably
+ * even in languages in which case conversions are not injective.
+ */
+ upper_name = str_toupper(unconstify(char *, name), strlen(name), collid);
+ lower_name = str_tolower(upper_name, strlen(upper_name), collid);
+ pfree(upper_name);
+
+ for (a = array; *a != NULL; a++)
+ {
+ char *upper_element;
+ char *lower_element;
+ int element_len;
+
+ /* Likewise upper/lower-case array element */
+ upper_element = str_toupper(*a, strlen(*a), collid);
+ lower_element = str_tolower(upper_element, strlen(upper_element),
+ collid);
+ pfree(upper_element);
+ element_len = strlen(lower_element);
+
+ /* Match? */
+ if (strncmp(lower_name, lower_element, element_len) == 0)
+ {
+ *len = element_len;
+ pfree(lower_element);
+ pfree(lower_name);
+ return a - array;
+ }
+ pfree(lower_element);
+ }
+
+ pfree(lower_name);
+ return -1;
+}
+
+/*
+ * Perform a sequential search in 'array' (or 'localized_array', if that's
+ * not NULL) for an entry matching the first character(s) of the 'src'
+ * string case-insensitively.
+ *
+ * The 'array' is presumed to be English words (all-ASCII), but
+ * if 'localized_array' is supplied, that might be non-English
+ * so we need a more expensive case-folding transformation
+ * (which will follow the rules of the collation 'collid').
+ *
+ * If a match is found, copy the array index of the match into the integer
+ * pointed to by 'dest', advance 'src' to the end of the part of the string
+ * which matched, and return the number of characters consumed.
+ *
+ * If the string doesn't match, throw an error if 'have_error' is NULL,
+ * otherwise set '*have_error' and return -1.
+ *
+ * 'node' is used only for error reports: node->key->name identifies the
+ * field type we were searching for.
+ */
+static int
+from_char_seq_search(int *dest, const char **src, const char *const *array,
+ char **localized_array, Oid collid,
+ FormatNode *node, bool *have_error)
+{
+ int len;
+
+ if (localized_array == NULL)
+ *dest = seq_search_ascii(*src, array, &len);
+ else
+ *dest = seq_search_localized(*src, localized_array, &len, collid);
+
+ if (len <= 0)
+ {
+ /*
+ * In the error report, truncate the string at the next whitespace (if
+ * any) to avoid including irrelevant data.
+ */
+ char *copy = pstrdup(*src);
+ char *c;
+
+ for (c = copy; *c; c++)
+ {
+ if (scanner_isspace(*c))
+ {
+ *c = '\0';
+ break;
+ }
+ }
+
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid value \"%s\" for \"%s\"",
+ copy, node->key->name),
+ errdetail("The given value did not match any of "
+ "the allowed values for this field."))));
+ }
+ *src += len;
+ return len;
+
+on_error:
+ return -1;
+}
+
+/* ----------
+ * Process a TmToChar struct as denoted by a list of FormatNodes.
+ * The formatted data is written to the string pointed to by 'out'.
+ * ----------
+ */
+static void
+DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
+{
+ FormatNode *n;
+ char *s;
+ struct fmt_tm *tm = &in->tm;
+ int i;
+
+ /* cache localized days and months */
+ cache_locale_time();
+
+ s = out;
+ for (n = node; n->type != NODE_TYPE_END; n++)
+ {
+ if (n->type != NODE_TYPE_ACTION)
+ {
+ strcpy(s, n->character);
+ s += strlen(s);
+ continue;
+ }
+
+ switch (n->key->id)
+ {
+ case DCH_A_M:
+ case DCH_P_M:
+ strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
+ ? P_M_STR : A_M_STR);
+ s += strlen(s);
+ break;
+ case DCH_AM:
+ case DCH_PM:
+ strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
+ ? PM_STR : AM_STR);
+ s += strlen(s);
+ break;
+ case DCH_a_m:
+ case DCH_p_m:
+ strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
+ ? p_m_STR : a_m_STR);
+ s += strlen(s);
+ break;
+ case DCH_am:
+ case DCH_pm:
+ strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
+ ? pm_STR : am_STR);
+ s += strlen(s);
+ break;
+ case DCH_HH:
+ case DCH_HH12:
+
+ /*
+ * display time as shown on a 12-hour clock, even for
+ * intervals
+ */
+ sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
+ tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
+ (long long) (HOURS_PER_DAY / 2) :
+ (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_HH24:
+ sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
+ (long long) tm->tm_hour);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_MI:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
+ tm->tm_min);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_SS:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
+ tm->tm_sec);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+
+#define DCH_to_char_fsec(frac_fmt, frac_val) \
+ sprintf(s, frac_fmt, (int) (frac_val)); \
+ if (S_THth(n->suffix)) \
+ str_numth(s, s, S_TH_TYPE(n->suffix)); \
+ s += strlen(s)
+
+ case DCH_FF1: /* tenth of second */
+ DCH_to_char_fsec("%01d", in->fsec / 100000);
+ break;
+ case DCH_FF2: /* hundredth of second */
+ DCH_to_char_fsec("%02d", in->fsec / 10000);
+ break;
+ case DCH_FF3:
+ case DCH_MS: /* millisecond */
+ DCH_to_char_fsec("%03d", in->fsec / 1000);
+ break;
+ case DCH_FF4: /* tenth of a millisecond */
+ DCH_to_char_fsec("%04d", in->fsec / 100);
+ break;
+ case DCH_FF5: /* hundredth of a millisecond */
+ DCH_to_char_fsec("%05d", in->fsec / 10);
+ break;
+ case DCH_FF6:
+ case DCH_US: /* microsecond */
+ DCH_to_char_fsec("%06d", in->fsec);
+ break;
+#undef DCH_to_char_fsec
+ case DCH_SSSS:
+ sprintf(s, "%lld",
+ (long long) (tm->tm_hour * SECS_PER_HOUR +
+ tm->tm_min * SECS_PER_MINUTE +
+ tm->tm_sec));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_tz:
+ INVALID_FOR_INTERVAL;
+ if (tmtcTzn(in))
+ {
+ /* We assume here that timezone names aren't localized */
+ char *p = asc_tolower_z(tmtcTzn(in));
+
+ strcpy(s, p);
+ pfree(p);
+ s += strlen(s);
+ }
+ break;
+ case DCH_TZ:
+ INVALID_FOR_INTERVAL;
+ if (tmtcTzn(in))
+ {
+ strcpy(s, tmtcTzn(in));
+ s += strlen(s);
+ }
+ break;
+ case DCH_TZH:
+ INVALID_FOR_INTERVAL;
+ sprintf(s, "%c%02d",
+ (tm->tm_gmtoff >= 0) ? '+' : '-',
+ abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
+ s += strlen(s);
+ break;
+ case DCH_TZM:
+ INVALID_FOR_INTERVAL;
+ sprintf(s, "%02d",
+ (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
+ s += strlen(s);
+ break;
+ case DCH_OF:
+ INVALID_FOR_INTERVAL;
+ sprintf(s, "%c%0*d",
+ (tm->tm_gmtoff >= 0) ? '+' : '-',
+ S_FM(n->suffix) ? 0 : 2,
+ abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
+ s += strlen(s);
+ if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
+ {
+ sprintf(s, ":%02d",
+ (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
+ s += strlen(s);
+ }
+ break;
+ case DCH_A_D:
+ case DCH_B_C:
+ INVALID_FOR_INTERVAL;
+ strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
+ s += strlen(s);
+ break;
+ case DCH_AD:
+ case DCH_BC:
+ INVALID_FOR_INTERVAL;
+ strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
+ s += strlen(s);
+ break;
+ case DCH_a_d:
+ case DCH_b_c:
+ INVALID_FOR_INTERVAL;
+ strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
+ s += strlen(s);
+ break;
+ case DCH_ad:
+ case DCH_bc:
+ INVALID_FOR_INTERVAL;
+ strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
+ s += strlen(s);
+ break;
+ case DCH_MONTH:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ asc_toupper_z(months_full[tm->tm_mon - 1]));
+ s += strlen(s);
+ break;
+ case DCH_Month:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ months_full[tm->tm_mon - 1]);
+ s += strlen(s);
+ break;
+ case DCH_month:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ asc_tolower_z(months_full[tm->tm_mon - 1]));
+ s += strlen(s);
+ break;
+ case DCH_MON:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
+ s += strlen(s);
+ break;
+ case DCH_Mon:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, months[tm->tm_mon - 1]);
+ s += strlen(s);
+ break;
+ case DCH_mon:
+ INVALID_FOR_INTERVAL;
+ if (!tm->tm_mon)
+ break;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
+ s += strlen(s);
+ break;
+ case DCH_MM:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
+ tm->tm_mon);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_DAY:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ asc_toupper_z(days[tm->tm_wday]));
+ s += strlen(s);
+ break;
+ case DCH_Day:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ days[tm->tm_wday]);
+ s += strlen(s);
+ break;
+ case DCH_day:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+ asc_tolower_z(days[tm->tm_wday]));
+ s += strlen(s);
+ break;
+ case DCH_DY:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
+ s += strlen(s);
+ break;
+ case DCH_Dy:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, days_short[tm->tm_wday]);
+ s += strlen(s);
+ break;
+ case DCH_dy:
+ INVALID_FOR_INTERVAL;
+ if (S_TM(n->suffix))
+ {
+ char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
+
+ if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
+ strcpy(s, str);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("localized string format value too long")));
+ }
+ else
+ strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
+ s += strlen(s);
+ break;
+ case DCH_DDD:
+ case DCH_IDDD:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
+ (n->key->id == DCH_DDD) ?
+ tm->tm_yday :
+ date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_DD:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_D:
+ INVALID_FOR_INTERVAL;
+ sprintf(s, "%d", tm->tm_wday + 1);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_ID:
+ INVALID_FOR_INTERVAL;
+ sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_WW:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
+ (tm->tm_yday - 1) / 7 + 1);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_IW:
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
+ date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_Q:
+ if (!tm->tm_mon)
+ break;
+ sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_CC:
+ if (is_interval) /* straight calculation */
+ i = tm->tm_year / 100;
+ else
+ {
+ if (tm->tm_year > 0)
+ /* Century 20 == 1901 - 2000 */
+ i = (tm->tm_year - 1) / 100 + 1;
+ else
+ /* Century 6BC == 600BC - 501BC */
+ i = tm->tm_year / 100 - 1;
+ }
+ if (i <= 99 && i >= -99)
+ sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
+ else
+ sprintf(s, "%d", i);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_Y_YYY:
+ i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
+ sprintf(s, "%d,%03d", i,
+ ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_YYYY:
+ case DCH_IYYY:
+ sprintf(s, "%0*d",
+ S_FM(n->suffix) ? 0 :
+ (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
+ (n->key->id == DCH_YYYY ?
+ ADJUST_YEAR(tm->tm_year, is_interval) :
+ ADJUST_YEAR(date2isoyear(tm->tm_year,
+ tm->tm_mon,
+ tm->tm_mday),
+ is_interval)));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_YYY:
+ case DCH_IYY:
+ sprintf(s, "%0*d",
+ S_FM(n->suffix) ? 0 :
+ (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
+ (n->key->id == DCH_YYY ?
+ ADJUST_YEAR(tm->tm_year, is_interval) :
+ ADJUST_YEAR(date2isoyear(tm->tm_year,
+ tm->tm_mon,
+ tm->tm_mday),
+ is_interval)) % 1000);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_YY:
+ case DCH_IY:
+ sprintf(s, "%0*d",
+ S_FM(n->suffix) ? 0 :
+ (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
+ (n->key->id == DCH_YY ?
+ ADJUST_YEAR(tm->tm_year, is_interval) :
+ ADJUST_YEAR(date2isoyear(tm->tm_year,
+ tm->tm_mon,
+ tm->tm_mday),
+ is_interval)) % 100);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_Y:
+ case DCH_I:
+ sprintf(s, "%1d",
+ (n->key->id == DCH_Y ?
+ ADJUST_YEAR(tm->tm_year, is_interval) :
+ ADJUST_YEAR(date2isoyear(tm->tm_year,
+ tm->tm_mon,
+ tm->tm_mday),
+ is_interval)) % 10);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_RM:
+ /* FALLTHROUGH */
+ case DCH_rm:
+
+ /*
+ * For intervals, values like '12 month' will be reduced to 0
+ * month and some years. These should be processed.
+ */
+ if (!tm->tm_mon && !tm->tm_year)
+ break;
+ else
+ {
+ int mon = 0;
+ const char *const *months;
+
+ if (n->key->id == DCH_RM)
+ months = rm_months_upper;
+ else
+ months = rm_months_lower;
+
+ /*
+ * Compute the position in the roman-numeral array. Note
+ * that the contents of the array are reversed, December
+ * being first and January last.
+ */
+ if (tm->tm_mon == 0)
+ {
+ /*
+ * This case is special, and tracks the case of full
+ * interval years.
+ */
+ mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
+ }
+ else if (tm->tm_mon < 0)
+ {
+ /*
+ * Negative case. In this case, the calculation is
+ * reversed, where -1 means December, -2 November,
+ * etc.
+ */
+ mon = -1 * (tm->tm_mon + 1);
+ }
+ else
+ {
+ /*
+ * Common case, with a strictly positive value. The
+ * position in the array matches with the value of
+ * tm_mon.
+ */
+ mon = MONTHS_PER_YEAR - tm->tm_mon;
+ }
+
+ sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
+ months[mon]);
+ s += strlen(s);
+ }
+ break;
+ case DCH_W:
+ sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ case DCH_J:
+ sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
+ if (S_THth(n->suffix))
+ str_numth(s, s, S_TH_TYPE(n->suffix));
+ s += strlen(s);
+ break;
+ }
+ }
+
+ *s = '\0';
+}
+
+/*
+ * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
+ * The TmFromChar struct pointed to by 'out' is populated with the results.
+ *
+ * 'collid' identifies the collation to use, if needed.
+ * 'std' specifies standard parsing mode.
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
+ *
+ * Note: we currently don't have any to_interval() function, so there
+ * is no need here for INVALID_FOR_INTERVAL checks.
+ */
+static void
+DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
+ Oid collid, bool std, bool *have_error)
+{
+ FormatNode *n;
+ const char *s;
+ int len,
+ value;
+ bool fx_mode = std;
+
+ /* number of extra skipped characters (more than given in format string) */
+ int extra_skip = 0;
+
+ /* cache localized days and months */
+ cache_locale_time();
+
+ for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
+ {
+ /*
+ * Ignore spaces at the beginning of the string and before fields when
+ * not in FX (fixed width) mode.
+ */
+ if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
+ (n->type == NODE_TYPE_ACTION || n == node))
+ {
+ while (*s != '\0' && isspace((unsigned char) *s))
+ {
+ s++;
+ extra_skip++;
+ }
+ }
+
+ if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
+ {
+ if (std)
+ {
+ /*
+ * Standard mode requires strict matching between format
+ * string separators/spaces and input string.
+ */
+ Assert(n->character[0] && !n->character[1]);
+
+ if (*s == n->character[0])
+ s++;
+ else
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("unmatched format separator \"%c\"",
+ n->character[0]))));
+ }
+ else if (!fx_mode)
+ {
+ /*
+ * In non FX (fixed format) mode one format string space or
+ * separator match to one space or separator in input string.
+ * Or match nothing if there is no space or separator in the
+ * current position of input string.
+ */
+ extra_skip--;
+ if (isspace((unsigned char) *s) || is_separator_char(s))
+ {
+ s++;
+ extra_skip++;
+ }
+ }
+ else
+ {
+ /*
+ * In FX mode, on format string space or separator we consume
+ * exactly one character from input string. Notice we don't
+ * insist that the consumed character match the format's
+ * character.
+ */
+ s += pg_mblen(s);
+ }
+ continue;
+ }
+ else if (n->type != NODE_TYPE_ACTION)
+ {
+ /*
+ * Text character, so consume one character from input string.
+ * Notice we don't insist that the consumed character match the
+ * format's character.
+ */
+ if (!fx_mode)
+ {
+ /*
+ * In non FX mode we might have skipped some extra characters
+ * (more than specified in format string) before. In this
+ * case we don't skip input string character, because it might
+ * be part of field.
+ */
+ if (extra_skip > 0)
+ extra_skip--;
+ else
+ s += pg_mblen(s);
+ }
+ else
+ {
+ int chlen = pg_mblen(s);
+
+ /*
+ * Standard mode requires strict match of format characters.
+ */
+ if (std && n->type == NODE_TYPE_CHAR &&
+ strncmp(s, n->character, chlen) != 0)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("unmatched format character \"%s\"",
+ n->character))));
+
+ s += chlen;
+ }
+ continue;
+ }
+
+ from_char_set_mode(out, n->key->date_mode, have_error);
+ CHECK_ERROR;
+
+ switch (n->key->id)
+ {
+ case DCH_FX:
+ fx_mode = true;
+ break;
+ case DCH_A_M:
+ case DCH_P_M:
+ case DCH_a_m:
+ case DCH_p_m:
+ from_char_seq_search(&value, &s, ampm_strings_long,
+ NULL, InvalidOid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->pm, value % 2, n, have_error);
+ CHECK_ERROR;
+ out->clock = CLOCK_12_HOUR;
+ break;
+ case DCH_AM:
+ case DCH_PM:
+ case DCH_am:
+ case DCH_pm:
+ from_char_seq_search(&value, &s, ampm_strings,
+ NULL, InvalidOid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->pm, value % 2, n, have_error);
+ CHECK_ERROR;
+ out->clock = CLOCK_12_HOUR;
+ break;
+ case DCH_HH:
+ case DCH_HH12:
+ from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
+ CHECK_ERROR;
+ out->clock = CLOCK_12_HOUR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_HH24:
+ from_char_parse_int_len(&out->hh, &s, 2, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_MI:
+ from_char_parse_int(&out->mi, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_SS:
+ from_char_parse_int(&out->ss, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_MS: /* millisecond */
+ len = from_char_parse_int_len(&out->ms, &s, 3, n, have_error);
+ CHECK_ERROR;
+
+ /*
+ * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
+ */
+ out->ms *= len == 1 ? 100 :
+ len == 2 ? 10 : 1;
+
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_FF1:
+ case DCH_FF2:
+ case DCH_FF3:
+ case DCH_FF4:
+ case DCH_FF5:
+ case DCH_FF6:
+ out->ff = n->key->id - DCH_FF1 + 1;
+ /* fall through */
+ case DCH_US: /* microsecond */
+ len = from_char_parse_int_len(&out->us, &s,
+ n->key->id == DCH_US ? 6 :
+ out->ff, n, have_error);
+ CHECK_ERROR;
+
+ out->us *= len == 1 ? 100000 :
+ len == 2 ? 10000 :
+ len == 3 ? 1000 :
+ len == 4 ? 100 :
+ len == 5 ? 10 : 1;
+
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_SSSS:
+ from_char_parse_int(&out->ssss, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_tz:
+ case DCH_TZ:
+ case DCH_OF:
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("formatting field \"%s\" is only supported in to_char",
+ n->key->name))));
+ CHECK_ERROR;
+ break;
+ case DCH_TZH:
+
+ /*
+ * Value of TZH might be negative. And the issue is that we
+ * might swallow minus sign as the separator. So, if we have
+ * skipped more characters than specified in the format
+ * string, then we consider prepending last skipped minus to
+ * TZH.
+ */
+ if (*s == '+' || *s == '-' || *s == ' ')
+ {
+ out->tzsign = *s == '-' ? -1 : +1;
+ s++;
+ }
+ else
+ {
+ if (extra_skip > 0 && *(s - 1) == '-')
+ out->tzsign = -1;
+ else
+ out->tzsign = +1;
+ }
+
+ from_char_parse_int_len(&out->tzh, &s, 2, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_TZM:
+ /* assign positive timezone sign if TZH was not seen before */
+ if (!out->tzsign)
+ out->tzsign = +1;
+ from_char_parse_int_len(&out->tzm, &s, 2, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_A_D:
+ case DCH_B_C:
+ case DCH_a_d:
+ case DCH_b_c:
+ from_char_seq_search(&value, &s, adbc_strings_long,
+ NULL, InvalidOid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->bc, value % 2, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_AD:
+ case DCH_BC:
+ case DCH_ad:
+ case DCH_bc:
+ from_char_seq_search(&value, &s, adbc_strings,
+ NULL, InvalidOid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->bc, value % 2, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_MONTH:
+ case DCH_Month:
+ case DCH_month:
+ from_char_seq_search(&value, &s, months_full,
+ S_TM(n->suffix) ? localized_full_months : NULL,
+ collid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->mm, value + 1, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_MON:
+ case DCH_Mon:
+ case DCH_mon:
+ from_char_seq_search(&value, &s, months,
+ S_TM(n->suffix) ? localized_abbrev_months : NULL,
+ collid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->mm, value + 1, n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_MM:
+ from_char_parse_int(&out->mm, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_DAY:
+ case DCH_Day:
+ case DCH_day:
+ from_char_seq_search(&value, &s, days,
+ S_TM(n->suffix) ? localized_full_days : NULL,
+ collid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->d, value, n, have_error);
+ CHECK_ERROR;
+ out->d++;
+ break;
+ case DCH_DY:
+ case DCH_Dy:
+ case DCH_dy:
+ from_char_seq_search(&value, &s, days_short,
+ S_TM(n->suffix) ? localized_abbrev_days : NULL,
+ collid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->d, value, n, have_error);
+ CHECK_ERROR;
+ out->d++;
+ break;
+ case DCH_DDD:
+ from_char_parse_int(&out->ddd, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_IDDD:
+ from_char_parse_int_len(&out->ddd, &s, 3, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_DD:
+ from_char_parse_int(&out->dd, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_D:
+ from_char_parse_int(&out->d, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_ID:
+ from_char_parse_int_len(&out->d, &s, 1, n, have_error);
+ CHECK_ERROR;
+ /* Shift numbering to match Gregorian where Sunday = 1 */
+ if (++out->d > 7)
+ out->d = 1;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_WW:
+ case DCH_IW:
+ from_char_parse_int(&out->ww, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_Q:
+
+ /*
+ * We ignore 'Q' when converting to date because it is unclear
+ * which date in the quarter to use, and some people specify
+ * both quarter and month, so if it was honored it might
+ * conflict with the supplied month. That is also why we don't
+ * throw an error.
+ *
+ * We still parse the source string for an integer, but it
+ * isn't stored anywhere in 'out'.
+ */
+ from_char_parse_int((int *) NULL, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_CC:
+ from_char_parse_int(&out->cc, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_Y_YYY:
+ {
+ int matched,
+ years,
+ millennia,
+ nch;
+
+ matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
+ if (matched < 2)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("invalid input string for \"Y,YYY\""))));
+ years += (millennia * 1000);
+ from_char_set_int(&out->year, years, n, have_error);
+ CHECK_ERROR;
+ out->yysz = 4;
+ s += nch;
+ SKIP_THth(s, n->suffix);
+ }
+ break;
+ case DCH_YYYY:
+ case DCH_IYYY:
+ from_char_parse_int(&out->year, &s, n, have_error);
+ CHECK_ERROR;
+ out->yysz = 4;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_YYY:
+ case DCH_IYY:
+ len = from_char_parse_int(&out->year, &s, n, have_error);
+ CHECK_ERROR;
+ if (len < 4)
+ out->year = adjust_partial_year_to_2020(out->year);
+ out->yysz = 3;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_YY:
+ case DCH_IY:
+ len = from_char_parse_int(&out->year, &s, n, have_error);
+ CHECK_ERROR;
+ if (len < 4)
+ out->year = adjust_partial_year_to_2020(out->year);
+ out->yysz = 2;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_Y:
+ case DCH_I:
+ len = from_char_parse_int(&out->year, &s, n, have_error);
+ CHECK_ERROR;
+ if (len < 4)
+ out->year = adjust_partial_year_to_2020(out->year);
+ out->yysz = 1;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_RM:
+ case DCH_rm:
+ from_char_seq_search(&value, &s, rm_months_lower,
+ NULL, InvalidOid,
+ n, have_error);
+ CHECK_ERROR;
+ from_char_set_int(&out->mm, MONTHS_PER_YEAR - value,
+ n, have_error);
+ CHECK_ERROR;
+ break;
+ case DCH_W:
+ from_char_parse_int(&out->w, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ case DCH_J:
+ from_char_parse_int(&out->j, &s, n, have_error);
+ CHECK_ERROR;
+ SKIP_THth(s, n->suffix);
+ break;
+ }
+
+ /* Ignore all spaces after fields */
+ if (!fx_mode)
+ {
+ extra_skip = 0;
+ while (*s != '\0' && isspace((unsigned char) *s))
+ {
+ s++;
+ extra_skip++;
+ }
+ }
+ }
+
+ /*
+ * Standard parsing mode doesn't allow unmatched format patterns or
+ * trailing characters in the input string.
+ */
+ if (std)
+ {
+ if (n->type != NODE_TYPE_END)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("input string is too short for datetime format"))));
+
+ while (*s != '\0' && isspace((unsigned char) *s))
+ s++;
+
+ if (*s != '\0')
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("trailing characters remain in input string "
+ "after datetime format"))));
+ }
+
+on_error:
+ return;
+}
+
+/*
+ * The invariant for DCH cache entry management is that DCHCounter is equal
+ * to the maximum age value among the existing entries, and we increment it
+ * whenever an access occurs. If we approach overflow, deal with that by
+ * halving all the age values, so that we retain a fairly accurate idea of
+ * which entries are oldest.
+ */
+static inline void
+DCH_prevent_counter_overflow(void)
+{
+ if (DCHCounter >= (INT_MAX - 1))
+ {
+ for (int i = 0; i < n_DCHCache; i++)
+ DCHCache[i]->age >>= 1;
+ DCHCounter >>= 1;
+ }
+}
+
+/*
+ * Get mask of date/time/zone components present in format nodes.
+ *
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
+ */
+static int
+DCH_datetime_type(FormatNode *node, bool *have_error)
+{
+ FormatNode *n;
+ int flags = 0;
+
+ for (n = node; n->type != NODE_TYPE_END; n++)
+ {
+ if (n->type != NODE_TYPE_ACTION)
+ continue;
+
+ switch (n->key->id)
+ {
+ case DCH_FX:
+ break;
+ case DCH_A_M:
+ case DCH_P_M:
+ case DCH_a_m:
+ case DCH_p_m:
+ case DCH_AM:
+ case DCH_PM:
+ case DCH_am:
+ case DCH_pm:
+ case DCH_HH:
+ case DCH_HH12:
+ case DCH_HH24:
+ case DCH_MI:
+ case DCH_SS:
+ case DCH_MS: /* millisecond */
+ case DCH_US: /* microsecond */
+ case DCH_FF1:
+ case DCH_FF2:
+ case DCH_FF3:
+ case DCH_FF4:
+ case DCH_FF5:
+ case DCH_FF6:
+ case DCH_SSSS:
+ flags |= DCH_TIMED;
+ break;
+ case DCH_tz:
+ case DCH_TZ:
+ case DCH_OF:
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("formatting field \"%s\" is only supported in to_char",
+ n->key->name))));
+ flags |= DCH_ZONED;
+ break;
+ case DCH_TZH:
+ case DCH_TZM:
+ flags |= DCH_ZONED;
+ break;
+ case DCH_A_D:
+ case DCH_B_C:
+ case DCH_a_d:
+ case DCH_b_c:
+ case DCH_AD:
+ case DCH_BC:
+ case DCH_ad:
+ case DCH_bc:
+ case DCH_MONTH:
+ case DCH_Month:
+ case DCH_month:
+ case DCH_MON:
+ case DCH_Mon:
+ case DCH_mon:
+ case DCH_MM:
+ case DCH_DAY:
+ case DCH_Day:
+ case DCH_day:
+ case DCH_DY:
+ case DCH_Dy:
+ case DCH_dy:
+ case DCH_DDD:
+ case DCH_IDDD:
+ case DCH_DD:
+ case DCH_D:
+ case DCH_ID:
+ case DCH_WW:
+ case DCH_Q:
+ case DCH_CC:
+ case DCH_Y_YYY:
+ case DCH_YYYY:
+ case DCH_IYYY:
+ case DCH_YYY:
+ case DCH_IYY:
+ case DCH_YY:
+ case DCH_IY:
+ case DCH_Y:
+ case DCH_I:
+ case DCH_RM:
+ case DCH_rm:
+ case DCH_W:
+ case DCH_J:
+ flags |= DCH_DATED;
+ break;
+ }
+ }
+
+on_error:
+ return flags;
+}
+
+/* select a DCHCacheEntry to hold the given format picture */
+static DCHCacheEntry *
+DCH_cache_getnew(const char *str, bool std)
+{
+ DCHCacheEntry *ent;
+
+ /* Ensure we can advance DCHCounter below */
+ DCH_prevent_counter_overflow();
+
+ /*
+ * If cache is full, remove oldest entry (or recycle first not-valid one)
+ */
+ if (n_DCHCache >= DCH_CACHE_ENTRIES)
+ {
+ DCHCacheEntry *old = DCHCache[0];
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
+#endif
+ if (old->valid)
+ {
+ for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
+ {
+ ent = DCHCache[i];
+ if (!ent->valid)
+ {
+ old = ent;
+ break;
+ }
+ if (ent->age < old->age)
+ old = ent;
+ }
+ }
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
+#endif
+ old->valid = false;
+ strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
+ old->age = (++DCHCounter);
+ /* caller is expected to fill format, then set valid */
+ return old;
+ }
+ else
+ {
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
+#endif
+ Assert(DCHCache[n_DCHCache] == NULL);
+ DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
+ MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
+ ent->valid = false;
+ strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
+ ent->std = std;
+ ent->age = (++DCHCounter);
+ /* caller is expected to fill format, then set valid */
+ ++n_DCHCache;
+ return ent;
+ }
+}
+
+/* look for an existing DCHCacheEntry matching the given format picture */
+static DCHCacheEntry *
+DCH_cache_search(const char *str, bool std)
+{
+ /* Ensure we can advance DCHCounter below */
+ DCH_prevent_counter_overflow();
+
+ for (int i = 0; i < n_DCHCache; i++)
+ {
+ DCHCacheEntry *ent = DCHCache[i];
+
+ if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
+ {
+ ent->age = (++DCHCounter);
+ return ent;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find or create a DCHCacheEntry for the given format picture */
+static DCHCacheEntry *
+DCH_cache_fetch(const char *str, bool std)
+{
+ DCHCacheEntry *ent;
+
+ if ((ent = DCH_cache_search(str, std)) == NULL)
+ {
+ /*
+ * Not in the cache, must run parser and save a new format-picture to
+ * the cache. Do not mark the cache entry valid until parsing
+ * succeeds.
+ */
+ ent = DCH_cache_getnew(str, std);
+
+ parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
+ DCH_FLAG | (std ? STD_FLAG : 0), NULL);
+
+ ent->valid = true;
+ }
+ return ent;
+}
+
+/*
+ * Format a date/time or interval into a string according to fmt.
+ * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
+ * for formatting.
+ */
+static text *
+datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
+{
+ FormatNode *format;
+ char *fmt_str,
+ *result;
+ bool incache;
+ int fmt_len;
+ text *res;
+
+ /*
+ * Convert fmt to C string
+ */
+ fmt_str = text_to_cstring(fmt);
+ fmt_len = strlen(fmt_str);
+
+ /*
+ * Allocate workspace for result as C string
+ */
+ result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
+ *result = '\0';
+
+ if (fmt_len > DCH_CACHE_SIZE)
+ {
+ /*
+ * Allocate new memory if format picture is bigger than static cache
+ * and do not use cache (call parser always)
+ */
+ incache = false;
+
+ format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
+
+ parse_format(format, fmt_str, DCH_keywords,
+ DCH_suff, DCH_index, DCH_FLAG, NULL);
+ }
+ else
+ {
+ /*
+ * Use cache buffers
+ */
+ DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
+
+ incache = true;
+ format = ent->format;
+ }
+
+ /* The real work is here */
+ DCH_to_char(format, is_interval, tmtc, result, collid);
+
+ if (!incache)
+ pfree(format);
+
+ pfree(fmt_str);
+
+ /* convert C-string result to TEXT format */
+ res = cstring_to_text(result);
+
+ pfree(result);
+ return res;
+}
+
+/****************************************************************************
+ * Public routines
+ ***************************************************************************/
+
+/* -------------------
+ * TIMESTAMP to_char()
+ * -------------------
+ */
+Datum
+timestamp_to_char(PG_FUNCTION_ARGS)
+{
+ Timestamp dt = PG_GETARG_TIMESTAMP(0);
+ text *fmt = PG_GETARG_TEXT_PP(1),
+ *res;
+ TmToChar tmtc;
+ struct pg_tm tt;
+ struct fmt_tm *tm;
+ int thisdate;
+
+ if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
+ PG_RETURN_NULL();
+
+ ZERO_tmtc(&tmtc);
+ tm = tmtcTm(&tmtc);
+
+ if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* calculate wday and yday, because timestamp2tm doesn't */
+ thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
+ tt.tm_wday = (thisdate + 1) % 7;
+ tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
+
+ COPY_tm(tm, &tt);
+
+ if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(res);
+}
+
+Datum
+timestamptz_to_char(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt = PG_GETARG_TIMESTAMP(0);
+ text *fmt = PG_GETARG_TEXT_PP(1),
+ *res;
+ TmToChar tmtc;
+ int tz;
+ struct pg_tm tt;
+ struct fmt_tm *tm;
+ int thisdate;
+
+ if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
+ PG_RETURN_NULL();
+
+ ZERO_tmtc(&tmtc);
+ tm = tmtcTm(&tmtc);
+
+ if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* calculate wday and yday, because timestamp2tm doesn't */
+ thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
+ tt.tm_wday = (thisdate + 1) % 7;
+ tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
+
+ COPY_tm(tm, &tt);
+
+ if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(res);
+}
+
+
+/* -------------------
+ * INTERVAL to_char()
+ * -------------------
+ */
+Datum
+interval_to_char(PG_FUNCTION_ARGS)
+{
+ Interval *it = PG_GETARG_INTERVAL_P(0);
+ text *fmt = PG_GETARG_TEXT_PP(1),
+ *res;
+ TmToChar tmtc;
+ struct fmt_tm *tm;
+ struct pg_itm tt,
+ *itm = &tt;
+
+ if (VARSIZE_ANY_EXHDR(fmt) <= 0)
+ PG_RETURN_NULL();
+
+ ZERO_tmtc(&tmtc);
+ tm = tmtcTm(&tmtc);
+
+ interval2itm(*it, itm);
+ tmtc.fsec = itm->tm_usec;
+ tm->tm_sec = itm->tm_sec;
+ tm->tm_min = itm->tm_min;
+ tm->tm_hour = itm->tm_hour;
+ tm->tm_mday = itm->tm_mday;
+ tm->tm_mon = itm->tm_mon;
+ tm->tm_year = itm->tm_year;
+
+ /* wday is meaningless, yday approximates the total span in days */
+ tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
+
+ if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(res);
+}
+
+/* ---------------------
+ * TO_TIMESTAMP()
+ *
+ * Make Timestamp from date_str which is formatted at argument 'fmt'
+ * ( to_timestamp is reverse to_char() )
+ * ---------------------
+ */
+Datum
+to_timestamp(PG_FUNCTION_ARGS)
+{
+ text *date_txt = PG_GETARG_TEXT_PP(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ Oid collid = PG_GET_COLLATION();
+ Timestamp result;
+ int tz;
+ struct pg_tm tm;
+ fsec_t fsec;
+ int fprec;
+
+ do_to_timestamp(date_txt, fmt, collid, false,
+ &tm, &fsec, &fprec, NULL, NULL);
+
+ /* Use the specified time zone, if any. */
+ if (tm.tm_zone)
+ {
+ int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), &tz);
+
+ if (dterr)
+ DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
+ }
+ else
+ tz = DetermineTimeZoneOffset(&tm, session_timezone);
+
+ if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* Use the specified fractional precision, if any. */
+ if (fprec)
+ AdjustTimestampForTypmod(&result, fprec);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* ----------
+ * TO_DATE
+ * Make Date from date_str which is formatted at argument 'fmt'
+ * ----------
+ */
+Datum
+to_date(PG_FUNCTION_ARGS)
+{
+ text *date_txt = PG_GETARG_TEXT_PP(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ Oid collid = PG_GET_COLLATION();
+ DateADT result;
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ do_to_timestamp(date_txt, fmt, collid, false,
+ &tm, &fsec, NULL, NULL, NULL);
+
+ /* Prevent overflow in Julian-day routines */
+ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"",
+ text_to_cstring(date_txt))));
+
+ result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
+
+ /* Now check for just-out-of-range dates */
+ if (!IS_VALID_DATE(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"",
+ text_to_cstring(date_txt))));
+
+ PG_RETURN_DATEADT(result);
+}
+
+/*
+ * Convert the 'date_txt' input to a datetime type using argument 'fmt'
+ * as a format string. The collation 'collid' may be used for case-folding
+ * rules in some cases. 'strict' specifies standard parsing mode.
+ *
+ * The actual data type (returned in 'typid', 'typmod') is determined by
+ * the presence of date/time/zone components in the format string.
+ *
+ * When timezone component is present, the corresponding offset is
+ * returned in '*tz'.
+ *
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set
+ * and zero value is returned.
+ */
+Datum
+parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
+ Oid *typid, int32 *typmod, int *tz,
+ bool *have_error)
+{
+ struct pg_tm tm;
+ fsec_t fsec;
+ int fprec;
+ uint32 flags;
+
+ do_to_timestamp(date_txt, fmt, collid, strict,
+ &tm, &fsec, &fprec, &flags, have_error);
+ CHECK_ERROR;
+
+ *typmod = fprec ? fprec : -1; /* fractional part precision */
+
+ if (flags & DCH_DATED)
+ {
+ if (flags & DCH_TIMED)
+ {
+ if (flags & DCH_ZONED)
+ {
+ TimestampTz result;
+
+ if (tm.tm_zone)
+ {
+ int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
+
+ if (dterr)
+ DateTimeParseError(dterr, text_to_cstring(date_txt), "timestamptz");
+ }
+ else
+ {
+ /*
+ * Time zone is present in format string, but not in input
+ * string. Assuming do_to_timestamp() triggers no error
+ * this should be possible only in non-strict case.
+ */
+ Assert(!strict);
+
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("missing time zone in input string for type timestamptz"))));
+ }
+
+ if (tm2timestamp(&tm, fsec, tz, &result) != 0)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamptz out of range"))));
+
+ AdjustTimestampForTypmod(&result, *typmod);
+
+ *typid = TIMESTAMPTZOID;
+ return TimestampTzGetDatum(result);
+ }
+ else
+ {
+ Timestamp result;
+
+ if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range"))));
+
+ AdjustTimestampForTypmod(&result, *typmod);
+
+ *typid = TIMESTAMPOID;
+ return TimestampGetDatum(result);
+ }
+ }
+ else
+ {
+ if (flags & DCH_ZONED)
+ {
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("datetime format is zoned but not timed"))));
+ }
+ else
+ {
+ DateADT result;
+
+ /* Prevent overflow in Julian-day routines */
+ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"",
+ text_to_cstring(date_txt)))));
+
+ result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
+ POSTGRES_EPOCH_JDATE;
+
+ /* Now check for just-out-of-range dates */
+ if (!IS_VALID_DATE(result))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: \"%s\"",
+ text_to_cstring(date_txt)))));
+
+ *typid = DATEOID;
+ return DateADTGetDatum(result);
+ }
+ }
+ }
+ else if (flags & DCH_TIMED)
+ {
+ if (flags & DCH_ZONED)
+ {
+ TimeTzADT *result = palloc(sizeof(TimeTzADT));
+
+ if (tm.tm_zone)
+ {
+ int dterr = DecodeTimezone(unconstify(char *, tm.tm_zone), tz);
+
+ if (dterr)
+ RETURN_ERROR(DateTimeParseError(dterr, text_to_cstring(date_txt), "timetz"));
+ }
+ else
+ {
+ /*
+ * Time zone is present in format string, but not in input
+ * string. Assuming do_to_timestamp() triggers no error this
+ * should be possible only in non-strict case.
+ */
+ Assert(!strict);
+
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("missing time zone in input string for type timetz"))));
+ }
+
+ if (tm2timetz(&tm, fsec, *tz, result) != 0)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timetz out of range"))));
+
+ AdjustTimeForTypmod(&result->time, *typmod);
+
+ *typid = TIMETZOID;
+ return TimeTzADTPGetDatum(result);
+ }
+ else
+ {
+ TimeADT result;
+
+ if (tm2time(&tm, fsec, &result) != 0)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("time out of range"))));
+
+ AdjustTimeForTypmod(&result, *typmod);
+
+ *typid = TIMEOID;
+ return TimeADTGetDatum(result);
+ }
+ }
+ else
+ {
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("datetime format is not dated and not timed"))));
+ }
+
+on_error:
+ return (Datum) 0;
+}
+
+/*
+ * do_to_timestamp: shared code for to_timestamp and to_date
+ *
+ * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
+ * fractional seconds, and fractional precision.
+ *
+ * 'collid' identifies the collation to use, if needed.
+ * 'std' specifies standard parsing mode.
+ * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
+ * if that is not NULL.
+ * If 'have_error' is NULL, then errors are thrown, else '*have_error' is set.
+ *
+ * We parse 'fmt' into a list of FormatNodes, which is then passed to
+ * DCH_from_char to populate a TmFromChar with the parsed contents of
+ * 'date_txt'.
+ *
+ * The TmFromChar is then analysed and converted into the final results in
+ * struct 'tm', 'fsec', and 'fprec'.
+ */
+static void
+do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
+ struct pg_tm *tm, fsec_t *fsec, int *fprec,
+ uint32 *flags, bool *have_error)
+{
+ FormatNode *format = NULL;
+ TmFromChar tmfc;
+ int fmt_len;
+ char *date_str;
+ int fmask;
+ bool incache = false;
+
+ Assert(tm != NULL);
+ Assert(fsec != NULL);
+
+ date_str = text_to_cstring(date_txt);
+
+ ZERO_tmfc(&tmfc);
+ ZERO_tm(tm);
+ *fsec = 0;
+ if (fprec)
+ *fprec = 0;
+ if (flags)
+ *flags = 0;
+ fmask = 0; /* bit mask for ValidateDate() */
+
+ fmt_len = VARSIZE_ANY_EXHDR(fmt);
+
+ if (fmt_len)
+ {
+ char *fmt_str;
+
+ fmt_str = text_to_cstring(fmt);
+
+ if (fmt_len > DCH_CACHE_SIZE)
+ {
+ /*
+ * Allocate new memory if format picture is bigger than static
+ * cache and do not use cache (call parser always)
+ */
+ format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
+
+ parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
+ DCH_FLAG | (std ? STD_FLAG : 0), NULL);
+ }
+ else
+ {
+ /*
+ * Use cache buffers
+ */
+ DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
+
+ incache = true;
+ format = ent->format;
+ }
+
+#ifdef DEBUG_TO_FROM_CHAR
+ /* dump_node(format, fmt_len); */
+ /* dump_index(DCH_keywords, DCH_index); */
+#endif
+
+ DCH_from_char(format, date_str, &tmfc, collid, std, have_error);
+ CHECK_ERROR;
+
+ pfree(fmt_str);
+
+ if (flags)
+ *flags = DCH_datetime_type(format, have_error);
+
+ if (!incache)
+ {
+ pfree(format);
+ format = NULL;
+ }
+
+ CHECK_ERROR;
+ }
+
+ DEBUG_TMFC(&tmfc);
+
+ /*
+ * Convert to_date/to_timestamp input fields to standard 'tm'
+ */
+ if (tmfc.ssss)
+ {
+ int x = tmfc.ssss;
+
+ tm->tm_hour = x / SECS_PER_HOUR;
+ x %= SECS_PER_HOUR;
+ tm->tm_min = x / SECS_PER_MINUTE;
+ x %= SECS_PER_MINUTE;
+ tm->tm_sec = x;
+ }
+
+ if (tmfc.ss)
+ tm->tm_sec = tmfc.ss;
+ if (tmfc.mi)
+ tm->tm_min = tmfc.mi;
+ if (tmfc.hh)
+ tm->tm_hour = tmfc.hh;
+
+ if (tmfc.clock == CLOCK_12_HOUR)
+ {
+ if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
+ {
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("hour \"%d\" is invalid for the 12-hour clock",
+ tm->tm_hour),
+ errhint("Use the 24-hour clock, or give an hour between 1 and 12."))));
+ }
+
+ if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
+ tm->tm_hour += HOURS_PER_DAY / 2;
+ else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
+ tm->tm_hour = 0;
+ }
+
+ if (tmfc.year)
+ {
+ /*
+ * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
+ * the year in the given century. Keep in mind that the 21st century
+ * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
+ * 600BC to 501BC.
+ */
+ if (tmfc.cc && tmfc.yysz <= 2)
+ {
+ if (tmfc.bc)
+ tmfc.cc = -tmfc.cc;
+ tm->tm_year = tmfc.year % 100;
+ if (tm->tm_year)
+ {
+ if (tmfc.cc >= 0)
+ tm->tm_year += (tmfc.cc - 1) * 100;
+ else
+ tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
+ }
+ else
+ {
+ /* find century year for dates ending in "00" */
+ tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
+ }
+ }
+ else
+ {
+ /* If a 4-digit year is provided, we use that and ignore CC. */
+ tm->tm_year = tmfc.year;
+ if (tmfc.bc)
+ tm->tm_year = -tm->tm_year;
+ /* correct for our representation of BC years */
+ if (tm->tm_year < 0)
+ tm->tm_year++;
+ }
+ fmask |= DTK_M(YEAR);
+ }
+ else if (tmfc.cc)
+ {
+ /* use first year of century */
+ if (tmfc.bc)
+ tmfc.cc = -tmfc.cc;
+ if (tmfc.cc >= 0)
+ /* +1 because 21st century started in 2001 */
+ tm->tm_year = (tmfc.cc - 1) * 100 + 1;
+ else
+ /* +1 because year == 599 is 600 BC */
+ tm->tm_year = tmfc.cc * 100 + 1;
+ fmask |= DTK_M(YEAR);
+ }
+
+ if (tmfc.j)
+ {
+ j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
+ }
+
+ if (tmfc.ww)
+ {
+ if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
+ {
+ /*
+ * If tmfc.d is not set, then the date is left at the beginning of
+ * the ISO week (Monday).
+ */
+ if (tmfc.d)
+ isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ else
+ isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
+ }
+ else
+ tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
+ }
+
+ if (tmfc.w)
+ tmfc.dd = (tmfc.w - 1) * 7 + 1;
+ if (tmfc.dd)
+ {
+ tm->tm_mday = tmfc.dd;
+ fmask |= DTK_M(DAY);
+ }
+ if (tmfc.mm)
+ {
+ tm->tm_mon = tmfc.mm;
+ fmask |= DTK_M(MONTH);
+ }
+
+ if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
+ {
+ /*
+ * The month and day field have not been set, so we use the
+ * day-of-year field to populate them. Depending on the date mode,
+ * this field may be interpreted as a Gregorian day-of-year, or an ISO
+ * week date day-of-year.
+ */
+
+ if (!tm->tm_year && !tmfc.bc)
+ {
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
+ errmsg("cannot calculate day of year without year information"))));
+ }
+
+ if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
+ {
+ int j0; /* zeroth day of the ISO year, in Julian */
+
+ j0 = isoweek2j(tm->tm_year, 1) - 1;
+
+ j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ fmask |= DTK_DATE_M;
+ }
+ else
+ {
+ const int *y;
+ int i;
+
+ static const int ysum[2][13] = {
+ {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
+ {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
+
+ y = ysum[isleap(tm->tm_year)];
+
+ for (i = 1; i <= MONTHS_PER_YEAR; i++)
+ {
+ if (tmfc.ddd <= y[i])
+ break;
+ }
+ if (tm->tm_mon <= 1)
+ tm->tm_mon = i;
+
+ if (tm->tm_mday <= 1)
+ tm->tm_mday = tmfc.ddd - y[i - 1];
+
+ fmask |= DTK_M(MONTH) | DTK_M(DAY);
+ }
+ }
+
+ if (tmfc.ms)
+ *fsec += tmfc.ms * 1000;
+ if (tmfc.us)
+ *fsec += tmfc.us;
+ if (fprec)
+ *fprec = tmfc.ff; /* fractional precision, if specified */
+
+ /* Range-check date fields according to bit mask computed above */
+ if (fmask != 0)
+ {
+ /* We already dealt with AD/BC, so pass isjulian = true */
+ int dterr = ValidateDate(fmask, true, false, false, tm);
+
+ if (dterr != 0)
+ {
+ /*
+ * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
+ * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
+ * irrelevant hint about datestyle.
+ */
+ RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
+ }
+ }
+
+ /* Range-check time fields too */
+ if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
+ tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
+ tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
+ *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
+ {
+ RETURN_ERROR(DateTimeParseError(DTERR_FIELD_OVERFLOW, date_str, "timestamp"));
+ }
+
+ /* Save parsed time-zone into tm->tm_zone if it was specified */
+ if (tmfc.tzsign)
+ {
+ char *tz;
+
+ if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
+ tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
+ {
+ RETURN_ERROR(DateTimeParseError(DTERR_TZDISP_OVERFLOW, date_str, "timestamp"));
+ }
+
+ tz = psprintf("%c%02d:%02d",
+ tmfc.tzsign > 0 ? '+' : '-', tmfc.tzh, tmfc.tzm);
+
+ tm->tm_zone = tz;
+ }
+
+ DEBUG_TM(tm);
+
+on_error:
+
+ if (format && !incache)
+ pfree(format);
+
+ pfree(date_str);
+}
+
+
+/**********************************************************************
+ * the NUMBER version part
+ *********************************************************************/
+
+
+static char *
+fill_str(char *str, int c, int max)
+{
+ memset(str, c, max);
+ *(str + max) = '\0';
+ return str;
+}
+
+#define zeroize_NUM(_n) \
+do { \
+ (_n)->flag = 0; \
+ (_n)->lsign = 0; \
+ (_n)->pre = 0; \
+ (_n)->post = 0; \
+ (_n)->pre_lsign_num = 0; \
+ (_n)->need_locale = 0; \
+ (_n)->multi = 0; \
+ (_n)->zero_start = 0; \
+ (_n)->zero_end = 0; \
+} while(0)
+
+/* This works the same as DCH_prevent_counter_overflow */
+static inline void
+NUM_prevent_counter_overflow(void)
+{
+ if (NUMCounter >= (INT_MAX - 1))
+ {
+ for (int i = 0; i < n_NUMCache; i++)
+ NUMCache[i]->age >>= 1;
+ NUMCounter >>= 1;
+ }
+}
+
+/* select a NUMCacheEntry to hold the given format picture */
+static NUMCacheEntry *
+NUM_cache_getnew(const char *str)
+{
+ NUMCacheEntry *ent;
+
+ /* Ensure we can advance NUMCounter below */
+ NUM_prevent_counter_overflow();
+
+ /*
+ * If cache is full, remove oldest entry (or recycle first not-valid one)
+ */
+ if (n_NUMCache >= NUM_CACHE_ENTRIES)
+ {
+ NUMCacheEntry *old = NUMCache[0];
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
+#endif
+ if (old->valid)
+ {
+ for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
+ {
+ ent = NUMCache[i];
+ if (!ent->valid)
+ {
+ old = ent;
+ break;
+ }
+ if (ent->age < old->age)
+ old = ent;
+ }
+ }
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
+#endif
+ old->valid = false;
+ strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
+ old->age = (++NUMCounter);
+ /* caller is expected to fill format and Num, then set valid */
+ return old;
+ }
+ else
+ {
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
+#endif
+ Assert(NUMCache[n_NUMCache] == NULL);
+ NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
+ MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
+ ent->valid = false;
+ strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
+ ent->age = (++NUMCounter);
+ /* caller is expected to fill format and Num, then set valid */
+ ++n_NUMCache;
+ return ent;
+ }
+}
+
+/* look for an existing NUMCacheEntry matching the given format picture */
+static NUMCacheEntry *
+NUM_cache_search(const char *str)
+{
+ /* Ensure we can advance NUMCounter below */
+ NUM_prevent_counter_overflow();
+
+ for (int i = 0; i < n_NUMCache; i++)
+ {
+ NUMCacheEntry *ent = NUMCache[i];
+
+ if (ent->valid && strcmp(ent->str, str) == 0)
+ {
+ ent->age = (++NUMCounter);
+ return ent;
+ }
+ }
+
+ return NULL;
+}
+
+/* Find or create a NUMCacheEntry for the given format picture */
+static NUMCacheEntry *
+NUM_cache_fetch(const char *str)
+{
+ NUMCacheEntry *ent;
+
+ if ((ent = NUM_cache_search(str)) == NULL)
+ {
+ /*
+ * Not in the cache, must run parser and save a new format-picture to
+ * the cache. Do not mark the cache entry valid until parsing
+ * succeeds.
+ */
+ ent = NUM_cache_getnew(str);
+
+ zeroize_NUM(&ent->Num);
+
+ parse_format(ent->format, str, NUM_keywords,
+ NULL, NUM_index, NUM_FLAG, &ent->Num);
+
+ ent->valid = true;
+ }
+ return ent;
+}
+
+/* ----------
+ * Cache routine for NUM to_char version
+ * ----------
+ */
+static FormatNode *
+NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
+{
+ FormatNode *format = NULL;
+ char *str;
+
+ str = text_to_cstring(pars_str);
+
+ if (len > NUM_CACHE_SIZE)
+ {
+ /*
+ * Allocate new memory if format picture is bigger than static cache
+ * and do not use cache (call parser always)
+ */
+ format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
+
+ *shouldFree = true;
+
+ zeroize_NUM(Num);
+
+ parse_format(format, str, NUM_keywords,
+ NULL, NUM_index, NUM_FLAG, Num);
+ }
+ else
+ {
+ /*
+ * Use cache buffers
+ */
+ NUMCacheEntry *ent = NUM_cache_fetch(str);
+
+ *shouldFree = false;
+
+ format = ent->format;
+
+ /*
+ * Copy cache to used struct
+ */
+ Num->flag = ent->Num.flag;
+ Num->lsign = ent->Num.lsign;
+ Num->pre = ent->Num.pre;
+ Num->post = ent->Num.post;
+ Num->pre_lsign_num = ent->Num.pre_lsign_num;
+ Num->need_locale = ent->Num.need_locale;
+ Num->multi = ent->Num.multi;
+ Num->zero_start = ent->Num.zero_start;
+ Num->zero_end = ent->Num.zero_end;
+ }
+
+#ifdef DEBUG_TO_FROM_CHAR
+ /* dump_node(format, len); */
+ dump_index(NUM_keywords, NUM_index);
+#endif
+
+ pfree(str);
+ return format;
+}
+
+
+static char *
+int_to_roman(int number)
+{
+ int len,
+ num;
+ char *p,
+ *result,
+ numstr[12];
+
+ result = (char *) palloc(16);
+ *result = '\0';
+
+ if (number > 3999 || number < 1)
+ {
+ fill_str(result, '#', 15);
+ return result;
+ }
+ len = snprintf(numstr, sizeof(numstr), "%d", number);
+
+ for (p = numstr; *p != '\0'; p++, --len)
+ {
+ num = *p - ('0' + 1);
+ if (num < 0)
+ continue;
+
+ if (len > 3)
+ {
+ while (num-- != -1)
+ strcat(result, "M");
+ }
+ else
+ {
+ if (len == 3)
+ strcat(result, rm100[num]);
+ else if (len == 2)
+ strcat(result, rm10[num]);
+ else if (len == 1)
+ strcat(result, rm1[num]);
+ }
+ }
+ return result;
+}
+
+
+
+/* ----------
+ * Locale
+ * ----------
+ */
+static void
+NUM_prepare_locale(NUMProc *Np)
+{
+ if (Np->Num->need_locale)
+ {
+ struct lconv *lconv;
+
+ /*
+ * Get locales
+ */
+ lconv = PGLC_localeconv();
+
+ /*
+ * Positive / Negative number sign
+ */
+ if (lconv->negative_sign && *lconv->negative_sign)
+ Np->L_negative_sign = lconv->negative_sign;
+ else
+ Np->L_negative_sign = "-";
+
+ if (lconv->positive_sign && *lconv->positive_sign)
+ Np->L_positive_sign = lconv->positive_sign;
+ else
+ Np->L_positive_sign = "+";
+
+ /*
+ * Number decimal point
+ */
+ if (lconv->decimal_point && *lconv->decimal_point)
+ Np->decimal = lconv->decimal_point;
+
+ else
+ Np->decimal = ".";
+
+ if (!IS_LDECIMAL(Np->Num))
+ Np->decimal = ".";
+
+ /*
+ * Number thousands separator
+ *
+ * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
+ * but "" for thousands_sep, so we set the thousands_sep too.
+ * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
+ */
+ if (lconv->thousands_sep && *lconv->thousands_sep)
+ Np->L_thousands_sep = lconv->thousands_sep;
+ /* Make sure thousands separator doesn't match decimal point symbol. */
+ else if (strcmp(Np->decimal, ",") != 0)
+ Np->L_thousands_sep = ",";
+ else
+ Np->L_thousands_sep = ".";
+
+ /*
+ * Currency symbol
+ */
+ if (lconv->currency_symbol && *lconv->currency_symbol)
+ Np->L_currency_symbol = lconv->currency_symbol;
+ else
+ Np->L_currency_symbol = " ";
+ }
+ else
+ {
+ /*
+ * Default values
+ */
+ Np->L_negative_sign = "-";
+ Np->L_positive_sign = "+";
+ Np->decimal = ".";
+
+ Np->L_thousands_sep = ",";
+ Np->L_currency_symbol = " ";
+ }
+}
+
+/* ----------
+ * Return pointer of last relevant number after decimal point
+ * 12.0500 --> last relevant is '5'
+ * 12.0000 --> last relevant is '.'
+ * If there is no decimal point, return NULL (which will result in same
+ * behavior as if FM hadn't been specified).
+ * ----------
+ */
+static char *
+get_last_relevant_decnum(char *num)
+{
+ char *result,
+ *p = strchr(num, '.');
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "get_last_relevant_decnum()");
+#endif
+
+ if (!p)
+ return NULL;
+
+ result = p;
+
+ while (*(++p))
+ {
+ if (*p != '0')
+ result = p;
+ }
+
+ return result;
+}
+
+/*
+ * These macros are used in NUM_processor() and its subsidiary routines.
+ * OVERLOAD_TEST: true if we've reached end of input string
+ * AMOUNT_TEST(s): true if at least s bytes remain in string
+ */
+#define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
+#define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
+
+/* ----------
+ * Number extraction for TO_NUMBER()
+ * ----------
+ */
+static void
+NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
+{
+ bool isread = false;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, " --- scan start --- id=%s",
+ (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
+#endif
+
+ if (OVERLOAD_TEST)
+ return;
+
+ if (*Np->inout_p == ' ')
+ Np->inout_p++;
+
+ if (OVERLOAD_TEST)
+ return;
+
+ /*
+ * read sign before number
+ */
+ if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
+ (Np->read_pre + Np->read_post) == 0)
+ {
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
+ *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
+#endif
+
+ /*
+ * locale sign
+ */
+ if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
+ {
+ int x = 0;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
+#endif
+ if ((x = strlen(Np->L_negative_sign)) &&
+ AMOUNT_TEST(x) &&
+ strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
+ {
+ Np->inout_p += x;
+ *Np->number = '-';
+ }
+ else if ((x = strlen(Np->L_positive_sign)) &&
+ AMOUNT_TEST(x) &&
+ strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
+ {
+ Np->inout_p += x;
+ *Np->number = '+';
+ }
+ }
+ else
+ {
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
+#endif
+
+ /*
+ * simple + - < >
+ */
+ if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
+ *Np->inout_p == '<'))
+ {
+ *Np->number = '-'; /* set - */
+ Np->inout_p++;
+ }
+ else if (*Np->inout_p == '+')
+ {
+ *Np->number = '+'; /* set + */
+ Np->inout_p++;
+ }
+ }
+ }
+
+ if (OVERLOAD_TEST)
+ return;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
+#endif
+
+ /*
+ * read digit or decimal point
+ */
+ if (isdigit((unsigned char) *Np->inout_p))
+ {
+ if (Np->read_dec && Np->read_post == Np->Num->post)
+ return;
+
+ *Np->number_p = *Np->inout_p;
+ Np->number_p++;
+
+ if (Np->read_dec)
+ Np->read_post++;
+ else
+ Np->read_pre++;
+
+ isread = true;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
+#endif
+ }
+ else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
+ {
+ /*
+ * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
+ * Np->decimal is always just "." if we don't have a D format token.
+ * So we just unconditionally match to Np->decimal.
+ */
+ int x = strlen(Np->decimal);
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read decimal point (%c)",
+ *Np->inout_p);
+#endif
+ if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
+ {
+ Np->inout_p += x - 1;
+ *Np->number_p = '.';
+ Np->number_p++;
+ Np->read_dec = true;
+ isread = true;
+ }
+ }
+
+ if (OVERLOAD_TEST)
+ return;
+
+ /*
+ * Read sign behind "last" number
+ *
+ * We need sign detection because determine exact position of post-sign is
+ * difficult:
+ *
+ * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
+ * 5.01-
+ */
+ if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
+ {
+ /*
+ * locale sign (NUM_S) is always anchored behind a last number, if: -
+ * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
+ * next char is not digit
+ */
+ if (IS_LSIGN(Np->Num) && isread &&
+ (Np->inout_p + 1) < Np->inout + input_len &&
+ !isdigit((unsigned char) *(Np->inout_p + 1)))
+ {
+ int x;
+ char *tmp = Np->inout_p++;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
+#endif
+ if ((x = strlen(Np->L_negative_sign)) &&
+ AMOUNT_TEST(x) &&
+ strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
+ {
+ Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
+ *Np->number = '-';
+ }
+ else if ((x = strlen(Np->L_positive_sign)) &&
+ AMOUNT_TEST(x) &&
+ strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
+ {
+ Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
+ *Np->number = '+';
+ }
+ if (*Np->number == ' ')
+ /* no sign read */
+ Np->inout_p = tmp;
+ }
+
+ /*
+ * try read non-locale sign, it's happen only if format is not exact
+ * and we cannot determine sign position of MI/PL/SG, an example:
+ *
+ * FM9.999999MI -> 5.01-
+ *
+ * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
+ * like to_number('1 -', '9S') where sign is not anchored to last
+ * number.
+ */
+ else if (isread == false && IS_LSIGN(Np->Num) == false &&
+ (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
+ {
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
+#endif
+
+ /*
+ * simple + -
+ */
+ if (*Np->inout_p == '-' || *Np->inout_p == '+')
+ /* NUM_processor() do inout_p++ */
+ *Np->number = *Np->inout_p;
+ }
+ }
+}
+
+#define IS_PREDEC_SPACE(_n) \
+ (IS_ZERO((_n)->Num)==false && \
+ (_n)->number == (_n)->number_p && \
+ *(_n)->number == '0' && \
+ (_n)->Num->post != 0)
+
+/* ----------
+ * Add digit or sign to number-string
+ * ----------
+ */
+static void
+NUM_numpart_to_char(NUMProc *Np, int id)
+{
+ int end;
+
+ if (IS_ROMAN(Np->Num))
+ return;
+
+ /* Note: in this elog() output not set '\0' in 'inout' */
+
+#ifdef DEBUG_TO_FROM_CHAR
+
+ /*
+ * Np->num_curr is number of current item in format-picture, it is not
+ * current position in inout!
+ */
+ elog(DEBUG_elog_output,
+ "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
+ Np->sign_wrote,
+ Np->num_curr,
+ Np->number_p,
+ Np->inout);
+#endif
+ Np->num_in = false;
+
+ /*
+ * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
+ * handle "9.9" --> " .1"
+ */
+ if (Np->sign_wrote == false &&
+ (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
+ (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
+ {
+ if (IS_LSIGN(Np->Num))
+ {
+ if (Np->Num->lsign == NUM_LSIGN_PRE)
+ {
+ if (Np->sign == '-')
+ strcpy(Np->inout_p, Np->L_negative_sign);
+ else
+ strcpy(Np->inout_p, Np->L_positive_sign);
+ Np->inout_p += strlen(Np->inout_p);
+ Np->sign_wrote = true;
+ }
+ }
+ else if (IS_BRACKET(Np->Num))
+ {
+ *Np->inout_p = Np->sign == '+' ? ' ' : '<';
+ ++Np->inout_p;
+ Np->sign_wrote = true;
+ }
+ else if (Np->sign == '+')
+ {
+ if (!IS_FILLMODE(Np->Num))
+ {
+ *Np->inout_p = ' '; /* Write + */
+ ++Np->inout_p;
+ }
+ Np->sign_wrote = true;
+ }
+ else if (Np->sign == '-')
+ { /* Write - */
+ *Np->inout_p = '-';
+ ++Np->inout_p;
+ Np->sign_wrote = true;
+ }
+ }
+
+
+ /*
+ * digits / FM / Zero / Dec. point
+ */
+ if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
+ {
+ if (Np->num_curr < Np->out_pre_spaces &&
+ (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
+ {
+ /*
+ * Write blank space
+ */
+ if (!IS_FILLMODE(Np->Num))
+ {
+ *Np->inout_p = ' '; /* Write ' ' */
+ ++Np->inout_p;
+ }
+ }
+ else if (IS_ZERO(Np->Num) &&
+ Np->num_curr < Np->out_pre_spaces &&
+ Np->Num->zero_start <= Np->num_curr)
+ {
+ /*
+ * Write ZERO
+ */
+ *Np->inout_p = '0'; /* Write '0' */
+ ++Np->inout_p;
+ Np->num_in = true;
+ }
+ else
+ {
+ /*
+ * Write Decimal point
+ */
+ if (*Np->number_p == '.')
+ {
+ if (!Np->last_relevant || *Np->last_relevant != '.')
+ {
+ strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
+ Np->inout_p += strlen(Np->inout_p);
+ }
+
+ /*
+ * Ora 'n' -- FM9.9 --> 'n.'
+ */
+ else if (IS_FILLMODE(Np->Num) &&
+ Np->last_relevant && *Np->last_relevant == '.')
+ {
+ strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
+ Np->inout_p += strlen(Np->inout_p);
+ }
+ }
+ else
+ {
+ /*
+ * Write Digits
+ */
+ if (Np->last_relevant && Np->number_p > Np->last_relevant &&
+ id != NUM_0)
+ ;
+
+ /*
+ * '0.1' -- 9.9 --> ' .1'
+ */
+ else if (IS_PREDEC_SPACE(Np))
+ {
+ if (!IS_FILLMODE(Np->Num))
+ {
+ *Np->inout_p = ' ';
+ ++Np->inout_p;
+ }
+
+ /*
+ * '0' -- FM9.9 --> '0.'
+ */
+ else if (Np->last_relevant && *Np->last_relevant == '.')
+ {
+ *Np->inout_p = '0';
+ ++Np->inout_p;
+ }
+ }
+ else
+ {
+ *Np->inout_p = *Np->number_p; /* Write DIGIT */
+ ++Np->inout_p;
+ Np->num_in = true;
+ }
+ }
+ /* do no exceed string length */
+ if (*Np->number_p)
+ ++Np->number_p;
+ }
+
+ end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
+
+ if (Np->last_relevant && Np->last_relevant == Np->number_p)
+ end = Np->num_curr;
+
+ if (Np->num_curr + 1 == end)
+ {
+ if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
+ {
+ *Np->inout_p = Np->sign == '+' ? ' ' : '>';
+ ++Np->inout_p;
+ }
+ else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
+ {
+ if (Np->sign == '-')
+ strcpy(Np->inout_p, Np->L_negative_sign);
+ else
+ strcpy(Np->inout_p, Np->L_positive_sign);
+ Np->inout_p += strlen(Np->inout_p);
+ }
+ }
+ }
+
+ ++Np->num_curr;
+}
+
+/*
+ * Skip over "n" input characters, but only if they aren't numeric data
+ */
+static void
+NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
+{
+ while (n-- > 0)
+ {
+ if (OVERLOAD_TEST)
+ break; /* end of input */
+ if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
+ break; /* it's a data character */
+ Np->inout_p += pg_mblen(Np->inout_p);
+ }
+}
+
+static char *
+NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
+ char *number, int input_len, int to_char_out_pre_spaces,
+ int sign, bool is_to_char, Oid collid)
+{
+ FormatNode *n;
+ NUMProc _Np,
+ *Np = &_Np;
+ const char *pattern;
+ int pattern_len;
+
+ MemSet(Np, 0, sizeof(NUMProc));
+
+ Np->Num = Num;
+ Np->is_to_char = is_to_char;
+ Np->number = number;
+ Np->inout = inout;
+ Np->last_relevant = NULL;
+ Np->read_post = 0;
+ Np->read_pre = 0;
+ Np->read_dec = false;
+
+ if (Np->Num->zero_start)
+ --Np->Num->zero_start;
+
+ if (IS_EEEE(Np->Num))
+ {
+ if (!Np->is_to_char)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"EEEE\" not supported for input")));
+ return strcpy(inout, number);
+ }
+
+ /*
+ * Roman correction
+ */
+ if (IS_ROMAN(Np->Num))
+ {
+ if (!Np->is_to_char)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("\"RN\" not supported for input")));
+
+ Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
+ Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
+
+ if (IS_FILLMODE(Np->Num))
+ {
+ Np->Num->flag = 0;
+ Np->Num->flag |= NUM_F_FILLMODE;
+ }
+ else
+ Np->Num->flag = 0;
+ Np->Num->flag |= NUM_F_ROMAN;
+ }
+
+ /*
+ * Sign
+ */
+ if (is_to_char)
+ {
+ Np->sign = sign;
+
+ /* MI/PL/SG - write sign itself and not in number */
+ if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
+ {
+ if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
+ Np->sign_wrote = false; /* need sign */
+ else
+ Np->sign_wrote = true; /* needn't sign */
+ }
+ else
+ {
+ if (Np->sign != '-')
+ {
+ if (IS_BRACKET(Np->Num) && IS_FILLMODE(Np->Num))
+ Np->Num->flag &= ~NUM_F_BRACKET;
+ if (IS_MINUS(Np->Num))
+ Np->Num->flag &= ~NUM_F_MINUS;
+ }
+ else if (Np->sign != '+' && IS_PLUS(Np->Num))
+ Np->Num->flag &= ~NUM_F_PLUS;
+
+ if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
+ Np->sign_wrote = true; /* needn't sign */
+ else
+ Np->sign_wrote = false; /* need sign */
+
+ if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
+ Np->Num->lsign = NUM_LSIGN_POST;
+ }
+ }
+ else
+ Np->sign = false;
+
+ /*
+ * Count
+ */
+ Np->num_count = Np->Num->post + Np->Num->pre - 1;
+
+ if (is_to_char)
+ {
+ Np->out_pre_spaces = to_char_out_pre_spaces;
+
+ if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
+ {
+ Np->last_relevant = get_last_relevant_decnum(Np->number);
+
+ /*
+ * If any '0' specifiers are present, make sure we don't strip
+ * those digits. But don't advance last_relevant beyond the last
+ * character of the Np->number string, which is a hazard if the
+ * number got shortened due to precision limitations.
+ */
+ if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
+ {
+ int last_zero_pos;
+ char *last_zero;
+
+ /* note that Np->number cannot be zero-length here */
+ last_zero_pos = strlen(Np->number) - 1;
+ last_zero_pos = Min(last_zero_pos,
+ Np->Num->zero_end - Np->out_pre_spaces);
+ last_zero = Np->number + last_zero_pos;
+ if (Np->last_relevant < last_zero)
+ Np->last_relevant = last_zero;
+ }
+ }
+
+ if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
+ ++Np->num_count;
+ }
+ else
+ {
+ Np->out_pre_spaces = 0;
+ *Np->number = ' '; /* sign space */
+ *(Np->number + 1) = '\0';
+ }
+
+ Np->num_in = 0;
+ Np->num_curr = 0;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output,
+ "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
+ Np->sign,
+ Np->number,
+ Np->Num->pre,
+ Np->Num->post,
+ Np->num_count,
+ Np->out_pre_spaces,
+ Np->sign_wrote ? "Yes" : "No",
+ IS_ZERO(Np->Num) ? "Yes" : "No",
+ Np->Num->zero_start,
+ Np->Num->zero_end,
+ Np->last_relevant ? Np->last_relevant : "<not set>",
+ IS_BRACKET(Np->Num) ? "Yes" : "No",
+ IS_PLUS(Np->Num) ? "Yes" : "No",
+ IS_MINUS(Np->Num) ? "Yes" : "No",
+ IS_FILLMODE(Np->Num) ? "Yes" : "No",
+ IS_ROMAN(Np->Num) ? "Yes" : "No",
+ IS_EEEE(Np->Num) ? "Yes" : "No"
+ );
+#endif
+
+ /*
+ * Locale
+ */
+ NUM_prepare_locale(Np);
+
+ /*
+ * Processor direct cycle
+ */
+ if (Np->is_to_char)
+ Np->number_p = Np->number;
+ else
+ Np->number_p = Np->number + 1; /* first char is space for sign */
+
+ for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
+ {
+ if (!Np->is_to_char)
+ {
+ /*
+ * Check at least one byte remains to be scanned. (In actions
+ * below, must use AMOUNT_TEST if we want to read more bytes than
+ * that.)
+ */
+ if (OVERLOAD_TEST)
+ break;
+ }
+
+ /*
+ * Format pictures actions
+ */
+ if (n->type == NODE_TYPE_ACTION)
+ {
+ /*
+ * Create/read digit/zero/blank/sign/special-case
+ *
+ * 'NUM_S' note: The locale sign is anchored to number and we
+ * read/write it when we work with first or last number
+ * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
+ *
+ * Notice the "Np->inout_p++" at the bottom of the loop. This is
+ * why most of the actions advance inout_p one less than you might
+ * expect. In cases where we don't want that increment to happen,
+ * a switch case ends with "continue" not "break".
+ */
+ switch (n->key->id)
+ {
+ case NUM_9:
+ case NUM_0:
+ case NUM_DEC:
+ case NUM_D:
+ if (Np->is_to_char)
+ {
+ NUM_numpart_to_char(Np, n->key->id);
+ continue; /* for() */
+ }
+ else
+ {
+ NUM_numpart_from_char(Np, n->key->id, input_len);
+ break; /* switch() case: */
+ }
+
+ case NUM_COMMA:
+ if (Np->is_to_char)
+ {
+ if (!Np->num_in)
+ {
+ if (IS_FILLMODE(Np->Num))
+ continue;
+ else
+ *Np->inout_p = ' ';
+ }
+ else
+ *Np->inout_p = ',';
+ }
+ else
+ {
+ if (!Np->num_in)
+ {
+ if (IS_FILLMODE(Np->Num))
+ continue;
+ }
+ if (*Np->inout_p != ',')
+ continue;
+ }
+ break;
+
+ case NUM_G:
+ pattern = Np->L_thousands_sep;
+ pattern_len = strlen(pattern);
+ if (Np->is_to_char)
+ {
+ if (!Np->num_in)
+ {
+ if (IS_FILLMODE(Np->Num))
+ continue;
+ else
+ {
+ /* just in case there are MB chars */
+ pattern_len = pg_mbstrlen(pattern);
+ memset(Np->inout_p, ' ', pattern_len);
+ Np->inout_p += pattern_len - 1;
+ }
+ }
+ else
+ {
+ strcpy(Np->inout_p, pattern);
+ Np->inout_p += pattern_len - 1;
+ }
+ }
+ else
+ {
+ if (!Np->num_in)
+ {
+ if (IS_FILLMODE(Np->Num))
+ continue;
+ }
+
+ /*
+ * Because L_thousands_sep typically contains data
+ * characters (either '.' or ','), we can't use
+ * NUM_eat_non_data_chars here. Instead skip only if
+ * the input matches L_thousands_sep.
+ */
+ if (AMOUNT_TEST(pattern_len) &&
+ strncmp(Np->inout_p, pattern, pattern_len) == 0)
+ Np->inout_p += pattern_len - 1;
+ else
+ continue;
+ }
+ break;
+
+ case NUM_L:
+ pattern = Np->L_currency_symbol;
+ if (Np->is_to_char)
+ {
+ strcpy(Np->inout_p, pattern);
+ Np->inout_p += strlen(pattern) - 1;
+ }
+ else
+ {
+ NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
+ continue;
+ }
+ break;
+
+ case NUM_RN:
+ if (IS_FILLMODE(Np->Num))
+ {
+ strcpy(Np->inout_p, Np->number_p);
+ Np->inout_p += strlen(Np->inout_p) - 1;
+ }
+ else
+ {
+ sprintf(Np->inout_p, "%15s", Np->number_p);
+ Np->inout_p += strlen(Np->inout_p) - 1;
+ }
+ break;
+
+ case NUM_rn:
+ if (IS_FILLMODE(Np->Num))
+ {
+ strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
+ Np->inout_p += strlen(Np->inout_p) - 1;
+ }
+ else
+ {
+ sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
+ Np->inout_p += strlen(Np->inout_p) - 1;
+ }
+ break;
+
+ case NUM_th:
+ if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
+ Np->sign == '-' || IS_DECIMAL(Np->Num))
+ continue;
+
+ if (Np->is_to_char)
+ {
+ strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
+ Np->inout_p += 1;
+ }
+ else
+ {
+ /* All variants of 'th' occupy 2 characters */
+ NUM_eat_non_data_chars(Np, 2, input_len);
+ continue;
+ }
+ break;
+
+ case NUM_TH:
+ if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
+ Np->sign == '-' || IS_DECIMAL(Np->Num))
+ continue;
+
+ if (Np->is_to_char)
+ {
+ strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
+ Np->inout_p += 1;
+ }
+ else
+ {
+ /* All variants of 'TH' occupy 2 characters */
+ NUM_eat_non_data_chars(Np, 2, input_len);
+ continue;
+ }
+ break;
+
+ case NUM_MI:
+ if (Np->is_to_char)
+ {
+ if (Np->sign == '-')
+ *Np->inout_p = '-';
+ else if (IS_FILLMODE(Np->Num))
+ continue;
+ else
+ *Np->inout_p = ' ';
+ }
+ else
+ {
+ if (*Np->inout_p == '-')
+ *Np->number = '-';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
+ }
+ break;
+
+ case NUM_PL:
+ if (Np->is_to_char)
+ {
+ if (Np->sign == '+')
+ *Np->inout_p = '+';
+ else if (IS_FILLMODE(Np->Num))
+ continue;
+ else
+ *Np->inout_p = ' ';
+ }
+ else
+ {
+ if (*Np->inout_p == '+')
+ *Np->number = '+';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
+ }
+ break;
+
+ case NUM_SG:
+ if (Np->is_to_char)
+ *Np->inout_p = Np->sign;
+ else
+ {
+ if (*Np->inout_p == '-')
+ *Np->number = '-';
+ else if (*Np->inout_p == '+')
+ *Np->number = '+';
+ else
+ {
+ NUM_eat_non_data_chars(Np, 1, input_len);
+ continue;
+ }
+ }
+ break;
+
+ default:
+ continue;
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * In TO_CHAR, non-pattern characters in the format are copied to
+ * the output. In TO_NUMBER, we skip one input character for each
+ * non-pattern format character, whether or not it matches the
+ * format character.
+ */
+ if (Np->is_to_char)
+ {
+ strcpy(Np->inout_p, n->character);
+ Np->inout_p += strlen(Np->inout_p);
+ }
+ else
+ {
+ Np->inout_p += pg_mblen(Np->inout_p);
+ }
+ continue;
+ }
+ Np->inout_p++;
+ }
+
+ if (Np->is_to_char)
+ {
+ *Np->inout_p = '\0';
+ return Np->inout;
+ }
+ else
+ {
+ if (*(Np->number_p - 1) == '.')
+ *(Np->number_p - 1) = '\0';
+ else
+ *Np->number_p = '\0';
+
+ /*
+ * Correction - precision of dec. number
+ */
+ Np->Num->post = Np->read_post;
+
+#ifdef DEBUG_TO_FROM_CHAR
+ elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
+#endif
+ return Np->number;
+ }
+}
+
+/* ----------
+ * MACRO: Start part of NUM - for all NUM's to_char variants
+ * (sorry, but I hate copy same code - macro is better..)
+ * ----------
+ */
+#define NUM_TOCHAR_prepare \
+do { \
+ int len = VARSIZE_ANY_EXHDR(fmt); \
+ if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
+ PG_RETURN_TEXT_P(cstring_to_text("")); \
+ result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
+ format = NUM_cache(len, &Num, fmt, &shouldFree); \
+} while (0)
+
+/* ----------
+ * MACRO: Finish part of NUM
+ * ----------
+ */
+#define NUM_TOCHAR_finish \
+do { \
+ int len; \
+ \
+ NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
+ \
+ if (shouldFree) \
+ pfree(format); \
+ \
+ /* \
+ * Convert null-terminated representation of result to standard text. \
+ * The result is usually much bigger than it needs to be, but there \
+ * seems little point in realloc'ing it smaller. \
+ */ \
+ len = strlen(VARDATA(result)); \
+ SET_VARSIZE(result, len + VARHDRSZ); \
+} while (0)
+
+/* -------------------
+ * NUMERIC to_number() (convert string to numeric)
+ * -------------------
+ */
+Datum
+numeric_to_number(PG_FUNCTION_ARGS)
+{
+ text *value = PG_GETARG_TEXT_PP(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ Datum result;
+ FormatNode *format;
+ char *numstr;
+ bool shouldFree;
+ int len = 0;
+ int scale,
+ precision;
+
+ len = VARSIZE_ANY_EXHDR(fmt);
+
+ if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
+ PG_RETURN_NULL();
+
+ format = NUM_cache(len, &Num, fmt, &shouldFree);
+
+ numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
+
+ NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
+ VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
+
+ scale = Num.post;
+ precision = Num.pre + Num.multi + scale;
+
+ if (shouldFree)
+ pfree(format);
+
+ result = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(numstr),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
+
+ if (IS_MULTI(&Num))
+ {
+ Numeric x;
+ Numeric a = int64_to_numeric(10);
+ Numeric b = int64_to_numeric(-Num.multi);
+
+ x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
+ NumericGetDatum(a),
+ NumericGetDatum(b)));
+ result = DirectFunctionCall2(numeric_mul,
+ result,
+ NumericGetDatum(x));
+ }
+
+ pfree(numstr);
+ return result;
+}
+
+/* ------------------
+ * NUMERIC to_char()
+ * ------------------
+ */
+Datum
+numeric_to_char(PG_FUNCTION_ARGS)
+{
+ Numeric value = PG_GETARG_NUMERIC(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ FormatNode *format;
+ text *result;
+ bool shouldFree;
+ int out_pre_spaces = 0,
+ sign = 0;
+ char *numstr,
+ *orgnum,
+ *p;
+ Numeric x;
+
+ NUM_TOCHAR_prepare;
+
+ /*
+ * On DateType depend part (numeric)
+ */
+ if (IS_ROMAN(&Num))
+ {
+ x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
+ NumericGetDatum(value),
+ Int32GetDatum(0)));
+ numstr =
+ int_to_roman(DatumGetInt32(DirectFunctionCall1(numeric_int4,
+ NumericGetDatum(x))));
+ }
+ else if (IS_EEEE(&Num))
+ {
+ orgnum = numeric_out_sci(value, Num.post);
+
+ /*
+ * numeric_out_sci() does not emit a sign for positive numbers. We
+ * need to add a space in this case so that positive and negative
+ * numbers are aligned. Also must check for NaN/infinity cases, which
+ * we handle the same way as in float8_to_char.
+ */
+ if (strcmp(orgnum, "NaN") == 0 ||
+ strcmp(orgnum, "Infinity") == 0 ||
+ strcmp(orgnum, "-Infinity") == 0)
+ {
+ /*
+ * Allow 6 characters for the leading sign, the decimal point,
+ * "e", the exponent's sign and two exponent digits.
+ */
+ numstr = (char *) palloc(Num.pre + Num.post + 7);
+ fill_str(numstr, '#', Num.pre + Num.post + 6);
+ *numstr = ' ';
+ *(numstr + Num.pre + 1) = '.';
+ }
+ else if (*orgnum != '-')
+ {
+ numstr = (char *) palloc(strlen(orgnum) + 2);
+ *numstr = ' ';
+ strcpy(numstr + 1, orgnum);
+ }
+ else
+ {
+ numstr = orgnum;
+ }
+ }
+ else
+ {
+ int numstr_pre_len;
+ Numeric val = value;
+
+ if (IS_MULTI(&Num))
+ {
+ Numeric a = int64_to_numeric(10);
+ Numeric b = int64_to_numeric(Num.multi);
+
+ x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
+ NumericGetDatum(a),
+ NumericGetDatum(b)));
+ val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
+ NumericGetDatum(value),
+ NumericGetDatum(x)));
+ Num.pre += Num.multi;
+ }
+
+ x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
+ NumericGetDatum(val),
+ Int32GetDatum(Num.post)));
+ orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(x)));
+
+ if (*orgnum == '-')
+ {
+ sign = '-';
+ numstr = orgnum + 1;
+ }
+ else
+ {
+ sign = '+';
+ numstr = orgnum;
+ }
+
+ if ((p = strchr(numstr, '.')))
+ numstr_pre_len = p - numstr;
+ else
+ numstr_pre_len = strlen(numstr);
+
+ /* needs padding? */
+ if (numstr_pre_len < Num.pre)
+ out_pre_spaces = Num.pre - numstr_pre_len;
+ /* overflowed prefix digit format? */
+ else if (numstr_pre_len > Num.pre)
+ {
+ numstr = (char *) palloc(Num.pre + Num.post + 2);
+ fill_str(numstr, '#', Num.pre + Num.post + 1);
+ *(numstr + Num.pre) = '.';
+ }
+ }
+
+ NUM_TOCHAR_finish;
+ PG_RETURN_TEXT_P(result);
+}
+
+/* ---------------
+ * INT4 to_char()
+ * ---------------
+ */
+Datum
+int4_to_char(PG_FUNCTION_ARGS)
+{
+ int32 value = PG_GETARG_INT32(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ FormatNode *format;
+ text *result;
+ bool shouldFree;
+ int out_pre_spaces = 0,
+ sign = 0;
+ char *numstr,
+ *orgnum;
+
+ NUM_TOCHAR_prepare;
+
+ /*
+ * On DateType depend part (int32)
+ */
+ if (IS_ROMAN(&Num))
+ numstr = int_to_roman(value);
+ else if (IS_EEEE(&Num))
+ {
+ /* we can do it easily because float8 won't lose any precision */
+ float8 val = (float8) value;
+
+ orgnum = (char *) psprintf("%+.*e", Num.post, val);
+
+ /*
+ * Swap a leading positive sign for a space.
+ */
+ if (*orgnum == '+')
+ *orgnum = ' ';
+
+ numstr = orgnum;
+ }
+ else
+ {
+ int numstr_pre_len;
+
+ if (IS_MULTI(&Num))
+ {
+ orgnum = DatumGetCString(DirectFunctionCall1(int4out,
+ Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
+ Num.pre += Num.multi;
+ }
+ else
+ {
+ orgnum = DatumGetCString(DirectFunctionCall1(int4out,
+ Int32GetDatum(value)));
+ }
+
+ if (*orgnum == '-')
+ {
+ sign = '-';
+ orgnum++;
+ }
+ else
+ sign = '+';
+
+ numstr_pre_len = strlen(orgnum);
+
+ /* post-decimal digits? Pad out with zeros. */
+ if (Num.post)
+ {
+ numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
+ strcpy(numstr, orgnum);
+ *(numstr + numstr_pre_len) = '.';
+ memset(numstr + numstr_pre_len + 1, '0', Num.post);
+ *(numstr + numstr_pre_len + Num.post + 1) = '\0';
+ }
+ else
+ numstr = orgnum;
+
+ /* needs padding? */
+ if (numstr_pre_len < Num.pre)
+ out_pre_spaces = Num.pre - numstr_pre_len;
+ /* overflowed prefix digit format? */
+ else if (numstr_pre_len > Num.pre)
+ {
+ numstr = (char *) palloc(Num.pre + Num.post + 2);
+ fill_str(numstr, '#', Num.pre + Num.post + 1);
+ *(numstr + Num.pre) = '.';
+ }
+ }
+
+ NUM_TOCHAR_finish;
+ PG_RETURN_TEXT_P(result);
+}
+
+/* ---------------
+ * INT8 to_char()
+ * ---------------
+ */
+Datum
+int8_to_char(PG_FUNCTION_ARGS)
+{
+ int64 value = PG_GETARG_INT64(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ FormatNode *format;
+ text *result;
+ bool shouldFree;
+ int out_pre_spaces = 0,
+ sign = 0;
+ char *numstr,
+ *orgnum;
+
+ NUM_TOCHAR_prepare;
+
+ /*
+ * On DateType depend part (int32)
+ */
+ if (IS_ROMAN(&Num))
+ {
+ /* Currently don't support int8 conversion to roman... */
+ numstr = int_to_roman(DatumGetInt32(DirectFunctionCall1(int84, Int64GetDatum(value))));
+ }
+ else if (IS_EEEE(&Num))
+ {
+ /* to avoid loss of precision, must go via numeric not float8 */
+ orgnum = numeric_out_sci(int64_to_numeric(value),
+ Num.post);
+
+ /*
+ * numeric_out_sci() does not emit a sign for positive numbers. We
+ * need to add a space in this case so that positive and negative
+ * numbers are aligned. We don't have to worry about NaN/inf here.
+ */
+ if (*orgnum != '-')
+ {
+ numstr = (char *) palloc(strlen(orgnum) + 2);
+ *numstr = ' ';
+ strcpy(numstr + 1, orgnum);
+ }
+ else
+ {
+ numstr = orgnum;
+ }
+ }
+ else
+ {
+ int numstr_pre_len;
+
+ if (IS_MULTI(&Num))
+ {
+ double multi = pow((double) 10, (double) Num.multi);
+
+ value = DatumGetInt64(DirectFunctionCall2(int8mul,
+ Int64GetDatum(value),
+ DirectFunctionCall1(dtoi8,
+ Float8GetDatum(multi))));
+ Num.pre += Num.multi;
+ }
+
+ orgnum = DatumGetCString(DirectFunctionCall1(int8out,
+ Int64GetDatum(value)));
+
+ if (*orgnum == '-')
+ {
+ sign = '-';
+ orgnum++;
+ }
+ else
+ sign = '+';
+
+ numstr_pre_len = strlen(orgnum);
+
+ /* post-decimal digits? Pad out with zeros. */
+ if (Num.post)
+ {
+ numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
+ strcpy(numstr, orgnum);
+ *(numstr + numstr_pre_len) = '.';
+ memset(numstr + numstr_pre_len + 1, '0', Num.post);
+ *(numstr + numstr_pre_len + Num.post + 1) = '\0';
+ }
+ else
+ numstr = orgnum;
+
+ /* needs padding? */
+ if (numstr_pre_len < Num.pre)
+ out_pre_spaces = Num.pre - numstr_pre_len;
+ /* overflowed prefix digit format? */
+ else if (numstr_pre_len > Num.pre)
+ {
+ numstr = (char *) palloc(Num.pre + Num.post + 2);
+ fill_str(numstr, '#', Num.pre + Num.post + 1);
+ *(numstr + Num.pre) = '.';
+ }
+ }
+
+ NUM_TOCHAR_finish;
+ PG_RETURN_TEXT_P(result);
+}
+
+/* -----------------
+ * FLOAT4 to_char()
+ * -----------------
+ */
+Datum
+float4_to_char(PG_FUNCTION_ARGS)
+{
+ float4 value = PG_GETARG_FLOAT4(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ FormatNode *format;
+ text *result;
+ bool shouldFree;
+ int out_pre_spaces = 0,
+ sign = 0;
+ char *numstr,
+ *p;
+
+ NUM_TOCHAR_prepare;
+
+ if (IS_ROMAN(&Num))
+ numstr = int_to_roman((int) rint(value));
+ else if (IS_EEEE(&Num))
+ {
+ if (isnan(value) || isinf(value))
+ {
+ /*
+ * Allow 6 characters for the leading sign, the decimal point,
+ * "e", the exponent's sign and two exponent digits.
+ */
+ numstr = (char *) palloc(Num.pre + Num.post + 7);
+ fill_str(numstr, '#', Num.pre + Num.post + 6);
+ *numstr = ' ';
+ *(numstr + Num.pre + 1) = '.';
+ }
+ else
+ {
+ numstr = psprintf("%+.*e", Num.post, value);
+
+ /*
+ * Swap a leading positive sign for a space.
+ */
+ if (*numstr == '+')
+ *numstr = ' ';
+ }
+ }
+ else
+ {
+ float4 val = value;
+ char *orgnum;
+ int numstr_pre_len;
+
+ if (IS_MULTI(&Num))
+ {
+ float multi = pow((double) 10, (double) Num.multi);
+
+ val = value * multi;
+ Num.pre += Num.multi;
+ }
+
+ orgnum = psprintf("%.0f", fabs(val));
+ numstr_pre_len = strlen(orgnum);
+
+ /* adjust post digits to fit max float digits */
+ if (numstr_pre_len >= FLT_DIG)
+ Num.post = 0;
+ else if (numstr_pre_len + Num.post > FLT_DIG)
+ Num.post = FLT_DIG - numstr_pre_len;
+ orgnum = psprintf("%.*f", Num.post, val);
+
+ if (*orgnum == '-')
+ { /* < 0 */
+ sign = '-';
+ numstr = orgnum + 1;
+ }
+ else
+ {
+ sign = '+';
+ numstr = orgnum;
+ }
+
+ if ((p = strchr(numstr, '.')))
+ numstr_pre_len = p - numstr;
+ else
+ numstr_pre_len = strlen(numstr);
+
+ /* needs padding? */
+ if (numstr_pre_len < Num.pre)
+ out_pre_spaces = Num.pre - numstr_pre_len;
+ /* overflowed prefix digit format? */
+ else if (numstr_pre_len > Num.pre)
+ {
+ numstr = (char *) palloc(Num.pre + Num.post + 2);
+ fill_str(numstr, '#', Num.pre + Num.post + 1);
+ *(numstr + Num.pre) = '.';
+ }
+ }
+
+ NUM_TOCHAR_finish;
+ PG_RETURN_TEXT_P(result);
+}
+
+/* -----------------
+ * FLOAT8 to_char()
+ * -----------------
+ */
+Datum
+float8_to_char(PG_FUNCTION_ARGS)
+{
+ float8 value = PG_GETARG_FLOAT8(0);
+ text *fmt = PG_GETARG_TEXT_PP(1);
+ NUMDesc Num;
+ FormatNode *format;
+ text *result;
+ bool shouldFree;
+ int out_pre_spaces = 0,
+ sign = 0;
+ char *numstr,
+ *p;
+
+ NUM_TOCHAR_prepare;
+
+ if (IS_ROMAN(&Num))
+ numstr = int_to_roman((int) rint(value));
+ else if (IS_EEEE(&Num))
+ {
+ if (isnan(value) || isinf(value))
+ {
+ /*
+ * Allow 6 characters for the leading sign, the decimal point,
+ * "e", the exponent's sign and two exponent digits.
+ */
+ numstr = (char *) palloc(Num.pre + Num.post + 7);
+ fill_str(numstr, '#', Num.pre + Num.post + 6);
+ *numstr = ' ';
+ *(numstr + Num.pre + 1) = '.';
+ }
+ else
+ {
+ numstr = psprintf("%+.*e", Num.post, value);
+
+ /*
+ * Swap a leading positive sign for a space.
+ */
+ if (*numstr == '+')
+ *numstr = ' ';
+ }
+ }
+ else
+ {
+ float8 val = value;
+ char *orgnum;
+ int numstr_pre_len;
+
+ if (IS_MULTI(&Num))
+ {
+ double multi = pow((double) 10, (double) Num.multi);
+
+ val = value * multi;
+ Num.pre += Num.multi;
+ }
+
+ orgnum = psprintf("%.0f", fabs(val));
+ numstr_pre_len = strlen(orgnum);
+
+ /* adjust post digits to fit max double digits */
+ if (numstr_pre_len >= DBL_DIG)
+ Num.post = 0;
+ else if (numstr_pre_len + Num.post > DBL_DIG)
+ Num.post = DBL_DIG - numstr_pre_len;
+ orgnum = psprintf("%.*f", Num.post, val);
+
+ if (*orgnum == '-')
+ { /* < 0 */
+ sign = '-';
+ numstr = orgnum + 1;
+ }
+ else
+ {
+ sign = '+';
+ numstr = orgnum;
+ }
+
+ if ((p = strchr(numstr, '.')))
+ numstr_pre_len = p - numstr;
+ else
+ numstr_pre_len = strlen(numstr);
+
+ /* needs padding? */
+ if (numstr_pre_len < Num.pre)
+ out_pre_spaces = Num.pre - numstr_pre_len;
+ /* overflowed prefix digit format? */
+ else if (numstr_pre_len > Num.pre)
+ {
+ numstr = (char *) palloc(Num.pre + Num.post + 2);
+ fill_str(numstr, '#', Num.pre + Num.post + 1);
+ *(numstr + Num.pre) = '.';
+ }
+ }
+
+ NUM_TOCHAR_finish;
+ PG_RETURN_TEXT_P(result);
+}
diff --git a/src/backend/utils/adt/genfile.c b/src/backend/utils/adt/genfile.c
new file mode 100644
index 0000000..5d2679d
--- /dev/null
+++ b/src/backend/utils/adt/genfile.c
@@ -0,0 +1,709 @@
+/*-------------------------------------------------------------------------
+ *
+ * genfile.c
+ * Functions for direct access to files
+ *
+ *
+ * Copyright (c) 2004-2022, PostgreSQL Global Development Group
+ *
+ * Author: Andreas Pflug <pgadmin@pse-consulting.de>
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/genfile.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+
+#include "access/htup_details.h"
+#include "access/xlog_internal.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_tablespace_d.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "postmaster/syslogger.h"
+#include "replication/slot.h"
+#include "storage/fd.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+
+
+/*
+ * Convert a "text" filename argument to C string, and check it's allowable.
+ *
+ * Filename may be absolute or relative to the DataDir, but we only allow
+ * absolute paths that match DataDir or Log_directory.
+ *
+ * This does a privilege check against the 'pg_read_server_files' role, so
+ * this function is really only appropriate for callers who are only checking
+ * 'read' access. Do not use this function if you are looking for a check
+ * for 'write' or 'program' access without updating it to access the type
+ * of check as an argument and checking the appropriate role membership.
+ */
+static char *
+convert_and_check_filename(text *arg)
+{
+ char *filename;
+
+ filename = text_to_cstring(arg);
+ canonicalize_path(filename); /* filename can change length here */
+
+ /*
+ * Roles with privileges of the 'pg_read_server_files' role are allowed to
+ * access any files on the server as the PG user, so no need to do any
+ * further checks here.
+ */
+ if (has_privs_of_role(GetUserId(), ROLE_PG_READ_SERVER_FILES))
+ return filename;
+
+ /*
+ * User isn't a member of the pg_read_server_files role, so check if it's
+ * allowable
+ */
+ if (is_absolute_path(filename))
+ {
+ /*
+ * Allow absolute paths if within DataDir or Log_directory, even
+ * though Log_directory might be outside DataDir.
+ */
+ if (!path_is_prefix_of_path(DataDir, filename) &&
+ (!is_absolute_path(Log_directory) ||
+ !path_is_prefix_of_path(Log_directory, filename)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("absolute path not allowed")));
+ }
+ else if (!path_is_relative_and_below_cwd(filename))
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("path must be in or below the current directory")));
+
+ return filename;
+}
+
+
+/*
+ * Read a section of a file, returning it as bytea
+ *
+ * Caller is responsible for all permissions checking.
+ *
+ * We read the whole of the file when bytes_to_read is negative.
+ */
+static bytea *
+read_binary_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
+ bool missing_ok)
+{
+ bytea *buf;
+ size_t nbytes = 0;
+ FILE *file;
+
+ /* clamp request size to what we can actually deliver */
+ if (bytes_to_read > (int64) (MaxAllocSize - VARHDRSZ))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("requested length too large")));
+
+ if ((file = AllocateFile(filename, PG_BINARY_R)) == NULL)
+ {
+ if (missing_ok && errno == ENOENT)
+ return NULL;
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open file \"%s\" for reading: %m",
+ filename)));
+ }
+
+ if (fseeko(file, (off_t) seek_offset,
+ (seek_offset >= 0) ? SEEK_SET : SEEK_END) != 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not seek in file \"%s\": %m", filename)));
+
+ if (bytes_to_read >= 0)
+ {
+ /* If passed explicit read size just do it */
+ buf = (bytea *) palloc((Size) bytes_to_read + VARHDRSZ);
+
+ nbytes = fread(VARDATA(buf), 1, (size_t) bytes_to_read, file);
+ }
+ else
+ {
+ /* Negative read size, read rest of file */
+ StringInfoData sbuf;
+
+ initStringInfo(&sbuf);
+ /* Leave room in the buffer for the varlena length word */
+ sbuf.len += VARHDRSZ;
+ Assert(sbuf.len < sbuf.maxlen);
+
+ while (!(feof(file) || ferror(file)))
+ {
+ size_t rbytes;
+
+ /* Minimum amount to read at a time */
+#define MIN_READ_SIZE 4096
+
+ /*
+ * If not at end of file, and sbuf.len is equal to MaxAllocSize -
+ * 1, then either the file is too large, or there is nothing left
+ * to read. Attempt to read one more byte to see if the end of
+ * file has been reached. If not, the file is too large; we'd
+ * rather give the error message for that ourselves.
+ */
+ if (sbuf.len == MaxAllocSize - 1)
+ {
+ char rbuf[1];
+
+ if (fread(rbuf, 1, 1, file) != 0 || !feof(file))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("file length too large")));
+ else
+ break;
+ }
+
+ /* OK, ensure that we can read at least MIN_READ_SIZE */
+ enlargeStringInfo(&sbuf, MIN_READ_SIZE);
+
+ /*
+ * stringinfo.c likes to allocate in powers of 2, so it's likely
+ * that much more space is available than we asked for. Use all
+ * of it, rather than making more fread calls than necessary.
+ */
+ rbytes = fread(sbuf.data + sbuf.len, 1,
+ (size_t) (sbuf.maxlen - sbuf.len - 1), file);
+ sbuf.len += rbytes;
+ nbytes += rbytes;
+ }
+
+ /* Now we can commandeer the stringinfo's buffer as the result */
+ buf = (bytea *) sbuf.data;
+ }
+
+ if (ferror(file))
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m", filename)));
+
+ SET_VARSIZE(buf, nbytes + VARHDRSZ);
+
+ FreeFile(file);
+
+ return buf;
+}
+
+/*
+ * Similar to read_binary_file, but we verify that the contents are valid
+ * in the database encoding.
+ */
+static text *
+read_text_file(const char *filename, int64 seek_offset, int64 bytes_to_read,
+ bool missing_ok)
+{
+ bytea *buf;
+
+ buf = read_binary_file(filename, seek_offset, bytes_to_read, missing_ok);
+
+ if (buf != NULL)
+ {
+ /* Make sure the input is valid */
+ pg_verifymbstr(VARDATA(buf), VARSIZE(buf) - VARHDRSZ, false);
+
+ /* OK, we can cast it to text safely */
+ return (text *) buf;
+ }
+ else
+ return NULL;
+}
+
+/*
+ * Read a section of a file, returning it as text
+ *
+ * This function is kept to support adminpack 1.0.
+ */
+Datum
+pg_read_file(PG_FUNCTION_ARGS)
+{
+ text *filename_t = PG_GETARG_TEXT_PP(0);
+ int64 seek_offset = 0;
+ int64 bytes_to_read = -1;
+ bool missing_ok = false;
+ char *filename;
+ text *result;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to read files with adminpack 1.0"),
+ /* translator: %s is a SQL function name */
+ errhint("Consider using %s, which is part of core, instead.",
+ "pg_read_file()")));
+
+ /* handle optional arguments */
+ if (PG_NARGS() >= 3)
+ {
+ seek_offset = PG_GETARG_INT64(1);
+ bytes_to_read = PG_GETARG_INT64(2);
+
+ if (bytes_to_read < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("requested length cannot be negative")));
+ }
+ if (PG_NARGS() >= 4)
+ missing_ok = PG_GETARG_BOOL(3);
+
+ filename = convert_and_check_filename(filename_t);
+
+ result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
+ if (result)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * Read a section of a file, returning it as text
+ *
+ * No superuser check done here- instead privileges are handled by the
+ * GRANT system.
+ */
+Datum
+pg_read_file_v2(PG_FUNCTION_ARGS)
+{
+ text *filename_t = PG_GETARG_TEXT_PP(0);
+ int64 seek_offset = 0;
+ int64 bytes_to_read = -1;
+ bool missing_ok = false;
+ char *filename;
+ text *result;
+
+ /* handle optional arguments */
+ if (PG_NARGS() >= 3)
+ {
+ seek_offset = PG_GETARG_INT64(1);
+ bytes_to_read = PG_GETARG_INT64(2);
+
+ if (bytes_to_read < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("requested length cannot be negative")));
+ }
+ if (PG_NARGS() >= 4)
+ missing_ok = PG_GETARG_BOOL(3);
+
+ filename = convert_and_check_filename(filename_t);
+
+ result = read_text_file(filename, seek_offset, bytes_to_read, missing_ok);
+ if (result)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * Read a section of a file, returning it as bytea
+ */
+Datum
+pg_read_binary_file(PG_FUNCTION_ARGS)
+{
+ text *filename_t = PG_GETARG_TEXT_PP(0);
+ int64 seek_offset = 0;
+ int64 bytes_to_read = -1;
+ bool missing_ok = false;
+ char *filename;
+ bytea *result;
+
+ /* handle optional arguments */
+ if (PG_NARGS() >= 3)
+ {
+ seek_offset = PG_GETARG_INT64(1);
+ bytes_to_read = PG_GETARG_INT64(2);
+
+ if (bytes_to_read < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("requested length cannot be negative")));
+ }
+ if (PG_NARGS() >= 4)
+ missing_ok = PG_GETARG_BOOL(3);
+
+ filename = convert_and_check_filename(filename_t);
+
+ result = read_binary_file(filename, seek_offset,
+ bytes_to_read, missing_ok);
+ if (result)
+ PG_RETURN_BYTEA_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+
+/*
+ * Wrapper functions for the 1 and 3 argument variants of pg_read_file_v2()
+ * and pg_read_binary_file().
+ *
+ * These are necessary to pass the sanity check in opr_sanity, which checks
+ * that all built-in functions that share the implementing C function take
+ * the same number of arguments.
+ */
+Datum
+pg_read_file_off_len(PG_FUNCTION_ARGS)
+{
+ return pg_read_file_v2(fcinfo);
+}
+
+Datum
+pg_read_file_all(PG_FUNCTION_ARGS)
+{
+ return pg_read_file_v2(fcinfo);
+}
+
+Datum
+pg_read_binary_file_off_len(PG_FUNCTION_ARGS)
+{
+ return pg_read_binary_file(fcinfo);
+}
+
+Datum
+pg_read_binary_file_all(PG_FUNCTION_ARGS)
+{
+ return pg_read_binary_file(fcinfo);
+}
+
+/*
+ * stat a file
+ */
+Datum
+pg_stat_file(PG_FUNCTION_ARGS)
+{
+ text *filename_t = PG_GETARG_TEXT_PP(0);
+ char *filename;
+ struct stat fst;
+ Datum values[6];
+ bool isnull[6];
+ HeapTuple tuple;
+ TupleDesc tupdesc;
+ bool missing_ok = false;
+
+ /* check the optional argument */
+ if (PG_NARGS() == 2)
+ missing_ok = PG_GETARG_BOOL(1);
+
+ filename = convert_and_check_filename(filename_t);
+
+ if (stat(filename, &fst) < 0)
+ {
+ if (missing_ok && errno == ENOENT)
+ PG_RETURN_NULL();
+ else
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", filename)));
+ }
+
+ /*
+ * This record type had better match the output parameters declared for me
+ * in pg_proc.h.
+ */
+ tupdesc = CreateTemplateTupleDesc(6);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1,
+ "size", INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2,
+ "access", TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3,
+ "modification", TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4,
+ "change", TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5,
+ "creation", TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6,
+ "isdir", BOOLOID, -1, 0);
+ BlessTupleDesc(tupdesc);
+
+ memset(isnull, false, sizeof(isnull));
+
+ values[0] = Int64GetDatum((int64) fst.st_size);
+ values[1] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_atime));
+ values[2] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_mtime));
+ /* Unix has file status change time, while Win32 has creation time */
+#if !defined(WIN32) && !defined(__CYGWIN__)
+ values[3] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
+ isnull[4] = true;
+#else
+ isnull[3] = true;
+ values[4] = TimestampTzGetDatum(time_t_to_timestamptz(fst.st_ctime));
+#endif
+ values[5] = BoolGetDatum(S_ISDIR(fst.st_mode));
+
+ tuple = heap_form_tuple(tupdesc, values, isnull);
+
+ pfree(filename);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}
+
+/*
+ * stat a file (1 argument version)
+ *
+ * note: this wrapper is necessary to pass the sanity check in opr_sanity,
+ * which checks that all built-in functions that share the implementing C
+ * function take the same number of arguments
+ */
+Datum
+pg_stat_file_1arg(PG_FUNCTION_ARGS)
+{
+ return pg_stat_file(fcinfo);
+}
+
+/*
+ * List a directory (returns the filenames only)
+ */
+Datum
+pg_ls_dir(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ char *location;
+ bool missing_ok = false;
+ bool include_dot_dirs = false;
+ DIR *dirdesc;
+ struct dirent *de;
+
+ location = convert_and_check_filename(PG_GETARG_TEXT_PP(0));
+
+ /* check the optional arguments */
+ if (PG_NARGS() == 3)
+ {
+ if (!PG_ARGISNULL(1))
+ missing_ok = PG_GETARG_BOOL(1);
+ if (!PG_ARGISNULL(2))
+ include_dot_dirs = PG_GETARG_BOOL(2);
+ }
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
+
+ dirdesc = AllocateDir(location);
+ if (!dirdesc)
+ {
+ /* Return empty tuplestore if appropriate */
+ if (missing_ok && errno == ENOENT)
+ return (Datum) 0;
+ /* Otherwise, we can let ReadDir() throw the error */
+ }
+
+ while ((de = ReadDir(dirdesc, location)) != NULL)
+ {
+ Datum values[1];
+ bool nulls[1];
+
+ if (!include_dot_dirs &&
+ (strcmp(de->d_name, ".") == 0 ||
+ strcmp(de->d_name, "..") == 0))
+ continue;
+
+ values[0] = CStringGetTextDatum(de->d_name);
+ nulls[0] = false;
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ FreeDir(dirdesc);
+ return (Datum) 0;
+}
+
+/*
+ * List a directory (1 argument version)
+ *
+ * note: this wrapper is necessary to pass the sanity check in opr_sanity,
+ * which checks that all built-in functions that share the implementing C
+ * function take the same number of arguments.
+ */
+Datum
+pg_ls_dir_1arg(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir(fcinfo);
+}
+
+/*
+ * Generic function to return a directory listing of files.
+ *
+ * If the directory isn't there, silently return an empty set if missing_ok.
+ * Other unreadable-directory cases throw an error.
+ */
+static Datum
+pg_ls_dir_files(FunctionCallInfo fcinfo, const char *dir, bool missing_ok)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ DIR *dirdesc;
+ struct dirent *de;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /*
+ * Now walk the directory. Note that we must do this within a single SRF
+ * call, not leave the directory open across multiple calls, since we
+ * can't count on the SRF being run to completion.
+ */
+ dirdesc = AllocateDir(dir);
+ if (!dirdesc)
+ {
+ /* Return empty tuplestore if appropriate */
+ if (missing_ok && errno == ENOENT)
+ return (Datum) 0;
+ /* Otherwise, we can let ReadDir() throw the error */
+ }
+
+ while ((de = ReadDir(dirdesc, dir)) != NULL)
+ {
+ Datum values[3];
+ bool nulls[3];
+ char path[MAXPGPATH * 2];
+ struct stat attrib;
+
+ /* Skip hidden files */
+ if (de->d_name[0] == '.')
+ continue;
+
+ /* Get the file info */
+ snprintf(path, sizeof(path), "%s/%s", dir, de->d_name);
+ if (stat(path, &attrib) < 0)
+ {
+ /* Ignore concurrently-deleted files, else complain */
+ if (errno == ENOENT)
+ continue;
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m", path)));
+ }
+
+ /* Ignore anything but regular files */
+ if (!S_ISREG(attrib.st_mode))
+ continue;
+
+ values[0] = CStringGetTextDatum(de->d_name);
+ values[1] = Int64GetDatum((int64) attrib.st_size);
+ values[2] = TimestampTzGetDatum(time_t_to_timestamptz(attrib.st_mtime));
+ memset(nulls, 0, sizeof(nulls));
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+ }
+
+ FreeDir(dirdesc);
+ return (Datum) 0;
+}
+
+/* Function to return the list of files in the log directory */
+Datum
+pg_ls_logdir(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir_files(fcinfo, Log_directory, false);
+}
+
+/* Function to return the list of files in the WAL directory */
+Datum
+pg_ls_waldir(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir_files(fcinfo, XLOGDIR, false);
+}
+
+/*
+ * Generic function to return the list of files in pgsql_tmp
+ */
+static Datum
+pg_ls_tmpdir(FunctionCallInfo fcinfo, Oid tblspc)
+{
+ char path[MAXPGPATH];
+
+ if (!SearchSysCacheExists1(TABLESPACEOID, ObjectIdGetDatum(tblspc)))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("tablespace with OID %u does not exist",
+ tblspc)));
+
+ TempTablespacePath(path, tblspc);
+ return pg_ls_dir_files(fcinfo, path, true);
+}
+
+/*
+ * Function to return the list of temporary files in the pg_default tablespace's
+ * pgsql_tmp directory
+ */
+Datum
+pg_ls_tmpdir_noargs(PG_FUNCTION_ARGS)
+{
+ return pg_ls_tmpdir(fcinfo, DEFAULTTABLESPACE_OID);
+}
+
+/*
+ * Function to return the list of temporary files in the specified tablespace's
+ * pgsql_tmp directory
+ */
+Datum
+pg_ls_tmpdir_1arg(PG_FUNCTION_ARGS)
+{
+ return pg_ls_tmpdir(fcinfo, PG_GETARG_OID(0));
+}
+
+/*
+ * Function to return the list of files in the WAL archive status directory.
+ */
+Datum
+pg_ls_archive_statusdir(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir_files(fcinfo, XLOGDIR "/archive_status", true);
+}
+
+/*
+ * Function to return the list of files in the pg_logical/snapshots directory.
+ */
+Datum
+pg_ls_logicalsnapdir(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir_files(fcinfo, "pg_logical/snapshots", false);
+}
+
+/*
+ * Function to return the list of files in the pg_logical/mappings directory.
+ */
+Datum
+pg_ls_logicalmapdir(PG_FUNCTION_ARGS)
+{
+ return pg_ls_dir_files(fcinfo, "pg_logical/mappings", false);
+}
+
+/*
+ * Function to return the list of files in the pg_replslot/<replication_slot>
+ * directory.
+ */
+Datum
+pg_ls_replslotdir(PG_FUNCTION_ARGS)
+{
+ text *slotname_t;
+ char path[MAXPGPATH];
+ char *slotname;
+
+ slotname_t = PG_GETARG_TEXT_PP(0);
+
+ slotname = text_to_cstring(slotname_t);
+
+ if (!SearchNamedReplicationSlot(slotname, true))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("replication slot \"%s\" does not exist",
+ slotname)));
+
+ snprintf(path, sizeof(path), "pg_replslot/%s", slotname);
+ return pg_ls_dir_files(fcinfo, path, false);
+}
diff --git a/src/backend/utils/adt/geo_ops.c b/src/backend/utils/adt/geo_ops.c
new file mode 100644
index 0000000..f1b632e
--- /dev/null
+++ b/src/backend/utils/adt/geo_ops.c
@@ -0,0 +1,5519 @@
+/*-------------------------------------------------------------------------
+ *
+ * geo_ops.c
+ * 2D geometric operations
+ *
+ * This module implements the geometric functions and operators. The
+ * geometric types are (from simple to more complicated):
+ *
+ * - point
+ * - line
+ * - line segment
+ * - box
+ * - circle
+ * - polygon
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/geo_ops.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+#include <ctype.h>
+
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/geo_decls.h"
+
+/*
+ * * Type constructors have this form:
+ * void type_construct(Type *result, ...);
+ *
+ * * Operators commonly have signatures such as
+ * void type1_operator_type2(Type *result, Type1 *obj1, Type2 *obj2);
+ *
+ * Common operators are:
+ * * Intersection point:
+ * bool type1_interpt_type2(Point *result, Type1 *obj1, Type2 *obj2);
+ * Return whether the two objects intersect. If *result is not NULL,
+ * it is set to the intersection point.
+ *
+ * * Containment:
+ * bool type1_contain_type2(Type1 *obj1, Type2 *obj2);
+ * Return whether obj1 contains obj2.
+ * bool type1_contain_type2(Type1 *contains_obj, Type1 *contained_obj);
+ * Return whether obj1 contains obj2 (used when types are the same)
+ *
+ * * Distance of closest point in or on obj1 to obj2:
+ * float8 type1_closept_type2(Point *result, Type1 *obj1, Type2 *obj2);
+ * Returns the shortest distance between two objects. If *result is not
+ * NULL, it is set to the closest point in or on obj1 to obj2.
+ *
+ * These functions may be used to implement multiple SQL-level operators. For
+ * example, determining whether two lines are parallel is done by checking
+ * whether they don't intersect.
+ */
+
+/*
+ * Internal routines
+ */
+
+enum path_delim
+{
+ PATH_NONE, PATH_OPEN, PATH_CLOSED
+};
+
+/* Routines for points */
+static inline void point_construct(Point *result, float8 x, float8 y);
+static inline void point_add_point(Point *result, Point *pt1, Point *pt2);
+static inline void point_sub_point(Point *result, Point *pt1, Point *pt2);
+static inline void point_mul_point(Point *result, Point *pt1, Point *pt2);
+static inline void point_div_point(Point *result, Point *pt1, Point *pt2);
+static inline bool point_eq_point(Point *pt1, Point *pt2);
+static inline float8 point_dt(Point *pt1, Point *pt2);
+static inline float8 point_sl(Point *pt1, Point *pt2);
+static int point_inside(Point *p, int npts, Point *plist);
+
+/* Routines for lines */
+static inline void line_construct(LINE *result, Point *pt, float8 m);
+static inline float8 line_sl(LINE *line);
+static inline float8 line_invsl(LINE *line);
+static bool line_interpt_line(Point *result, LINE *l1, LINE *l2);
+static bool line_contain_point(LINE *line, Point *point);
+static float8 line_closept_point(Point *result, LINE *line, Point *pt);
+
+/* Routines for line segments */
+static inline void statlseg_construct(LSEG *lseg, Point *pt1, Point *pt2);
+static inline float8 lseg_sl(LSEG *lseg);
+static inline float8 lseg_invsl(LSEG *lseg);
+static bool lseg_interpt_line(Point *result, LSEG *lseg, LINE *line);
+static bool lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2);
+static int lseg_crossing(float8 x, float8 y, float8 px, float8 py);
+static bool lseg_contain_point(LSEG *lseg, Point *point);
+static float8 lseg_closept_point(Point *result, LSEG *lseg, Point *pt);
+static float8 lseg_closept_line(Point *result, LSEG *lseg, LINE *line);
+static float8 lseg_closept_lseg(Point *result, LSEG *on_lseg, LSEG *to_lseg);
+
+/* Routines for boxes */
+static inline void box_construct(BOX *result, Point *pt1, Point *pt2);
+static void box_cn(Point *center, BOX *box);
+static bool box_ov(BOX *box1, BOX *box2);
+static float8 box_ar(BOX *box);
+static float8 box_ht(BOX *box);
+static float8 box_wd(BOX *box);
+static bool box_contain_point(BOX *box, Point *point);
+static bool box_contain_box(BOX *contains_box, BOX *contained_box);
+static bool box_contain_lseg(BOX *box, LSEG *lseg);
+static bool box_interpt_lseg(Point *result, BOX *box, LSEG *lseg);
+static float8 box_closept_point(Point *result, BOX *box, Point *point);
+static float8 box_closept_lseg(Point *result, BOX *box, LSEG *lseg);
+
+/* Routines for circles */
+static float8 circle_ar(CIRCLE *circle);
+
+/* Routines for polygons */
+static void make_bound_box(POLYGON *poly);
+static void poly_to_circle(CIRCLE *result, POLYGON *poly);
+static bool lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start);
+static bool poly_contain_poly(POLYGON *contains_poly, POLYGON *contained_poly);
+static bool plist_same(int npts, Point *p1, Point *p2);
+static float8 dist_ppoly_internal(Point *pt, POLYGON *poly);
+
+/* Routines for encoding and decoding */
+static float8 single_decode(char *num, char **endptr_p,
+ const char *type_name, const char *orig_string);
+static void single_encode(float8 x, StringInfo str);
+static void pair_decode(char *str, float8 *x, float8 *y, char **endptr_p,
+ const char *type_name, const char *orig_string);
+static void pair_encode(float8 x, float8 y, StringInfo str);
+static int pair_count(char *s, char delim);
+static void path_decode(char *str, bool opentype, int npts, Point *p,
+ bool *isopen, char **endptr_p,
+ const char *type_name, const char *orig_string);
+static char *path_encode(enum path_delim path_delim, int npts, Point *pt);
+
+
+/*
+ * Delimiters for input and output strings.
+ * LDELIM, RDELIM, and DELIM are left, right, and separator delimiters, respectively.
+ * LDELIM_EP, RDELIM_EP are left and right delimiters for paths with endpoints.
+ */
+
+#define LDELIM '('
+#define RDELIM ')'
+#define DELIM ','
+#define LDELIM_EP '['
+#define RDELIM_EP ']'
+#define LDELIM_C '<'
+#define RDELIM_C '>'
+#define LDELIM_L '{'
+#define RDELIM_L '}'
+
+
+/*
+ * Geometric data types are composed of points.
+ * This code tries to support a common format throughout the data types,
+ * to allow for more predictable usage and data type conversion.
+ * The fundamental unit is the point. Other units are line segments,
+ * open paths, boxes, closed paths, and polygons (which should be considered
+ * non-intersecting closed paths).
+ *
+ * Data representation is as follows:
+ * point: (x,y)
+ * line segment: [(x1,y1),(x2,y2)]
+ * box: (x1,y1),(x2,y2)
+ * open path: [(x1,y1),...,(xn,yn)]
+ * closed path: ((x1,y1),...,(xn,yn))
+ * polygon: ((x1,y1),...,(xn,yn))
+ *
+ * For boxes, the points are opposite corners with the first point at the top right.
+ * For closed paths and polygons, the points should be reordered to allow
+ * fast and correct equality comparisons.
+ *
+ * XXX perhaps points in complex shapes should be reordered internally
+ * to allow faster internal operations, but should keep track of input order
+ * and restore that order for text output - tgl 97/01/16
+ */
+
+static float8
+single_decode(char *num, char **endptr_p,
+ const char *type_name, const char *orig_string)
+{
+ return float8in_internal(num, endptr_p, type_name, orig_string);
+} /* single_decode() */
+
+static void
+single_encode(float8 x, StringInfo str)
+{
+ char *xstr = float8out_internal(x);
+
+ appendStringInfoString(str, xstr);
+ pfree(xstr);
+} /* single_encode() */
+
+static void
+pair_decode(char *str, float8 *x, float8 *y, char **endptr_p,
+ const char *type_name, const char *orig_string)
+{
+ bool has_delim;
+
+ while (isspace((unsigned char) *str))
+ str++;
+ if ((has_delim = (*str == LDELIM)))
+ str++;
+
+ *x = float8in_internal(str, &str, type_name, orig_string);
+
+ if (*str++ != DELIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+
+ *y = float8in_internal(str, &str, type_name, orig_string);
+
+ if (has_delim)
+ {
+ if (*str++ != RDELIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+ while (isspace((unsigned char) *str))
+ str++;
+ }
+
+ /* report stopping point if wanted, else complain if not end of string */
+ if (endptr_p)
+ *endptr_p = str;
+ else if (*str != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+}
+
+static void
+pair_encode(float8 x, float8 y, StringInfo str)
+{
+ char *xstr = float8out_internal(x);
+ char *ystr = float8out_internal(y);
+
+ appendStringInfo(str, "%s,%s", xstr, ystr);
+ pfree(xstr);
+ pfree(ystr);
+}
+
+static void
+path_decode(char *str, bool opentype, int npts, Point *p,
+ bool *isopen, char **endptr_p,
+ const char *type_name, const char *orig_string)
+{
+ int depth = 0;
+ char *cp;
+ int i;
+
+ while (isspace((unsigned char) *str))
+ str++;
+ if ((*isopen = (*str == LDELIM_EP)))
+ {
+ /* no open delimiter allowed? */
+ if (!opentype)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+ depth++;
+ str++;
+ }
+ else if (*str == LDELIM)
+ {
+ cp = (str + 1);
+ while (isspace((unsigned char) *cp))
+ cp++;
+ if (*cp == LDELIM)
+ {
+ depth++;
+ str = cp;
+ }
+ else if (strrchr(str, LDELIM) == str)
+ {
+ depth++;
+ str = cp;
+ }
+ }
+
+ for (i = 0; i < npts; i++)
+ {
+ pair_decode(str, &(p->x), &(p->y), &str, type_name, orig_string);
+ if (*str == DELIM)
+ str++;
+ p++;
+ }
+
+ while (depth > 0)
+ {
+ if (*str == RDELIM || (*str == RDELIM_EP && *isopen && depth == 1))
+ {
+ depth--;
+ str++;
+ while (isspace((unsigned char) *str))
+ str++;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+ }
+
+ /* report stopping point if wanted, else complain if not end of string */
+ if (endptr_p)
+ *endptr_p = str;
+ else if (*str != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ type_name, orig_string)));
+} /* path_decode() */
+
+static char *
+path_encode(enum path_delim path_delim, int npts, Point *pt)
+{
+ StringInfoData str;
+ int i;
+
+ initStringInfo(&str);
+
+ switch (path_delim)
+ {
+ case PATH_CLOSED:
+ appendStringInfoChar(&str, LDELIM);
+ break;
+ case PATH_OPEN:
+ appendStringInfoChar(&str, LDELIM_EP);
+ break;
+ case PATH_NONE:
+ break;
+ }
+
+ for (i = 0; i < npts; i++)
+ {
+ if (i > 0)
+ appendStringInfoChar(&str, DELIM);
+ appendStringInfoChar(&str, LDELIM);
+ pair_encode(pt->x, pt->y, &str);
+ appendStringInfoChar(&str, RDELIM);
+ pt++;
+ }
+
+ switch (path_delim)
+ {
+ case PATH_CLOSED:
+ appendStringInfoChar(&str, RDELIM);
+ break;
+ case PATH_OPEN:
+ appendStringInfoChar(&str, RDELIM_EP);
+ break;
+ case PATH_NONE:
+ break;
+ }
+
+ return str.data;
+} /* path_encode() */
+
+/*-------------------------------------------------------------
+ * pair_count - count the number of points
+ * allow the following notation:
+ * '((1,2),(3,4))'
+ * '(1,3,2,4)'
+ * require an odd number of delim characters in the string
+ *-------------------------------------------------------------*/
+static int
+pair_count(char *s, char delim)
+{
+ int ndelim = 0;
+
+ while ((s = strchr(s, delim)) != NULL)
+ {
+ ndelim++;
+ s++;
+ }
+ return (ndelim % 2) ? ((ndelim + 1) / 2) : -1;
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for two-dimensional boxes.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * Formatting and conversion routines.
+ *---------------------------------------------------------*/
+
+/* box_in - convert a string to internal form.
+ *
+ * External format: (two corners of box)
+ * "(f8, f8), (f8, f8)"
+ * also supports the older style "(f8, f8, f8, f8)"
+ */
+Datum
+box_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ BOX *box = (BOX *) palloc(sizeof(BOX));
+ bool isopen;
+ float8 x,
+ y;
+
+ path_decode(str, false, 2, &(box->high), &isopen, NULL, "box", str);
+
+ /* reorder corners if necessary... */
+ if (float8_lt(box->high.x, box->low.x))
+ {
+ x = box->high.x;
+ box->high.x = box->low.x;
+ box->low.x = x;
+ }
+ if (float8_lt(box->high.y, box->low.y))
+ {
+ y = box->high.y;
+ box->high.y = box->low.y;
+ box->low.y = y;
+ }
+
+ PG_RETURN_BOX_P(box);
+}
+
+/* box_out - convert a box to external form.
+ */
+Datum
+box_out(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+
+ PG_RETURN_CSTRING(path_encode(PATH_NONE, 2, &(box->high)));
+}
+
+/*
+ * box_recv - converts external binary format to box
+ */
+Datum
+box_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ BOX *box;
+ float8 x,
+ y;
+
+ box = (BOX *) palloc(sizeof(BOX));
+
+ box->high.x = pq_getmsgfloat8(buf);
+ box->high.y = pq_getmsgfloat8(buf);
+ box->low.x = pq_getmsgfloat8(buf);
+ box->low.y = pq_getmsgfloat8(buf);
+
+ /* reorder corners if necessary... */
+ if (float8_lt(box->high.x, box->low.x))
+ {
+ x = box->high.x;
+ box->high.x = box->low.x;
+ box->low.x = x;
+ }
+ if (float8_lt(box->high.y, box->low.y))
+ {
+ y = box->high.y;
+ box->high.y = box->low.y;
+ box->low.y = y;
+ }
+
+ PG_RETURN_BOX_P(box);
+}
+
+/*
+ * box_send - converts box to binary format
+ */
+Datum
+box_send(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, box->high.x);
+ pq_sendfloat8(&buf, box->high.y);
+ pq_sendfloat8(&buf, box->low.x);
+ pq_sendfloat8(&buf, box->low.y);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/* box_construct - fill in a new box.
+ */
+static inline void
+box_construct(BOX *result, Point *pt1, Point *pt2)
+{
+ if (float8_gt(pt1->x, pt2->x))
+ {
+ result->high.x = pt1->x;
+ result->low.x = pt2->x;
+ }
+ else
+ {
+ result->high.x = pt2->x;
+ result->low.x = pt1->x;
+ }
+ if (float8_gt(pt1->y, pt2->y))
+ {
+ result->high.y = pt1->y;
+ result->low.y = pt2->y;
+ }
+ else
+ {
+ result->high.y = pt2->y;
+ result->low.y = pt1->y;
+ }
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators for BOXes.
+ * <, >, <=, >=, and == are based on box area.
+ *---------------------------------------------------------*/
+
+/* box_same - are two boxes identical?
+ */
+Datum
+box_same(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(point_eq_point(&box1->high, &box2->high) &&
+ point_eq_point(&box1->low, &box2->low));
+}
+
+/* box_overlap - does box1 overlap box2?
+ */
+Datum
+box_overlap(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_ov(box1, box2));
+}
+
+static bool
+box_ov(BOX *box1, BOX *box2)
+{
+ return (FPle(box1->low.x, box2->high.x) &&
+ FPle(box2->low.x, box1->high.x) &&
+ FPle(box1->low.y, box2->high.y) &&
+ FPle(box2->low.y, box1->high.y));
+}
+
+/* box_left - is box1 strictly left of box2?
+ */
+Datum
+box_left(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPlt(box1->high.x, box2->low.x));
+}
+
+/* box_overleft - is the right edge of box1 at or left of
+ * the right edge of box2?
+ *
+ * This is "less than or equal" for the end of a time range,
+ * when time ranges are stored as rectangles.
+ */
+Datum
+box_overleft(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPle(box1->high.x, box2->high.x));
+}
+
+/* box_right - is box1 strictly right of box2?
+ */
+Datum
+box_right(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPgt(box1->low.x, box2->high.x));
+}
+
+/* box_overright - is the left edge of box1 at or right of
+ * the left edge of box2?
+ *
+ * This is "greater than or equal" for time ranges, when time ranges
+ * are stored as rectangles.
+ */
+Datum
+box_overright(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPge(box1->low.x, box2->low.x));
+}
+
+/* box_below - is box1 strictly below box2?
+ */
+Datum
+box_below(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPlt(box1->high.y, box2->low.y));
+}
+
+/* box_overbelow - is the upper edge of box1 at or below
+ * the upper edge of box2?
+ */
+Datum
+box_overbelow(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPle(box1->high.y, box2->high.y));
+}
+
+/* box_above - is box1 strictly above box2?
+ */
+Datum
+box_above(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPgt(box1->low.y, box2->high.y));
+}
+
+/* box_overabove - is the lower edge of box1 at or above
+ * the lower edge of box2?
+ */
+Datum
+box_overabove(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPge(box1->low.y, box2->low.y));
+}
+
+/* box_contained - is box1 contained by box2?
+ */
+Datum
+box_contained(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_contain_box(box2, box1));
+}
+
+/* box_contain - does box1 contain box2?
+ */
+Datum
+box_contain(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_contain_box(box1, box2));
+}
+
+/*
+ * Check whether the second box is in the first box or on its border
+ */
+static bool
+box_contain_box(BOX *contains_box, BOX *contained_box)
+{
+ return FPge(contains_box->high.x, contained_box->high.x) &&
+ FPle(contains_box->low.x, contained_box->low.x) &&
+ FPge(contains_box->high.y, contained_box->high.y) &&
+ FPle(contains_box->low.y, contained_box->low.y);
+}
+
+
+/* box_positionop -
+ * is box1 entirely {above,below} box2?
+ *
+ * box_below_eq and box_above_eq are obsolete versions that (probably
+ * erroneously) accept the equal-boundaries case. Since these are not
+ * in sync with the box_left and box_right code, they are deprecated and
+ * not supported in the PG 8.1 rtree operator class extension.
+ */
+Datum
+box_below_eq(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPle(box1->high.y, box2->low.y));
+}
+
+Datum
+box_above_eq(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPge(box1->low.y, box2->high.y));
+}
+
+
+/* box_relop - is area(box1) relop area(box2), within
+ * our accuracy constraint?
+ */
+Datum
+box_lt(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPlt(box_ar(box1), box_ar(box2)));
+}
+
+Datum
+box_gt(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPgt(box_ar(box1), box_ar(box2)));
+}
+
+Datum
+box_eq(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPeq(box_ar(box1), box_ar(box2)));
+}
+
+Datum
+box_le(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPle(box_ar(box1), box_ar(box2)));
+}
+
+Datum
+box_ge(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(FPge(box_ar(box1), box_ar(box2)));
+}
+
+
+/*----------------------------------------------------------
+ * "Arithmetic" operators on boxes.
+ *---------------------------------------------------------*/
+
+/* box_area - returns the area of the box.
+ */
+Datum
+box_area(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+
+ PG_RETURN_FLOAT8(box_ar(box));
+}
+
+
+/* box_width - returns the width of the box
+ * (horizontal magnitude).
+ */
+Datum
+box_width(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+
+ PG_RETURN_FLOAT8(box_wd(box));
+}
+
+
+/* box_height - returns the height of the box
+ * (vertical magnitude).
+ */
+Datum
+box_height(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+
+ PG_RETURN_FLOAT8(box_ht(box));
+}
+
+
+/* box_distance - returns the distance between the
+ * center points of two boxes.
+ */
+Datum
+box_distance(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+ Point a,
+ b;
+
+ box_cn(&a, box1);
+ box_cn(&b, box2);
+
+ PG_RETURN_FLOAT8(point_dt(&a, &b));
+}
+
+
+/* box_center - returns the center point of the box.
+ */
+Datum
+box_center(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *result = (Point *) palloc(sizeof(Point));
+
+ box_cn(result, box);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/* box_ar - returns the area of the box.
+ */
+static float8
+box_ar(BOX *box)
+{
+ return float8_mul(box_wd(box), box_ht(box));
+}
+
+
+/* box_cn - stores the centerpoint of the box into *center.
+ */
+static void
+box_cn(Point *center, BOX *box)
+{
+ center->x = float8_div(float8_pl(box->high.x, box->low.x), 2.0);
+ center->y = float8_div(float8_pl(box->high.y, box->low.y), 2.0);
+}
+
+
+/* box_wd - returns the width (length) of the box
+ * (horizontal magnitude).
+ */
+static float8
+box_wd(BOX *box)
+{
+ return float8_mi(box->high.x, box->low.x);
+}
+
+
+/* box_ht - returns the height of the box
+ * (vertical magnitude).
+ */
+static float8
+box_ht(BOX *box)
+{
+ return float8_mi(box->high.y, box->low.y);
+}
+
+
+/*----------------------------------------------------------
+ * Funky operations.
+ *---------------------------------------------------------*/
+
+/* box_intersect -
+ * returns the overlapping portion of two boxes,
+ * or NULL if they do not intersect.
+ */
+Datum
+box_intersect(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0);
+ BOX *box2 = PG_GETARG_BOX_P(1);
+ BOX *result;
+
+ if (!box_ov(box1, box2))
+ PG_RETURN_NULL();
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ result->high.x = float8_min(box1->high.x, box2->high.x);
+ result->low.x = float8_max(box1->low.x, box2->low.x);
+ result->high.y = float8_min(box1->high.y, box2->high.y);
+ result->low.y = float8_max(box1->low.y, box2->low.y);
+
+ PG_RETURN_BOX_P(result);
+}
+
+
+/* box_diagonal -
+ * returns a line segment which happens to be the
+ * positive-slope diagonal of "box".
+ */
+Datum
+box_diagonal(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ LSEG *result = (LSEG *) palloc(sizeof(LSEG));
+
+ statlseg_construct(result, &box->high, &box->low);
+
+ PG_RETURN_LSEG_P(result);
+}
+
+/***********************************************************************
+ **
+ ** Routines for 2D lines.
+ **
+ ***********************************************************************/
+
+static bool
+line_decode(char *s, const char *str, LINE *line)
+{
+ /* s was already advanced over leading '{' */
+ line->A = single_decode(s, &s, "line", str);
+ if (*s++ != DELIM)
+ return false;
+ line->B = single_decode(s, &s, "line", str);
+ if (*s++ != DELIM)
+ return false;
+ line->C = single_decode(s, &s, "line", str);
+ if (*s++ != RDELIM_L)
+ return false;
+ while (isspace((unsigned char) *s))
+ s++;
+ if (*s != '\0')
+ return false;
+ return true;
+}
+
+Datum
+line_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ LINE *line = (LINE *) palloc(sizeof(LINE));
+ LSEG lseg;
+ bool isopen;
+ char *s;
+
+ s = str;
+ while (isspace((unsigned char) *s))
+ s++;
+ if (*s == LDELIM_L)
+ {
+ if (!line_decode(s + 1, str, line))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "line", str)));
+ if (FPzero(line->A) && FPzero(line->B))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid line specification: A and B cannot both be zero")));
+ }
+ else
+ {
+ path_decode(s, true, 2, &lseg.p[0], &isopen, NULL, "line", str);
+ if (point_eq_point(&lseg.p[0], &lseg.p[1]))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid line specification: must be two distinct points")));
+ line_construct(line, &lseg.p[0], lseg_sl(&lseg));
+ }
+
+ PG_RETURN_LINE_P(line);
+}
+
+
+Datum
+line_out(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ char *astr = float8out_internal(line->A);
+ char *bstr = float8out_internal(line->B);
+ char *cstr = float8out_internal(line->C);
+
+ PG_RETURN_CSTRING(psprintf("%c%s%c%s%c%s%c", LDELIM_L, astr, DELIM, bstr,
+ DELIM, cstr, RDELIM_L));
+}
+
+/*
+ * line_recv - converts external binary format to line
+ */
+Datum
+line_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ LINE *line;
+
+ line = (LINE *) palloc(sizeof(LINE));
+
+ line->A = pq_getmsgfloat8(buf);
+ line->B = pq_getmsgfloat8(buf);
+ line->C = pq_getmsgfloat8(buf);
+
+ if (FPzero(line->A) && FPzero(line->B))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid line specification: A and B cannot both be zero")));
+
+ PG_RETURN_LINE_P(line);
+}
+
+/*
+ * line_send - converts line to binary format
+ */
+Datum
+line_send(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, line->A);
+ pq_sendfloat8(&buf, line->B);
+ pq_sendfloat8(&buf, line->C);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*----------------------------------------------------------
+ * Conversion routines from one line formula to internal.
+ * Internal form: Ax+By+C=0
+ *---------------------------------------------------------*/
+
+/*
+ * Fill already-allocated LINE struct from the point and the slope
+ */
+static inline void
+line_construct(LINE *result, Point *pt, float8 m)
+{
+ if (isinf(m))
+ {
+ /* vertical - use "x = C" */
+ result->A = -1.0;
+ result->B = 0.0;
+ result->C = pt->x;
+ }
+ else if (m == 0)
+ {
+ /* horizontal - use "y = C" */
+ result->A = 0.0;
+ result->B = -1.0;
+ result->C = pt->y;
+ }
+ else
+ {
+ /* use "mx - y + yinter = 0" */
+ result->A = m;
+ result->B = -1.0;
+ result->C = float8_mi(pt->y, float8_mul(m, pt->x));
+ /* on some platforms, the preceding expression tends to produce -0 */
+ if (result->C == 0.0)
+ result->C = 0.0;
+ }
+}
+
+/* line_construct_pp()
+ * two points
+ */
+Datum
+line_construct_pp(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+ LINE *result = (LINE *) palloc(sizeof(LINE));
+
+ if (point_eq_point(pt1, pt2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid line specification: must be two distinct points")));
+
+ line_construct(result, pt1, point_sl(pt1, pt2));
+
+ PG_RETURN_LINE_P(result);
+}
+
+
+/*----------------------------------------------------------
+ * Relative position routines.
+ *---------------------------------------------------------*/
+
+Datum
+line_intersect(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_BOOL(line_interpt_line(NULL, l1, l2));
+}
+
+Datum
+line_parallel(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_BOOL(!line_interpt_line(NULL, l1, l2));
+}
+
+Datum
+line_perp(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+
+ if (FPzero(l1->A))
+ PG_RETURN_BOOL(FPzero(l2->B));
+ if (FPzero(l2->A))
+ PG_RETURN_BOOL(FPzero(l1->B));
+ if (FPzero(l1->B))
+ PG_RETURN_BOOL(FPzero(l2->A));
+ if (FPzero(l2->B))
+ PG_RETURN_BOOL(FPzero(l1->A));
+
+ PG_RETURN_BOOL(FPeq(float8_div(float8_mul(l1->A, l2->A),
+ float8_mul(l1->B, l2->B)), -1.0));
+}
+
+Datum
+line_vertical(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+
+ PG_RETURN_BOOL(FPzero(line->B));
+}
+
+Datum
+line_horizontal(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+
+ PG_RETURN_BOOL(FPzero(line->A));
+}
+
+
+/*
+ * Check whether the two lines are the same
+ */
+Datum
+line_eq(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+ float8 ratio;
+
+ /* If any NaNs are involved, insist on exact equality */
+ if (unlikely(isnan(l1->A) || isnan(l1->B) || isnan(l1->C) ||
+ isnan(l2->A) || isnan(l2->B) || isnan(l2->C)))
+ {
+ PG_RETURN_BOOL(float8_eq(l1->A, l2->A) &&
+ float8_eq(l1->B, l2->B) &&
+ float8_eq(l1->C, l2->C));
+ }
+
+ /* Otherwise, lines whose parameters are proportional are the same */
+ if (!FPzero(l2->A))
+ ratio = float8_div(l1->A, l2->A);
+ else if (!FPzero(l2->B))
+ ratio = float8_div(l1->B, l2->B);
+ else if (!FPzero(l2->C))
+ ratio = float8_div(l1->C, l2->C);
+ else
+ ratio = 1.0;
+
+ PG_RETURN_BOOL(FPeq(l1->A, float8_mul(ratio, l2->A)) &&
+ FPeq(l1->B, float8_mul(ratio, l2->B)) &&
+ FPeq(l1->C, float8_mul(ratio, l2->C)));
+}
+
+
+/*----------------------------------------------------------
+ * Line arithmetic routines.
+ *---------------------------------------------------------*/
+
+/*
+ * Return slope of the line
+ */
+static inline float8
+line_sl(LINE *line)
+{
+ if (FPzero(line->A))
+ return 0.0;
+ if (FPzero(line->B))
+ return get_float8_infinity();
+ return float8_div(line->A, -line->B);
+}
+
+
+/*
+ * Return inverse slope of the line
+ */
+static inline float8
+line_invsl(LINE *line)
+{
+ if (FPzero(line->A))
+ return get_float8_infinity();
+ if (FPzero(line->B))
+ return 0.0;
+ return float8_div(line->B, line->A);
+}
+
+
+/* line_distance()
+ * Distance between two lines.
+ */
+Datum
+line_distance(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+ float8 ratio;
+
+ if (line_interpt_line(NULL, l1, l2)) /* intersecting? */
+ PG_RETURN_FLOAT8(0.0);
+
+ if (!FPzero(l1->A) && !isnan(l1->A) && !FPzero(l2->A) && !isnan(l2->A))
+ ratio = float8_div(l1->A, l2->A);
+ else if (!FPzero(l1->B) && !isnan(l1->B) && !FPzero(l2->B) && !isnan(l2->B))
+ ratio = float8_div(l1->B, l2->B);
+ else
+ ratio = 1.0;
+
+ PG_RETURN_FLOAT8(float8_div(fabs(float8_mi(l1->C,
+ float8_mul(ratio, l2->C))),
+ HYPOT(l1->A, l1->B)));
+}
+
+/* line_interpt()
+ * Point where two lines l1, l2 intersect (if any)
+ */
+Datum
+line_interpt(PG_FUNCTION_ARGS)
+{
+ LINE *l1 = PG_GETARG_LINE_P(0);
+ LINE *l2 = PG_GETARG_LINE_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (!line_interpt_line(result, l1, l2))
+ PG_RETURN_NULL();
+ PG_RETURN_POINT_P(result);
+}
+
+/*
+ * Internal version of line_interpt
+ *
+ * Return whether two lines intersect. If *result is not NULL, it is set to
+ * the intersection point.
+ *
+ * NOTE: If the lines are identical then we will find they are parallel
+ * and report "no intersection". This is a little weird, but since
+ * there's no *unique* intersection, maybe it's appropriate behavior.
+ *
+ * If the lines have NaN constants, we will return true, and the intersection
+ * point would have NaN coordinates. We shouldn't return false in this case
+ * because that would mean the lines are parallel.
+ */
+static bool
+line_interpt_line(Point *result, LINE *l1, LINE *l2)
+{
+ float8 x,
+ y;
+
+ if (!FPzero(l1->B))
+ {
+ if (FPeq(l2->A, float8_mul(l1->A, float8_div(l2->B, l1->B))))
+ return false;
+
+ x = float8_div(float8_mi(float8_mul(l1->B, l2->C),
+ float8_mul(l2->B, l1->C)),
+ float8_mi(float8_mul(l1->A, l2->B),
+ float8_mul(l2->A, l1->B)));
+ y = float8_div(-float8_pl(float8_mul(l1->A, x), l1->C), l1->B);
+ }
+ else if (!FPzero(l2->B))
+ {
+ if (FPeq(l1->A, float8_mul(l2->A, float8_div(l1->B, l2->B))))
+ return false;
+
+ x = float8_div(float8_mi(float8_mul(l2->B, l1->C),
+ float8_mul(l1->B, l2->C)),
+ float8_mi(float8_mul(l2->A, l1->B),
+ float8_mul(l1->A, l2->B)));
+ y = float8_div(-float8_pl(float8_mul(l2->A, x), l2->C), l2->B);
+ }
+ else
+ return false;
+
+ /* On some platforms, the preceding expressions tend to produce -0. */
+ if (x == 0.0)
+ x = 0.0;
+ if (y == 0.0)
+ y = 0.0;
+
+ if (result != NULL)
+ point_construct(result, x, y);
+
+ return true;
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D paths (sequences of line segments, also
+ ** called `polylines').
+ **
+ ** This is not a general package for geometric paths,
+ ** which of course include polygons; the emphasis here
+ ** is on (for example) usefulness in wire layout.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * String to path / path to string conversion.
+ * External format:
+ * "((xcoord, ycoord),... )"
+ * "[(xcoord, ycoord),... ]"
+ * "(xcoord, ycoord),... "
+ * "[xcoord, ycoord,... ]"
+ * Also support older format:
+ * "(closed, npts, xcoord, ycoord,... )"
+ *---------------------------------------------------------*/
+
+Datum
+path_area(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+ float8 area = 0.0;
+ int i,
+ j;
+
+ if (!path->closed)
+ PG_RETURN_NULL();
+
+ for (i = 0; i < path->npts; i++)
+ {
+ j = (i + 1) % path->npts;
+ area = float8_pl(area, float8_mul(path->p[i].x, path->p[j].y));
+ area = float8_mi(area, float8_mul(path->p[i].y, path->p[j].x));
+ }
+
+ PG_RETURN_FLOAT8(float8_div(fabs(area), 2.0));
+}
+
+
+Datum
+path_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ PATH *path;
+ bool isopen;
+ char *s;
+ int npts;
+ int size;
+ int base_size;
+ int depth = 0;
+
+ if ((npts = pair_count(str, ',')) <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "path", str)));
+
+ s = str;
+ while (isspace((unsigned char) *s))
+ s++;
+
+ /* skip single leading paren */
+ if ((*s == LDELIM) && (strrchr(s, LDELIM) == s))
+ {
+ s++;
+ depth++;
+ }
+
+ base_size = sizeof(path->p[0]) * npts;
+ size = offsetof(PATH, p) + base_size;
+
+ /* Check for integer overflow */
+ if (base_size / npts != sizeof(path->p[0]) || size <= base_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many points requested")));
+
+ path = (PATH *) palloc(size);
+
+ SET_VARSIZE(path, size);
+ path->npts = npts;
+
+ path_decode(s, true, npts, &(path->p[0]), &isopen, &s, "path", str);
+
+ if (depth >= 1)
+ {
+ if (*s++ != RDELIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "path", str)));
+ while (isspace((unsigned char) *s))
+ s++;
+ }
+ if (*s != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "path", str)));
+
+ path->closed = (!isopen);
+ /* prevent instability in unused pad bytes */
+ path->dummy = 0;
+
+ PG_RETURN_PATH_P(path);
+}
+
+
+Datum
+path_out(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+
+ PG_RETURN_CSTRING(path_encode(path->closed ? PATH_CLOSED : PATH_OPEN, path->npts, path->p));
+}
+
+/*
+ * path_recv - converts external binary format to path
+ *
+ * External representation is closed flag (a boolean byte), int32 number
+ * of points, and the points.
+ */
+Datum
+path_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ PATH *path;
+ int closed;
+ int32 npts;
+ int32 i;
+ int size;
+
+ closed = pq_getmsgbyte(buf);
+ npts = pq_getmsgint(buf, sizeof(int32));
+ if (npts <= 0 || npts >= (int32) ((INT_MAX - offsetof(PATH, p)) / sizeof(Point)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid number of points in external \"path\" value")));
+
+ size = offsetof(PATH, p) + sizeof(path->p[0]) * npts;
+ path = (PATH *) palloc(size);
+
+ SET_VARSIZE(path, size);
+ path->npts = npts;
+ path->closed = (closed ? 1 : 0);
+ /* prevent instability in unused pad bytes */
+ path->dummy = 0;
+
+ for (i = 0; i < npts; i++)
+ {
+ path->p[i].x = pq_getmsgfloat8(buf);
+ path->p[i].y = pq_getmsgfloat8(buf);
+ }
+
+ PG_RETURN_PATH_P(path);
+}
+
+/*
+ * path_send - converts path to binary format
+ */
+Datum
+path_send(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+ StringInfoData buf;
+ int32 i;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, path->closed ? 1 : 0);
+ pq_sendint32(&buf, path->npts);
+ for (i = 0; i < path->npts; i++)
+ {
+ pq_sendfloat8(&buf, path->p[i].x);
+ pq_sendfloat8(&buf, path->p[i].y);
+ }
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators.
+ * These are based on the path cardinality,
+ * as stupid as that sounds.
+ *
+ * Better relops and access methods coming soon.
+ *---------------------------------------------------------*/
+
+Datum
+path_n_lt(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_BOOL(p1->npts < p2->npts);
+}
+
+Datum
+path_n_gt(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_BOOL(p1->npts > p2->npts);
+}
+
+Datum
+path_n_eq(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_BOOL(p1->npts == p2->npts);
+}
+
+Datum
+path_n_le(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_BOOL(p1->npts <= p2->npts);
+}
+
+Datum
+path_n_ge(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_BOOL(p1->npts >= p2->npts);
+}
+
+/*----------------------------------------------------------
+ * Conversion operators.
+ *---------------------------------------------------------*/
+
+Datum
+path_isclosed(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+
+ PG_RETURN_BOOL(path->closed);
+}
+
+Datum
+path_isopen(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+
+ PG_RETURN_BOOL(!path->closed);
+}
+
+Datum
+path_npoints(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+
+ PG_RETURN_INT32(path->npts);
+}
+
+
+Datum
+path_close(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+
+ path->closed = true;
+
+ PG_RETURN_PATH_P(path);
+}
+
+Datum
+path_open(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+
+ path->closed = false;
+
+ PG_RETURN_PATH_P(path);
+}
+
+
+/* path_inter -
+ * Does p1 intersect p2 at any point?
+ * Use bounding boxes for a quick (O(n)) check, then do a
+ * O(n^2) iterative edge check.
+ */
+Datum
+path_inter(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+ BOX b1,
+ b2;
+ int i,
+ j;
+ LSEG seg1,
+ seg2;
+
+ Assert(p1->npts > 0 && p2->npts > 0);
+
+ b1.high.x = b1.low.x = p1->p[0].x;
+ b1.high.y = b1.low.y = p1->p[0].y;
+ for (i = 1; i < p1->npts; i++)
+ {
+ b1.high.x = float8_max(p1->p[i].x, b1.high.x);
+ b1.high.y = float8_max(p1->p[i].y, b1.high.y);
+ b1.low.x = float8_min(p1->p[i].x, b1.low.x);
+ b1.low.y = float8_min(p1->p[i].y, b1.low.y);
+ }
+ b2.high.x = b2.low.x = p2->p[0].x;
+ b2.high.y = b2.low.y = p2->p[0].y;
+ for (i = 1; i < p2->npts; i++)
+ {
+ b2.high.x = float8_max(p2->p[i].x, b2.high.x);
+ b2.high.y = float8_max(p2->p[i].y, b2.high.y);
+ b2.low.x = float8_min(p2->p[i].x, b2.low.x);
+ b2.low.y = float8_min(p2->p[i].y, b2.low.y);
+ }
+ if (!box_ov(&b1, &b2))
+ PG_RETURN_BOOL(false);
+
+ /* pairwise check lseg intersections */
+ for (i = 0; i < p1->npts; i++)
+ {
+ int iprev;
+
+ if (i > 0)
+ iprev = i - 1;
+ else
+ {
+ if (!p1->closed)
+ continue;
+ iprev = p1->npts - 1; /* include the closure segment */
+ }
+
+ for (j = 0; j < p2->npts; j++)
+ {
+ int jprev;
+
+ if (j > 0)
+ jprev = j - 1;
+ else
+ {
+ if (!p2->closed)
+ continue;
+ jprev = p2->npts - 1; /* include the closure segment */
+ }
+
+ statlseg_construct(&seg1, &p1->p[iprev], &p1->p[i]);
+ statlseg_construct(&seg2, &p2->p[jprev], &p2->p[j]);
+ if (lseg_interpt_lseg(NULL, &seg1, &seg2))
+ PG_RETURN_BOOL(true);
+ }
+ }
+
+ /* if we dropped through, no two segs intersected */
+ PG_RETURN_BOOL(false);
+}
+
+/* path_distance()
+ * This essentially does a cartesian product of the lsegs in the
+ * two paths, and finds the min distance between any two lsegs
+ */
+Datum
+path_distance(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+ float8 min = 0.0; /* initialize to keep compiler quiet */
+ bool have_min = false;
+ float8 tmp;
+ int i,
+ j;
+ LSEG seg1,
+ seg2;
+
+ for (i = 0; i < p1->npts; i++)
+ {
+ int iprev;
+
+ if (i > 0)
+ iprev = i - 1;
+ else
+ {
+ if (!p1->closed)
+ continue;
+ iprev = p1->npts - 1; /* include the closure segment */
+ }
+
+ for (j = 0; j < p2->npts; j++)
+ {
+ int jprev;
+
+ if (j > 0)
+ jprev = j - 1;
+ else
+ {
+ if (!p2->closed)
+ continue;
+ jprev = p2->npts - 1; /* include the closure segment */
+ }
+
+ statlseg_construct(&seg1, &p1->p[iprev], &p1->p[i]);
+ statlseg_construct(&seg2, &p2->p[jprev], &p2->p[j]);
+
+ tmp = lseg_closept_lseg(NULL, &seg1, &seg2);
+ if (!have_min || float8_lt(tmp, min))
+ {
+ min = tmp;
+ have_min = true;
+ }
+ }
+ }
+
+ if (!have_min)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(min);
+}
+
+
+/*----------------------------------------------------------
+ * "Arithmetic" operations.
+ *---------------------------------------------------------*/
+
+Datum
+path_length(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+ float8 result = 0.0;
+ int i;
+
+ for (i = 0; i < path->npts; i++)
+ {
+ int iprev;
+
+ if (i > 0)
+ iprev = i - 1;
+ else
+ {
+ if (!path->closed)
+ continue;
+ iprev = path->npts - 1; /* include the closure segment */
+ }
+
+ result = float8_pl(result, point_dt(&path->p[iprev], &path->p[i]));
+ }
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/***********************************************************************
+ **
+ ** Routines for 2D points.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * String to point, point to string conversion.
+ * External format:
+ * "(x,y)"
+ * "x,y"
+ *---------------------------------------------------------*/
+
+Datum
+point_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ Point *point = (Point *) palloc(sizeof(Point));
+
+ pair_decode(str, &point->x, &point->y, NULL, "point", str);
+ PG_RETURN_POINT_P(point);
+}
+
+Datum
+point_out(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+
+ PG_RETURN_CSTRING(path_encode(PATH_NONE, 1, pt));
+}
+
+/*
+ * point_recv - converts external binary format to point
+ */
+Datum
+point_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Point *point;
+
+ point = (Point *) palloc(sizeof(Point));
+ point->x = pq_getmsgfloat8(buf);
+ point->y = pq_getmsgfloat8(buf);
+ PG_RETURN_POINT_P(point);
+}
+
+/*
+ * point_send - converts point to binary format
+ */
+Datum
+point_send(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, pt->x);
+ pq_sendfloat8(&buf, pt->y);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * Initialize a point
+ */
+static inline void
+point_construct(Point *result, float8 x, float8 y)
+{
+ result->x = x;
+ result->y = y;
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators for Points.
+ * Since we do have a sense of coordinates being
+ * "equal" to a given accuracy (point_vert, point_horiz),
+ * the other ops must preserve that sense. This means
+ * that results may, strictly speaking, be a lie (unless
+ * EPSILON = 0.0).
+ *---------------------------------------------------------*/
+
+Datum
+point_left(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPlt(pt1->x, pt2->x));
+}
+
+Datum
+point_right(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPgt(pt1->x, pt2->x));
+}
+
+Datum
+point_above(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPgt(pt1->y, pt2->y));
+}
+
+Datum
+point_below(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPlt(pt1->y, pt2->y));
+}
+
+Datum
+point_vert(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPeq(pt1->x, pt2->x));
+}
+
+Datum
+point_horiz(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(FPeq(pt1->y, pt2->y));
+}
+
+Datum
+point_eq(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(point_eq_point(pt1, pt2));
+}
+
+Datum
+point_ne(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(!point_eq_point(pt1, pt2));
+}
+
+
+/*
+ * Check whether the two points are the same
+ */
+static inline bool
+point_eq_point(Point *pt1, Point *pt2)
+{
+ /* If any NaNs are involved, insist on exact equality */
+ if (unlikely(isnan(pt1->x) || isnan(pt1->y) ||
+ isnan(pt2->x) || isnan(pt2->y)))
+ return (float8_eq(pt1->x, pt2->x) && float8_eq(pt1->y, pt2->y));
+
+ return (FPeq(pt1->x, pt2->x) && FPeq(pt1->y, pt2->y));
+}
+
+
+/*----------------------------------------------------------
+ * "Arithmetic" operators on points.
+ *---------------------------------------------------------*/
+
+Datum
+point_distance(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(point_dt(pt1, pt2));
+}
+
+static inline float8
+point_dt(Point *pt1, Point *pt2)
+{
+ return HYPOT(float8_mi(pt1->x, pt2->x), float8_mi(pt1->y, pt2->y));
+}
+
+Datum
+point_slope(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(point_sl(pt1, pt2));
+}
+
+
+/*
+ * Return slope of two points
+ *
+ * Note that this function returns Inf when the points are the same.
+ */
+static inline float8
+point_sl(Point *pt1, Point *pt2)
+{
+ if (FPeq(pt1->x, pt2->x))
+ return get_float8_infinity();
+ if (FPeq(pt1->y, pt2->y))
+ return 0.0;
+ return float8_div(float8_mi(pt1->y, pt2->y), float8_mi(pt1->x, pt2->x));
+}
+
+
+/*
+ * Return inverse slope of two points
+ *
+ * Note that this function returns 0.0 when the points are the same.
+ */
+static inline float8
+point_invsl(Point *pt1, Point *pt2)
+{
+ if (FPeq(pt1->x, pt2->x))
+ return 0.0;
+ if (FPeq(pt1->y, pt2->y))
+ return get_float8_infinity();
+ return float8_div(float8_mi(pt1->x, pt2->x), float8_mi(pt2->y, pt1->y));
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D line segments.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * String to lseg, lseg to string conversion.
+ * External forms: "[(x1, y1), (x2, y2)]"
+ * "(x1, y1), (x2, y2)"
+ * "x1, y1, x2, y2"
+ * closed form ok "((x1, y1), (x2, y2))"
+ * (old form) "(x1, y1, x2, y2)"
+ *---------------------------------------------------------*/
+
+Datum
+lseg_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ LSEG *lseg = (LSEG *) palloc(sizeof(LSEG));
+ bool isopen;
+
+ path_decode(str, true, 2, &lseg->p[0], &isopen, NULL, "lseg", str);
+ PG_RETURN_LSEG_P(lseg);
+}
+
+
+Datum
+lseg_out(PG_FUNCTION_ARGS)
+{
+ LSEG *ls = PG_GETARG_LSEG_P(0);
+
+ PG_RETURN_CSTRING(path_encode(PATH_OPEN, 2, &ls->p[0]));
+}
+
+/*
+ * lseg_recv - converts external binary format to lseg
+ */
+Datum
+lseg_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ LSEG *lseg;
+
+ lseg = (LSEG *) palloc(sizeof(LSEG));
+
+ lseg->p[0].x = pq_getmsgfloat8(buf);
+ lseg->p[0].y = pq_getmsgfloat8(buf);
+ lseg->p[1].x = pq_getmsgfloat8(buf);
+ lseg->p[1].y = pq_getmsgfloat8(buf);
+
+ PG_RETURN_LSEG_P(lseg);
+}
+
+/*
+ * lseg_send - converts lseg to binary format
+ */
+Datum
+lseg_send(PG_FUNCTION_ARGS)
+{
+ LSEG *ls = PG_GETARG_LSEG_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, ls->p[0].x);
+ pq_sendfloat8(&buf, ls->p[0].y);
+ pq_sendfloat8(&buf, ls->p[1].x);
+ pq_sendfloat8(&buf, ls->p[1].y);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/* lseg_construct -
+ * form a LSEG from two Points.
+ */
+Datum
+lseg_construct(PG_FUNCTION_ARGS)
+{
+ Point *pt1 = PG_GETARG_POINT_P(0);
+ Point *pt2 = PG_GETARG_POINT_P(1);
+ LSEG *result = (LSEG *) palloc(sizeof(LSEG));
+
+ statlseg_construct(result, pt1, pt2);
+
+ PG_RETURN_LSEG_P(result);
+}
+
+/* like lseg_construct, but assume space already allocated */
+static inline void
+statlseg_construct(LSEG *lseg, Point *pt1, Point *pt2)
+{
+ lseg->p[0].x = pt1->x;
+ lseg->p[0].y = pt1->y;
+ lseg->p[1].x = pt2->x;
+ lseg->p[1].y = pt2->y;
+}
+
+
+/*
+ * Return slope of the line segment
+ */
+static inline float8
+lseg_sl(LSEG *lseg)
+{
+ return point_sl(&lseg->p[0], &lseg->p[1]);
+}
+
+
+/*
+ * Return inverse slope of the line segment
+ */
+static inline float8
+lseg_invsl(LSEG *lseg)
+{
+ return point_invsl(&lseg->p[0], &lseg->p[1]);
+}
+
+
+Datum
+lseg_length(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+
+ PG_RETURN_FLOAT8(point_dt(&lseg->p[0], &lseg->p[1]));
+}
+
+/*----------------------------------------------------------
+ * Relative position routines.
+ *---------------------------------------------------------*/
+
+/*
+ ** find intersection of the two lines, and see if it falls on
+ ** both segments.
+ */
+Datum
+lseg_intersect(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(lseg_interpt_lseg(NULL, l1, l2));
+}
+
+
+Datum
+lseg_parallel(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPeq(lseg_sl(l1), lseg_sl(l2)));
+}
+
+/*
+ * Determine if two line segments are perpendicular.
+ */
+Datum
+lseg_perp(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPeq(lseg_sl(l1), lseg_invsl(l2)));
+}
+
+Datum
+lseg_vertical(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+
+ PG_RETURN_BOOL(FPeq(lseg->p[0].x, lseg->p[1].x));
+}
+
+Datum
+lseg_horizontal(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+
+ PG_RETURN_BOOL(FPeq(lseg->p[0].y, lseg->p[1].y));
+}
+
+
+Datum
+lseg_eq(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(point_eq_point(&l1->p[0], &l2->p[0]) &&
+ point_eq_point(&l1->p[1], &l2->p[1]));
+}
+
+Datum
+lseg_ne(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(!point_eq_point(&l1->p[0], &l2->p[0]) ||
+ !point_eq_point(&l1->p[1], &l2->p[1]));
+}
+
+Datum
+lseg_lt(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPlt(point_dt(&l1->p[0], &l1->p[1]),
+ point_dt(&l2->p[0], &l2->p[1])));
+}
+
+Datum
+lseg_le(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPle(point_dt(&l1->p[0], &l1->p[1]),
+ point_dt(&l2->p[0], &l2->p[1])));
+}
+
+Datum
+lseg_gt(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPgt(point_dt(&l1->p[0], &l1->p[1]),
+ point_dt(&l2->p[0], &l2->p[1])));
+}
+
+Datum
+lseg_ge(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(FPge(point_dt(&l1->p[0], &l1->p[1]),
+ point_dt(&l2->p[0], &l2->p[1])));
+}
+
+
+/*----------------------------------------------------------
+ * Line arithmetic routines.
+ *---------------------------------------------------------*/
+
+/* lseg_distance -
+ * If two segments don't intersect, then the closest
+ * point will be from one of the endpoints to the other
+ * segment.
+ */
+Datum
+lseg_distance(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_FLOAT8(lseg_closept_lseg(NULL, l1, l2));
+}
+
+
+Datum
+lseg_center(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ result->x = float8_div(float8_pl(lseg->p[0].x, lseg->p[1].x), 2.0);
+ result->y = float8_div(float8_pl(lseg->p[0].y, lseg->p[1].y), 2.0);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*
+ * Return whether the two segments intersect. If *result is not NULL,
+ * it is set to the intersection point.
+ *
+ * This function is almost perfectly symmetric, even though it doesn't look
+ * like it. See lseg_interpt_line() for the other half of it.
+ */
+static bool
+lseg_interpt_lseg(Point *result, LSEG *l1, LSEG *l2)
+{
+ Point interpt;
+ LINE tmp;
+
+ line_construct(&tmp, &l2->p[0], lseg_sl(l2));
+ if (!lseg_interpt_line(&interpt, l1, &tmp))
+ return false;
+
+ /*
+ * If the line intersection point isn't within l2, there is no valid
+ * segment intersection point at all.
+ */
+ if (!lseg_contain_point(l2, &interpt))
+ return false;
+
+ if (result != NULL)
+ *result = interpt;
+
+ return true;
+}
+
+Datum
+lseg_interpt(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (!lseg_interpt_lseg(result, l1, l2))
+ PG_RETURN_NULL();
+ PG_RETURN_POINT_P(result);
+}
+
+/***********************************************************************
+ **
+ ** Routines for position comparisons of differently-typed
+ ** 2D objects.
+ **
+ ***********************************************************************/
+
+/*---------------------------------------------------------------------
+ * dist_
+ * Minimum distance from one object to another.
+ *-------------------------------------------------------------------*/
+
+/*
+ * Distance from a point to a line
+ */
+Datum
+dist_pl(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_FLOAT8(line_closept_point(NULL, line, pt));
+}
+
+/*
+ * Distance from a line to a point
+ */
+Datum
+dist_lp(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ Point *pt = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(line_closept_point(NULL, line, pt));
+}
+
+/*
+ * Distance from a point to a lseg
+ */
+Datum
+dist_ps(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_FLOAT8(lseg_closept_point(NULL, lseg, pt));
+}
+
+/*
+ * Distance from a lseg to a point
+ */
+Datum
+dist_sp(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ Point *pt = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(lseg_closept_point(NULL, lseg, pt));
+}
+
+static float8
+dist_ppath_internal(Point *pt, PATH *path)
+{
+ float8 result = 0.0; /* keep compiler quiet */
+ bool have_min = false;
+ float8 tmp;
+ int i;
+ LSEG lseg;
+
+ Assert(path->npts > 0);
+
+ /*
+ * The distance from a point to a path is the smallest distance from the
+ * point to any of its constituent segments.
+ */
+ for (i = 0; i < path->npts; i++)
+ {
+ int iprev;
+
+ if (i > 0)
+ iprev = i - 1;
+ else
+ {
+ if (!path->closed)
+ continue;
+ iprev = path->npts - 1; /* Include the closure segment */
+ }
+
+ statlseg_construct(&lseg, &path->p[iprev], &path->p[i]);
+ tmp = lseg_closept_point(NULL, &lseg, pt);
+ if (!have_min || float8_lt(tmp, result))
+ {
+ result = tmp;
+ have_min = true;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Distance from a point to a path
+ */
+Datum
+dist_ppath(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ PATH *path = PG_GETARG_PATH_P(1);
+
+ PG_RETURN_FLOAT8(dist_ppath_internal(pt, path));
+}
+
+/*
+ * Distance from a path to a point
+ */
+Datum
+dist_pathp(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+ Point *pt = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(dist_ppath_internal(pt, path));
+}
+
+/*
+ * Distance from a point to a box
+ */
+Datum
+dist_pb(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_FLOAT8(box_closept_point(NULL, box, pt));
+}
+
+/*
+ * Distance from a box to a point
+ */
+Datum
+dist_bp(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *pt = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(box_closept_point(NULL, box, pt));
+}
+
+/*
+ * Distance from a lseg to a line
+ */
+Datum
+dist_sl(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_FLOAT8(lseg_closept_line(NULL, lseg, line));
+}
+
+/*
+ * Distance from a line to a lseg
+ */
+Datum
+dist_ls(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_FLOAT8(lseg_closept_line(NULL, lseg, line));
+}
+
+/*
+ * Distance from a lseg to a box
+ */
+Datum
+dist_sb(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_FLOAT8(box_closept_lseg(NULL, box, lseg));
+}
+
+/*
+ * Distance from a box to a lseg
+ */
+Datum
+dist_bs(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_FLOAT8(box_closept_lseg(NULL, box, lseg));
+}
+
+static float8
+dist_cpoly_internal(CIRCLE *circle, POLYGON *poly)
+{
+ float8 result;
+
+ /* calculate distance to center, and subtract radius */
+ result = float8_mi(dist_ppoly_internal(&circle->center, poly),
+ circle->radius);
+ if (result < 0.0)
+ result = 0.0;
+
+ return result;
+}
+
+/*
+ * Distance from a circle to a polygon
+ */
+Datum
+dist_cpoly(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ POLYGON *poly = PG_GETARG_POLYGON_P(1);
+
+ PG_RETURN_FLOAT8(dist_cpoly_internal(circle, poly));
+}
+
+/*
+ * Distance from a polygon to a circle
+ */
+Datum
+dist_polyc(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_FLOAT8(dist_cpoly_internal(circle, poly));
+}
+
+/*
+ * Distance from a point to a polygon
+ */
+Datum
+dist_ppoly(PG_FUNCTION_ARGS)
+{
+ Point *point = PG_GETARG_POINT_P(0);
+ POLYGON *poly = PG_GETARG_POLYGON_P(1);
+
+ PG_RETURN_FLOAT8(dist_ppoly_internal(point, poly));
+}
+
+Datum
+dist_polyp(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_FLOAT8(dist_ppoly_internal(point, poly));
+}
+
+static float8
+dist_ppoly_internal(Point *pt, POLYGON *poly)
+{
+ float8 result;
+ float8 d;
+ int i;
+ LSEG seg;
+
+ if (point_inside(pt, poly->npts, poly->p) != 0)
+ return 0.0;
+
+ /* initialize distance with segment between first and last points */
+ seg.p[0].x = poly->p[0].x;
+ seg.p[0].y = poly->p[0].y;
+ seg.p[1].x = poly->p[poly->npts - 1].x;
+ seg.p[1].y = poly->p[poly->npts - 1].y;
+ result = lseg_closept_point(NULL, &seg, pt);
+
+ /* check distances for other segments */
+ for (i = 0; i < poly->npts - 1; i++)
+ {
+ seg.p[0].x = poly->p[i].x;
+ seg.p[0].y = poly->p[i].y;
+ seg.p[1].x = poly->p[i + 1].x;
+ seg.p[1].y = poly->p[i + 1].y;
+ d = lseg_closept_point(NULL, &seg, pt);
+ if (float8_lt(d, result))
+ result = d;
+ }
+
+ return result;
+}
+
+
+/*---------------------------------------------------------------------
+ * interpt_
+ * Intersection point of objects.
+ * We choose to ignore the "point" of intersection between
+ * lines and boxes, since there are typically two.
+ *-------------------------------------------------------------------*/
+
+/*
+ * Return whether the line segment intersect with the line. If *result is not
+ * NULL, it is set to the intersection point.
+ */
+static bool
+lseg_interpt_line(Point *result, LSEG *lseg, LINE *line)
+{
+ Point interpt;
+ LINE tmp;
+
+ /*
+ * First, we promote the line segment to a line, because we know how to
+ * find the intersection point of two lines. If they don't have an
+ * intersection point, we are done.
+ */
+ line_construct(&tmp, &lseg->p[0], lseg_sl(lseg));
+ if (!line_interpt_line(&interpt, &tmp, line))
+ return false;
+
+ /*
+ * Then, we check whether the intersection point is actually on the line
+ * segment.
+ */
+ if (!lseg_contain_point(lseg, &interpt))
+ return false;
+ if (result != NULL)
+ {
+ /*
+ * If there is an intersection, then check explicitly for matching
+ * endpoints since there may be rounding effects with annoying LSB
+ * residue.
+ */
+ if (point_eq_point(&lseg->p[0], &interpt))
+ *result = lseg->p[0];
+ else if (point_eq_point(&lseg->p[1], &interpt))
+ *result = lseg->p[1];
+ else
+ *result = interpt;
+ }
+
+ return true;
+}
+
+/*---------------------------------------------------------------------
+ * close_
+ * Point of closest proximity between objects.
+ *-------------------------------------------------------------------*/
+
+/*
+ * If *result is not NULL, it is set to the intersection point of a
+ * perpendicular of the line through the point. Returns the distance
+ * of those two points.
+ */
+static float8
+line_closept_point(Point *result, LINE *line, Point *point)
+{
+ Point closept;
+ LINE tmp;
+
+ /*
+ * We drop a perpendicular to find the intersection point. Ordinarily we
+ * should always find it, but that can fail in the presence of NaN
+ * coordinates, and perhaps even from simple roundoff issues.
+ */
+ line_construct(&tmp, point, line_invsl(line));
+ if (!line_interpt_line(&closept, &tmp, line))
+ {
+ if (result != NULL)
+ *result = *point;
+
+ return get_float8_nan();
+ }
+
+ if (result != NULL)
+ *result = closept;
+
+ return point_dt(&closept, point);
+}
+
+Datum
+close_pl(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(line_closept_point(result, line, pt)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*
+ * Closest point on line segment to specified point.
+ *
+ * If *result is not NULL, set it to the closest point on the line segment
+ * to the point. Returns the distance of the two points.
+ */
+static float8
+lseg_closept_point(Point *result, LSEG *lseg, Point *pt)
+{
+ Point closept;
+ LINE tmp;
+
+ /*
+ * To find the closest point, we draw a perpendicular line from the point
+ * to the line segment.
+ */
+ line_construct(&tmp, pt, point_invsl(&lseg->p[0], &lseg->p[1]));
+ lseg_closept_line(&closept, lseg, &tmp);
+
+ if (result != NULL)
+ *result = closept;
+
+ return point_dt(&closept, pt);
+}
+
+Datum
+close_ps(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(lseg_closept_point(result, lseg, pt)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*
+ * Closest point on line segment to line segment
+ */
+static float8
+lseg_closept_lseg(Point *result, LSEG *on_lseg, LSEG *to_lseg)
+{
+ Point point;
+ float8 dist,
+ d;
+
+ /* First, we handle the case when the line segments are intersecting. */
+ if (lseg_interpt_lseg(result, on_lseg, to_lseg))
+ return 0.0;
+
+ /*
+ * Then, we find the closest points from the endpoints of the second line
+ * segment, and keep the closest one.
+ */
+ dist = lseg_closept_point(result, on_lseg, &to_lseg->p[0]);
+ d = lseg_closept_point(&point, on_lseg, &to_lseg->p[1]);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = point;
+ }
+
+ /* The closest point can still be one of the endpoints, so we test them. */
+ d = lseg_closept_point(NULL, to_lseg, &on_lseg->p[0]);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = on_lseg->p[0];
+ }
+ d = lseg_closept_point(NULL, to_lseg, &on_lseg->p[1]);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = on_lseg->p[1];
+ }
+
+ return dist;
+}
+
+Datum
+close_lseg(PG_FUNCTION_ARGS)
+{
+ LSEG *l1 = PG_GETARG_LSEG_P(0);
+ LSEG *l2 = PG_GETARG_LSEG_P(1);
+ Point *result;
+
+ if (lseg_sl(l1) == lseg_sl(l2))
+ PG_RETURN_NULL();
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(lseg_closept_lseg(result, l2, l1)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*
+ * Closest point on or in box to specified point.
+ *
+ * If *result is not NULL, set it to the closest point on the box to the
+ * given point, and return the distance of the two points.
+ */
+static float8
+box_closept_point(Point *result, BOX *box, Point *pt)
+{
+ float8 dist,
+ d;
+ Point point,
+ closept;
+ LSEG lseg;
+
+ if (box_contain_point(box, pt))
+ {
+ if (result != NULL)
+ *result = *pt;
+
+ return 0.0;
+ }
+
+ /* pairwise check lseg distances */
+ point.x = box->low.x;
+ point.y = box->high.y;
+ statlseg_construct(&lseg, &box->low, &point);
+ dist = lseg_closept_point(result, &lseg, pt);
+
+ statlseg_construct(&lseg, &box->high, &point);
+ d = lseg_closept_point(&closept, &lseg, pt);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ point.x = box->high.x;
+ point.y = box->low.y;
+ statlseg_construct(&lseg, &box->low, &point);
+ d = lseg_closept_point(&closept, &lseg, pt);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ statlseg_construct(&lseg, &box->high, &point);
+ d = lseg_closept_point(&closept, &lseg, pt);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ return dist;
+}
+
+Datum
+close_pb(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(box_closept_point(result, box, pt)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+/*
+ * Closest point on line segment to line.
+ *
+ * Return the distance between the line and the closest point of the line
+ * segment to the line. If *result is not NULL, set it to that point.
+ *
+ * NOTE: When the lines are parallel, endpoints of one of the line segment
+ * are FPeq(), in presence of NaN or Infinite coordinates, or perhaps =
+ * even because of simple roundoff issues, there may not be a single closest
+ * point. We are likely to set the result to the second endpoint in these
+ * cases.
+ */
+static float8
+lseg_closept_line(Point *result, LSEG *lseg, LINE *line)
+{
+ float8 dist1,
+ dist2;
+
+ if (lseg_interpt_line(result, lseg, line))
+ return 0.0;
+
+ dist1 = line_closept_point(NULL, line, &lseg->p[0]);
+ dist2 = line_closept_point(NULL, line, &lseg->p[1]);
+
+ if (dist1 < dist2)
+ {
+ if (result != NULL)
+ *result = lseg->p[0];
+
+ return dist1;
+ }
+ else
+ {
+ if (result != NULL)
+ *result = lseg->p[1];
+
+ return dist2;
+ }
+}
+
+Datum
+close_ls(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+ Point *result;
+
+ if (lseg_sl(lseg) == line_sl(line))
+ PG_RETURN_NULL();
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(lseg_closept_line(result, lseg, line)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*
+ * Closest point on or in box to line segment.
+ *
+ * Returns the distance between the closest point on or in the box to
+ * the line segment. If *result is not NULL, it is set to that point.
+ */
+static float8
+box_closept_lseg(Point *result, BOX *box, LSEG *lseg)
+{
+ float8 dist,
+ d;
+ Point point,
+ closept;
+ LSEG bseg;
+
+ if (box_interpt_lseg(result, box, lseg))
+ return 0.0;
+
+ /* pairwise check lseg distances */
+ point.x = box->low.x;
+ point.y = box->high.y;
+ statlseg_construct(&bseg, &box->low, &point);
+ dist = lseg_closept_lseg(result, &bseg, lseg);
+
+ statlseg_construct(&bseg, &box->high, &point);
+ d = lseg_closept_lseg(&closept, &bseg, lseg);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ point.x = box->high.x;
+ point.y = box->low.y;
+ statlseg_construct(&bseg, &box->low, &point);
+ d = lseg_closept_lseg(&closept, &bseg, lseg);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ statlseg_construct(&bseg, &box->high, &point);
+ d = lseg_closept_lseg(&closept, &bseg, lseg);
+ if (float8_lt(d, dist))
+ {
+ dist = d;
+ if (result != NULL)
+ *result = closept;
+ }
+
+ return dist;
+}
+
+Datum
+close_sb(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ if (isnan(box_closept_lseg(result, box, lseg)))
+ PG_RETURN_NULL();
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/*---------------------------------------------------------------------
+ * on_
+ * Whether one object lies completely within another.
+ *-------------------------------------------------------------------*/
+
+/*
+ * Does the point satisfy the equation?
+ */
+static bool
+line_contain_point(LINE *line, Point *point)
+{
+ return FPzero(float8_pl(float8_pl(float8_mul(line->A, point->x),
+ float8_mul(line->B, point->y)),
+ line->C));
+}
+
+Datum
+on_pl(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_BOOL(line_contain_point(line, pt));
+}
+
+
+/*
+ * Determine colinearity by detecting a triangle inequality.
+ * This algorithm seems to behave nicely even with lsb residues - tgl 1997-07-09
+ */
+static bool
+lseg_contain_point(LSEG *lseg, Point *pt)
+{
+ return FPeq(point_dt(pt, &lseg->p[0]) +
+ point_dt(pt, &lseg->p[1]),
+ point_dt(&lseg->p[0], &lseg->p[1]));
+}
+
+Datum
+on_ps(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ LSEG *lseg = PG_GETARG_LSEG_P(1);
+
+ PG_RETURN_BOOL(lseg_contain_point(lseg, pt));
+}
+
+
+/*
+ * Check whether the point is in the box or on its border
+ */
+static bool
+box_contain_point(BOX *box, Point *point)
+{
+ return box->high.x >= point->x && box->low.x <= point->x &&
+ box->high.y >= point->y && box->low.y <= point->y;
+}
+
+Datum
+on_pb(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_contain_point(box, pt));
+}
+
+Datum
+box_contain_pt(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *pt = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(box_contain_point(box, pt));
+}
+
+/* on_ppath -
+ * Whether a point lies within (on) a polyline.
+ * If open, we have to (groan) check each segment.
+ * (uses same algorithm as for point intersecting segment - tgl 1997-07-09)
+ * If closed, we use the old O(n) ray method for point-in-polygon.
+ * The ray is horizontal, from pt out to the right.
+ * Each segment that crosses the ray counts as an
+ * intersection; note that an endpoint or edge may touch
+ * but not cross.
+ * (we can do p-in-p in lg(n), but it takes preprocessing)
+ */
+Datum
+on_ppath(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ PATH *path = PG_GETARG_PATH_P(1);
+ int i,
+ n;
+ float8 a,
+ b;
+
+ /*-- OPEN --*/
+ if (!path->closed)
+ {
+ n = path->npts - 1;
+ a = point_dt(pt, &path->p[0]);
+ for (i = 0; i < n; i++)
+ {
+ b = point_dt(pt, &path->p[i + 1]);
+ if (FPeq(float8_pl(a, b), point_dt(&path->p[i], &path->p[i + 1])))
+ PG_RETURN_BOOL(true);
+ a = b;
+ }
+ PG_RETURN_BOOL(false);
+ }
+
+ /*-- CLOSED --*/
+ PG_RETURN_BOOL(point_inside(pt, path->npts, path->p) != 0);
+}
+
+
+/*
+ * Check whether the line segment is on the line or close enough
+ *
+ * It is, if both of its points are on the line or close enough.
+ */
+Datum
+on_sl(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_BOOL(line_contain_point(line, &lseg->p[0]) &&
+ line_contain_point(line, &lseg->p[1]));
+}
+
+
+/*
+ * Check whether the line segment is in the box or on its border
+ *
+ * It is, if both of its points are in the box or on its border.
+ */
+static bool
+box_contain_lseg(BOX *box, LSEG *lseg)
+{
+ return box_contain_point(box, &lseg->p[0]) &&
+ box_contain_point(box, &lseg->p[1]);
+}
+
+Datum
+on_sb(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_contain_lseg(box, lseg));
+}
+
+/*---------------------------------------------------------------------
+ * inter_
+ * Whether one object intersects another.
+ *-------------------------------------------------------------------*/
+
+Datum
+inter_sl(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ LINE *line = PG_GETARG_LINE_P(1);
+
+ PG_RETURN_BOOL(lseg_interpt_line(NULL, lseg, line));
+}
+
+
+/*
+ * Do line segment and box intersect?
+ *
+ * Segment completely inside box counts as intersection.
+ * If you want only segments crossing box boundaries,
+ * try converting box to path first.
+ *
+ * This function also sets the *result to the closest point on the line
+ * segment to the center of the box when they overlap and the result is
+ * not NULL. It is somewhat arbitrary, but maybe the best we can do as
+ * there are typically two points they intersect.
+ *
+ * Optimize for non-intersection by checking for box intersection first.
+ * - thomas 1998-01-30
+ */
+static bool
+box_interpt_lseg(Point *result, BOX *box, LSEG *lseg)
+{
+ BOX lbox;
+ LSEG bseg;
+ Point point;
+
+ lbox.low.x = float8_min(lseg->p[0].x, lseg->p[1].x);
+ lbox.low.y = float8_min(lseg->p[0].y, lseg->p[1].y);
+ lbox.high.x = float8_max(lseg->p[0].x, lseg->p[1].x);
+ lbox.high.y = float8_max(lseg->p[0].y, lseg->p[1].y);
+
+ /* nothing close to overlap? then not going to intersect */
+ if (!box_ov(&lbox, box))
+ return false;
+
+ if (result != NULL)
+ {
+ box_cn(&point, box);
+ lseg_closept_point(result, lseg, &point);
+ }
+
+ /* an endpoint of segment is inside box? then clearly intersects */
+ if (box_contain_point(box, &lseg->p[0]) ||
+ box_contain_point(box, &lseg->p[1]))
+ return true;
+
+ /* pairwise check lseg intersections */
+ point.x = box->low.x;
+ point.y = box->high.y;
+ statlseg_construct(&bseg, &box->low, &point);
+ if (lseg_interpt_lseg(NULL, &bseg, lseg))
+ return true;
+
+ statlseg_construct(&bseg, &box->high, &point);
+ if (lseg_interpt_lseg(NULL, &bseg, lseg))
+ return true;
+
+ point.x = box->high.x;
+ point.y = box->low.y;
+ statlseg_construct(&bseg, &box->low, &point);
+ if (lseg_interpt_lseg(NULL, &bseg, lseg))
+ return true;
+
+ statlseg_construct(&bseg, &box->high, &point);
+ if (lseg_interpt_lseg(NULL, &bseg, lseg))
+ return true;
+
+ /* if we dropped through, no two segs intersected */
+ return false;
+}
+
+Datum
+inter_sb(PG_FUNCTION_ARGS)
+{
+ LSEG *lseg = PG_GETARG_LSEG_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+
+ PG_RETURN_BOOL(box_interpt_lseg(NULL, box, lseg));
+}
+
+
+/* inter_lb()
+ * Do line and box intersect?
+ */
+Datum
+inter_lb(PG_FUNCTION_ARGS)
+{
+ LINE *line = PG_GETARG_LINE_P(0);
+ BOX *box = PG_GETARG_BOX_P(1);
+ LSEG bseg;
+ Point p1,
+ p2;
+
+ /* pairwise check lseg intersections */
+ p1.x = box->low.x;
+ p1.y = box->low.y;
+ p2.x = box->low.x;
+ p2.y = box->high.y;
+ statlseg_construct(&bseg, &p1, &p2);
+ if (lseg_interpt_line(NULL, &bseg, line))
+ PG_RETURN_BOOL(true);
+ p1.x = box->high.x;
+ p1.y = box->high.y;
+ statlseg_construct(&bseg, &p1, &p2);
+ if (lseg_interpt_line(NULL, &bseg, line))
+ PG_RETURN_BOOL(true);
+ p2.x = box->high.x;
+ p2.y = box->low.y;
+ statlseg_construct(&bseg, &p1, &p2);
+ if (lseg_interpt_line(NULL, &bseg, line))
+ PG_RETURN_BOOL(true);
+ p1.x = box->low.x;
+ p1.y = box->low.y;
+ statlseg_construct(&bseg, &p1, &p2);
+ if (lseg_interpt_line(NULL, &bseg, line))
+ PG_RETURN_BOOL(true);
+
+ /* if we dropped through, no intersection */
+ PG_RETURN_BOOL(false);
+}
+
+/*------------------------------------------------------------------
+ * The following routines define a data type and operator class for
+ * POLYGONS .... Part of which (the polygon's bounding box) is built on
+ * top of the BOX data type.
+ *
+ * make_bound_box - create the bounding box for the input polygon
+ *------------------------------------------------------------------*/
+
+/*---------------------------------------------------------------------
+ * Make the smallest bounding box for the given polygon.
+ *---------------------------------------------------------------------*/
+static void
+make_bound_box(POLYGON *poly)
+{
+ int i;
+ float8 x1,
+ y1,
+ x2,
+ y2;
+
+ Assert(poly->npts > 0);
+
+ x1 = x2 = poly->p[0].x;
+ y2 = y1 = poly->p[0].y;
+ for (i = 1; i < poly->npts; i++)
+ {
+ if (float8_lt(poly->p[i].x, x1))
+ x1 = poly->p[i].x;
+ if (float8_gt(poly->p[i].x, x2))
+ x2 = poly->p[i].x;
+ if (float8_lt(poly->p[i].y, y1))
+ y1 = poly->p[i].y;
+ if (float8_gt(poly->p[i].y, y2))
+ y2 = poly->p[i].y;
+ }
+
+ poly->boundbox.low.x = x1;
+ poly->boundbox.high.x = x2;
+ poly->boundbox.low.y = y1;
+ poly->boundbox.high.y = y2;
+}
+
+/*------------------------------------------------------------------
+ * poly_in - read in the polygon from a string specification
+ *
+ * External format:
+ * "((x0,y0),...,(xn,yn))"
+ * "x0,y0,...,xn,yn"
+ * also supports the older style "(x1,...,xn,y1,...yn)"
+ *------------------------------------------------------------------*/
+Datum
+poly_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ POLYGON *poly;
+ int npts;
+ int size;
+ int base_size;
+ bool isopen;
+
+ if ((npts = pair_count(str, ',')) <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "polygon", str)));
+
+ base_size = sizeof(poly->p[0]) * npts;
+ size = offsetof(POLYGON, p) + base_size;
+
+ /* Check for integer overflow */
+ if (base_size / npts != sizeof(poly->p[0]) || size <= base_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many points requested")));
+
+ poly = (POLYGON *) palloc0(size); /* zero any holes */
+
+ SET_VARSIZE(poly, size);
+ poly->npts = npts;
+
+ path_decode(str, false, npts, &(poly->p[0]), &isopen, NULL, "polygon", str);
+
+ make_bound_box(poly);
+
+ PG_RETURN_POLYGON_P(poly);
+}
+
+/*---------------------------------------------------------------
+ * poly_out - convert internal POLYGON representation to the
+ * character string format "((f8,f8),...,(f8,f8))"
+ *---------------------------------------------------------------*/
+Datum
+poly_out(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+
+ PG_RETURN_CSTRING(path_encode(PATH_CLOSED, poly->npts, poly->p));
+}
+
+/*
+ * poly_recv - converts external binary format to polygon
+ *
+ * External representation is int32 number of points, and the points.
+ * We recompute the bounding box on read, instead of trusting it to
+ * be valid. (Checking it would take just as long, so may as well
+ * omit it from external representation.)
+ */
+Datum
+poly_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ POLYGON *poly;
+ int32 npts;
+ int32 i;
+ int size;
+
+ npts = pq_getmsgint(buf, sizeof(int32));
+ if (npts <= 0 || npts >= (int32) ((INT_MAX - offsetof(POLYGON, p)) / sizeof(Point)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid number of points in external \"polygon\" value")));
+
+ size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * npts;
+ poly = (POLYGON *) palloc0(size); /* zero any holes */
+
+ SET_VARSIZE(poly, size);
+ poly->npts = npts;
+
+ for (i = 0; i < npts; i++)
+ {
+ poly->p[i].x = pq_getmsgfloat8(buf);
+ poly->p[i].y = pq_getmsgfloat8(buf);
+ }
+
+ make_bound_box(poly);
+
+ PG_RETURN_POLYGON_P(poly);
+}
+
+/*
+ * poly_send - converts polygon to binary format
+ */
+Datum
+poly_send(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ StringInfoData buf;
+ int32 i;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, poly->npts);
+ for (i = 0; i < poly->npts; i++)
+ {
+ pq_sendfloat8(&buf, poly->p[i].x);
+ pq_sendfloat8(&buf, poly->p[i].y);
+ }
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*-------------------------------------------------------
+ * Is polygon A strictly left of polygon B? i.e. is
+ * the right most point of A left of the left most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_left(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.high.x < polyb->boundbox.low.x;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A overlapping or left of polygon B? i.e. is
+ * the right most point of A at or left of the right most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_overleft(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.high.x <= polyb->boundbox.high.x;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A strictly right of polygon B? i.e. is
+ * the left most point of A right of the right most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_right(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.low.x > polyb->boundbox.high.x;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A overlapping or right of polygon B? i.e. is
+ * the left most point of A at or right of the left most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_overright(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.low.x >= polyb->boundbox.low.x;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A strictly below polygon B? i.e. is
+ * the upper most point of A below the lower most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_below(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.high.y < polyb->boundbox.low.y;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A overlapping or below polygon B? i.e. is
+ * the upper most point of A at or below the upper most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_overbelow(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.high.y <= polyb->boundbox.high.y;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A strictly above polygon B? i.e. is
+ * the lower most point of A above the upper most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_above(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.low.y > polyb->boundbox.high.y;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-------------------------------------------------------
+ * Is polygon A overlapping or above polygon B? i.e. is
+ * the lower most point of A at or above the lower most point
+ * of B?
+ *-------------------------------------------------------*/
+Datum
+poly_overabove(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = polya->boundbox.low.y >= polyb->boundbox.low.y;
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+
+/*-------------------------------------------------------
+ * Is polygon A the same as polygon B? i.e. are all the
+ * points the same?
+ * Check all points for matches in both forward and reverse
+ * direction since polygons are non-directional and are
+ * closed shapes.
+ *-------------------------------------------------------*/
+Datum
+poly_same(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ if (polya->npts != polyb->npts)
+ result = false;
+ else
+ result = plist_same(polya->npts, polya->p, polyb->p);
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*-----------------------------------------------------------------
+ * Determine if polygon A overlaps polygon B
+ *-----------------------------------------------------------------*/
+static bool
+poly_overlap_internal(POLYGON *polya, POLYGON *polyb)
+{
+ bool result;
+
+ Assert(polya->npts > 0 && polyb->npts > 0);
+
+ /* Quick check by bounding box */
+ result = box_ov(&polya->boundbox, &polyb->boundbox);
+
+ /*
+ * Brute-force algorithm - try to find intersected edges, if so then
+ * polygons are overlapped else check is one polygon inside other or not
+ * by testing single point of them.
+ */
+ if (result)
+ {
+ int ia,
+ ib;
+ LSEG sa,
+ sb;
+
+ /* Init first of polya's edge with last point */
+ sa.p[0] = polya->p[polya->npts - 1];
+ result = false;
+
+ for (ia = 0; ia < polya->npts && !result; ia++)
+ {
+ /* Second point of polya's edge is a current one */
+ sa.p[1] = polya->p[ia];
+
+ /* Init first of polyb's edge with last point */
+ sb.p[0] = polyb->p[polyb->npts - 1];
+
+ for (ib = 0; ib < polyb->npts && !result; ib++)
+ {
+ sb.p[1] = polyb->p[ib];
+ result = lseg_interpt_lseg(NULL, &sa, &sb);
+ sb.p[0] = sb.p[1];
+ }
+
+ /*
+ * move current endpoint to the first point of next edge
+ */
+ sa.p[0] = sa.p[1];
+ }
+
+ if (!result)
+ {
+ result = (point_inside(polya->p, polyb->npts, polyb->p) ||
+ point_inside(polyb->p, polya->npts, polya->p));
+ }
+ }
+
+ return result;
+}
+
+Datum
+poly_overlap(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = poly_overlap_internal(polya, polyb);
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Tests special kind of segment for in/out of polygon.
+ * Special kind means:
+ * - point a should be on segment s
+ * - segment (a,b) should not be contained by s
+ * Returns true if:
+ * - segment (a,b) is collinear to s and (a,b) is in polygon
+ * - segment (a,b) s not collinear to s. Note: that doesn't
+ * mean that segment is in polygon!
+ */
+
+static bool
+touched_lseg_inside_poly(Point *a, Point *b, LSEG *s, POLYGON *poly, int start)
+{
+ /* point a is on s, b is not */
+ LSEG t;
+
+ t.p[0] = *a;
+ t.p[1] = *b;
+
+ if (point_eq_point(a, s->p))
+ {
+ if (lseg_contain_point(&t, s->p + 1))
+ return lseg_inside_poly(b, s->p + 1, poly, start);
+ }
+ else if (point_eq_point(a, s->p + 1))
+ {
+ if (lseg_contain_point(&t, s->p))
+ return lseg_inside_poly(b, s->p, poly, start);
+ }
+ else if (lseg_contain_point(&t, s->p))
+ {
+ return lseg_inside_poly(b, s->p, poly, start);
+ }
+ else if (lseg_contain_point(&t, s->p + 1))
+ {
+ return lseg_inside_poly(b, s->p + 1, poly, start);
+ }
+
+ return true; /* may be not true, but that will check later */
+}
+
+/*
+ * Returns true if segment (a,b) is in polygon, option
+ * start is used for optimization - function checks
+ * polygon's edges starting from start
+ */
+static bool
+lseg_inside_poly(Point *a, Point *b, POLYGON *poly, int start)
+{
+ LSEG s,
+ t;
+ int i;
+ bool res = true,
+ intersection = false;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ t.p[0] = *a;
+ t.p[1] = *b;
+ s.p[0] = poly->p[(start == 0) ? (poly->npts - 1) : (start - 1)];
+
+ for (i = start; i < poly->npts && res; i++)
+ {
+ Point interpt;
+
+ CHECK_FOR_INTERRUPTS();
+
+ s.p[1] = poly->p[i];
+
+ if (lseg_contain_point(&s, t.p))
+ {
+ if (lseg_contain_point(&s, t.p + 1))
+ return true; /* t is contained by s */
+
+ /* Y-cross */
+ res = touched_lseg_inside_poly(t.p, t.p + 1, &s, poly, i + 1);
+ }
+ else if (lseg_contain_point(&s, t.p + 1))
+ {
+ /* Y-cross */
+ res = touched_lseg_inside_poly(t.p + 1, t.p, &s, poly, i + 1);
+ }
+ else if (lseg_interpt_lseg(&interpt, &t, &s))
+ {
+ /*
+ * segments are X-crossing, go to check each subsegment
+ */
+
+ intersection = true;
+ res = lseg_inside_poly(t.p, &interpt, poly, i + 1);
+ if (res)
+ res = lseg_inside_poly(t.p + 1, &interpt, poly, i + 1);
+ }
+
+ s.p[0] = s.p[1];
+ }
+
+ if (res && !intersection)
+ {
+ Point p;
+
+ /*
+ * if X-intersection wasn't found, then check central point of tested
+ * segment. In opposite case we already check all subsegments
+ */
+ p.x = float8_div(float8_pl(t.p[0].x, t.p[1].x), 2.0);
+ p.y = float8_div(float8_pl(t.p[0].y, t.p[1].y), 2.0);
+
+ res = point_inside(&p, poly->npts, poly->p);
+ }
+
+ return res;
+}
+
+/*
+ * Check whether the first polygon contains the second
+ */
+static bool
+poly_contain_poly(POLYGON *contains_poly, POLYGON *contained_poly)
+{
+ int i;
+ LSEG s;
+
+ Assert(contains_poly->npts > 0 && contained_poly->npts > 0);
+
+ /*
+ * Quick check to see if contained's bounding box is contained in
+ * contains' bb.
+ */
+ if (!box_contain_box(&contains_poly->boundbox, &contained_poly->boundbox))
+ return false;
+
+ s.p[0] = contained_poly->p[contained_poly->npts - 1];
+
+ for (i = 0; i < contained_poly->npts; i++)
+ {
+ s.p[1] = contained_poly->p[i];
+ if (!lseg_inside_poly(s.p, s.p + 1, contains_poly, 0))
+ return false;
+ s.p[0] = s.p[1];
+ }
+
+ return true;
+}
+
+Datum
+poly_contain(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ result = poly_contain_poly(polya, polyb);
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+
+/*-----------------------------------------------------------------
+ * Determine if polygon A is contained by polygon B
+ *-----------------------------------------------------------------*/
+Datum
+poly_contained(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ bool result;
+
+ /* Just switch the arguments and pass it off to poly_contain */
+ result = poly_contain_poly(polyb, polya);
+
+ /*
+ * Avoid leaking memory for toasted inputs ... needed for rtree indexes
+ */
+ PG_FREE_IF_COPY(polya, 0);
+ PG_FREE_IF_COPY(polyb, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+
+Datum
+poly_contain_pt(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ Point *p = PG_GETARG_POINT_P(1);
+
+ PG_RETURN_BOOL(point_inside(p, poly->npts, poly->p) != 0);
+}
+
+Datum
+pt_contained_poly(PG_FUNCTION_ARGS)
+{
+ Point *p = PG_GETARG_POINT_P(0);
+ POLYGON *poly = PG_GETARG_POLYGON_P(1);
+
+ PG_RETURN_BOOL(point_inside(p, poly->npts, poly->p) != 0);
+}
+
+
+Datum
+poly_distance(PG_FUNCTION_ARGS)
+{
+ POLYGON *polya = PG_GETARG_POLYGON_P(0);
+ POLYGON *polyb = PG_GETARG_POLYGON_P(1);
+ float8 min = 0.0; /* initialize to keep compiler quiet */
+ bool have_min = false;
+ float8 tmp;
+ int i,
+ j;
+ LSEG seg1,
+ seg2;
+
+ /*
+ * Distance is zero if polygons overlap. We must check this because the
+ * path distance will not give the right answer if one poly is entirely
+ * within the other.
+ */
+ if (poly_overlap_internal(polya, polyb))
+ PG_RETURN_FLOAT8(0.0);
+
+ /*
+ * When they don't overlap, the distance calculation is identical to that
+ * for closed paths (i.e., we needn't care about the fact that polygons
+ * include their contained areas). See path_distance().
+ */
+ for (i = 0; i < polya->npts; i++)
+ {
+ int iprev;
+
+ if (i > 0)
+ iprev = i - 1;
+ else
+ iprev = polya->npts - 1;
+
+ for (j = 0; j < polyb->npts; j++)
+ {
+ int jprev;
+
+ if (j > 0)
+ jprev = j - 1;
+ else
+ jprev = polyb->npts - 1;
+
+ statlseg_construct(&seg1, &polya->p[iprev], &polya->p[i]);
+ statlseg_construct(&seg2, &polyb->p[jprev], &polyb->p[j]);
+
+ tmp = lseg_closept_lseg(NULL, &seg1, &seg2);
+ if (!have_min || float8_lt(tmp, min))
+ {
+ min = tmp;
+ have_min = true;
+ }
+ }
+ }
+
+ if (!have_min)
+ PG_RETURN_NULL();
+
+ PG_RETURN_FLOAT8(min);
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D points.
+ **
+ ***********************************************************************/
+
+Datum
+construct_point(PG_FUNCTION_ARGS)
+{
+ float8 x = PG_GETARG_FLOAT8(0);
+ float8 y = PG_GETARG_FLOAT8(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ point_construct(result, x, y);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+static inline void
+point_add_point(Point *result, Point *pt1, Point *pt2)
+{
+ point_construct(result,
+ float8_pl(pt1->x, pt2->x),
+ float8_pl(pt1->y, pt2->y));
+}
+
+Datum
+point_add(PG_FUNCTION_ARGS)
+{
+ Point *p1 = PG_GETARG_POINT_P(0);
+ Point *p2 = PG_GETARG_POINT_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ point_add_point(result, p1, p2);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+static inline void
+point_sub_point(Point *result, Point *pt1, Point *pt2)
+{
+ point_construct(result,
+ float8_mi(pt1->x, pt2->x),
+ float8_mi(pt1->y, pt2->y));
+}
+
+Datum
+point_sub(PG_FUNCTION_ARGS)
+{
+ Point *p1 = PG_GETARG_POINT_P(0);
+ Point *p2 = PG_GETARG_POINT_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ point_sub_point(result, p1, p2);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+static inline void
+point_mul_point(Point *result, Point *pt1, Point *pt2)
+{
+ point_construct(result,
+ float8_mi(float8_mul(pt1->x, pt2->x),
+ float8_mul(pt1->y, pt2->y)),
+ float8_pl(float8_mul(pt1->x, pt2->y),
+ float8_mul(pt1->y, pt2->x)));
+}
+
+Datum
+point_mul(PG_FUNCTION_ARGS)
+{
+ Point *p1 = PG_GETARG_POINT_P(0);
+ Point *p2 = PG_GETARG_POINT_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ point_mul_point(result, p1, p2);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+static inline void
+point_div_point(Point *result, Point *pt1, Point *pt2)
+{
+ float8 div;
+
+ div = float8_pl(float8_mul(pt2->x, pt2->x), float8_mul(pt2->y, pt2->y));
+
+ point_construct(result,
+ float8_div(float8_pl(float8_mul(pt1->x, pt2->x),
+ float8_mul(pt1->y, pt2->y)), div),
+ float8_div(float8_mi(float8_mul(pt1->y, pt2->x),
+ float8_mul(pt1->x, pt2->y)), div));
+}
+
+Datum
+point_div(PG_FUNCTION_ARGS)
+{
+ Point *p1 = PG_GETARG_POINT_P(0);
+ Point *p2 = PG_GETARG_POINT_P(1);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ point_div_point(result, p1, p2);
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D boxes.
+ **
+ ***********************************************************************/
+
+Datum
+points_box(PG_FUNCTION_ARGS)
+{
+ Point *p1 = PG_GETARG_POINT_P(0);
+ Point *p2 = PG_GETARG_POINT_P(1);
+ BOX *result;
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ box_construct(result, p1, p2);
+
+ PG_RETURN_BOX_P(result);
+}
+
+Datum
+box_add(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *p = PG_GETARG_POINT_P(1);
+ BOX *result;
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ point_add_point(&result->high, &box->high, p);
+ point_add_point(&result->low, &box->low, p);
+
+ PG_RETURN_BOX_P(result);
+}
+
+Datum
+box_sub(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *p = PG_GETARG_POINT_P(1);
+ BOX *result;
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ point_sub_point(&result->high, &box->high, p);
+ point_sub_point(&result->low, &box->low, p);
+
+ PG_RETURN_BOX_P(result);
+}
+
+Datum
+box_mul(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *p = PG_GETARG_POINT_P(1);
+ BOX *result;
+ Point high,
+ low;
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ point_mul_point(&high, &box->high, p);
+ point_mul_point(&low, &box->low, p);
+
+ box_construct(result, &high, &low);
+
+ PG_RETURN_BOX_P(result);
+}
+
+Datum
+box_div(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ Point *p = PG_GETARG_POINT_P(1);
+ BOX *result;
+ Point high,
+ low;
+
+ result = (BOX *) palloc(sizeof(BOX));
+
+ point_div_point(&high, &box->high, p);
+ point_div_point(&low, &box->low, p);
+
+ box_construct(result, &high, &low);
+
+ PG_RETURN_BOX_P(result);
+}
+
+/*
+ * Convert point to empty box
+ */
+Datum
+point_box(PG_FUNCTION_ARGS)
+{
+ Point *pt = PG_GETARG_POINT_P(0);
+ BOX *box;
+
+ box = (BOX *) palloc(sizeof(BOX));
+
+ box->high.x = pt->x;
+ box->low.x = pt->x;
+ box->high.y = pt->y;
+ box->low.y = pt->y;
+
+ PG_RETURN_BOX_P(box);
+}
+
+/*
+ * Smallest bounding box that includes both of the given boxes
+ */
+Datum
+boxes_bound_box(PG_FUNCTION_ARGS)
+{
+ BOX *box1 = PG_GETARG_BOX_P(0),
+ *box2 = PG_GETARG_BOX_P(1),
+ *container;
+
+ container = (BOX *) palloc(sizeof(BOX));
+
+ container->high.x = float8_max(box1->high.x, box2->high.x);
+ container->low.x = float8_min(box1->low.x, box2->low.x);
+ container->high.y = float8_max(box1->high.y, box2->high.y);
+ container->low.y = float8_min(box1->low.y, box2->low.y);
+
+ PG_RETURN_BOX_P(container);
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D paths.
+ **
+ ***********************************************************************/
+
+/* path_add()
+ * Concatenate two paths (only if they are both open).
+ */
+Datum
+path_add(PG_FUNCTION_ARGS)
+{
+ PATH *p1 = PG_GETARG_PATH_P(0);
+ PATH *p2 = PG_GETARG_PATH_P(1);
+ PATH *result;
+ int size,
+ base_size;
+ int i;
+
+ if (p1->closed || p2->closed)
+ PG_RETURN_NULL();
+
+ base_size = sizeof(p1->p[0]) * (p1->npts + p2->npts);
+ size = offsetof(PATH, p) + base_size;
+
+ /* Check for integer overflow */
+ if (base_size / sizeof(p1->p[0]) != (p1->npts + p2->npts) ||
+ size <= base_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many points requested")));
+
+ result = (PATH *) palloc(size);
+
+ SET_VARSIZE(result, size);
+ result->npts = (p1->npts + p2->npts);
+ result->closed = p1->closed;
+ /* prevent instability in unused pad bytes */
+ result->dummy = 0;
+
+ for (i = 0; i < p1->npts; i++)
+ {
+ result->p[i].x = p1->p[i].x;
+ result->p[i].y = p1->p[i].y;
+ }
+ for (i = 0; i < p2->npts; i++)
+ {
+ result->p[i + p1->npts].x = p2->p[i].x;
+ result->p[i + p1->npts].y = p2->p[i].y;
+ }
+
+ PG_RETURN_PATH_P(result);
+}
+
+/* path_add_pt()
+ * Translation operators.
+ */
+Datum
+path_add_pt(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ int i;
+
+ for (i = 0; i < path->npts; i++)
+ point_add_point(&path->p[i], &path->p[i], point);
+
+ PG_RETURN_PATH_P(path);
+}
+
+Datum
+path_sub_pt(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ int i;
+
+ for (i = 0; i < path->npts; i++)
+ point_sub_point(&path->p[i], &path->p[i], point);
+
+ PG_RETURN_PATH_P(path);
+}
+
+/* path_mul_pt()
+ * Rotation and scaling operators.
+ */
+Datum
+path_mul_pt(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ int i;
+
+ for (i = 0; i < path->npts; i++)
+ point_mul_point(&path->p[i], &path->p[i], point);
+
+ PG_RETURN_PATH_P(path);
+}
+
+Datum
+path_div_pt(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P_COPY(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ int i;
+
+ for (i = 0; i < path->npts; i++)
+ point_div_point(&path->p[i], &path->p[i], point);
+
+ PG_RETURN_PATH_P(path);
+}
+
+
+Datum
+path_poly(PG_FUNCTION_ARGS)
+{
+ PATH *path = PG_GETARG_PATH_P(0);
+ POLYGON *poly;
+ int size;
+ int i;
+
+ /* This is not very consistent --- other similar cases return NULL ... */
+ if (!path->closed)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("open path cannot be converted to polygon")));
+
+ /*
+ * Never overflows: the old size fit in MaxAllocSize, and the new size is
+ * just a small constant larger.
+ */
+ size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * path->npts;
+ poly = (POLYGON *) palloc(size);
+
+ SET_VARSIZE(poly, size);
+ poly->npts = path->npts;
+
+ for (i = 0; i < path->npts; i++)
+ {
+ poly->p[i].x = path->p[i].x;
+ poly->p[i].y = path->p[i].y;
+ }
+
+ make_bound_box(poly);
+
+ PG_RETURN_POLYGON_P(poly);
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for 2D polygons.
+ **
+ ***********************************************************************/
+
+Datum
+poly_npoints(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+
+ PG_RETURN_INT32(poly->npts);
+}
+
+
+Datum
+poly_center(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ Point *result;
+ CIRCLE circle;
+
+ result = (Point *) palloc(sizeof(Point));
+
+ poly_to_circle(&circle, poly);
+ *result = circle.center;
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+Datum
+poly_box(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ BOX *box;
+
+ box = (BOX *) palloc(sizeof(BOX));
+ *box = poly->boundbox;
+
+ PG_RETURN_BOX_P(box);
+}
+
+
+/* box_poly()
+ * Convert a box to a polygon.
+ */
+Datum
+box_poly(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ POLYGON *poly;
+ int size;
+
+ /* map four corners of the box to a polygon */
+ size = offsetof(POLYGON, p) + sizeof(poly->p[0]) * 4;
+ poly = (POLYGON *) palloc(size);
+
+ SET_VARSIZE(poly, size);
+ poly->npts = 4;
+
+ poly->p[0].x = box->low.x;
+ poly->p[0].y = box->low.y;
+ poly->p[1].x = box->low.x;
+ poly->p[1].y = box->high.y;
+ poly->p[2].x = box->high.x;
+ poly->p[2].y = box->high.y;
+ poly->p[3].x = box->high.x;
+ poly->p[3].y = box->low.y;
+
+ box_construct(&poly->boundbox, &box->high, &box->low);
+
+ PG_RETURN_POLYGON_P(poly);
+}
+
+
+Datum
+poly_path(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ PATH *path;
+ int size;
+ int i;
+
+ /*
+ * Never overflows: the old size fit in MaxAllocSize, and the new size is
+ * smaller by a small constant.
+ */
+ size = offsetof(PATH, p) + sizeof(path->p[0]) * poly->npts;
+ path = (PATH *) palloc(size);
+
+ SET_VARSIZE(path, size);
+ path->npts = poly->npts;
+ path->closed = true;
+ /* prevent instability in unused pad bytes */
+ path->dummy = 0;
+
+ for (i = 0; i < poly->npts; i++)
+ {
+ path->p[i].x = poly->p[i].x;
+ path->p[i].y = poly->p[i].y;
+ }
+
+ PG_RETURN_PATH_P(path);
+}
+
+
+/***********************************************************************
+ **
+ ** Routines for circles.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * Formatting and conversion routines.
+ *---------------------------------------------------------*/
+
+/* circle_in - convert a string to internal form.
+ *
+ * External format: (center and radius of circle)
+ * "<(f8,f8),f8>"
+ * also supports quick entry style "f8,f8,f8"
+ */
+Datum
+circle_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ CIRCLE *circle = (CIRCLE *) palloc(sizeof(CIRCLE));
+ char *s,
+ *cp;
+ int depth = 0;
+
+ s = str;
+ while (isspace((unsigned char) *s))
+ s++;
+ if (*s == LDELIM_C)
+ depth++, s++;
+ else if (*s == LDELIM)
+ {
+ /* If there are two left parens, consume the first one */
+ cp = (s + 1);
+ while (isspace((unsigned char) *cp))
+ cp++;
+ if (*cp == LDELIM)
+ depth++, s = cp;
+ }
+
+ /* pair_decode will consume parens around the pair, if any */
+ pair_decode(s, &circle->center.x, &circle->center.y, &s, "circle", str);
+
+ if (*s == DELIM)
+ s++;
+
+ circle->radius = single_decode(s, &s, "circle", str);
+ /* We have to accept NaN. */
+ if (circle->radius < 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "circle", str)));
+
+ while (depth > 0)
+ {
+ if ((*s == RDELIM) || ((*s == RDELIM_C) && (depth == 1)))
+ {
+ depth--;
+ s++;
+ while (isspace((unsigned char) *s))
+ s++;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "circle", str)));
+ }
+
+ if (*s != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "circle", str)));
+
+ PG_RETURN_CIRCLE_P(circle);
+}
+
+/* circle_out - convert a circle to external form.
+ */
+Datum
+circle_out(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ StringInfoData str;
+
+ initStringInfo(&str);
+
+ appendStringInfoChar(&str, LDELIM_C);
+ appendStringInfoChar(&str, LDELIM);
+ pair_encode(circle->center.x, circle->center.y, &str);
+ appendStringInfoChar(&str, RDELIM);
+ appendStringInfoChar(&str, DELIM);
+ single_encode(circle->radius, &str);
+ appendStringInfoChar(&str, RDELIM_C);
+
+ PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * circle_recv - converts external binary format to circle
+ */
+Datum
+circle_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ CIRCLE *circle;
+
+ circle = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ circle->center.x = pq_getmsgfloat8(buf);
+ circle->center.y = pq_getmsgfloat8(buf);
+ circle->radius = pq_getmsgfloat8(buf);
+
+ /* We have to accept NaN. */
+ if (circle->radius < 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid radius in external \"circle\" value")));
+
+ PG_RETURN_CIRCLE_P(circle);
+}
+
+/*
+ * circle_send - converts circle to binary format
+ */
+Datum
+circle_send(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendfloat8(&buf, circle->center.x);
+ pq_sendfloat8(&buf, circle->center.y);
+ pq_sendfloat8(&buf, circle->radius);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators for CIRCLEs.
+ * <, >, <=, >=, and == are based on circle area.
+ *---------------------------------------------------------*/
+
+/* circles identical?
+ *
+ * We consider NaNs values to be equal to each other to let those circles
+ * to be found.
+ */
+Datum
+circle_same(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(((isnan(circle1->radius) && isnan(circle2->radius)) ||
+ FPeq(circle1->radius, circle2->radius)) &&
+ point_eq_point(&circle1->center, &circle2->center));
+}
+
+/* circle_overlap - does circle1 overlap circle2?
+ */
+Datum
+circle_overlap(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center),
+ float8_pl(circle1->radius, circle2->radius)));
+}
+
+/* circle_overleft - is the right edge of circle1 at or left of
+ * the right edge of circle2?
+ */
+Datum
+circle_overleft(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(float8_pl(circle1->center.x, circle1->radius),
+ float8_pl(circle2->center.x, circle2->radius)));
+}
+
+/* circle_left - is circle1 strictly left of circle2?
+ */
+Datum
+circle_left(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPlt(float8_pl(circle1->center.x, circle1->radius),
+ float8_mi(circle2->center.x, circle2->radius)));
+}
+
+/* circle_right - is circle1 strictly right of circle2?
+ */
+Datum
+circle_right(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPgt(float8_mi(circle1->center.x, circle1->radius),
+ float8_pl(circle2->center.x, circle2->radius)));
+}
+
+/* circle_overright - is the left edge of circle1 at or right of
+ * the left edge of circle2?
+ */
+Datum
+circle_overright(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPge(float8_mi(circle1->center.x, circle1->radius),
+ float8_mi(circle2->center.x, circle2->radius)));
+}
+
+/* circle_contained - is circle1 contained by circle2?
+ */
+Datum
+circle_contained(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center),
+ float8_mi(circle2->radius, circle1->radius)));
+}
+
+/* circle_contain - does circle1 contain circle2?
+ */
+Datum
+circle_contain(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(point_dt(&circle1->center, &circle2->center),
+ float8_mi(circle1->radius, circle2->radius)));
+}
+
+
+/* circle_below - is circle1 strictly below circle2?
+ */
+Datum
+circle_below(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPlt(float8_pl(circle1->center.y, circle1->radius),
+ float8_mi(circle2->center.y, circle2->radius)));
+}
+
+/* circle_above - is circle1 strictly above circle2?
+ */
+Datum
+circle_above(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPgt(float8_mi(circle1->center.y, circle1->radius),
+ float8_pl(circle2->center.y, circle2->radius)));
+}
+
+/* circle_overbelow - is the upper edge of circle1 at or below
+ * the upper edge of circle2?
+ */
+Datum
+circle_overbelow(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(float8_pl(circle1->center.y, circle1->radius),
+ float8_pl(circle2->center.y, circle2->radius)));
+}
+
+/* circle_overabove - is the lower edge of circle1 at or above
+ * the lower edge of circle2?
+ */
+Datum
+circle_overabove(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPge(float8_mi(circle1->center.y, circle1->radius),
+ float8_mi(circle2->center.y, circle2->radius)));
+}
+
+
+/* circle_relop - is area(circle1) relop area(circle2), within
+ * our accuracy constraint?
+ */
+Datum
+circle_eq(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPeq(circle_ar(circle1), circle_ar(circle2)));
+}
+
+Datum
+circle_ne(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPne(circle_ar(circle1), circle_ar(circle2)));
+}
+
+Datum
+circle_lt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPlt(circle_ar(circle1), circle_ar(circle2)));
+}
+
+Datum
+circle_gt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPgt(circle_ar(circle1), circle_ar(circle2)));
+}
+
+Datum
+circle_le(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPle(circle_ar(circle1), circle_ar(circle2)));
+}
+
+Datum
+circle_ge(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+
+ PG_RETURN_BOOL(FPge(circle_ar(circle1), circle_ar(circle2)));
+}
+
+
+/*----------------------------------------------------------
+ * "Arithmetic" operators on circles.
+ *---------------------------------------------------------*/
+
+/* circle_add_pt()
+ * Translation operator.
+ */
+Datum
+circle_add_pt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ point_add_point(&result->center, &circle->center, point);
+ result->radius = circle->radius;
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+Datum
+circle_sub_pt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ point_sub_point(&result->center, &circle->center, point);
+ result->radius = circle->radius;
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+
+/* circle_mul_pt()
+ * Rotation and scaling operators.
+ */
+Datum
+circle_mul_pt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ point_mul_point(&result->center, &circle->center, point);
+ result->radius = float8_mul(circle->radius, HYPOT(point->x, point->y));
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+Datum
+circle_div_pt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ point_div_point(&result->center, &circle->center, point);
+ result->radius = float8_div(circle->radius, HYPOT(point->x, point->y));
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+
+/* circle_area - returns the area of the circle.
+ */
+Datum
+circle_area(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+
+ PG_RETURN_FLOAT8(circle_ar(circle));
+}
+
+
+/* circle_diameter - returns the diameter of the circle.
+ */
+Datum
+circle_diameter(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+
+ PG_RETURN_FLOAT8(float8_mul(circle->radius, 2.0));
+}
+
+
+/* circle_radius - returns the radius of the circle.
+ */
+Datum
+circle_radius(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+
+ PG_RETURN_FLOAT8(circle->radius);
+}
+
+
+/* circle_distance - returns the distance between
+ * two circles.
+ */
+Datum
+circle_distance(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle1 = PG_GETARG_CIRCLE_P(0);
+ CIRCLE *circle2 = PG_GETARG_CIRCLE_P(1);
+ float8 result;
+
+ result = float8_mi(point_dt(&circle1->center, &circle2->center),
+ float8_pl(circle1->radius, circle2->radius));
+ if (result < 0.0)
+ result = 0.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+
+Datum
+circle_contain_pt(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ float8 d;
+
+ d = point_dt(&circle->center, point);
+ PG_RETURN_BOOL(d <= circle->radius);
+}
+
+
+Datum
+pt_contained_circle(PG_FUNCTION_ARGS)
+{
+ Point *point = PG_GETARG_POINT_P(0);
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(1);
+ float8 d;
+
+ d = point_dt(&circle->center, point);
+ PG_RETURN_BOOL(d <= circle->radius);
+}
+
+
+/* dist_pc - returns the distance between
+ * a point and a circle.
+ */
+Datum
+dist_pc(PG_FUNCTION_ARGS)
+{
+ Point *point = PG_GETARG_POINT_P(0);
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(1);
+ float8 result;
+
+ result = float8_mi(point_dt(point, &circle->center),
+ circle->radius);
+ if (result < 0.0)
+ result = 0.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * Distance from a circle to a point
+ */
+Datum
+dist_cpoint(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *point = PG_GETARG_POINT_P(1);
+ float8 result;
+
+ result = float8_mi(point_dt(point, &circle->center), circle->radius);
+ if (result < 0.0)
+ result = 0.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/* circle_center - returns the center point of the circle.
+ */
+Datum
+circle_center(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ Point *result;
+
+ result = (Point *) palloc(sizeof(Point));
+ result->x = circle->center.x;
+ result->y = circle->center.y;
+
+ PG_RETURN_POINT_P(result);
+}
+
+
+/* circle_ar - returns the area of the circle.
+ */
+static float8
+circle_ar(CIRCLE *circle)
+{
+ return float8_mul(float8_mul(circle->radius, circle->radius), M_PI);
+}
+
+
+/*----------------------------------------------------------
+ * Conversion operators.
+ *---------------------------------------------------------*/
+
+Datum
+cr_circle(PG_FUNCTION_ARGS)
+{
+ Point *center = PG_GETARG_POINT_P(0);
+ float8 radius = PG_GETARG_FLOAT8(1);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ result->center.x = center->x;
+ result->center.y = center->y;
+ result->radius = radius;
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+Datum
+circle_box(PG_FUNCTION_ARGS)
+{
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(0);
+ BOX *box;
+ float8 delta;
+
+ box = (BOX *) palloc(sizeof(BOX));
+
+ delta = float8_div(circle->radius, sqrt(2.0));
+
+ box->high.x = float8_pl(circle->center.x, delta);
+ box->low.x = float8_mi(circle->center.x, delta);
+ box->high.y = float8_pl(circle->center.y, delta);
+ box->low.y = float8_mi(circle->center.y, delta);
+
+ PG_RETURN_BOX_P(box);
+}
+
+/* box_circle()
+ * Convert a box to a circle.
+ */
+Datum
+box_circle(PG_FUNCTION_ARGS)
+{
+ BOX *box = PG_GETARG_BOX_P(0);
+ CIRCLE *circle;
+
+ circle = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ circle->center.x = float8_div(float8_pl(box->high.x, box->low.x), 2.0);
+ circle->center.y = float8_div(float8_pl(box->high.y, box->low.y), 2.0);
+
+ circle->radius = point_dt(&circle->center, &box->high);
+
+ PG_RETURN_CIRCLE_P(circle);
+}
+
+
+Datum
+circle_poly(PG_FUNCTION_ARGS)
+{
+ int32 npts = PG_GETARG_INT32(0);
+ CIRCLE *circle = PG_GETARG_CIRCLE_P(1);
+ POLYGON *poly;
+ int base_size,
+ size;
+ int i;
+ float8 angle;
+ float8 anglestep;
+
+ if (FPzero(circle->radius))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert circle with radius zero to polygon")));
+
+ if (npts < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("must request at least 2 points")));
+
+ base_size = sizeof(poly->p[0]) * npts;
+ size = offsetof(POLYGON, p) + base_size;
+
+ /* Check for integer overflow */
+ if (base_size / npts != sizeof(poly->p[0]) || size <= base_size)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many points requested")));
+
+ poly = (POLYGON *) palloc0(size); /* zero any holes */
+ SET_VARSIZE(poly, size);
+ poly->npts = npts;
+
+ anglestep = float8_div(2.0 * M_PI, npts);
+
+ for (i = 0; i < npts; i++)
+ {
+ angle = float8_mul(anglestep, i);
+
+ poly->p[i].x = float8_mi(circle->center.x,
+ float8_mul(circle->radius, cos(angle)));
+ poly->p[i].y = float8_pl(circle->center.y,
+ float8_mul(circle->radius, sin(angle)));
+ }
+
+ make_bound_box(poly);
+
+ PG_RETURN_POLYGON_P(poly);
+}
+
+/*
+ * Convert polygon to circle
+ *
+ * The result must be preallocated.
+ *
+ * XXX This algorithm should use weighted means of line segments
+ * rather than straight average values of points - tgl 97/01/21.
+ */
+static void
+poly_to_circle(CIRCLE *result, POLYGON *poly)
+{
+ int i;
+
+ Assert(poly->npts > 0);
+
+ result->center.x = 0;
+ result->center.y = 0;
+ result->radius = 0;
+
+ for (i = 0; i < poly->npts; i++)
+ point_add_point(&result->center, &result->center, &poly->p[i]);
+ result->center.x = float8_div(result->center.x, poly->npts);
+ result->center.y = float8_div(result->center.y, poly->npts);
+
+ for (i = 0; i < poly->npts; i++)
+ result->radius = float8_pl(result->radius,
+ point_dt(&poly->p[i], &result->center));
+ result->radius = float8_div(result->radius, poly->npts);
+}
+
+Datum
+poly_circle(PG_FUNCTION_ARGS)
+{
+ POLYGON *poly = PG_GETARG_POLYGON_P(0);
+ CIRCLE *result;
+
+ result = (CIRCLE *) palloc(sizeof(CIRCLE));
+
+ poly_to_circle(result, poly);
+
+ PG_RETURN_CIRCLE_P(result);
+}
+
+
+/***********************************************************************
+ **
+ ** Private routines for multiple types.
+ **
+ ***********************************************************************/
+
+/*
+ * Test to see if the point is inside the polygon, returns 1/0, or 2 if
+ * the point is on the polygon.
+ * Code adapted but not copied from integer-based routines in WN: A
+ * Server for the HTTP
+ * version 1.15.1, file wn/image.c
+ * http://hopf.math.northwestern.edu/index.html
+ * Description of algorithm: http://www.linuxjournal.com/article/2197
+ * http://www.linuxjournal.com/article/2029
+ */
+
+#define POINT_ON_POLYGON INT_MAX
+
+static int
+point_inside(Point *p, int npts, Point *plist)
+{
+ float8 x0,
+ y0;
+ float8 prev_x,
+ prev_y;
+ int i = 0;
+ float8 x,
+ y;
+ int cross,
+ total_cross = 0;
+
+ Assert(npts > 0);
+
+ /* compute first polygon point relative to single point */
+ x0 = float8_mi(plist[0].x, p->x);
+ y0 = float8_mi(plist[0].y, p->y);
+
+ prev_x = x0;
+ prev_y = y0;
+ /* loop over polygon points and aggregate total_cross */
+ for (i = 1; i < npts; i++)
+ {
+ /* compute next polygon point relative to single point */
+ x = float8_mi(plist[i].x, p->x);
+ y = float8_mi(plist[i].y, p->y);
+
+ /* compute previous to current point crossing */
+ if ((cross = lseg_crossing(x, y, prev_x, prev_y)) == POINT_ON_POLYGON)
+ return 2;
+ total_cross += cross;
+
+ prev_x = x;
+ prev_y = y;
+ }
+
+ /* now do the first point */
+ if ((cross = lseg_crossing(x0, y0, prev_x, prev_y)) == POINT_ON_POLYGON)
+ return 2;
+ total_cross += cross;
+
+ if (total_cross != 0)
+ return 1;
+ return 0;
+}
+
+
+/* lseg_crossing()
+ * Returns +/-2 if line segment crosses the positive X-axis in a +/- direction.
+ * Returns +/-1 if one point is on the positive X-axis.
+ * Returns 0 if both points are on the positive X-axis, or there is no crossing.
+ * Returns POINT_ON_POLYGON if the segment contains (0,0).
+ * Wow, that is one confusing API, but it is used above, and when summed,
+ * can tell is if a point is in a polygon.
+ */
+
+static int
+lseg_crossing(float8 x, float8 y, float8 prev_x, float8 prev_y)
+{
+ float8 z;
+ int y_sign;
+
+ if (FPzero(y))
+ { /* y == 0, on X axis */
+ if (FPzero(x)) /* (x,y) is (0,0)? */
+ return POINT_ON_POLYGON;
+ else if (FPgt(x, 0))
+ { /* x > 0 */
+ if (FPzero(prev_y)) /* y and prev_y are zero */
+ /* prev_x > 0? */
+ return FPgt(prev_x, 0.0) ? 0 : POINT_ON_POLYGON;
+ return FPlt(prev_y, 0.0) ? 1 : -1;
+ }
+ else
+ { /* x < 0, x not on positive X axis */
+ if (FPzero(prev_y))
+ /* prev_x < 0? */
+ return FPlt(prev_x, 0.0) ? 0 : POINT_ON_POLYGON;
+ return 0;
+ }
+ }
+ else
+ { /* y != 0 */
+ /* compute y crossing direction from previous point */
+ y_sign = FPgt(y, 0.0) ? 1 : -1;
+
+ if (FPzero(prev_y))
+ /* previous point was on X axis, so new point is either off or on */
+ return FPlt(prev_x, 0.0) ? 0 : y_sign;
+ else if ((y_sign < 0 && FPlt(prev_y, 0.0)) ||
+ (y_sign > 0 && FPgt(prev_y, 0.0)))
+ /* both above or below X axis */
+ return 0; /* same sign */
+ else
+ { /* y and prev_y cross X-axis */
+ if (FPge(x, 0.0) && FPgt(prev_x, 0.0))
+ /* both non-negative so cross positive X-axis */
+ return 2 * y_sign;
+ if (FPlt(x, 0.0) && FPle(prev_x, 0.0))
+ /* both non-positive so do not cross positive X-axis */
+ return 0;
+
+ /* x and y cross axes, see URL above point_inside() */
+ z = float8_mi(float8_mul(float8_mi(x, prev_x), y),
+ float8_mul(float8_mi(y, prev_y), x));
+ if (FPzero(z))
+ return POINT_ON_POLYGON;
+ if ((y_sign < 0 && FPlt(z, 0.0)) ||
+ (y_sign > 0 && FPgt(z, 0.0)))
+ return 0;
+ return 2 * y_sign;
+ }
+ }
+}
+
+
+static bool
+plist_same(int npts, Point *p1, Point *p2)
+{
+ int i,
+ ii,
+ j;
+
+ /* find match for first point */
+ for (i = 0; i < npts; i++)
+ {
+ if (point_eq_point(&p2[i], &p1[0]))
+ {
+
+ /* match found? then look forward through remaining points */
+ for (ii = 1, j = i + 1; ii < npts; ii++, j++)
+ {
+ if (j >= npts)
+ j = 0;
+ if (!point_eq_point(&p2[j], &p1[ii]))
+ break;
+ }
+ if (ii == npts)
+ return true;
+
+ /* match not found forwards? then look backwards */
+ for (ii = 1, j = i - 1; ii < npts; ii++, j--)
+ {
+ if (j < 0)
+ j = (npts - 1);
+ if (!point_eq_point(&p2[j], &p1[ii]))
+ break;
+ }
+ if (ii == npts)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+
+/*-------------------------------------------------------------------------
+ * Determine the hypotenuse.
+ *
+ * If required, x and y are swapped to make x the larger number. The
+ * traditional formula of x^2+y^2 is rearranged to factor x outside the
+ * sqrt. This allows computation of the hypotenuse for significantly
+ * larger values, and with a higher precision than when using the naive
+ * formula. In particular, this cannot overflow unless the final result
+ * would be out-of-range.
+ *
+ * sqrt( x^2 + y^2 ) = sqrt( x^2( 1 + y^2/x^2) )
+ * = x * sqrt( 1 + y^2/x^2 )
+ * = x * sqrt( 1 + y/x * y/x )
+ *
+ * It is expected that this routine will eventually be replaced with the
+ * C99 hypot() function.
+ *
+ * This implementation conforms to IEEE Std 1003.1 and GLIBC, in that the
+ * case of hypot(inf,nan) results in INF, and not NAN.
+ *-----------------------------------------------------------------------
+ */
+float8
+pg_hypot(float8 x, float8 y)
+{
+ float8 yx,
+ result;
+
+ /* Handle INF and NaN properly */
+ if (isinf(x) || isinf(y))
+ return get_float8_infinity();
+
+ if (isnan(x) || isnan(y))
+ return get_float8_nan();
+
+ /* Else, drop any minus signs */
+ x = fabs(x);
+ y = fabs(y);
+
+ /* Swap x and y if needed to make x the larger one */
+ if (x < y)
+ {
+ float8 temp = x;
+
+ x = y;
+ y = temp;
+ }
+
+ /*
+ * If y is zero, the hypotenuse is x. This test saves a few cycles in
+ * such cases, but more importantly it also protects against
+ * divide-by-zero errors, since now x >= y.
+ */
+ if (y == 0.0)
+ return x;
+
+ /* Determine the hypotenuse */
+ yx = y / x;
+ result = x * sqrt(1.0 + (yx * yx));
+
+ if (unlikely(isinf(result)))
+ float_overflow_error();
+ if (unlikely(result == 0.0))
+ float_underflow_error();
+
+ return result;
+}
diff --git a/src/backend/utils/adt/geo_selfuncs.c b/src/backend/utils/adt/geo_selfuncs.c
new file mode 100644
index 0000000..9deccab
--- /dev/null
+++ b/src/backend/utils/adt/geo_selfuncs.c
@@ -0,0 +1,96 @@
+/*-------------------------------------------------------------------------
+ *
+ * geo_selfuncs.c
+ * Selectivity routines registered in the operator catalog in the
+ * "oprrest" and "oprjoin" attributes.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/geo_selfuncs.c
+ *
+ * XXX These are totally bogus. Perhaps someone will make them do
+ * something reasonable, someday.
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/builtins.h"
+#include "utils/geo_decls.h"
+
+
+/*
+ * Selectivity functions for geometric operators. These are bogus -- unless
+ * we know the actual key distribution in the index, we can't make a good
+ * prediction of the selectivity of these operators.
+ *
+ * Note: the values used here may look unreasonably small. Perhaps they
+ * are. For now, we want to make sure that the optimizer will make use
+ * of a geometric index if one is available, so the selectivity had better
+ * be fairly small.
+ *
+ * In general, GiST needs to search multiple subtrees in order to guarantee
+ * that all occurrences of the same key have been found. Because of this,
+ * the estimated cost for scanning the index ought to be higher than the
+ * output selectivity would indicate. gistcostestimate(), over in selfuncs.c,
+ * ought to be adjusted accordingly --- but until we can generate somewhat
+ * realistic numbers here, it hardly matters...
+ */
+
+
+/*
+ * Selectivity for operators that depend on area, such as "overlap".
+ */
+
+Datum
+areasel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.005);
+}
+
+Datum
+areajoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.005);
+}
+
+/*
+ * positionsel
+ *
+ * How likely is a box to be strictly left of (right of, above, below)
+ * a given box?
+ */
+
+Datum
+positionsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.1);
+}
+
+Datum
+positionjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.1);
+}
+
+/*
+ * contsel -- How likely is a box to contain (be contained by) a given box?
+ *
+ * This is a tighter constraint than "overlap", so produce a smaller
+ * estimate than areasel does.
+ */
+
+Datum
+contsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.001);
+}
+
+Datum
+contjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(0.001);
+}
diff --git a/src/backend/utils/adt/geo_spgist.c b/src/backend/utils/adt/geo_spgist.c
new file mode 100644
index 0000000..5dec466
--- /dev/null
+++ b/src/backend/utils/adt/geo_spgist.c
@@ -0,0 +1,885 @@
+/*-------------------------------------------------------------------------
+ *
+ * geo_spgist.c
+ * SP-GiST implementation of 4-dimensional quad tree over boxes
+ *
+ * This module provides SP-GiST implementation for boxes using quad tree
+ * analogy in 4-dimensional space. SP-GiST doesn't allow indexing of
+ * overlapping objects. We are making 2D objects never-overlapping in
+ * 4D space. This technique has some benefits compared to traditional
+ * R-Tree which is implemented as GiST. The performance tests reveal
+ * that this technique especially beneficial with too much overlapping
+ * objects, so called "spaghetti data".
+ *
+ * Unlike the original quad tree, we are splitting the tree into 16
+ * quadrants in 4D space. It is easier to imagine it as splitting space
+ * two times into 4:
+ *
+ * | |
+ * | |
+ * | -----+-----
+ * | |
+ * | |
+ * -------------+-------------
+ * |
+ * |
+ * |
+ * |
+ * |
+ *
+ * We are using box datatype as the prefix, but we are treating them
+ * as points in 4-dimensional space, because 2D boxes are not enough
+ * to represent the quadrant boundaries in 4D space. They however are
+ * sufficient to point out the additional boundaries of the next
+ * quadrant.
+ *
+ * We are using traversal values provided by SP-GiST to calculate and
+ * to store the bounds of the quadrants, while traversing into the tree.
+ * Traversal value has all the boundaries in the 4D space, and is capable
+ * of transferring the required boundaries to the following traversal
+ * values. In conclusion, three things are necessary to calculate the
+ * next traversal value:
+ *
+ * (1) the traversal value of the parent
+ * (2) the quadrant of the current node
+ * (3) the prefix of the current node
+ *
+ * If we visualize them on our simplified drawing (see the drawing above);
+ * transferred boundaries of (1) would be the outer axis, relevant part
+ * of (2) would be the up right part of the other axis, and (3) would be
+ * the inner axis.
+ *
+ * For example, consider the case of overlapping. When recursion
+ * descends deeper and deeper down the tree, all quadrants in
+ * the current node will be checked for overlapping. The boundaries
+ * will be re-calculated for all quadrants. Overlap check answers
+ * the question: can any box from this quadrant overlap with the given
+ * box? If yes, then this quadrant will be walked. If no, then this
+ * quadrant will be skipped.
+ *
+ * This method provides restrictions for minimum and maximum values of
+ * every dimension of every corner of the box on every level of the tree
+ * except the root. For the root node, we are setting the boundaries
+ * that we don't yet have as infinity.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/geo_spgist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/spgist.h"
+#include "access/spgist_private.h"
+#include "access/stratnum.h"
+#include "catalog/pg_type.h"
+#include "utils/float.h"
+#include "utils/fmgroids.h"
+#include "utils/fmgrprotos.h"
+#include "utils/geo_decls.h"
+
+/*
+ * Comparator for qsort
+ *
+ * We don't need to use the floating point macros in here, because this
+ * is only going to be used in a place to effect the performance
+ * of the index, not the correctness.
+ */
+static int
+compareDoubles(const void *a, const void *b)
+{
+ float8 x = *(float8 *) a;
+ float8 y = *(float8 *) b;
+
+ if (x == y)
+ return 0;
+ return (x > y) ? 1 : -1;
+}
+
+typedef struct
+{
+ float8 low;
+ float8 high;
+} Range;
+
+typedef struct
+{
+ Range left;
+ Range right;
+} RangeBox;
+
+typedef struct
+{
+ RangeBox range_box_x;
+ RangeBox range_box_y;
+} RectBox;
+
+/*
+ * Calculate the quadrant
+ *
+ * The quadrant is 8 bit unsigned integer with 4 least bits in use.
+ * This function accepts BOXes as input. They are not casted to
+ * RangeBoxes, yet. All 4 bits are set by comparing a corner of the box.
+ * This makes 16 quadrants in total.
+ */
+static uint8
+getQuadrant(BOX *centroid, BOX *inBox)
+{
+ uint8 quadrant = 0;
+
+ if (inBox->low.x > centroid->low.x)
+ quadrant |= 0x8;
+
+ if (inBox->high.x > centroid->high.x)
+ quadrant |= 0x4;
+
+ if (inBox->low.y > centroid->low.y)
+ quadrant |= 0x2;
+
+ if (inBox->high.y > centroid->high.y)
+ quadrant |= 0x1;
+
+ return quadrant;
+}
+
+/*
+ * Get RangeBox using BOX
+ *
+ * We are turning the BOX to our structures to emphasize their function
+ * of representing points in 4D space. It also is more convenient to
+ * access the values with this structure.
+ */
+static RangeBox *
+getRangeBox(BOX *box)
+{
+ RangeBox *range_box = (RangeBox *) palloc(sizeof(RangeBox));
+
+ range_box->left.low = box->low.x;
+ range_box->left.high = box->high.x;
+
+ range_box->right.low = box->low.y;
+ range_box->right.high = box->high.y;
+
+ return range_box;
+}
+
+/*
+ * Initialize the traversal value
+ *
+ * In the beginning, we don't have any restrictions. We have to
+ * initialize the struct to cover the whole 4D space.
+ */
+static RectBox *
+initRectBox(void)
+{
+ RectBox *rect_box = (RectBox *) palloc(sizeof(RectBox));
+ float8 infinity = get_float8_infinity();
+
+ rect_box->range_box_x.left.low = -infinity;
+ rect_box->range_box_x.left.high = infinity;
+
+ rect_box->range_box_x.right.low = -infinity;
+ rect_box->range_box_x.right.high = infinity;
+
+ rect_box->range_box_y.left.low = -infinity;
+ rect_box->range_box_y.left.high = infinity;
+
+ rect_box->range_box_y.right.low = -infinity;
+ rect_box->range_box_y.right.high = infinity;
+
+ return rect_box;
+}
+
+/*
+ * Calculate the next traversal value
+ *
+ * All centroids are bounded by RectBox, but SP-GiST only keeps
+ * boxes. When we are traversing the tree, we must calculate RectBox,
+ * using centroid and quadrant.
+ */
+static RectBox *
+nextRectBox(RectBox *rect_box, RangeBox *centroid, uint8 quadrant)
+{
+ RectBox *next_rect_box = (RectBox *) palloc(sizeof(RectBox));
+
+ memcpy(next_rect_box, rect_box, sizeof(RectBox));
+
+ if (quadrant & 0x8)
+ next_rect_box->range_box_x.left.low = centroid->left.low;
+ else
+ next_rect_box->range_box_x.left.high = centroid->left.low;
+
+ if (quadrant & 0x4)
+ next_rect_box->range_box_x.right.low = centroid->left.high;
+ else
+ next_rect_box->range_box_x.right.high = centroid->left.high;
+
+ if (quadrant & 0x2)
+ next_rect_box->range_box_y.left.low = centroid->right.low;
+ else
+ next_rect_box->range_box_y.left.high = centroid->right.low;
+
+ if (quadrant & 0x1)
+ next_rect_box->range_box_y.right.low = centroid->right.high;
+ else
+ next_rect_box->range_box_y.right.high = centroid->right.high;
+
+ return next_rect_box;
+}
+
+/* Can any range from range_box overlap with this argument? */
+static bool
+overlap2D(RangeBox *range_box, Range *query)
+{
+ return FPge(range_box->right.high, query->low) &&
+ FPle(range_box->left.low, query->high);
+}
+
+/* Can any rectangle from rect_box overlap with this argument? */
+static bool
+overlap4D(RectBox *rect_box, RangeBox *query)
+{
+ return overlap2D(&rect_box->range_box_x, &query->left) &&
+ overlap2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any range from range_box contain this argument? */
+static bool
+contain2D(RangeBox *range_box, Range *query)
+{
+ return FPge(range_box->right.high, query->high) &&
+ FPle(range_box->left.low, query->low);
+}
+
+/* Can any rectangle from rect_box contain this argument? */
+static bool
+contain4D(RectBox *rect_box, RangeBox *query)
+{
+ return contain2D(&rect_box->range_box_x, &query->left) &&
+ contain2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any range from range_box be contained by this argument? */
+static bool
+contained2D(RangeBox *range_box, Range *query)
+{
+ return FPle(range_box->left.low, query->high) &&
+ FPge(range_box->left.high, query->low) &&
+ FPle(range_box->right.low, query->high) &&
+ FPge(range_box->right.high, query->low);
+}
+
+/* Can any rectangle from rect_box be contained by this argument? */
+static bool
+contained4D(RectBox *rect_box, RangeBox *query)
+{
+ return contained2D(&rect_box->range_box_x, &query->left) &&
+ contained2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any range from range_box to be lower than this argument? */
+static bool
+lower2D(RangeBox *range_box, Range *query)
+{
+ return FPlt(range_box->left.low, query->low) &&
+ FPlt(range_box->right.low, query->low);
+}
+
+/* Can any range from range_box not extend to the right side of the query? */
+static bool
+overLower2D(RangeBox *range_box, Range *query)
+{
+ return FPle(range_box->left.low, query->high) &&
+ FPle(range_box->right.low, query->high);
+}
+
+/* Can any range from range_box to be higher than this argument? */
+static bool
+higher2D(RangeBox *range_box, Range *query)
+{
+ return FPgt(range_box->left.high, query->high) &&
+ FPgt(range_box->right.high, query->high);
+}
+
+/* Can any range from range_box not extend to the left side of the query? */
+static bool
+overHigher2D(RangeBox *range_box, Range *query)
+{
+ return FPge(range_box->left.high, query->low) &&
+ FPge(range_box->right.high, query->low);
+}
+
+/* Can any rectangle from rect_box be left of this argument? */
+static bool
+left4D(RectBox *rect_box, RangeBox *query)
+{
+ return lower2D(&rect_box->range_box_x, &query->left);
+}
+
+/* Can any rectangle from rect_box does not extend the right of this argument? */
+static bool
+overLeft4D(RectBox *rect_box, RangeBox *query)
+{
+ return overLower2D(&rect_box->range_box_x, &query->left);
+}
+
+/* Can any rectangle from rect_box be right of this argument? */
+static bool
+right4D(RectBox *rect_box, RangeBox *query)
+{
+ return higher2D(&rect_box->range_box_x, &query->left);
+}
+
+/* Can any rectangle from rect_box does not extend the left of this argument? */
+static bool
+overRight4D(RectBox *rect_box, RangeBox *query)
+{
+ return overHigher2D(&rect_box->range_box_x, &query->left);
+}
+
+/* Can any rectangle from rect_box be below of this argument? */
+static bool
+below4D(RectBox *rect_box, RangeBox *query)
+{
+ return lower2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any rectangle from rect_box does not extend above this argument? */
+static bool
+overBelow4D(RectBox *rect_box, RangeBox *query)
+{
+ return overLower2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any rectangle from rect_box be above of this argument? */
+static bool
+above4D(RectBox *rect_box, RangeBox *query)
+{
+ return higher2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Can any rectangle from rect_box does not extend below of this argument? */
+static bool
+overAbove4D(RectBox *rect_box, RangeBox *query)
+{
+ return overHigher2D(&rect_box->range_box_y, &query->right);
+}
+
+/* Lower bound for the distance between point and rect_box */
+static double
+pointToRectBoxDistance(Point *point, RectBox *rect_box)
+{
+ double dx;
+ double dy;
+
+ if (point->x < rect_box->range_box_x.left.low)
+ dx = rect_box->range_box_x.left.low - point->x;
+ else if (point->x > rect_box->range_box_x.right.high)
+ dx = point->x - rect_box->range_box_x.right.high;
+ else
+ dx = 0;
+
+ if (point->y < rect_box->range_box_y.left.low)
+ dy = rect_box->range_box_y.left.low - point->y;
+ else if (point->y > rect_box->range_box_y.right.high)
+ dy = point->y - rect_box->range_box_y.right.high;
+ else
+ dy = 0;
+
+ return HYPOT(dx, dy);
+}
+
+
+/*
+ * SP-GiST config function
+ */
+Datum
+spg_box_quad_config(PG_FUNCTION_ARGS)
+{
+ spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+ cfg->prefixType = BOXOID;
+ cfg->labelType = VOIDOID; /* We don't need node labels. */
+ cfg->canReturnData = true;
+ cfg->longValuesOK = false;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SP-GiST choose function
+ */
+Datum
+spg_box_quad_choose(PG_FUNCTION_ARGS)
+{
+ spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+ spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+ BOX *centroid = DatumGetBoxP(in->prefixDatum),
+ *box = DatumGetBoxP(in->leafDatum);
+
+ out->resultType = spgMatchNode;
+ out->result.matchNode.restDatum = BoxPGetDatum(box);
+
+ /* nodeN will be set by core, when allTheSame. */
+ if (!in->allTheSame)
+ out->result.matchNode.nodeN = getQuadrant(centroid, box);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SP-GiST pick-split function
+ *
+ * It splits a list of boxes into quadrants by choosing a central 4D
+ * point as the median of the coordinates of the boxes.
+ */
+Datum
+spg_box_quad_picksplit(PG_FUNCTION_ARGS)
+{
+ spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+ spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+ BOX *centroid;
+ int median,
+ i;
+ float8 *lowXs = palloc(sizeof(float8) * in->nTuples);
+ float8 *highXs = palloc(sizeof(float8) * in->nTuples);
+ float8 *lowYs = palloc(sizeof(float8) * in->nTuples);
+ float8 *highYs = palloc(sizeof(float8) * in->nTuples);
+
+ /* Calculate median of all 4D coordinates */
+ for (i = 0; i < in->nTuples; i++)
+ {
+ BOX *box = DatumGetBoxP(in->datums[i]);
+
+ lowXs[i] = box->low.x;
+ highXs[i] = box->high.x;
+ lowYs[i] = box->low.y;
+ highYs[i] = box->high.y;
+ }
+
+ qsort(lowXs, in->nTuples, sizeof(float8), compareDoubles);
+ qsort(highXs, in->nTuples, sizeof(float8), compareDoubles);
+ qsort(lowYs, in->nTuples, sizeof(float8), compareDoubles);
+ qsort(highYs, in->nTuples, sizeof(float8), compareDoubles);
+
+ median = in->nTuples / 2;
+
+ centroid = palloc(sizeof(BOX));
+
+ centroid->low.x = lowXs[median];
+ centroid->high.x = highXs[median];
+ centroid->low.y = lowYs[median];
+ centroid->high.y = highYs[median];
+
+ /* Fill the output */
+ out->hasPrefix = true;
+ out->prefixDatum = BoxPGetDatum(centroid);
+
+ out->nNodes = 16;
+ out->nodeLabels = NULL; /* We don't need node labels. */
+
+ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
+ out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
+
+ /*
+ * Assign ranges to corresponding nodes according to quadrants relative to
+ * the "centroid" range
+ */
+ for (i = 0; i < in->nTuples; i++)
+ {
+ BOX *box = DatumGetBoxP(in->datums[i]);
+ uint8 quadrant = getQuadrant(centroid, box);
+
+ out->leafTupleDatums[i] = BoxPGetDatum(box);
+ out->mapTuplesToNodes[i] = quadrant;
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Check if result of consistent method based on bounding box is exact.
+ */
+static bool
+is_bounding_box_test_exact(StrategyNumber strategy)
+{
+ switch (strategy)
+ {
+ case RTLeftStrategyNumber:
+ case RTOverLeftStrategyNumber:
+ case RTOverRightStrategyNumber:
+ case RTRightStrategyNumber:
+ case RTOverBelowStrategyNumber:
+ case RTBelowStrategyNumber:
+ case RTAboveStrategyNumber:
+ case RTOverAboveStrategyNumber:
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+/*
+ * Get bounding box for ScanKey.
+ */
+static BOX *
+spg_box_quad_get_scankey_bbox(ScanKey sk, bool *recheck)
+{
+ switch (sk->sk_subtype)
+ {
+ case BOXOID:
+ return DatumGetBoxP(sk->sk_argument);
+
+ case POLYGONOID:
+ if (recheck && !is_bounding_box_test_exact(sk->sk_strategy))
+ *recheck = true;
+ return &DatumGetPolygonP(sk->sk_argument)->boundbox;
+
+ default:
+ elog(ERROR, "unrecognized scankey subtype: %d", sk->sk_subtype);
+ return NULL;
+ }
+}
+
+/*
+ * SP-GiST inner consistent function
+ */
+Datum
+spg_box_quad_inner_consistent(PG_FUNCTION_ARGS)
+{
+ spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+ spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+ int i;
+ MemoryContext old_ctx;
+ RectBox *rect_box;
+ uint8 quadrant;
+ RangeBox *centroid,
+ **queries;
+
+ /*
+ * We are saving the traversal value or initialize it an unbounded one, if
+ * we have just begun to walk the tree.
+ */
+ if (in->traversalValue)
+ rect_box = in->traversalValue;
+ else
+ rect_box = initRectBox();
+
+ if (in->allTheSame)
+ {
+ /* Report that all nodes should be visited */
+ out->nNodes = in->nNodes;
+ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+ for (i = 0; i < in->nNodes; i++)
+ out->nodeNumbers[i] = i;
+
+ if (in->norderbys > 0 && in->nNodes > 0)
+ {
+ double *distances = palloc(sizeof(double) * in->norderbys);
+ int j;
+
+ for (j = 0; j < in->norderbys; j++)
+ {
+ Point *pt = DatumGetPointP(in->orderbys[j].sk_argument);
+
+ distances[j] = pointToRectBoxDistance(pt, rect_box);
+ }
+
+ out->distances = (double **) palloc(sizeof(double *) * in->nNodes);
+ out->distances[0] = distances;
+
+ for (i = 1; i < in->nNodes; i++)
+ {
+ out->distances[i] = palloc(sizeof(double) * in->norderbys);
+ memcpy(out->distances[i], distances,
+ sizeof(double) * in->norderbys);
+ }
+ }
+
+ PG_RETURN_VOID();
+ }
+
+ /*
+ * We are casting the prefix and queries to RangeBoxes for ease of the
+ * following operations.
+ */
+ centroid = getRangeBox(DatumGetBoxP(in->prefixDatum));
+ queries = (RangeBox **) palloc(in->nkeys * sizeof(RangeBox *));
+ for (i = 0; i < in->nkeys; i++)
+ {
+ BOX *box = spg_box_quad_get_scankey_bbox(&in->scankeys[i], NULL);
+
+ queries[i] = getRangeBox(box);
+ }
+
+ /* Allocate enough memory for nodes */
+ out->nNodes = 0;
+ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+ out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
+ if (in->norderbys > 0)
+ out->distances = (double **) palloc(sizeof(double *) * in->nNodes);
+
+ /*
+ * We switch memory context, because we want to allocate memory for new
+ * traversal values (next_rect_box) and pass these pieces of memory to
+ * further call of this function.
+ */
+ old_ctx = MemoryContextSwitchTo(in->traversalMemoryContext);
+
+ for (quadrant = 0; quadrant < in->nNodes; quadrant++)
+ {
+ RectBox *next_rect_box = nextRectBox(rect_box, centroid, quadrant);
+ bool flag = true;
+
+ for (i = 0; i < in->nkeys; i++)
+ {
+ StrategyNumber strategy = in->scankeys[i].sk_strategy;
+
+ switch (strategy)
+ {
+ case RTOverlapStrategyNumber:
+ flag = overlap4D(next_rect_box, queries[i]);
+ break;
+
+ case RTContainsStrategyNumber:
+ flag = contain4D(next_rect_box, queries[i]);
+ break;
+
+ case RTSameStrategyNumber:
+ case RTContainedByStrategyNumber:
+ flag = contained4D(next_rect_box, queries[i]);
+ break;
+
+ case RTLeftStrategyNumber:
+ flag = left4D(next_rect_box, queries[i]);
+ break;
+
+ case RTOverLeftStrategyNumber:
+ flag = overLeft4D(next_rect_box, queries[i]);
+ break;
+
+ case RTRightStrategyNumber:
+ flag = right4D(next_rect_box, queries[i]);
+ break;
+
+ case RTOverRightStrategyNumber:
+ flag = overRight4D(next_rect_box, queries[i]);
+ break;
+
+ case RTAboveStrategyNumber:
+ flag = above4D(next_rect_box, queries[i]);
+ break;
+
+ case RTOverAboveStrategyNumber:
+ flag = overAbove4D(next_rect_box, queries[i]);
+ break;
+
+ case RTBelowStrategyNumber:
+ flag = below4D(next_rect_box, queries[i]);
+ break;
+
+ case RTOverBelowStrategyNumber:
+ flag = overBelow4D(next_rect_box, queries[i]);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized strategy: %d", strategy);
+ }
+
+ /* If any check is failed, we have found our answer. */
+ if (!flag)
+ break;
+ }
+
+ if (flag)
+ {
+ out->traversalValues[out->nNodes] = next_rect_box;
+ out->nodeNumbers[out->nNodes] = quadrant;
+
+ if (in->norderbys > 0)
+ {
+ double *distances = palloc(sizeof(double) * in->norderbys);
+ int j;
+
+ out->distances[out->nNodes] = distances;
+
+ for (j = 0; j < in->norderbys; j++)
+ {
+ Point *pt = DatumGetPointP(in->orderbys[j].sk_argument);
+
+ distances[j] = pointToRectBoxDistance(pt, next_rect_box);
+ }
+ }
+
+ out->nNodes++;
+ }
+ else
+ {
+ /*
+ * If this node is not selected, we don't need to keep the next
+ * traversal value in the memory context.
+ */
+ pfree(next_rect_box);
+ }
+ }
+
+ /* Switch back */
+ MemoryContextSwitchTo(old_ctx);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SP-GiST inner consistent function
+ */
+Datum
+spg_box_quad_leaf_consistent(PG_FUNCTION_ARGS)
+{
+ spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+ spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+ Datum leaf = in->leafDatum;
+ bool flag = true;
+ int i;
+
+ /* All tests are exact. */
+ out->recheck = false;
+
+ /*
+ * Don't return leafValue unless told to; this is used for both box and
+ * polygon opclasses, and in the latter case the leaf datum is not even of
+ * the right type to return.
+ */
+ if (in->returnData)
+ out->leafValue = leaf;
+
+ /* Perform the required comparison(s) */
+ for (i = 0; i < in->nkeys; i++)
+ {
+ StrategyNumber strategy = in->scankeys[i].sk_strategy;
+ BOX *box = spg_box_quad_get_scankey_bbox(&in->scankeys[i],
+ &out->recheck);
+ Datum query = BoxPGetDatum(box);
+
+ switch (strategy)
+ {
+ case RTOverlapStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_overlap, leaf,
+ query));
+ break;
+
+ case RTContainsStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_contain, leaf,
+ query));
+ break;
+
+ case RTContainedByStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_contained, leaf,
+ query));
+ break;
+
+ case RTSameStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_same, leaf,
+ query));
+ break;
+
+ case RTLeftStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_left, leaf,
+ query));
+ break;
+
+ case RTOverLeftStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_overleft, leaf,
+ query));
+ break;
+
+ case RTRightStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_right, leaf,
+ query));
+ break;
+
+ case RTOverRightStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_overright, leaf,
+ query));
+ break;
+
+ case RTAboveStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_above, leaf,
+ query));
+ break;
+
+ case RTOverAboveStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_overabove, leaf,
+ query));
+ break;
+
+ case RTBelowStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_below, leaf,
+ query));
+ break;
+
+ case RTOverBelowStrategyNumber:
+ flag = DatumGetBool(DirectFunctionCall2(box_overbelow, leaf,
+ query));
+ break;
+
+ default:
+ elog(ERROR, "unrecognized strategy: %d", strategy);
+ }
+
+ /* If any check is failed, we have found our answer. */
+ if (!flag)
+ break;
+ }
+
+ if (flag && in->norderbys > 0)
+ {
+ Oid distfnoid = in->orderbys[0].sk_func.fn_oid;
+
+ out->distances = spg_key_orderbys_distances(leaf, false,
+ in->orderbys, in->norderbys);
+
+ /* Recheck is necessary when computing distance to polygon */
+ out->recheckDistances = distfnoid == F_DIST_POLYP;
+ }
+
+ PG_RETURN_BOOL(flag);
+}
+
+
+/*
+ * SP-GiST config function for 2-D types that are lossy represented by their
+ * bounding boxes
+ */
+Datum
+spg_bbox_quad_config(PG_FUNCTION_ARGS)
+{
+ spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+ cfg->prefixType = BOXOID; /* A type represented by its bounding box */
+ cfg->labelType = VOIDOID; /* We don't need node labels. */
+ cfg->leafType = BOXOID;
+ cfg->canReturnData = false;
+ cfg->longValuesOK = false;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SP-GiST compress function for polygons
+ */
+Datum
+spg_poly_quad_compress(PG_FUNCTION_ARGS)
+{
+ POLYGON *polygon = PG_GETARG_POLYGON_P(0);
+ BOX *box;
+
+ box = (BOX *) palloc(sizeof(BOX));
+ *box = polygon->boundbox;
+
+ PG_RETURN_BOX_P(box);
+}
diff --git a/src/backend/utils/adt/hbafuncs.c b/src/backend/utils/adt/hbafuncs.c
new file mode 100644
index 0000000..c336599
--- /dev/null
+++ b/src/backend/utils/adt/hbafuncs.c
@@ -0,0 +1,564 @@
+/*-------------------------------------------------------------------------
+ *
+ * hbafuncs.c
+ * Support functions for SQL views of authentication files.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/hbafuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/objectaddress.h"
+#include "common/ip.h"
+#include "funcapi.h"
+#include "libpq/hba.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+
+
+static ArrayType *get_hba_options(HbaLine *hba);
+static void fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, HbaLine *hba, const char *err_msg);
+static void fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc);
+static void fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, IdentLine *ident, const char *err_msg);
+static void fill_ident_view(Tuplestorestate *tuple_store, TupleDesc tupdesc);
+
+
+/*
+ * This macro specifies the maximum number of authentication options
+ * that are possible with any given authentication method that is supported.
+ * Currently LDAP supports 11, and there are 3 that are not dependent on
+ * the auth method here. It may not actually be possible to set all of them
+ * at the same time, but we'll set the macro value high enough to be
+ * conservative and avoid warnings from static analysis tools.
+ */
+#define MAX_HBA_OPTIONS 14
+
+/*
+ * Create a text array listing the options specified in the HBA line.
+ * Return NULL if no options are specified.
+ */
+static ArrayType *
+get_hba_options(HbaLine *hba)
+{
+ int noptions;
+ Datum options[MAX_HBA_OPTIONS];
+
+ noptions = 0;
+
+ if (hba->auth_method == uaGSS || hba->auth_method == uaSSPI)
+ {
+ if (hba->include_realm)
+ options[noptions++] =
+ CStringGetTextDatum("include_realm=true");
+
+ if (hba->krb_realm)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("krb_realm=%s", hba->krb_realm));
+ }
+
+ if (hba->usermap)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("map=%s", hba->usermap));
+
+ if (hba->clientcert != clientCertOff)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("clientcert=%s", (hba->clientcert == clientCertCA) ? "verify-ca" : "verify-full"));
+
+ if (hba->pamservice)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("pamservice=%s", hba->pamservice));
+
+ if (hba->auth_method == uaLDAP)
+ {
+ if (hba->ldapserver)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapserver=%s", hba->ldapserver));
+
+ if (hba->ldapport)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapport=%d", hba->ldapport));
+
+ if (hba->ldaptls)
+ options[noptions++] =
+ CStringGetTextDatum("ldaptls=true");
+
+ if (hba->ldapprefix)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapprefix=%s", hba->ldapprefix));
+
+ if (hba->ldapsuffix)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapsuffix=%s", hba->ldapsuffix));
+
+ if (hba->ldapbasedn)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbasedn=%s", hba->ldapbasedn));
+
+ if (hba->ldapbinddn)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbinddn=%s", hba->ldapbinddn));
+
+ if (hba->ldapbindpasswd)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapbindpasswd=%s",
+ hba->ldapbindpasswd));
+
+ if (hba->ldapsearchattribute)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapsearchattribute=%s",
+ hba->ldapsearchattribute));
+
+ if (hba->ldapsearchfilter)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapsearchfilter=%s",
+ hba->ldapsearchfilter));
+
+ if (hba->ldapscope)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("ldapscope=%d", hba->ldapscope));
+ }
+
+ if (hba->auth_method == uaRADIUS)
+ {
+ if (hba->radiusservers_s)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusservers=%s", hba->radiusservers_s));
+
+ if (hba->radiussecrets_s)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiussecrets=%s", hba->radiussecrets_s));
+
+ if (hba->radiusidentifiers_s)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusidentifiers=%s", hba->radiusidentifiers_s));
+
+ if (hba->radiusports_s)
+ options[noptions++] =
+ CStringGetTextDatum(psprintf("radiusports=%s", hba->radiusports_s));
+ }
+
+ /* If you add more options, consider increasing MAX_HBA_OPTIONS. */
+ Assert(noptions <= MAX_HBA_OPTIONS);
+
+ if (noptions > 0)
+ return construct_array(options, noptions, TEXTOID, -1, false, TYPALIGN_INT);
+ else
+ return NULL;
+}
+
+/* Number of columns in pg_hba_file_rules view */
+#define NUM_PG_HBA_FILE_RULES_ATTS 9
+
+/*
+ * fill_hba_line
+ * Build one row of pg_hba_file_rules view, add it to tuplestore.
+ *
+ * tuple_store: where to store data
+ * tupdesc: tuple descriptor for the view
+ * lineno: pg_hba.conf line number (must always be valid)
+ * hba: parsed line data (can be NULL, in which case err_msg should be set)
+ * err_msg: error message (NULL if none)
+ *
+ * Note: leaks memory, but we don't care since this is run in a short-lived
+ * memory context.
+ */
+static void
+fill_hba_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, HbaLine *hba, const char *err_msg)
+{
+ Datum values[NUM_PG_HBA_FILE_RULES_ATTS];
+ bool nulls[NUM_PG_HBA_FILE_RULES_ATTS];
+ char buffer[NI_MAXHOST];
+ HeapTuple tuple;
+ int index;
+ ListCell *lc;
+ const char *typestr;
+ const char *addrstr;
+ const char *maskstr;
+ ArrayType *options;
+
+ Assert(tupdesc->natts == NUM_PG_HBA_FILE_RULES_ATTS);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+ index = 0;
+
+ /* line_number */
+ values[index++] = Int32GetDatum(lineno);
+
+ if (hba != NULL)
+ {
+ /* type */
+ /* Avoid a default: case so compiler will warn about missing cases */
+ typestr = NULL;
+ switch (hba->conntype)
+ {
+ case ctLocal:
+ typestr = "local";
+ break;
+ case ctHost:
+ typestr = "host";
+ break;
+ case ctHostSSL:
+ typestr = "hostssl";
+ break;
+ case ctHostNoSSL:
+ typestr = "hostnossl";
+ break;
+ case ctHostGSS:
+ typestr = "hostgssenc";
+ break;
+ case ctHostNoGSS:
+ typestr = "hostnogssenc";
+ break;
+ }
+ if (typestr)
+ values[index++] = CStringGetTextDatum(typestr);
+ else
+ nulls[index++] = true;
+
+ /* database */
+ if (hba->databases)
+ {
+ /*
+ * Flatten AuthToken list to string list. It might seem that we
+ * should re-quote any quoted tokens, but that has been rejected
+ * on the grounds that it makes it harder to compare the array
+ * elements to other system catalogs. That makes entries like
+ * "all" or "samerole" formally ambiguous ... but users who name
+ * databases/roles that way are inflicting their own pain.
+ */
+ List *names = NIL;
+
+ foreach(lc, hba->databases)
+ {
+ AuthToken *tok = lfirst(lc);
+
+ names = lappend(names, tok->string);
+ }
+ values[index++] = PointerGetDatum(strlist_to_textarray(names));
+ }
+ else
+ nulls[index++] = true;
+
+ /* user */
+ if (hba->roles)
+ {
+ /* Flatten AuthToken list to string list; see comment above */
+ List *roles = NIL;
+
+ foreach(lc, hba->roles)
+ {
+ AuthToken *tok = lfirst(lc);
+
+ roles = lappend(roles, tok->string);
+ }
+ values[index++] = PointerGetDatum(strlist_to_textarray(roles));
+ }
+ else
+ nulls[index++] = true;
+
+ /* address and netmask */
+ /* Avoid a default: case so compiler will warn about missing cases */
+ addrstr = maskstr = NULL;
+ switch (hba->ip_cmp_method)
+ {
+ case ipCmpMask:
+ if (hba->hostname)
+ {
+ addrstr = hba->hostname;
+ }
+ else
+ {
+ /*
+ * Note: if pg_getnameinfo_all fails, it'll set buffer to
+ * "???", which we want to return.
+ */
+ if (hba->addrlen > 0)
+ {
+ if (pg_getnameinfo_all(&hba->addr, hba->addrlen,
+ buffer, sizeof(buffer),
+ NULL, 0,
+ NI_NUMERICHOST) == 0)
+ clean_ipv6_addr(hba->addr.ss_family, buffer);
+ addrstr = pstrdup(buffer);
+ }
+ if (hba->masklen > 0)
+ {
+ if (pg_getnameinfo_all(&hba->mask, hba->masklen,
+ buffer, sizeof(buffer),
+ NULL, 0,
+ NI_NUMERICHOST) == 0)
+ clean_ipv6_addr(hba->mask.ss_family, buffer);
+ maskstr = pstrdup(buffer);
+ }
+ }
+ break;
+ case ipCmpAll:
+ addrstr = "all";
+ break;
+ case ipCmpSameHost:
+ addrstr = "samehost";
+ break;
+ case ipCmpSameNet:
+ addrstr = "samenet";
+ break;
+ }
+ if (addrstr)
+ values[index++] = CStringGetTextDatum(addrstr);
+ else
+ nulls[index++] = true;
+ if (maskstr)
+ values[index++] = CStringGetTextDatum(maskstr);
+ else
+ nulls[index++] = true;
+
+ /* auth_method */
+ values[index++] = CStringGetTextDatum(hba_authname(hba->auth_method));
+
+ /* options */
+ options = get_hba_options(hba);
+ if (options)
+ values[index++] = PointerGetDatum(options);
+ else
+ nulls[index++] = true;
+ }
+ else
+ {
+ /* no parsing result, so set relevant fields to nulls */
+ memset(&nulls[1], true, (NUM_PG_HBA_FILE_RULES_ATTS - 2) * sizeof(bool));
+ }
+
+ /* error */
+ if (err_msg)
+ values[NUM_PG_HBA_FILE_RULES_ATTS - 1] = CStringGetTextDatum(err_msg);
+ else
+ nulls[NUM_PG_HBA_FILE_RULES_ATTS - 1] = true;
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ tuplestore_puttuple(tuple_store, tuple);
+}
+
+/*
+ * fill_hba_view
+ * Read the pg_hba.conf file and fill the tuplestore with view records.
+ */
+static void
+fill_hba_view(Tuplestorestate *tuple_store, TupleDesc tupdesc)
+{
+ FILE *file;
+ List *hba_lines = NIL;
+ ListCell *line;
+ MemoryContext linecxt;
+ MemoryContext hbacxt;
+ MemoryContext oldcxt;
+
+ /*
+ * In the unlikely event that we can't open pg_hba.conf, we throw an
+ * error, rather than trying to report it via some sort of view entry.
+ * (Most other error conditions should result in a message in a view
+ * entry.)
+ */
+ file = AllocateFile(HbaFileName, "r");
+ if (file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open configuration file \"%s\": %m",
+ HbaFileName)));
+
+ linecxt = tokenize_auth_file(HbaFileName, file, &hba_lines, DEBUG3);
+ FreeFile(file);
+
+ /* Now parse all the lines */
+ hbacxt = AllocSetContextCreate(CurrentMemoryContext,
+ "hba parser context",
+ ALLOCSET_SMALL_SIZES);
+ oldcxt = MemoryContextSwitchTo(hbacxt);
+ foreach(line, hba_lines)
+ {
+ TokenizedAuthLine *tok_line = (TokenizedAuthLine *) lfirst(line);
+ HbaLine *hbaline = NULL;
+
+ /* don't parse lines that already have errors */
+ if (tok_line->err_msg == NULL)
+ hbaline = parse_hba_line(tok_line, DEBUG3);
+
+ fill_hba_line(tuple_store, tupdesc, tok_line->line_num,
+ hbaline, tok_line->err_msg);
+ }
+
+ /* Free tokenizer memory */
+ MemoryContextDelete(linecxt);
+ /* Free parse_hba_line memory */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(hbacxt);
+}
+
+/*
+ * pg_hba_file_rules
+ *
+ * SQL-accessible set-returning function to return all the entries in the
+ * pg_hba.conf file.
+ */
+Datum
+pg_hba_file_rules(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsi;
+
+ /*
+ * Build tuplestore to hold the result rows. We must use the Materialize
+ * mode to be safe against HBA file changes while the cursor is open. It's
+ * also more efficient than having to look up our current position in the
+ * parsed list every time.
+ */
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* Fill the tuplestore */
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+ fill_hba_view(rsi->setResult, rsi->setDesc);
+
+ PG_RETURN_NULL();
+}
+
+/* Number of columns in pg_ident_file_mappings view */
+#define NUM_PG_IDENT_FILE_MAPPINGS_ATTS 5
+
+/*
+ * fill_ident_line: build one row of pg_ident_file_mappings view, add it to
+ * tuplestore
+ *
+ * tuple_store: where to store data
+ * tupdesc: tuple descriptor for the view
+ * lineno: pg_ident.conf line number (must always be valid)
+ * ident: parsed line data (can be NULL, in which case err_msg should be set)
+ * err_msg: error message (NULL if none)
+ *
+ * Note: leaks memory, but we don't care since this is run in a short-lived
+ * memory context.
+ */
+static void
+fill_ident_line(Tuplestorestate *tuple_store, TupleDesc tupdesc,
+ int lineno, IdentLine *ident, const char *err_msg)
+{
+ Datum values[NUM_PG_IDENT_FILE_MAPPINGS_ATTS];
+ bool nulls[NUM_PG_IDENT_FILE_MAPPINGS_ATTS];
+ HeapTuple tuple;
+ int index;
+
+ Assert(tupdesc->natts == NUM_PG_IDENT_FILE_MAPPINGS_ATTS);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+ index = 0;
+
+ /* line_number */
+ values[index++] = Int32GetDatum(lineno);
+
+ if (ident != NULL)
+ {
+ values[index++] = CStringGetTextDatum(ident->usermap);
+ values[index++] = CStringGetTextDatum(ident->ident_user);
+ values[index++] = CStringGetTextDatum(ident->pg_role);
+ }
+ else
+ {
+ /* no parsing result, so set relevant fields to nulls */
+ memset(&nulls[1], true, (NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 2) * sizeof(bool));
+ }
+
+ /* error */
+ if (err_msg)
+ values[NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 1] = CStringGetTextDatum(err_msg);
+ else
+ nulls[NUM_PG_IDENT_FILE_MAPPINGS_ATTS - 1] = true;
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ tuplestore_puttuple(tuple_store, tuple);
+}
+
+/*
+ * Read the pg_ident.conf file and fill the tuplestore with view records.
+ */
+static void
+fill_ident_view(Tuplestorestate *tuple_store, TupleDesc tupdesc)
+{
+ FILE *file;
+ List *ident_lines = NIL;
+ ListCell *line;
+ MemoryContext linecxt;
+ MemoryContext identcxt;
+ MemoryContext oldcxt;
+
+ /*
+ * In the unlikely event that we can't open pg_ident.conf, we throw an
+ * error, rather than trying to report it via some sort of view entry.
+ * (Most other error conditions should result in a message in a view
+ * entry.)
+ */
+ file = AllocateFile(IdentFileName, "r");
+ if (file == NULL)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open usermap file \"%s\": %m",
+ IdentFileName)));
+
+ linecxt = tokenize_auth_file(IdentFileName, file, &ident_lines, DEBUG3);
+ FreeFile(file);
+
+ /* Now parse all the lines */
+ identcxt = AllocSetContextCreate(CurrentMemoryContext,
+ "ident parser context",
+ ALLOCSET_SMALL_SIZES);
+ oldcxt = MemoryContextSwitchTo(identcxt);
+ foreach(line, ident_lines)
+ {
+ TokenizedAuthLine *tok_line = (TokenizedAuthLine *) lfirst(line);
+ IdentLine *identline = NULL;
+
+ /* don't parse lines that already have errors */
+ if (tok_line->err_msg == NULL)
+ identline = parse_ident_line(tok_line, DEBUG3);
+
+ fill_ident_line(tuple_store, tupdesc, tok_line->line_num, identline,
+ tok_line->err_msg);
+ }
+
+ /* Free tokenizer memory */
+ MemoryContextDelete(linecxt);
+ /* Free parse_ident_line memory */
+ MemoryContextSwitchTo(oldcxt);
+ MemoryContextDelete(identcxt);
+}
+
+/*
+ * SQL-accessible SRF to return all the entries in the pg_ident.conf file.
+ */
+Datum
+pg_ident_file_mappings(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsi;
+
+ /*
+ * Build tuplestore to hold the result rows. We must use the Materialize
+ * mode to be safe against HBA file changes while the cursor is open. It's
+ * also more efficient than having to look up our current position in the
+ * parsed list every time.
+ */
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* Fill the tuplestore */
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+ fill_ident_view(rsi->setResult, rsi->setDesc);
+
+ PG_RETURN_NULL();
+}
diff --git a/src/backend/utils/adt/inet_cidr_ntop.c b/src/backend/utils/adt/inet_cidr_ntop.c
new file mode 100644
index 0000000..5f74c05
--- /dev/null
+++ b/src/backend/utils/adt/inet_cidr_ntop.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * src/backend/utils/adt/inet_cidr_ntop.c
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "Id: inet_net_ntop.c,v 1.1.2.2 2004/03/09 09:17:27 marka Exp $";
+#endif
+
+#include "postgres.h"
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "utils/builtins.h"
+#include "utils/inet.h"
+
+
+#ifdef SPRINTF_CHAR
+#define SPRINTF(x) strlen(sprintf/**/x)
+#else
+#define SPRINTF(x) ((size_t)sprintf x)
+#endif
+
+static char *inet_cidr_ntop_ipv4(const u_char *src, int bits,
+ char *dst, size_t size);
+static char *inet_cidr_ntop_ipv6(const u_char *src, int bits,
+ char *dst, size_t size);
+
+/*
+ * char *
+ * pg_inet_cidr_ntop(af, src, bits, dst, size)
+ * convert network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+char *
+pg_inet_cidr_ntop(int af, const void *src, int bits, char *dst, size_t size)
+{
+ switch (af)
+ {
+ case PGSQL_AF_INET:
+ return inet_cidr_ntop_ipv4(src, bits, dst, size);
+ case PGSQL_AF_INET6:
+ return inet_cidr_ntop_ipv6(src, bits, dst, size);
+ default:
+ errno = EAFNOSUPPORT;
+ return NULL;
+ }
+}
+
+
+/*
+ * static char *
+ * inet_cidr_ntop_ipv4(src, bits, dst, size)
+ * convert IPv4 network number from network to presentation format.
+ * generates CIDR style result always.
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), July 1996
+ */
+static char *
+inet_cidr_ntop_ipv4(const u_char *src, int bits, char *dst, size_t size)
+{
+ char *odst = dst;
+ char *t;
+ u_int m;
+ int b;
+
+ if (bits < 0 || bits > 32)
+ {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ if (bits == 0)
+ {
+ if (size < sizeof "0")
+ goto emsgsize;
+ *dst++ = '0';
+ size--;
+ *dst = '\0';
+ }
+
+ /* Format whole octets. */
+ for (b = bits / 8; b > 0; b--)
+ {
+ if (size <= sizeof "255.")
+ goto emsgsize;
+ t = dst;
+ dst += SPRINTF((dst, "%u", *src++));
+ if (b > 1)
+ {
+ *dst++ = '.';
+ *dst = '\0';
+ }
+ size -= (size_t) (dst - t);
+ }
+
+ /* Format partial octet. */
+ b = bits % 8;
+ if (b > 0)
+ {
+ if (size <= sizeof ".255")
+ goto emsgsize;
+ t = dst;
+ if (dst != odst)
+ *dst++ = '.';
+ m = ((1 << b) - 1) << (8 - b);
+ dst += SPRINTF((dst, "%u", *src & m));
+ size -= (size_t) (dst - t);
+ }
+
+ /* Format CIDR /width. */
+ if (size <= sizeof "/32")
+ goto emsgsize;
+ dst += SPRINTF((dst, "/%u", bits));
+ return odst;
+
+emsgsize:
+ errno = EMSGSIZE;
+ return NULL;
+}
+
+/*
+ * static char *
+ * inet_cidr_ntop_ipv6(src, bits, dst, size)
+ * convert IPv6 network number from network to presentation format.
+ * generates CIDR style result always. Picks the shortest representation
+ * unless the IP is really IPv4.
+ * always prints specified number of bits (bits).
+ * return:
+ * pointer to dst, or NULL if an error occurred (check errno).
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Vadim Kogan (UCB), June 2001
+ * Original version (IPv4) by Paul Vixie (ISC), July 1996
+ */
+
+static char *
+inet_cidr_ntop_ipv6(const u_char *src, int bits, char *dst, size_t size)
+{
+ u_int m;
+ int b;
+ int p;
+ int zero_s,
+ zero_l,
+ tmp_zero_s,
+ tmp_zero_l;
+ int i;
+ int is_ipv4 = 0;
+ unsigned char inbuf[16];
+ char outbuf[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+ char *cp;
+ int words;
+ u_char *s;
+
+ if (bits < 0 || bits > 128)
+ {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ cp = outbuf;
+
+ if (bits == 0)
+ {
+ *cp++ = ':';
+ *cp++ = ':';
+ *cp = '\0';
+ }
+ else
+ {
+ /* Copy src to private buffer. Zero host part. */
+ p = (bits + 7) / 8;
+ memcpy(inbuf, src, p);
+ memset(inbuf + p, 0, 16 - p);
+ b = bits % 8;
+ if (b != 0)
+ {
+ m = ((u_int) ~0) << (8 - b);
+ inbuf[p - 1] &= m;
+ }
+
+ s = inbuf;
+
+ /* how many words need to be displayed in output */
+ words = (bits + 15) / 16;
+ if (words == 1)
+ words = 2;
+
+ /* Find the longest substring of zero's */
+ zero_s = zero_l = tmp_zero_s = tmp_zero_l = 0;
+ for (i = 0; i < (words * 2); i += 2)
+ {
+ if ((s[i] | s[i + 1]) == 0)
+ {
+ if (tmp_zero_l == 0)
+ tmp_zero_s = i / 2;
+ tmp_zero_l++;
+ }
+ else
+ {
+ if (tmp_zero_l && zero_l < tmp_zero_l)
+ {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ tmp_zero_l = 0;
+ }
+ }
+ }
+
+ if (tmp_zero_l && zero_l < tmp_zero_l)
+ {
+ zero_s = tmp_zero_s;
+ zero_l = tmp_zero_l;
+ }
+
+ if (zero_l != words && zero_s == 0 && ((zero_l == 6) ||
+ ((zero_l == 5 && s[10] == 0xff && s[11] == 0xff) ||
+ ((zero_l == 7 && s[14] != 0 && s[15] != 1)))))
+ is_ipv4 = 1;
+
+ /* Format whole words. */
+ for (p = 0; p < words; p++)
+ {
+ if (zero_l != 0 && p >= zero_s && p < zero_s + zero_l)
+ {
+ /* Time to skip some zeros */
+ if (p == zero_s)
+ *cp++ = ':';
+ if (p == words - 1)
+ *cp++ = ':';
+ s++;
+ s++;
+ continue;
+ }
+
+ if (is_ipv4 && p > 5)
+ {
+ *cp++ = (p == 6) ? ':' : '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ /* we can potentially drop the last octet */
+ if (p != 7 || bits > 120)
+ {
+ *cp++ = '.';
+ cp += SPRINTF((cp, "%u", *s++));
+ }
+ }
+ else
+ {
+ if (cp != outbuf)
+ *cp++ = ':';
+ cp += SPRINTF((cp, "%x", *s * 256 + s[1]));
+ s += 2;
+ }
+ }
+ }
+ /* Format CIDR /width. */
+ (void) SPRINTF((cp, "/%u", bits));
+ if (strlen(outbuf) + 1 > size)
+ goto emsgsize;
+ strcpy(dst, outbuf);
+
+ return dst;
+
+emsgsize:
+ errno = EMSGSIZE;
+ return NULL;
+}
diff --git a/src/backend/utils/adt/inet_net_pton.c b/src/backend/utils/adt/inet_net_pton.c
new file mode 100644
index 0000000..d3221a1
--- /dev/null
+++ b/src/backend/utils/adt/inet_net_pton.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2004 by Internet Systems Consortium, Inc. ("ISC")
+ * Copyright (c) 1996,1999 by Internet Software Consortium.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
+ * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ * src/backend/utils/adt/inet_net_pton.c
+ */
+
+#if defined(LIBC_SCCS) && !defined(lint)
+static const char rcsid[] = "Id: inet_net_pton.c,v 1.4.2.3 2004/03/17 00:40:11 marka Exp $";
+#endif
+
+#include "postgres.h"
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <assert.h>
+#include <ctype.h>
+
+#include "utils/builtins.h" /* pgrminclude ignore */ /* needed on some
+ * platforms */
+#include "utils/inet.h"
+
+
+static int inet_net_pton_ipv4(const char *src, u_char *dst);
+static int inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size);
+static int inet_net_pton_ipv6(const char *src, u_char *dst);
+static int inet_cidr_pton_ipv6(const char *src, u_char *dst, size_t size);
+
+
+/*
+ * int
+ * pg_inet_net_pton(af, src, dst, size)
+ * convert network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not a valid network specification.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ *
+ * Changes:
+ * I added the inet_cidr_pton function (also from Paul) and changed
+ * the names to reflect their current use.
+ *
+ */
+int
+pg_inet_net_pton(int af, const char *src, void *dst, size_t size)
+{
+ switch (af)
+ {
+ case PGSQL_AF_INET:
+ return size == -1 ?
+ inet_net_pton_ipv4(src, dst) :
+ inet_cidr_pton_ipv4(src, dst, size);
+ case PGSQL_AF_INET6:
+ return size == -1 ?
+ inet_net_pton_ipv6(src, dst) :
+ inet_cidr_pton_ipv6(src, dst, size);
+ default:
+ errno = EAFNOSUPPORT;
+ return -1;
+ }
+}
+
+/*
+ * static int
+ * inet_cidr_pton_ipv4(src, dst, size)
+ * convert IPv4 network number from presentation to network format.
+ * accepts hex octets, hex strings, decimal octets, and /CIDR.
+ * "size" is in bytes and describes "dst".
+ * return:
+ * number of bits, either imputed classfully or specified with /CIDR,
+ * or -1 if some failure occurred (check errno). ENOENT means it was
+ * not an IPv4 network specification.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0b11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+static int
+inet_cidr_pton_ipv4(const char *src, u_char *dst, size_t size)
+{
+ static const char xdigits[] = "0123456789abcdef";
+ static const char digits[] = "0123456789";
+ int n,
+ ch,
+ tmp = 0,
+ dirty,
+ bits;
+ const u_char *odst = dst;
+
+ ch = *src++;
+ if (ch == '0' && (src[0] == 'x' || src[0] == 'X')
+ && isxdigit((unsigned char) src[1]))
+ {
+ /* Hexadecimal: Eat nybble string. */
+ if (size <= 0U)
+ goto emsgsize;
+ dirty = 0;
+ src++; /* skip x or X. */
+ while ((ch = *src++) != '\0' && isxdigit((unsigned char) ch))
+ {
+ if (isupper((unsigned char) ch))
+ ch = tolower((unsigned char) ch);
+ n = strchr(xdigits, ch) - xdigits;
+ assert(n >= 0 && n <= 15);
+ if (dirty == 0)
+ tmp = n;
+ else
+ tmp = (tmp << 4) | n;
+ if (++dirty == 2)
+ {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ dirty = 0;
+ }
+ }
+ if (dirty)
+ { /* Odd trailing nybble? */
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) (tmp << 4);
+ }
+ }
+ else if (isdigit((unsigned char) ch))
+ {
+ /* Decimal: eat dotted digit string. */
+ for (;;)
+ {
+ tmp = 0;
+ do
+ {
+ n = strchr(digits, ch) - digits;
+ assert(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' &&
+ isdigit((unsigned char) ch));
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ ch = *src++;
+ if (!isdigit((unsigned char) ch))
+ goto enoent;
+ }
+ }
+ else
+ goto enoent;
+
+ bits = -1;
+ if (ch == '/' && isdigit((unsigned char) src[0]) && dst > odst)
+ {
+ /* CIDR width specifier. Nothing can follow it. */
+ ch = *src++; /* Skip over the /. */
+ bits = 0;
+ do
+ {
+ n = strchr(digits, ch) - digits;
+ assert(n >= 0 && n <= 9);
+ bits *= 10;
+ bits += n;
+ } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch));
+ if (ch != '\0')
+ goto enoent;
+ if (bits > 32)
+ goto emsgsize;
+ }
+
+ /* Fiery death and destruction unless we prefetched EOS. */
+ if (ch != '\0')
+ goto enoent;
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+ /* If no CIDR spec was given, infer width from net class. */
+ if (bits == -1)
+ {
+ if (*odst >= 240) /* Class E */
+ bits = 32;
+ else if (*odst >= 224) /* Class D */
+ bits = 8;
+ else if (*odst >= 192) /* Class C */
+ bits = 24;
+ else if (*odst >= 128) /* Class B */
+ bits = 16;
+ else
+ /* Class A */
+ bits = 8;
+ /* If imputed mask is narrower than specified octets, widen. */
+ if (bits < ((dst - odst) * 8))
+ bits = (dst - odst) * 8;
+
+ /*
+ * If there are no additional bits specified for a class D address
+ * adjust bits to 4.
+ */
+ if (bits == 8 && *odst == 224)
+ bits = 4;
+ }
+ /* Extend network to cover the actual mask. */
+ while (bits > ((dst - odst) * 8))
+ {
+ if (size-- <= 0U)
+ goto emsgsize;
+ *dst++ = '\0';
+ }
+ return bits;
+
+enoent:
+ errno = ENOENT;
+ return -1;
+
+emsgsize:
+ errno = EMSGSIZE;
+ return -1;
+}
+
+/*
+ * int
+ * inet_net_pton_ipv4(af, src, dst, *bits)
+ * convert network address from presentation to network format.
+ * accepts inet_pton()'s input for this "af" plus trailing "/CIDR".
+ * "dst" is assumed large enough for its "af". "bits" is set to the
+ * /CIDR prefix length, which can have defaults (like /32 for IPv4).
+ * return:
+ * -1 if an error occurred (inspect errno; ENOENT means bad format).
+ * 0 if successful conversion occurred.
+ * note:
+ * 192.5.5.1/28 has a nonzero host part, which means it isn't a network
+ * as called for by inet_cidr_pton() but it can be a host address with
+ * an included netmask.
+ * author:
+ * Paul Vixie (ISC), October 1998
+ */
+static int
+inet_net_pton_ipv4(const char *src, u_char *dst)
+{
+ static const char digits[] = "0123456789";
+ const u_char *odst = dst;
+ int n,
+ ch,
+ tmp,
+ bits;
+ size_t size = 4;
+
+ /* Get the mantissa. */
+ while (ch = *src++, isdigit((unsigned char) ch))
+ {
+ tmp = 0;
+ do
+ {
+ n = strchr(digits, ch) - digits;
+ assert(n >= 0 && n <= 9);
+ tmp *= 10;
+ tmp += n;
+ if (tmp > 255)
+ goto enoent;
+ } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch));
+ if (size-- == 0)
+ goto emsgsize;
+ *dst++ = (u_char) tmp;
+ if (ch == '\0' || ch == '/')
+ break;
+ if (ch != '.')
+ goto enoent;
+ }
+
+ /* Get the prefix length if any. */
+ bits = -1;
+ if (ch == '/' && isdigit((unsigned char) src[0]) && dst > odst)
+ {
+ /* CIDR width specifier. Nothing can follow it. */
+ ch = *src++; /* Skip over the /. */
+ bits = 0;
+ do
+ {
+ n = strchr(digits, ch) - digits;
+ assert(n >= 0 && n <= 9);
+ bits *= 10;
+ bits += n;
+ } while ((ch = *src++) != '\0' && isdigit((unsigned char) ch));
+ if (ch != '\0')
+ goto enoent;
+ if (bits > 32)
+ goto emsgsize;
+ }
+
+ /* Fiery death and destruction unless we prefetched EOS. */
+ if (ch != '\0')
+ goto enoent;
+
+ /* Prefix length can default to /32 only if all four octets spec'd. */
+ if (bits == -1)
+ {
+ if (dst - odst == 4)
+ bits = 32;
+ else
+ goto enoent;
+ }
+
+ /* If nothing was written to the destination, we found no address. */
+ if (dst == odst)
+ goto enoent;
+
+ /* If prefix length overspecifies mantissa, life is bad. */
+ if ((bits / 8) > (dst - odst))
+ goto enoent;
+
+ /* Extend address to four octets. */
+ while (size-- > 0)
+ *dst++ = 0;
+
+ return bits;
+
+enoent:
+ errno = ENOENT;
+ return -1;
+
+emsgsize:
+ errno = EMSGSIZE;
+ return -1;
+}
+
+static int
+getbits(const char *src, int *bitsp)
+{
+ static const char digits[] = "0123456789";
+ int n;
+ int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0')
+ {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL)
+ {
+ if (n++ != 0 && val == 0) /* no leading zeros */
+ return 0;
+ val *= 10;
+ val += (pch - digits);
+ if (val > 128) /* range */
+ return 0;
+ continue;
+ }
+ return 0;
+ }
+ if (n == 0)
+ return 0;
+ *bitsp = val;
+ return 1;
+}
+
+static int
+getv4(const char *src, u_char *dst, int *bitsp)
+{
+ static const char digits[] = "0123456789";
+ u_char *odst = dst;
+ int n;
+ u_int val;
+ char ch;
+
+ val = 0;
+ n = 0;
+ while ((ch = *src++) != '\0')
+ {
+ const char *pch;
+
+ pch = strchr(digits, ch);
+ if (pch != NULL)
+ {
+ if (n++ != 0 && val == 0) /* no leading zeros */
+ return 0;
+ val *= 10;
+ val += (pch - digits);
+ if (val > 255) /* range */
+ return 0;
+ continue;
+ }
+ if (ch == '.' || ch == '/')
+ {
+ if (dst - odst > 3) /* too many octets? */
+ return 0;
+ *dst++ = val;
+ if (ch == '/')
+ return getbits(src, bitsp);
+ val = 0;
+ n = 0;
+ continue;
+ }
+ return 0;
+ }
+ if (n == 0)
+ return 0;
+ if (dst - odst > 3) /* too many octets? */
+ return 0;
+ *dst++ = val;
+ return 1;
+}
+
+static int
+inet_net_pton_ipv6(const char *src, u_char *dst)
+{
+ return inet_cidr_pton_ipv6(src, dst, 16);
+}
+
+#define NS_IN6ADDRSZ 16
+#define NS_INT16SZ 2
+#define NS_INADDRSZ 4
+
+static int
+inet_cidr_pton_ipv6(const char *src, u_char *dst, size_t size)
+{
+ static const char xdigits_l[] = "0123456789abcdef",
+ xdigits_u[] = "0123456789ABCDEF";
+ u_char tmp[NS_IN6ADDRSZ],
+ *tp,
+ *endp,
+ *colonp;
+ const char *xdigits,
+ *curtok;
+ int ch,
+ saw_xdigit;
+ u_int val;
+ int digits;
+ int bits;
+
+ if (size < NS_IN6ADDRSZ)
+ goto emsgsize;
+
+ memset((tp = tmp), '\0', NS_IN6ADDRSZ);
+ endp = tp + NS_IN6ADDRSZ;
+ colonp = NULL;
+ /* Leading :: requires some special handling. */
+ if (*src == ':')
+ if (*++src != ':')
+ goto enoent;
+ curtok = src;
+ saw_xdigit = 0;
+ val = 0;
+ digits = 0;
+ bits = -1;
+ while ((ch = *src++) != '\0')
+ {
+ const char *pch;
+
+ if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
+ pch = strchr((xdigits = xdigits_u), ch);
+ if (pch != NULL)
+ {
+ val <<= 4;
+ val |= (pch - xdigits);
+ if (++digits > 4)
+ goto enoent;
+ saw_xdigit = 1;
+ continue;
+ }
+ if (ch == ':')
+ {
+ curtok = src;
+ if (!saw_xdigit)
+ {
+ if (colonp)
+ goto enoent;
+ colonp = tp;
+ continue;
+ }
+ else if (*src == '\0')
+ goto enoent;
+ if (tp + NS_INT16SZ > endp)
+ goto enoent;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ saw_xdigit = 0;
+ digits = 0;
+ val = 0;
+ continue;
+ }
+ if (ch == '.' && ((tp + NS_INADDRSZ) <= endp) &&
+ getv4(curtok, tp, &bits) > 0)
+ {
+ tp += NS_INADDRSZ;
+ saw_xdigit = 0;
+ break; /* '\0' was seen by inet_pton4(). */
+ }
+ if (ch == '/' && getbits(src, &bits) > 0)
+ break;
+ goto enoent;
+ }
+ if (saw_xdigit)
+ {
+ if (tp + NS_INT16SZ > endp)
+ goto enoent;
+ *tp++ = (u_char) (val >> 8) & 0xff;
+ *tp++ = (u_char) val & 0xff;
+ }
+ if (bits == -1)
+ bits = 128;
+
+ endp = tmp + 16;
+
+ if (colonp != NULL)
+ {
+ /*
+ * Since some memmove()'s erroneously fail to handle overlapping
+ * regions, we'll do the shift by hand.
+ */
+ const int n = tp - colonp;
+ int i;
+
+ if (tp == endp)
+ goto enoent;
+ for (i = 1; i <= n; i++)
+ {
+ endp[-i] = colonp[n - i];
+ colonp[n - i] = 0;
+ }
+ tp = endp;
+ }
+ if (tp != endp)
+ goto enoent;
+
+ /*
+ * Copy out the result.
+ */
+ memcpy(dst, tmp, NS_IN6ADDRSZ);
+
+ return bits;
+
+enoent:
+ errno = ENOENT;
+ return -1;
+
+emsgsize:
+ errno = EMSGSIZE;
+ return -1;
+}
diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
new file mode 100644
index 0000000..ff1f46e
--- /dev/null
+++ b/src/backend/utils/adt/int.c
@@ -0,0 +1,1648 @@
+/*-------------------------------------------------------------------------
+ *
+ * int.c
+ * Functions for the built-in integer types (except int8).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/int.c
+ *
+ *-------------------------------------------------------------------------
+ */
+/*
+ * OLD COMMENTS
+ * I/O routines:
+ * int2in, int2out, int2recv, int2send
+ * int4in, int4out, int4recv, int4send
+ * int2vectorin, int2vectorout, int2vectorrecv, int2vectorsend
+ * Boolean operators:
+ * inteq, intne, intlt, intle, intgt, intge
+ * Arithmetic operators:
+ * intpl, intmi, int4mul, intdiv
+ *
+ * Arithmetic operators:
+ * intmod
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <math.h>
+
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+#define Int2VectorSize(n) (offsetof(int2vector, values) + (n) * sizeof(int16))
+
+typedef struct
+{
+ int32 current;
+ int32 finish;
+ int32 step;
+} generate_series_fctx;
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+/*
+ * int2in - converts "num" to short
+ */
+Datum
+int2in(PG_FUNCTION_ARGS)
+{
+ char *num = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_INT16(pg_strtoint16(num));
+}
+
+/*
+ * int2out - converts short to "num"
+ */
+Datum
+int2out(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ char *result = (char *) palloc(7); /* sign, 5 digits, '\0' */
+
+ pg_itoa(arg1, result);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * int2recv - converts external binary format to int2
+ */
+Datum
+int2recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_INT16((int16) pq_getmsgint(buf, sizeof(int16)));
+}
+
+/*
+ * int2send - converts int2 to binary format
+ */
+Datum
+int2send(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint16(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * construct int2vector given a raw array of int2s
+ *
+ * If int2s is NULL then caller must fill values[] afterward
+ */
+int2vector *
+buildint2vector(const int16 *int2s, int n)
+{
+ int2vector *result;
+
+ result = (int2vector *) palloc0(Int2VectorSize(n));
+
+ if (n > 0 && int2s)
+ memcpy(result->values, int2s, n * sizeof(int16));
+
+ /*
+ * Attach standard array header. For historical reasons, we set the index
+ * lower bound to 0 not 1.
+ */
+ SET_VARSIZE(result, Int2VectorSize(n));
+ result->ndim = 1;
+ result->dataoffset = 0; /* never any nulls */
+ result->elemtype = INT2OID;
+ result->dim1 = n;
+ result->lbound1 = 0;
+
+ return result;
+}
+
+/*
+ * int2vectorin - converts "num num ..." to internal form
+ */
+Datum
+int2vectorin(PG_FUNCTION_ARGS)
+{
+ char *intString = PG_GETARG_CSTRING(0);
+ int2vector *result;
+ int nalloc;
+ int n;
+
+ nalloc = 32; /* arbitrary initial size guess */
+ result = (int2vector *) palloc0(Int2VectorSize(nalloc));
+
+ for (n = 0;; n++)
+ {
+ long l;
+ char *endp;
+
+ while (*intString && isspace((unsigned char) *intString))
+ intString++;
+ if (*intString == '\0')
+ break;
+
+ if (n >= nalloc)
+ {
+ nalloc *= 2;
+ result = (int2vector *) repalloc(result, Int2VectorSize(nalloc));
+ }
+
+ errno = 0;
+ l = strtol(intString, &endp, 10);
+
+ if (intString == endp)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "smallint", intString)));
+
+ if (errno == ERANGE || l < SHRT_MIN || l > SHRT_MAX)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s", intString,
+ "smallint")));
+
+ if (*endp && *endp != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "smallint", intString)));
+
+ result->values[n] = l;
+ intString = endp;
+ }
+
+ SET_VARSIZE(result, Int2VectorSize(n));
+ result->ndim = 1;
+ result->dataoffset = 0; /* never any nulls */
+ result->elemtype = INT2OID;
+ result->dim1 = n;
+ result->lbound1 = 0;
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * int2vectorout - converts internal form to "num num ..."
+ */
+Datum
+int2vectorout(PG_FUNCTION_ARGS)
+{
+ int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0);
+ int num,
+ nnums = int2Array->dim1;
+ char *rp;
+ char *result;
+
+ /* assumes sign, 5 digits, ' ' */
+ rp = result = (char *) palloc(nnums * 7 + 1);
+ for (num = 0; num < nnums; num++)
+ {
+ if (num != 0)
+ *rp++ = ' ';
+ rp += pg_itoa(int2Array->values[num], rp);
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * int2vectorrecv - converts external binary format to int2vector
+ */
+Datum
+int2vectorrecv(PG_FUNCTION_ARGS)
+{
+ LOCAL_FCINFO(locfcinfo, 3);
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ int2vector *result;
+
+ /*
+ * Normally one would call array_recv() using DirectFunctionCall3, but
+ * that does not work since array_recv wants to cache some data using
+ * fcinfo->flinfo->fn_extra. So we need to pass it our own flinfo
+ * parameter.
+ */
+ InitFunctionCallInfoData(*locfcinfo, fcinfo->flinfo, 3,
+ InvalidOid, NULL, NULL);
+
+ locfcinfo->args[0].value = PointerGetDatum(buf);
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = ObjectIdGetDatum(INT2OID);
+ locfcinfo->args[1].isnull = false;
+ locfcinfo->args[2].value = Int32GetDatum(-1);
+ locfcinfo->args[2].isnull = false;
+
+ result = (int2vector *) DatumGetPointer(array_recv(locfcinfo));
+
+ Assert(!locfcinfo->isnull);
+
+ /* sanity checks: int2vector must be 1-D, 0-based, no nulls */
+ if (ARR_NDIM(result) != 1 ||
+ ARR_HASNULL(result) ||
+ ARR_ELEMTYPE(result) != INT2OID ||
+ ARR_LBOUND(result)[0] != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid int2vector data")));
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * int2vectorsend - converts int2vector to binary format
+ */
+Datum
+int2vectorsend(PG_FUNCTION_ARGS)
+{
+ return array_send(fcinfo);
+}
+
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+/*
+ * int4in - converts "num" to int4
+ */
+Datum
+int4in(PG_FUNCTION_ARGS)
+{
+ char *num = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_INT32(pg_strtoint32(num));
+}
+
+/*
+ * int4out - converts int4 to "num"
+ */
+Datum
+int4out(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ char *result = (char *) palloc(12); /* sign, 10 digits, '\0' */
+
+ pg_ltoa(arg1, result);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * int4recv - converts external binary format to int4
+ */
+Datum
+int4recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_INT32((int32) pq_getmsgint(buf, sizeof(int32)));
+}
+
+/*
+ * int4send - converts int4 to binary format
+ */
+Datum
+int4send(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * ===================
+ * CONVERSION ROUTINES
+ * ===================
+ */
+
+Datum
+i2toi4(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+
+ PG_RETURN_INT32((int32) arg1);
+}
+
+Datum
+i4toi2(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+
+ if (unlikely(arg1 < SHRT_MIN) || unlikely(arg1 > SHRT_MAX))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ PG_RETURN_INT16((int16) arg1);
+}
+
+/* Cast int4 -> bool */
+Datum
+int4_bool(PG_FUNCTION_ARGS)
+{
+ if (PG_GETARG_INT32(0) == 0)
+ PG_RETURN_BOOL(false);
+ else
+ PG_RETURN_BOOL(true);
+}
+
+/* Cast bool -> int4 */
+Datum
+bool_int4(PG_FUNCTION_ARGS)
+{
+ if (PG_GETARG_BOOL(0) == false)
+ PG_RETURN_INT32(0);
+ else
+ PG_RETURN_INT32(1);
+}
+
+/*
+ * ============================
+ * COMPARISON OPERATOR ROUTINES
+ * ============================
+ */
+
+/*
+ * inteq - returns 1 iff arg1 == arg2
+ * intne - returns 1 iff arg1 != arg2
+ * intlt - returns 1 iff arg1 < arg2
+ * intle - returns 1 iff arg1 <= arg2
+ * intgt - returns 1 iff arg1 > arg2
+ * intge - returns 1 iff arg1 >= arg2
+ */
+
+Datum
+int4eq(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+int4ne(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+int4lt(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+int4le(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+int4gt(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+int4ge(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+Datum
+int2eq(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+int2ne(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+int2lt(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+int2le(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+int2gt(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+int2ge(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+Datum
+int24eq(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+int24ne(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+int24lt(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+int24le(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+int24gt(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+int24ge(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+Datum
+int42eq(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+int42ne(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+int42lt(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+int42le(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+int42gt(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+int42ge(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+
+/*----------------------------------------------------------
+ * in_range functions for int4 and int2,
+ * including cross-data-type comparisons.
+ *
+ * Note: we provide separate intN_int8 functions for performance
+ * reasons. This forces also providing intN_int2, else cases with a
+ * smallint offset value would fail to resolve which function to use.
+ * But that's an unlikely situation, so don't duplicate code for it.
+ *---------------------------------------------------------*/
+
+Datum
+in_range_int4_int4(PG_FUNCTION_ARGS)
+{
+ int32 val = PG_GETARG_INT32(0);
+ int32 base = PG_GETARG_INT32(1);
+ int32 offset = PG_GETARG_INT32(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ int32 sum;
+
+ if (offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ if (sub)
+ offset = -offset; /* cannot overflow */
+
+ if (unlikely(pg_add_s32_overflow(base, offset, &sum)))
+ {
+ /*
+ * If sub is false, the true sum is surely more than val, so correct
+ * answer is the same as "less". If sub is true, the true sum is
+ * surely less than val, so the answer is "!less".
+ */
+ PG_RETURN_BOOL(sub ? !less : less);
+ }
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int4_int2(PG_FUNCTION_ARGS)
+{
+ /* Doesn't seem worth duplicating code for, so just invoke int4_int4 */
+ return DirectFunctionCall5(in_range_int4_int4,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ Int32GetDatum((int32) PG_GETARG_INT16(2)),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4));
+}
+
+Datum
+in_range_int4_int8(PG_FUNCTION_ARGS)
+{
+ /* We must do all the math in int64 */
+ int64 val = (int64) PG_GETARG_INT32(0);
+ int64 base = (int64) PG_GETARG_INT32(1);
+ int64 offset = PG_GETARG_INT64(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ int64 sum;
+
+ if (offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ if (sub)
+ offset = -offset; /* cannot overflow */
+
+ if (unlikely(pg_add_s64_overflow(base, offset, &sum)))
+ {
+ /*
+ * If sub is false, the true sum is surely more than val, so correct
+ * answer is the same as "less". If sub is true, the true sum is
+ * surely less than val, so the answer is "!less".
+ */
+ PG_RETURN_BOOL(sub ? !less : less);
+ }
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int2_int4(PG_FUNCTION_ARGS)
+{
+ /* We must do all the math in int32 */
+ int32 val = (int32) PG_GETARG_INT16(0);
+ int32 base = (int32) PG_GETARG_INT16(1);
+ int32 offset = PG_GETARG_INT32(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ int32 sum;
+
+ if (offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ if (sub)
+ offset = -offset; /* cannot overflow */
+
+ if (unlikely(pg_add_s32_overflow(base, offset, &sum)))
+ {
+ /*
+ * If sub is false, the true sum is surely more than val, so correct
+ * answer is the same as "less". If sub is true, the true sum is
+ * surely less than val, so the answer is "!less".
+ */
+ PG_RETURN_BOOL(sub ? !less : less);
+ }
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_int2_int2(PG_FUNCTION_ARGS)
+{
+ /* Doesn't seem worth duplicating code for, so just invoke int2_int4 */
+ return DirectFunctionCall5(in_range_int2_int4,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ Int32GetDatum((int32) PG_GETARG_INT16(2)),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4));
+}
+
+Datum
+in_range_int2_int8(PG_FUNCTION_ARGS)
+{
+ /* Doesn't seem worth duplicating code for, so just invoke int4_int8 */
+ return DirectFunctionCall5(in_range_int4_int8,
+ Int32GetDatum((int32) PG_GETARG_INT16(0)),
+ Int32GetDatum((int32) PG_GETARG_INT16(1)),
+ PG_GETARG_DATUM(2),
+ PG_GETARG_DATUM(3),
+ PG_GETARG_DATUM(4));
+}
+
+
+/*
+ * int[24]pl - returns arg1 + arg2
+ * int[24]mi - returns arg1 - arg2
+ * int[24]mul - returns arg1 * arg2
+ * int[24]div - returns arg1 / arg2
+ */
+
+Datum
+int4um(PG_FUNCTION_ARGS)
+{
+ int32 arg = PG_GETARG_INT32(0);
+
+ if (unlikely(arg == PG_INT32_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(-arg);
+}
+
+Datum
+int4up(PG_FUNCTION_ARGS)
+{
+ int32 arg = PG_GETARG_INT32(0);
+
+ PG_RETURN_INT32(arg);
+}
+
+Datum
+int4pl(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_add_s32_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int4mi(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_sub_s32_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int4mul(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_mul_s32_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int4div(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (arg2 == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * INT_MIN / -1 is problematic, since the result can't be represented on a
+ * two's-complement machine. Some machines produce INT_MIN, some produce
+ * zero, some throw an exception. We can dodge the problem by recognizing
+ * that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT32_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ result = -arg1;
+ PG_RETURN_INT32(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int4inc(PG_FUNCTION_ARGS)
+{
+ int32 arg = PG_GETARG_INT32(0);
+ int32 result;
+
+ if (unlikely(pg_add_s32_overflow(arg, 1, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int2um(PG_FUNCTION_ARGS)
+{
+ int16 arg = PG_GETARG_INT16(0);
+
+ if (unlikely(arg == PG_INT16_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+ PG_RETURN_INT16(-arg);
+}
+
+Datum
+int2up(PG_FUNCTION_ARGS)
+{
+ int16 arg = PG_GETARG_INT16(0);
+
+ PG_RETURN_INT16(arg);
+}
+
+Datum
+int2pl(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int16 result;
+
+ if (unlikely(pg_add_s16_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+ PG_RETURN_INT16(result);
+}
+
+Datum
+int2mi(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int16 result;
+
+ if (unlikely(pg_sub_s16_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+ PG_RETURN_INT16(result);
+}
+
+Datum
+int2mul(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int16 result;
+
+ if (unlikely(pg_mul_s16_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ PG_RETURN_INT16(result);
+}
+
+Datum
+int2div(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int16 result;
+
+ if (arg2 == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * SHRT_MIN / -1 is problematic, since the result can't be represented on
+ * a two's-complement machine. Some machines produce SHRT_MIN, some
+ * produce zero, some throw an exception. We can dodge the problem by
+ * recognizing that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT16_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+ result = -arg1;
+ PG_RETURN_INT16(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT16(result);
+}
+
+Datum
+int24pl(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_add_s32_overflow((int32) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int24mi(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_sub_s32_overflow((int32) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int24mul(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ if (unlikely(pg_mul_s32_overflow((int32) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int24div(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /* No overflow is possible */
+ PG_RETURN_INT32((int32) arg1 / arg2);
+}
+
+Datum
+int42pl(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int32 result;
+
+ if (unlikely(pg_add_s32_overflow(arg1, (int32) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int42mi(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int32 result;
+
+ if (unlikely(pg_sub_s32_overflow(arg1, (int32) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int42mul(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int32 result;
+
+ if (unlikely(pg_mul_s32_overflow(arg1, (int32) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int42div(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int32 result;
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * INT_MIN / -1 is problematic, since the result can't be represented on a
+ * two's-complement machine. Some machines produce INT_MIN, some produce
+ * zero, some throw an exception. We can dodge the problem by recognizing
+ * that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT32_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ result = -arg1;
+ PG_RETURN_INT32(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int4mod(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Some machines throw a floating-point exception for INT_MIN % -1, which
+ * is a bit silly since the correct answer is perfectly well-defined,
+ * namely zero.
+ */
+ if (arg2 == -1)
+ PG_RETURN_INT32(0);
+
+ /* No overflow is possible */
+
+ PG_RETURN_INT32(arg1 % arg2);
+}
+
+Datum
+int2mod(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Some machines throw a floating-point exception for INT_MIN % -1, which
+ * is a bit silly since the correct answer is perfectly well-defined,
+ * namely zero. (It's not clear this ever happens when dealing with
+ * int16, but we might as well have the test for safety.)
+ */
+ if (arg2 == -1)
+ PG_RETURN_INT16(0);
+
+ /* No overflow is possible */
+
+ PG_RETURN_INT16(arg1 % arg2);
+}
+
+
+/* int[24]abs()
+ * Absolute value
+ */
+Datum
+int4abs(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 result;
+
+ if (unlikely(arg1 == PG_INT32_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ result = (arg1 < 0) ? -arg1 : arg1;
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int2abs(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 result;
+
+ if (unlikely(arg1 == PG_INT16_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+ result = (arg1 < 0) ? -arg1 : arg1;
+ PG_RETURN_INT16(result);
+}
+
+/*
+ * Greatest Common Divisor
+ *
+ * Returns the largest positive integer that exactly divides both inputs.
+ * Special cases:
+ * - gcd(x, 0) = gcd(0, x) = abs(x)
+ * because 0 is divisible by anything
+ * - gcd(0, 0) = 0
+ * complies with the previous definition and is a common convention
+ *
+ * Special care must be taken if either input is INT_MIN --- gcd(0, INT_MIN),
+ * gcd(INT_MIN, 0) and gcd(INT_MIN, INT_MIN) are all equal to abs(INT_MIN),
+ * which cannot be represented as a 32-bit signed integer.
+ */
+static int32
+int4gcd_internal(int32 arg1, int32 arg2)
+{
+ int32 swap;
+ int32 a1,
+ a2;
+
+ /*
+ * Put the greater absolute value in arg1.
+ *
+ * This would happen automatically in the loop below, but avoids an
+ * expensive modulo operation, and simplifies the special-case handling
+ * for INT_MIN below.
+ *
+ * We do this in negative space in order to handle INT_MIN.
+ */
+ a1 = (arg1 < 0) ? arg1 : -arg1;
+ a2 = (arg2 < 0) ? arg2 : -arg2;
+ if (a1 > a2)
+ {
+ swap = arg1;
+ arg1 = arg2;
+ arg2 = swap;
+ }
+
+ /* Special care needs to be taken with INT_MIN. See comments above. */
+ if (arg1 == PG_INT32_MIN)
+ {
+ if (arg2 == 0 || arg2 == PG_INT32_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ /*
+ * Some machines throw a floating-point exception for INT_MIN % -1,
+ * which is a bit silly since the correct answer is perfectly
+ * well-defined, namely zero. Guard against this and just return the
+ * result, gcd(INT_MIN, -1) = 1.
+ */
+ if (arg2 == -1)
+ return 1;
+ }
+
+ /* Use the Euclidean algorithm to find the GCD */
+ while (arg2 != 0)
+ {
+ swap = arg2;
+ arg2 = arg1 % arg2;
+ arg1 = swap;
+ }
+
+ /*
+ * Make sure the result is positive. (We know we don't have INT_MIN
+ * anymore).
+ */
+ if (arg1 < 0)
+ arg1 = -arg1;
+
+ return arg1;
+}
+
+Datum
+int4gcd(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 result;
+
+ result = int4gcd_internal(arg1, arg2);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * Least Common Multiple
+ */
+Datum
+int4lcm(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int32 gcd;
+ int32 result;
+
+ /*
+ * Handle lcm(x, 0) = lcm(0, x) = 0 as a special case. This prevents a
+ * division-by-zero error below when x is zero, and an overflow error from
+ * the GCD computation when x = INT_MIN.
+ */
+ if (arg1 == 0 || arg2 == 0)
+ PG_RETURN_INT32(0);
+
+ /* lcm(x, y) = abs(x / gcd(x, y) * y) */
+ gcd = int4gcd_internal(arg1, arg2);
+ arg1 = arg1 / gcd;
+
+ if (unlikely(pg_mul_s32_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ /* If the result is INT_MIN, it cannot be represented. */
+ if (unlikely(result == PG_INT32_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ if (result < 0)
+ result = -result;
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+int2larger(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_INT16((arg1 > arg2) ? arg1 : arg2);
+}
+
+Datum
+int2smaller(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_INT16((arg1 < arg2) ? arg1 : arg2);
+}
+
+Datum
+int4larger(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32((arg1 > arg2) ? arg1 : arg2);
+}
+
+Datum
+int4smaller(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32((arg1 < arg2) ? arg1 : arg2);
+}
+
+/*
+ * Bit-pushing operators
+ *
+ * int[24]and - returns arg1 & arg2
+ * int[24]or - returns arg1 | arg2
+ * int[24]xor - returns arg1 # arg2
+ * int[24]not - returns ~arg1
+ * int[24]shl - returns arg1 << arg2
+ * int[24]shr - returns arg1 >> arg2
+ */
+
+Datum
+int4and(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(arg1 & arg2);
+}
+
+Datum
+int4or(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(arg1 | arg2);
+}
+
+Datum
+int4xor(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(arg1 ^ arg2);
+}
+
+Datum
+int4shl(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(arg1 << arg2);
+}
+
+Datum
+int4shr(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT32(arg1 >> arg2);
+}
+
+Datum
+int4not(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+
+ PG_RETURN_INT32(~arg1);
+}
+
+Datum
+int2and(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_INT16(arg1 & arg2);
+}
+
+Datum
+int2or(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_INT16(arg1 | arg2);
+}
+
+Datum
+int2xor(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_INT16(arg1 ^ arg2);
+}
+
+Datum
+int2not(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+
+ PG_RETURN_INT16(~arg1);
+}
+
+
+Datum
+int2shl(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT16(arg1 << arg2);
+}
+
+Datum
+int2shr(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT16(arg1 >> arg2);
+}
+
+/*
+ * non-persistent numeric series generator
+ */
+Datum
+generate_series_int4(PG_FUNCTION_ARGS)
+{
+ return generate_series_step_int4(fcinfo);
+}
+
+Datum
+generate_series_step_int4(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ generate_series_fctx *fctx;
+ int32 result;
+ MemoryContext oldcontext;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ int32 start = PG_GETARG_INT32(0);
+ int32 finish = PG_GETARG_INT32(1);
+ int32 step = 1;
+
+ /* see if we were given an explicit step size */
+ if (PG_NARGS() == 3)
+ step = PG_GETARG_INT32(2);
+ if (step == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot equal zero")));
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ fctx = (generate_series_fctx *) palloc(sizeof(generate_series_fctx));
+
+ /*
+ * Use fctx to keep state from call to call. Seed current with the
+ * original start value
+ */
+ fctx->current = start;
+ fctx->finish = finish;
+ fctx->step = step;
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ /*
+ * get the saved state and use current as the result for this iteration
+ */
+ fctx = funcctx->user_fctx;
+ result = fctx->current;
+
+ if ((fctx->step > 0 && fctx->current <= fctx->finish) ||
+ (fctx->step < 0 && fctx->current >= fctx->finish))
+ {
+ /*
+ * Increment current in preparation for next iteration. If next-value
+ * computation overflows, this is the final result.
+ */
+ if (pg_add_s32_overflow(fctx->current, fctx->step, &fctx->current))
+ fctx->step = 0;
+
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, Int32GetDatum(result));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Planner support function for generate_series(int4, int4 [, int4])
+ */
+Datum
+generate_series_int4_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ if (list_length(args) >= 3)
+ arg3 = estimate_expression_value(req->root, lthird(args));
+ else
+ arg3 = NULL;
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned. Use double arithmetic to avoid overflow hazards.
+ */
+ if ((IsA(arg1, Const) &&
+ ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) &&
+ ((Const *) arg2)->constisnull) ||
+ (arg3 != NULL && IsA(arg3, Const) &&
+ ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) &&
+ IsA(arg2, Const) &&
+ (arg3 == NULL || IsA(arg3, Const)))
+ {
+ double start,
+ finish,
+ step;
+
+ start = DatumGetInt32(((Const *) arg1)->constvalue);
+ finish = DatumGetInt32(((Const *) arg2)->constvalue);
+ step = arg3 ? DatumGetInt32(((Const *) arg3)->constvalue) : 1;
+
+ /* This equation works for either sign of step */
+ if (step != 0)
+ {
+ req->rows = floor((finish - start + step) / step);
+ ret = (Node *) req;
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
new file mode 100644
index 0000000..98d4323
--- /dev/null
+++ b/src/backend/utils/adt/int8.c
@@ -0,0 +1,1524 @@
+/*-------------------------------------------------------------------------
+ *
+ * int8.c
+ * Internal 64-bit integer operations
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/int8.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+#include <math.h>
+
+#include "common/int.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "optimizer/optimizer.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+
+typedef struct
+{
+ int64 current;
+ int64 finish;
+ int64 step;
+} generate_series_fctx;
+
+
+/***********************************************************************
+ **
+ ** Routines for 64-bit integers.
+ **
+ ***********************************************************************/
+
+/*----------------------------------------------------------
+ * Formatting and conversion routines.
+ *---------------------------------------------------------*/
+
+/* int8in()
+ */
+Datum
+int8in(PG_FUNCTION_ARGS)
+{
+ char *num = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_INT64(pg_strtoint64(num));
+}
+
+
+/* int8out()
+ */
+Datum
+int8out(PG_FUNCTION_ARGS)
+{
+ int64 val = PG_GETARG_INT64(0);
+ char buf[MAXINT8LEN + 1];
+ char *result;
+ int len;
+
+ len = pg_lltoa(val, buf) + 1;
+
+ /*
+ * Since the length is already known, we do a manual palloc() and memcpy()
+ * to avoid the strlen() call that would otherwise be done in pstrdup().
+ */
+ result = palloc(len);
+ memcpy(result, buf, len);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * int8recv - converts external binary format to int8
+ */
+Datum
+int8recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_INT64(pq_getmsgint64(buf));
+}
+
+/*
+ * int8send - converts int8 to binary format
+ */
+Datum
+int8send(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators for int8s, including cross-data-type comparisons.
+ *---------------------------------------------------------*/
+
+/* int8relop()
+ * Is val1 relop val2?
+ */
+Datum
+int8eq(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 == val2);
+}
+
+Datum
+int8ne(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 != val2);
+}
+
+Datum
+int8lt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 < val2);
+}
+
+Datum
+int8gt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 > val2);
+}
+
+Datum
+int8le(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 <= val2);
+}
+
+Datum
+int8ge(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 >= val2);
+}
+
+/* int84relop()
+ * Is 64-bit val1 relop 32-bit val2?
+ */
+Datum
+int84eq(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 == val2);
+}
+
+Datum
+int84ne(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 != val2);
+}
+
+Datum
+int84lt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 < val2);
+}
+
+Datum
+int84gt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 > val2);
+}
+
+Datum
+int84le(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 <= val2);
+}
+
+Datum
+int84ge(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int32 val2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_BOOL(val1 >= val2);
+}
+
+/* int48relop()
+ * Is 32-bit val1 relop 64-bit val2?
+ */
+Datum
+int48eq(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 == val2);
+}
+
+Datum
+int48ne(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 != val2);
+}
+
+Datum
+int48lt(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 < val2);
+}
+
+Datum
+int48gt(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 > val2);
+}
+
+Datum
+int48le(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 <= val2);
+}
+
+Datum
+int48ge(PG_FUNCTION_ARGS)
+{
+ int32 val1 = PG_GETARG_INT32(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 >= val2);
+}
+
+/* int82relop()
+ * Is 64-bit val1 relop 16-bit val2?
+ */
+Datum
+int82eq(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 == val2);
+}
+
+Datum
+int82ne(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 != val2);
+}
+
+Datum
+int82lt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 < val2);
+}
+
+Datum
+int82gt(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 > val2);
+}
+
+Datum
+int82le(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 <= val2);
+}
+
+Datum
+int82ge(PG_FUNCTION_ARGS)
+{
+ int64 val1 = PG_GETARG_INT64(0);
+ int16 val2 = PG_GETARG_INT16(1);
+
+ PG_RETURN_BOOL(val1 >= val2);
+}
+
+/* int28relop()
+ * Is 16-bit val1 relop 64-bit val2?
+ */
+Datum
+int28eq(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 == val2);
+}
+
+Datum
+int28ne(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 != val2);
+}
+
+Datum
+int28lt(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 < val2);
+}
+
+Datum
+int28gt(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 > val2);
+}
+
+Datum
+int28le(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 <= val2);
+}
+
+Datum
+int28ge(PG_FUNCTION_ARGS)
+{
+ int16 val1 = PG_GETARG_INT16(0);
+ int64 val2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_BOOL(val1 >= val2);
+}
+
+/*
+ * in_range support function for int8.
+ *
+ * Note: we needn't supply int8_int4 or int8_int2 variants, as implicit
+ * coercion of the offset value takes care of those scenarios just as well.
+ */
+Datum
+in_range_int8_int8(PG_FUNCTION_ARGS)
+{
+ int64 val = PG_GETARG_INT64(0);
+ int64 base = PG_GETARG_INT64(1);
+ int64 offset = PG_GETARG_INT64(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ int64 sum;
+
+ if (offset < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ if (sub)
+ offset = -offset; /* cannot overflow */
+
+ if (unlikely(pg_add_s64_overflow(base, offset, &sum)))
+ {
+ /*
+ * If sub is false, the true sum is surely more than val, so correct
+ * answer is the same as "less". If sub is true, the true sum is
+ * surely less than val, so the answer is "!less".
+ */
+ PG_RETURN_BOOL(sub ? !less : less);
+ }
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+
+/*----------------------------------------------------------
+ * Arithmetic operators on 64-bit integers.
+ *---------------------------------------------------------*/
+
+Datum
+int8um(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+ int64 result;
+
+ if (unlikely(arg == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ result = -arg;
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8up(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+
+ PG_RETURN_INT64(arg);
+}
+
+Datum
+int8pl(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8mi(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8mul(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_mul_s64_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8div(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (arg2 == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * INT64_MIN / -1 is problematic, since the result can't be represented on
+ * a two's-complement machine. Some machines produce INT64_MIN, some
+ * produce zero, some throw an exception. We can dodge the problem by
+ * recognizing that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ result = -arg1;
+ PG_RETURN_INT64(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT64(result);
+}
+
+/* int8abs()
+ * Absolute value
+ */
+Datum
+int8abs(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 result;
+
+ if (unlikely(arg1 == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ result = (arg1 < 0) ? -arg1 : arg1;
+ PG_RETURN_INT64(result);
+}
+
+/* int8mod()
+ * Modulo operation.
+ */
+Datum
+int8mod(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * Some machines throw a floating-point exception for INT64_MIN % -1,
+ * which is a bit silly since the correct answer is perfectly
+ * well-defined, namely zero.
+ */
+ if (arg2 == -1)
+ PG_RETURN_INT64(0);
+
+ /* No overflow is possible */
+
+ PG_RETURN_INT64(arg1 % arg2);
+}
+
+/*
+ * Greatest Common Divisor
+ *
+ * Returns the largest positive integer that exactly divides both inputs.
+ * Special cases:
+ * - gcd(x, 0) = gcd(0, x) = abs(x)
+ * because 0 is divisible by anything
+ * - gcd(0, 0) = 0
+ * complies with the previous definition and is a common convention
+ *
+ * Special care must be taken if either input is INT64_MIN ---
+ * gcd(0, INT64_MIN), gcd(INT64_MIN, 0) and gcd(INT64_MIN, INT64_MIN) are
+ * all equal to abs(INT64_MIN), which cannot be represented as a 64-bit signed
+ * integer.
+ */
+static int64
+int8gcd_internal(int64 arg1, int64 arg2)
+{
+ int64 swap;
+ int64 a1,
+ a2;
+
+ /*
+ * Put the greater absolute value in arg1.
+ *
+ * This would happen automatically in the loop below, but avoids an
+ * expensive modulo operation, and simplifies the special-case handling
+ * for INT64_MIN below.
+ *
+ * We do this in negative space in order to handle INT64_MIN.
+ */
+ a1 = (arg1 < 0) ? arg1 : -arg1;
+ a2 = (arg2 < 0) ? arg2 : -arg2;
+ if (a1 > a2)
+ {
+ swap = arg1;
+ arg1 = arg2;
+ arg2 = swap;
+ }
+
+ /* Special care needs to be taken with INT64_MIN. See comments above. */
+ if (arg1 == PG_INT64_MIN)
+ {
+ if (arg2 == 0 || arg2 == PG_INT64_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ /*
+ * Some machines throw a floating-point exception for INT64_MIN % -1,
+ * which is a bit silly since the correct answer is perfectly
+ * well-defined, namely zero. Guard against this and just return the
+ * result, gcd(INT64_MIN, -1) = 1.
+ */
+ if (arg2 == -1)
+ return 1;
+ }
+
+ /* Use the Euclidean algorithm to find the GCD */
+ while (arg2 != 0)
+ {
+ swap = arg2;
+ arg2 = arg1 % arg2;
+ arg1 = swap;
+ }
+
+ /*
+ * Make sure the result is positive. (We know we don't have INT64_MIN
+ * anymore).
+ */
+ if (arg1 < 0)
+ arg1 = -arg1;
+
+ return arg1;
+}
+
+Datum
+int8gcd(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ result = int8gcd_internal(arg1, arg2);
+
+ PG_RETURN_INT64(result);
+}
+
+/*
+ * Least Common Multiple
+ */
+Datum
+int8lcm(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 gcd;
+ int64 result;
+
+ /*
+ * Handle lcm(x, 0) = lcm(0, x) = 0 as a special case. This prevents a
+ * division-by-zero error below when x is zero, and an overflow error from
+ * the GCD computation when x = INT64_MIN.
+ */
+ if (arg1 == 0 || arg2 == 0)
+ PG_RETURN_INT64(0);
+
+ /* lcm(x, y) = abs(x / gcd(x, y) * y) */
+ gcd = int8gcd_internal(arg1, arg2);
+ arg1 = arg1 / gcd;
+
+ if (unlikely(pg_mul_s64_overflow(arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ /* If the result is INT64_MIN, it cannot be represented. */
+ if (unlikely(result == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ if (result < 0)
+ result = -result;
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8inc(PG_FUNCTION_ARGS)
+{
+ /*
+ * When int8 is pass-by-reference, we provide this special case to avoid
+ * palloc overhead for COUNT(): when called as an aggregate, we know that
+ * the argument is modifiable local storage, so just update it in-place.
+ * (If int8 is pass-by-value, then of course this is useless as well as
+ * incorrect, so just ifdef it out.)
+ */
+#ifndef USE_FLOAT8_BYVAL /* controls int8 too */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ int64 *arg = (int64 *) PG_GETARG_POINTER(0);
+
+ if (unlikely(pg_add_s64_overflow(*arg, 1, arg)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_POINTER(arg);
+ }
+ else
+#endif
+ {
+ /* Not called as an aggregate, so just do it the dumb way */
+ int64 arg = PG_GETARG_INT64(0);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow(arg, 1, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64(result);
+ }
+}
+
+Datum
+int8dec(PG_FUNCTION_ARGS)
+{
+ /*
+ * When int8 is pass-by-reference, we provide this special case to avoid
+ * palloc overhead for COUNT(): when called as an aggregate, we know that
+ * the argument is modifiable local storage, so just update it in-place.
+ * (If int8 is pass-by-value, then of course this is useless as well as
+ * incorrect, so just ifdef it out.)
+ */
+#ifndef USE_FLOAT8_BYVAL /* controls int8 too */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ int64 *arg = (int64 *) PG_GETARG_POINTER(0);
+
+ if (unlikely(pg_sub_s64_overflow(*arg, 1, arg)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_POINTER(arg);
+ }
+ else
+#endif
+ {
+ /* Not called as an aggregate, so just do it the dumb way */
+ int64 arg = PG_GETARG_INT64(0);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow(arg, 1, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64(result);
+ }
+}
+
+
+/*
+ * These functions are exactly like int8inc/int8dec but are used for
+ * aggregates that count only non-null values. Since the functions are
+ * declared strict, the null checks happen before we ever get here, and all we
+ * need do is increment the state value. We could actually make these pg_proc
+ * entries point right at int8inc/int8dec, but then the opr_sanity regression
+ * test would complain about mismatched entries for a built-in function.
+ */
+
+Datum
+int8inc_any(PG_FUNCTION_ARGS)
+{
+ return int8inc(fcinfo);
+}
+
+Datum
+int8inc_float8_float8(PG_FUNCTION_ARGS)
+{
+ return int8inc(fcinfo);
+}
+
+Datum
+int8dec_any(PG_FUNCTION_ARGS)
+{
+ return int8dec(fcinfo);
+}
+
+/*
+ * int8inc_support
+ * prosupport function for int8inc() and int8inc_any()
+ */
+Datum
+int8inc_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+ MonotonicFunction monotonic = MONOTONICFUNC_NONE;
+ int frameOptions = req->window_clause->frameOptions;
+
+ /* No ORDER BY clause then all rows are peers */
+ if (req->window_clause->orderClause == NIL)
+ monotonic = MONOTONICFUNC_BOTH;
+ else
+ {
+ /*
+ * Otherwise take into account the frame options. When the frame
+ * bound is the start of the window then the resulting value can
+ * never decrease, therefore is monotonically increasing
+ */
+ if (frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)
+ monotonic |= MONOTONICFUNC_INCREASING;
+
+ /*
+ * Likewise, if the frame bound is the end of the window then the
+ * resulting value can never decrease.
+ */
+ if (frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
+ monotonic |= MONOTONICFUNC_DECREASING;
+ }
+
+ req->monotonic = monotonic;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+
+Datum
+int8larger(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ result = ((arg1 > arg2) ? arg1 : arg2);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int8smaller(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ result = ((arg1 < arg2) ? arg1 : arg2);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int84pl(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int84mi(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int84mul(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int64 result;
+
+ if (unlikely(pg_mul_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int84div(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+ int64 result;
+
+ if (arg2 == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * INT64_MIN / -1 is problematic, since the result can't be represented on
+ * a two's-complement machine. Some machines produce INT64_MIN, some
+ * produce zero, some throw an exception. We can dodge the problem by
+ * recognizing that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ result = -arg1;
+ PG_RETURN_INT64(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int48pl(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int48mi(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int48mul(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_mul_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int48div(PG_FUNCTION_ARGS)
+{
+ int32 arg1 = PG_GETARG_INT32(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /* No overflow is possible */
+ PG_RETURN_INT64((int64) arg1 / arg2);
+}
+
+Datum
+int82pl(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int82mi(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int82mul(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int64 result;
+
+ if (unlikely(pg_mul_s64_overflow(arg1, (int64) arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int82div(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int16 arg2 = PG_GETARG_INT16(1);
+ int64 result;
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /*
+ * INT64_MIN / -1 is problematic, since the result can't be represented on
+ * a two's-complement machine. Some machines produce INT64_MIN, some
+ * produce zero, some throw an exception. We can dodge the problem by
+ * recognizing that division by -1 is the same as negation.
+ */
+ if (arg2 == -1)
+ {
+ if (unlikely(arg1 == PG_INT64_MIN))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ result = -arg1;
+ PG_RETURN_INT64(result);
+ }
+
+ /* No overflow is possible */
+
+ result = arg1 / arg2;
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int28pl(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_add_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int28mi(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_sub_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int28mul(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+ int64 result;
+
+ if (unlikely(pg_mul_s64_overflow((int64) arg1, arg2, &result)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+ PG_RETURN_INT64(result);
+}
+
+Datum
+int28div(PG_FUNCTION_ARGS)
+{
+ int16 arg1 = PG_GETARG_INT16(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ if (unlikely(arg2 == 0))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ /* ensure compiler realizes we mustn't reach the division (gcc bug) */
+ PG_RETURN_NULL();
+ }
+
+ /* No overflow is possible */
+ PG_RETURN_INT64((int64) arg1 / arg2);
+}
+
+/* Binary arithmetics
+ *
+ * int8and - returns arg1 & arg2
+ * int8or - returns arg1 | arg2
+ * int8xor - returns arg1 # arg2
+ * int8not - returns ~arg1
+ * int8shl - returns arg1 << arg2
+ * int8shr - returns arg1 >> arg2
+ */
+
+Datum
+int8and(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_INT64(arg1 & arg2);
+}
+
+Datum
+int8or(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_INT64(arg1 | arg2);
+}
+
+Datum
+int8xor(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int64 arg2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_INT64(arg1 ^ arg2);
+}
+
+Datum
+int8not(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+
+ PG_RETURN_INT64(~arg1);
+}
+
+Datum
+int8shl(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT64(arg1 << arg2);
+}
+
+Datum
+int8shr(PG_FUNCTION_ARGS)
+{
+ int64 arg1 = PG_GETARG_INT64(0);
+ int32 arg2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_INT64(arg1 >> arg2);
+}
+
+/*----------------------------------------------------------
+ * Conversion operators.
+ *---------------------------------------------------------*/
+
+Datum
+int48(PG_FUNCTION_ARGS)
+{
+ int32 arg = PG_GETARG_INT32(0);
+
+ PG_RETURN_INT64((int64) arg);
+}
+
+Datum
+int84(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+
+ if (unlikely(arg < PG_INT32_MIN) || unlikely(arg > PG_INT32_MAX))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ PG_RETURN_INT32((int32) arg);
+}
+
+Datum
+int28(PG_FUNCTION_ARGS)
+{
+ int16 arg = PG_GETARG_INT16(0);
+
+ PG_RETURN_INT64((int64) arg);
+}
+
+Datum
+int82(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+
+ if (unlikely(arg < PG_INT16_MIN) || unlikely(arg > PG_INT16_MAX))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ PG_RETURN_INT16((int16) arg);
+}
+
+Datum
+i8tod(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+ float8 result;
+
+ result = arg;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/* dtoi8()
+ * Convert float8 to 8-byte integer.
+ */
+Datum
+dtoi8(PG_FUNCTION_ARGS)
+{
+ float8 num = PG_GETARG_FLOAT8(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT8_FITS_IN_INT64(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64((int64) num);
+}
+
+Datum
+i8tof(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+ float4 result;
+
+ result = arg;
+
+ PG_RETURN_FLOAT4(result);
+}
+
+/* ftoi8()
+ * Convert float4 to 8-byte integer.
+ */
+Datum
+ftoi8(PG_FUNCTION_ARGS)
+{
+ float4 num = PG_GETARG_FLOAT4(0);
+
+ /*
+ * Get rid of any fractional part in the input. This is so we don't fail
+ * on just-out-of-range values that would round into range. Note
+ * assumption that rint() will pass through a NaN or Inf unchanged.
+ */
+ num = rint(num);
+
+ /* Range check */
+ if (unlikely(isnan(num) || !FLOAT4_FITS_IN_INT64(num)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64((int64) num);
+}
+
+Datum
+i8tooid(PG_FUNCTION_ARGS)
+{
+ int64 arg = PG_GETARG_INT64(0);
+
+ if (unlikely(arg < 0) || unlikely(arg > PG_UINT32_MAX))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("OID out of range")));
+
+ PG_RETURN_OID((Oid) arg);
+}
+
+Datum
+oidtoi8(PG_FUNCTION_ARGS)
+{
+ Oid arg = PG_GETARG_OID(0);
+
+ PG_RETURN_INT64((int64) arg);
+}
+
+/*
+ * non-persistent numeric series generator
+ */
+Datum
+generate_series_int8(PG_FUNCTION_ARGS)
+{
+ return generate_series_step_int8(fcinfo);
+}
+
+Datum
+generate_series_step_int8(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ generate_series_fctx *fctx;
+ int64 result;
+ MemoryContext oldcontext;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ int64 start = PG_GETARG_INT64(0);
+ int64 finish = PG_GETARG_INT64(1);
+ int64 step = 1;
+
+ /* see if we were given an explicit step size */
+ if (PG_NARGS() == 3)
+ step = PG_GETARG_INT64(2);
+ if (step == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot equal zero")));
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ fctx = (generate_series_fctx *) palloc(sizeof(generate_series_fctx));
+
+ /*
+ * Use fctx to keep state from call to call. Seed current with the
+ * original start value
+ */
+ fctx->current = start;
+ fctx->finish = finish;
+ fctx->step = step;
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ /*
+ * get the saved state and use current as the result for this iteration
+ */
+ fctx = funcctx->user_fctx;
+ result = fctx->current;
+
+ if ((fctx->step > 0 && fctx->current <= fctx->finish) ||
+ (fctx->step < 0 && fctx->current >= fctx->finish))
+ {
+ /*
+ * Increment current in preparation for next iteration. If next-value
+ * computation overflows, this is the final result.
+ */
+ if (pg_add_s64_overflow(fctx->current, fctx->step, &fctx->current))
+ fctx->step = 0;
+
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, Int64GetDatum(result));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Planner support function for generate_series(int8, int8 [, int8])
+ */
+Datum
+generate_series_int8_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestRows))
+ {
+ /* Try to estimate the number of rows returned */
+ SupportRequestRows *req = (SupportRequestRows *) rawreq;
+
+ if (is_funcclause(req->node)) /* be paranoid */
+ {
+ List *args = ((FuncExpr *) req->node)->args;
+ Node *arg1,
+ *arg2,
+ *arg3;
+
+ /* We can use estimated argument values here */
+ arg1 = estimate_expression_value(req->root, linitial(args));
+ arg2 = estimate_expression_value(req->root, lsecond(args));
+ if (list_length(args) >= 3)
+ arg3 = estimate_expression_value(req->root, lthird(args));
+ else
+ arg3 = NULL;
+
+ /*
+ * If any argument is constant NULL, we can safely assume that
+ * zero rows are returned. Otherwise, if they're all non-NULL
+ * constants, we can calculate the number of rows that will be
+ * returned. Use double arithmetic to avoid overflow hazards.
+ */
+ if ((IsA(arg1, Const) &&
+ ((Const *) arg1)->constisnull) ||
+ (IsA(arg2, Const) &&
+ ((Const *) arg2)->constisnull) ||
+ (arg3 != NULL && IsA(arg3, Const) &&
+ ((Const *) arg3)->constisnull))
+ {
+ req->rows = 0;
+ ret = (Node *) req;
+ }
+ else if (IsA(arg1, Const) &&
+ IsA(arg2, Const) &&
+ (arg3 == NULL || IsA(arg3, Const)))
+ {
+ double start,
+ finish,
+ step;
+
+ start = DatumGetInt64(((Const *) arg1)->constvalue);
+ finish = DatumGetInt64(((Const *) arg2)->constvalue);
+ step = arg3 ? DatumGetInt64(((Const *) arg3)->constvalue) : 1;
+
+ /* This equation works for either sign of step */
+ if (step != 0)
+ {
+ req->rows = floor((finish - start + step) / step);
+ ret = (Node *) req;
+ }
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c
new file mode 100644
index 0000000..2fececd
--- /dev/null
+++ b/src/backend/utils/adt/json.c
@@ -0,0 +1,1375 @@
+/*-------------------------------------------------------------------------
+ *
+ * json.c
+ * JSON data type support.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/json.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "parser/parse_coerce.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/json.h"
+#include "utils/jsonfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+
+typedef enum /* type categories for datum_to_json */
+{
+ JSONTYPE_NULL, /* null, so we didn't bother to identify */
+ JSONTYPE_BOOL, /* boolean (built-in types only) */
+ JSONTYPE_NUMERIC, /* numeric (ditto) */
+ JSONTYPE_DATE, /* we use special formatting for datetimes */
+ JSONTYPE_TIMESTAMP,
+ JSONTYPE_TIMESTAMPTZ,
+ JSONTYPE_JSON, /* JSON itself (and JSONB) */
+ JSONTYPE_ARRAY, /* array */
+ JSONTYPE_COMPOSITE, /* composite */
+ JSONTYPE_CAST, /* something with an explicit cast to JSON */
+ JSONTYPE_OTHER /* all else */
+} JsonTypeCategory;
+
+typedef struct JsonAggState
+{
+ StringInfo str;
+ JsonTypeCategory key_category;
+ Oid key_output_func;
+ JsonTypeCategory val_category;
+ Oid val_output_func;
+} JsonAggState;
+
+static void composite_to_json(Datum composite, StringInfo result,
+ bool use_line_feeds);
+static void array_dim_to_json(StringInfo result, int dim, int ndims, int *dims,
+ Datum *vals, bool *nulls, int *valcount,
+ JsonTypeCategory tcategory, Oid outfuncoid,
+ bool use_line_feeds);
+static void array_to_json_internal(Datum array, StringInfo result,
+ bool use_line_feeds);
+static void json_categorize_type(Oid typoid,
+ JsonTypeCategory *tcategory,
+ Oid *outfuncoid);
+static void datum_to_json(Datum val, bool is_null, StringInfo result,
+ JsonTypeCategory tcategory, Oid outfuncoid,
+ bool key_scalar);
+static void add_json(Datum val, bool is_null, StringInfo result,
+ Oid val_type, bool key_scalar);
+static text *catenate_stringinfo_string(StringInfo buffer, const char *addon);
+
+/*
+ * Input.
+ */
+Datum
+json_in(PG_FUNCTION_ARGS)
+{
+ char *json = PG_GETARG_CSTRING(0);
+ text *result = cstring_to_text(json);
+ JsonLexContext *lex;
+
+ /* validate it */
+ lex = makeJsonLexContext(result, false);
+ pg_parse_json_or_ereport(lex, &nullSemAction);
+
+ /* Internal representation is the same as text, for now */
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Output.
+ */
+Datum
+json_out(PG_FUNCTION_ARGS)
+{
+ /* we needn't detoast because text_to_cstring will handle that */
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * Binary send.
+ */
+Datum
+json_send(PG_FUNCTION_ARGS)
+{
+ text *t = PG_GETARG_TEXT_PP(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Binary receive.
+ */
+Datum
+json_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ char *str;
+ int nbytes;
+ JsonLexContext *lex;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+
+ /* Validate it. */
+ lex = makeJsonLexContextCstringLen(str, nbytes, GetDatabaseEncoding(), false);
+ pg_parse_json_or_ereport(lex, &nullSemAction);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(str, nbytes));
+}
+
+/*
+ * Determine how we want to print values of a given type in datum_to_json.
+ *
+ * Given the datatype OID, return its JsonTypeCategory, as well as the type's
+ * output function OID. If the returned category is JSONTYPE_CAST, we
+ * return the OID of the type->JSON cast function instead.
+ */
+static void
+json_categorize_type(Oid typoid,
+ JsonTypeCategory *tcategory,
+ Oid *outfuncoid)
+{
+ bool typisvarlena;
+
+ /* Look through any domain */
+ typoid = getBaseType(typoid);
+
+ *outfuncoid = InvalidOid;
+
+ /*
+ * We need to get the output function for everything except date and
+ * timestamp types, array and composite types, booleans, and non-builtin
+ * types where there's a cast to json.
+ */
+
+ switch (typoid)
+ {
+ case BOOLOID:
+ *tcategory = JSONTYPE_BOOL;
+ break;
+
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ *tcategory = JSONTYPE_NUMERIC;
+ break;
+
+ case DATEOID:
+ *tcategory = JSONTYPE_DATE;
+ break;
+
+ case TIMESTAMPOID:
+ *tcategory = JSONTYPE_TIMESTAMP;
+ break;
+
+ case TIMESTAMPTZOID:
+ *tcategory = JSONTYPE_TIMESTAMPTZ;
+ break;
+
+ case JSONOID:
+ case JSONBOID:
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ *tcategory = JSONTYPE_JSON;
+ break;
+
+ default:
+ /* Check for arrays and composites */
+ if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID
+ || typoid == ANYCOMPATIBLEARRAYOID || typoid == RECORDARRAYOID)
+ *tcategory = JSONTYPE_ARRAY;
+ else if (type_is_rowtype(typoid)) /* includes RECORDOID */
+ *tcategory = JSONTYPE_COMPOSITE;
+ else
+ {
+ /* It's probably the general case ... */
+ *tcategory = JSONTYPE_OTHER;
+ /* but let's look for a cast to json, if it's not built-in */
+ if (typoid >= FirstNormalObjectId)
+ {
+ Oid castfunc;
+ CoercionPathType ctype;
+
+ ctype = find_coercion_pathway(JSONOID, typoid,
+ COERCION_EXPLICIT,
+ &castfunc);
+ if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
+ {
+ *tcategory = JSONTYPE_CAST;
+ *outfuncoid = castfunc;
+ }
+ else
+ {
+ /* non builtin type with no cast */
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ }
+ }
+ else
+ {
+ /* any other builtin type */
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ }
+ }
+ break;
+ }
+}
+
+/*
+ * Turn a Datum into JSON text, appending the string to "result".
+ *
+ * tcategory and outfuncoid are from a previous call to json_categorize_type,
+ * except that if is_null is true then they can be invalid.
+ *
+ * If key_scalar is true, the value is being printed as a key, so insist
+ * it's of an acceptable type, and force it to be quoted.
+ */
+static void
+datum_to_json(Datum val, bool is_null, StringInfo result,
+ JsonTypeCategory tcategory, Oid outfuncoid,
+ bool key_scalar)
+{
+ char *outputstr;
+ text *jsontext;
+
+ check_stack_depth();
+
+ /* callers are expected to ensure that null keys are not passed in */
+ Assert(!(key_scalar && is_null));
+
+ if (is_null)
+ {
+ appendStringInfoString(result, "null");
+ return;
+ }
+
+ if (key_scalar &&
+ (tcategory == JSONTYPE_ARRAY ||
+ tcategory == JSONTYPE_COMPOSITE ||
+ tcategory == JSONTYPE_JSON ||
+ tcategory == JSONTYPE_CAST))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("key value must be scalar, not array, composite, or json")));
+
+ switch (tcategory)
+ {
+ case JSONTYPE_ARRAY:
+ array_to_json_internal(val, result, false);
+ break;
+ case JSONTYPE_COMPOSITE:
+ composite_to_json(val, result, false);
+ break;
+ case JSONTYPE_BOOL:
+ outputstr = DatumGetBool(val) ? "true" : "false";
+ if (key_scalar)
+ escape_json(result, outputstr);
+ else
+ appendStringInfoString(result, outputstr);
+ break;
+ case JSONTYPE_NUMERIC:
+ outputstr = OidOutputFunctionCall(outfuncoid, val);
+
+ /*
+ * Don't call escape_json for a non-key if it's a valid JSON
+ * number.
+ */
+ if (!key_scalar && IsValidJsonNumber(outputstr, strlen(outputstr)))
+ appendStringInfoString(result, outputstr);
+ else
+ escape_json(result, outputstr);
+ pfree(outputstr);
+ break;
+ case JSONTYPE_DATE:
+ {
+ char buf[MAXDATELEN + 1];
+
+ JsonEncodeDateTime(buf, val, DATEOID, NULL);
+ appendStringInfo(result, "\"%s\"", buf);
+ }
+ break;
+ case JSONTYPE_TIMESTAMP:
+ {
+ char buf[MAXDATELEN + 1];
+
+ JsonEncodeDateTime(buf, val, TIMESTAMPOID, NULL);
+ appendStringInfo(result, "\"%s\"", buf);
+ }
+ break;
+ case JSONTYPE_TIMESTAMPTZ:
+ {
+ char buf[MAXDATELEN + 1];
+
+ JsonEncodeDateTime(buf, val, TIMESTAMPTZOID, NULL);
+ appendStringInfo(result, "\"%s\"", buf);
+ }
+ break;
+ case JSONTYPE_JSON:
+ /* JSON and JSONB output will already be escaped */
+ outputstr = OidOutputFunctionCall(outfuncoid, val);
+ appendStringInfoString(result, outputstr);
+ pfree(outputstr);
+ break;
+ case JSONTYPE_CAST:
+ /* outfuncoid refers to a cast function, not an output function */
+ jsontext = DatumGetTextPP(OidFunctionCall1(outfuncoid, val));
+ outputstr = text_to_cstring(jsontext);
+ appendStringInfoString(result, outputstr);
+ pfree(outputstr);
+ pfree(jsontext);
+ break;
+ default:
+ outputstr = OidOutputFunctionCall(outfuncoid, val);
+ escape_json(result, outputstr);
+ pfree(outputstr);
+ break;
+ }
+}
+
+/*
+ * Encode 'value' of datetime type 'typid' into JSON string in ISO format using
+ * optionally preallocated buffer 'buf'. Optional 'tzp' determines time-zone
+ * offset (in seconds) in which we want to show timestamptz.
+ */
+char *
+JsonEncodeDateTime(char *buf, Datum value, Oid typid, const int *tzp)
+{
+ if (!buf)
+ buf = palloc(MAXDATELEN + 1);
+
+ switch (typid)
+ {
+ case DATEOID:
+ {
+ DateADT date;
+ struct pg_tm tm;
+
+ date = DatumGetDateADT(value);
+
+ /* Same as date_out(), but forcing DateStyle */
+ if (DATE_NOT_FINITE(date))
+ EncodeSpecialDate(date, buf);
+ else
+ {
+ j2date(date + POSTGRES_EPOCH_JDATE,
+ &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
+ EncodeDateOnly(&tm, USE_XSD_DATES, buf);
+ }
+ }
+ break;
+ case TIMEOID:
+ {
+ TimeADT time = DatumGetTimeADT(value);
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ /* Same as time_out(), but forcing DateStyle */
+ time2tm(time, tm, &fsec);
+ EncodeTimeOnly(tm, fsec, false, 0, USE_XSD_DATES, buf);
+ }
+ break;
+ case TIMETZOID:
+ {
+ TimeTzADT *time = DatumGetTimeTzADTP(value);
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ /* Same as timetz_out(), but forcing DateStyle */
+ timetz2tm(time, tm, &fsec, &tz);
+ EncodeTimeOnly(tm, fsec, true, tz, USE_XSD_DATES, buf);
+ }
+ break;
+ case TIMESTAMPOID:
+ {
+ Timestamp timestamp;
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ timestamp = DatumGetTimestamp(value);
+ /* Same as timestamp_out(), but forcing DateStyle */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ EncodeSpecialTimestamp(timestamp, buf);
+ else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
+ EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ break;
+ case TIMESTAMPTZOID:
+ {
+ TimestampTz timestamp;
+ struct pg_tm tm;
+ int tz;
+ fsec_t fsec;
+ const char *tzn = NULL;
+
+ timestamp = DatumGetTimestampTz(value);
+
+ /*
+ * If a time zone is specified, we apply the time-zone shift,
+ * convert timestamptz to pg_tm as if it were without a time
+ * zone, and then use the specified time zone for converting
+ * the timestamp into a string.
+ */
+ if (tzp)
+ {
+ tz = *tzp;
+ timestamp -= (TimestampTz) tz * USECS_PER_SEC;
+ }
+
+ /* Same as timestamptz_out(), but forcing DateStyle */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ EncodeSpecialTimestamp(timestamp, buf);
+ else if (timestamp2tm(timestamp, tzp ? NULL : &tz, &tm, &fsec,
+ tzp ? NULL : &tzn, NULL) == 0)
+ {
+ if (tzp)
+ tm.tm_isdst = 1; /* set time-zone presence flag */
+
+ EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ break;
+ default:
+ elog(ERROR, "unknown jsonb value datetime type oid %u", typid);
+ return NULL;
+ }
+
+ return buf;
+}
+
+/*
+ * Process a single dimension of an array.
+ * If it's the innermost dimension, output the values, otherwise call
+ * ourselves recursively to process the next dimension.
+ */
+static void
+array_dim_to_json(StringInfo result, int dim, int ndims, int *dims, Datum *vals,
+ bool *nulls, int *valcount, JsonTypeCategory tcategory,
+ Oid outfuncoid, bool use_line_feeds)
+{
+ int i;
+ const char *sep;
+
+ Assert(dim < ndims);
+
+ sep = use_line_feeds ? ",\n " : ",";
+
+ appendStringInfoChar(result, '[');
+
+ for (i = 1; i <= dims[dim]; i++)
+ {
+ if (i > 1)
+ appendStringInfoString(result, sep);
+
+ if (dim + 1 == ndims)
+ {
+ datum_to_json(vals[*valcount], nulls[*valcount], result, tcategory,
+ outfuncoid, false);
+ (*valcount)++;
+ }
+ else
+ {
+ /*
+ * Do we want line feeds on inner dimensions of arrays? For now
+ * we'll say no.
+ */
+ array_dim_to_json(result, dim + 1, ndims, dims, vals, nulls,
+ valcount, tcategory, outfuncoid, false);
+ }
+ }
+
+ appendStringInfoChar(result, ']');
+}
+
+/*
+ * Turn an array into JSON.
+ */
+static void
+array_to_json_internal(Datum array, StringInfo result, bool use_line_feeds)
+{
+ ArrayType *v = DatumGetArrayTypeP(array);
+ Oid element_type = ARR_ELEMTYPE(v);
+ int *dim;
+ int ndim;
+ int nitems;
+ int count = 0;
+ Datum *elements;
+ bool *nulls;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ JsonTypeCategory tcategory;
+ Oid outfuncoid;
+
+ ndim = ARR_NDIM(v);
+ dim = ARR_DIMS(v);
+ nitems = ArrayGetNItems(ndim, dim);
+
+ if (nitems <= 0)
+ {
+ appendStringInfoString(result, "[]");
+ return;
+ }
+
+ get_typlenbyvalalign(element_type,
+ &typlen, &typbyval, &typalign);
+
+ json_categorize_type(element_type,
+ &tcategory, &outfuncoid);
+
+ deconstruct_array(v, element_type, typlen, typbyval,
+ typalign, &elements, &nulls,
+ &nitems);
+
+ array_dim_to_json(result, 0, ndim, dim, elements, nulls, &count, tcategory,
+ outfuncoid, use_line_feeds);
+
+ pfree(elements);
+ pfree(nulls);
+}
+
+/*
+ * Turn a composite / record into JSON.
+ */
+static void
+composite_to_json(Datum composite, StringInfo result, bool use_line_feeds)
+{
+ HeapTupleHeader td;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tmptup,
+ *tuple;
+ int i;
+ bool needsep = false;
+ const char *sep;
+
+ sep = use_line_feeds ? ",\n " : ",";
+
+ td = DatumGetHeapTupleHeader(composite);
+
+ /* Extract rowtype info and find a tupdesc */
+ tupType = HeapTupleHeaderGetTypeId(td);
+ tupTypmod = HeapTupleHeaderGetTypMod(td);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+
+ /* Build a temporary HeapTuple control structure */
+ tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
+ tmptup.t_data = td;
+ tuple = &tmptup;
+
+ appendStringInfoChar(result, '{');
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Datum val;
+ bool isnull;
+ char *attname;
+ JsonTypeCategory tcategory;
+ Oid outfuncoid;
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+
+ if (needsep)
+ appendStringInfoString(result, sep);
+ needsep = true;
+
+ attname = NameStr(att->attname);
+ escape_json(result, attname);
+ appendStringInfoChar(result, ':');
+
+ val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
+
+ if (isnull)
+ {
+ tcategory = JSONTYPE_NULL;
+ outfuncoid = InvalidOid;
+ }
+ else
+ json_categorize_type(att->atttypid, &tcategory, &outfuncoid);
+
+ datum_to_json(val, isnull, result, tcategory, outfuncoid, false);
+ }
+
+ appendStringInfoChar(result, '}');
+ ReleaseTupleDesc(tupdesc);
+}
+
+/*
+ * Append JSON text for "val" to "result".
+ *
+ * This is just a thin wrapper around datum_to_json. If the same type will be
+ * printed many times, avoid using this; better to do the json_categorize_type
+ * lookups only once.
+ */
+static void
+add_json(Datum val, bool is_null, StringInfo result,
+ Oid val_type, bool key_scalar)
+{
+ JsonTypeCategory tcategory;
+ Oid outfuncoid;
+
+ if (val_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ if (is_null)
+ {
+ tcategory = JSONTYPE_NULL;
+ outfuncoid = InvalidOid;
+ }
+ else
+ json_categorize_type(val_type,
+ &tcategory, &outfuncoid);
+
+ datum_to_json(val, is_null, result, tcategory, outfuncoid, key_scalar);
+}
+
+/*
+ * SQL function array_to_json(row)
+ */
+Datum
+array_to_json(PG_FUNCTION_ARGS)
+{
+ Datum array = PG_GETARG_DATUM(0);
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ array_to_json_internal(array, result, false);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function array_to_json(row, prettybool)
+ */
+Datum
+array_to_json_pretty(PG_FUNCTION_ARGS)
+{
+ Datum array = PG_GETARG_DATUM(0);
+ bool use_line_feeds = PG_GETARG_BOOL(1);
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ array_to_json_internal(array, result, use_line_feeds);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function row_to_json(row)
+ */
+Datum
+row_to_json(PG_FUNCTION_ARGS)
+{
+ Datum array = PG_GETARG_DATUM(0);
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ composite_to_json(array, result, false);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function row_to_json(row, prettybool)
+ */
+Datum
+row_to_json_pretty(PG_FUNCTION_ARGS)
+{
+ Datum array = PG_GETARG_DATUM(0);
+ bool use_line_feeds = PG_GETARG_BOOL(1);
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ composite_to_json(array, result, use_line_feeds);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * SQL function to_json(anyvalue)
+ */
+Datum
+to_json(PG_FUNCTION_ARGS)
+{
+ Datum val = PG_GETARG_DATUM(0);
+ Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ StringInfo result;
+ JsonTypeCategory tcategory;
+ Oid outfuncoid;
+
+ if (val_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ json_categorize_type(val_type,
+ &tcategory, &outfuncoid);
+
+ result = makeStringInfo();
+
+ datum_to_json(val, false, result, tcategory, outfuncoid, false);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * json_agg transition function
+ *
+ * aggregate input column as a json array value.
+ */
+Datum
+json_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggcontext,
+ oldcontext;
+ JsonAggState *state;
+ Datum val;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "json_agg_transfn called in non-aggregate context");
+ }
+
+ if (PG_ARGISNULL(0))
+ {
+ Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ /*
+ * Make this state object in a context where it will persist for the
+ * duration of the aggregate call. MemoryContextSwitchTo is only
+ * needed the first time, as the StringInfo routines make sure they
+ * use the right context to enlarge the object if necessary.
+ */
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = (JsonAggState *) palloc(sizeof(JsonAggState));
+ state->str = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ appendStringInfoChar(state->str, '[');
+ json_categorize_type(arg_type, &state->val_category,
+ &state->val_output_func);
+ }
+ else
+ {
+ state = (JsonAggState *) PG_GETARG_POINTER(0);
+ appendStringInfoString(state->str, ", ");
+ }
+
+ /* fast path for NULLs */
+ if (PG_ARGISNULL(1))
+ {
+ datum_to_json((Datum) 0, true, state->str, JSONTYPE_NULL,
+ InvalidOid, false);
+ PG_RETURN_POINTER(state);
+ }
+
+ val = PG_GETARG_DATUM(1);
+
+ /* add some whitespace if structured type and not first item */
+ if (!PG_ARGISNULL(0) &&
+ (state->val_category == JSONTYPE_ARRAY ||
+ state->val_category == JSONTYPE_COMPOSITE))
+ {
+ appendStringInfoString(state->str, "\n ");
+ }
+
+ datum_to_json(val, false, state->str, state->val_category,
+ state->val_output_func, false);
+
+ /*
+ * The transition type for json_agg() is declared to be "internal", which
+ * is a pass-by-value type the same size as a pointer. So we can safely
+ * pass the JsonAggState pointer through nodeAgg.c's machinations.
+ */
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * json_agg final function
+ */
+Datum
+json_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ JsonAggState *state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ?
+ NULL :
+ (JsonAggState *) PG_GETARG_POINTER(0);
+
+ /* NULL result for no rows in, as is standard with aggregates */
+ if (state == NULL)
+ PG_RETURN_NULL();
+
+ /* Else return state with appropriate array terminator added */
+ PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
+}
+
+/*
+ * json_object_agg transition function.
+ *
+ * aggregate two input columns as a single json object value.
+ */
+Datum
+json_object_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggcontext,
+ oldcontext;
+ JsonAggState *state;
+ Datum arg;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "json_object_agg_transfn called in non-aggregate context");
+ }
+
+ if (PG_ARGISNULL(0))
+ {
+ Oid arg_type;
+
+ /*
+ * Make the StringInfo in a context where it will persist for the
+ * duration of the aggregate call. Switching context is only needed
+ * for this initial step, as the StringInfo routines make sure they
+ * use the right context to enlarge the object if necessary.
+ */
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = (JsonAggState *) palloc(sizeof(JsonAggState));
+ state->str = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine data type for argument %d", 1)));
+
+ json_categorize_type(arg_type, &state->key_category,
+ &state->key_output_func);
+
+ arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine data type for argument %d", 2)));
+
+ json_categorize_type(arg_type, &state->val_category,
+ &state->val_output_func);
+
+ appendStringInfoString(state->str, "{ ");
+ }
+ else
+ {
+ state = (JsonAggState *) PG_GETARG_POINTER(0);
+ appendStringInfoString(state->str, ", ");
+ }
+
+ /*
+ * Note: since json_object_agg() is declared as taking type "any", the
+ * parser will not do any type conversion on unknown-type literals (that
+ * is, undecorated strings or NULLs). Such values will arrive here as
+ * type UNKNOWN, which fortunately does not matter to us, since
+ * unknownout() works fine.
+ */
+
+ if (PG_ARGISNULL(1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("field name must not be null")));
+
+ arg = PG_GETARG_DATUM(1);
+
+ datum_to_json(arg, false, state->str, state->key_category,
+ state->key_output_func, true);
+
+ appendStringInfoString(state->str, " : ");
+
+ if (PG_ARGISNULL(2))
+ arg = (Datum) 0;
+ else
+ arg = PG_GETARG_DATUM(2);
+
+ datum_to_json(arg, PG_ARGISNULL(2), state->str, state->val_category,
+ state->val_output_func, false);
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * json_object_agg final function.
+ */
+Datum
+json_object_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ JsonAggState *state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (JsonAggState *) PG_GETARG_POINTER(0);
+
+ /* NULL result for no rows in, as is standard with aggregates */
+ if (state == NULL)
+ PG_RETURN_NULL();
+
+ /* Else return state with appropriate object terminator added */
+ PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
+}
+
+/*
+ * Helper function for aggregates: return given StringInfo's contents plus
+ * specified trailing string, as a text datum. We need this because aggregate
+ * final functions are not allowed to modify the aggregate state.
+ */
+static text *
+catenate_stringinfo_string(StringInfo buffer, const char *addon)
+{
+ /* custom version of cstring_to_text_with_len */
+ int buflen = buffer->len;
+ int addlen = strlen(addon);
+ text *result = (text *) palloc(buflen + addlen + VARHDRSZ);
+
+ SET_VARSIZE(result, buflen + addlen + VARHDRSZ);
+ memcpy(VARDATA(result), buffer->data, buflen);
+ memcpy(VARDATA(result) + buflen, addon, addlen);
+
+ return result;
+}
+
+/*
+ * SQL function json_build_object(variadic "any")
+ */
+Datum
+json_build_object(PG_FUNCTION_ARGS)
+{
+ int nargs;
+ int i;
+ const char *sep = "";
+ StringInfo result;
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* fetch argument values to build the object */
+ nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ if (nargs % 2 != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument list must have even number of elements"),
+ /* translator: %s is a SQL function name */
+ errhint("The arguments of %s must consist of alternating keys and values.",
+ "json_build_object()")));
+
+ result = makeStringInfo();
+
+ appendStringInfoChar(result, '{');
+
+ for (i = 0; i < nargs; i += 2)
+ {
+ appendStringInfoString(result, sep);
+ sep = ", ";
+
+ /* process key */
+ if (nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument %d cannot be null", i + 1),
+ errhint("Object keys should be text.")));
+
+ add_json(args[i], false, result, types[i], true);
+
+ appendStringInfoString(result, " : ");
+
+ /* process value */
+ add_json(args[i + 1], nulls[i + 1], result, types[i + 1], false);
+ }
+
+ appendStringInfoChar(result, '}');
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * degenerate case of json_build_object where it gets 0 arguments.
+ */
+Datum
+json_build_object_noargs(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
+}
+
+/*
+ * SQL function json_build_array(variadic "any")
+ */
+Datum
+json_build_array(PG_FUNCTION_ARGS)
+{
+ int nargs;
+ int i;
+ const char *sep = "";
+ StringInfo result;
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* fetch argument values to build the array */
+ nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ result = makeStringInfo();
+
+ appendStringInfoChar(result, '[');
+
+ for (i = 0; i < nargs; i++)
+ {
+ appendStringInfoString(result, sep);
+ sep = ", ";
+ add_json(args[i], nulls[i], result, types[i], false);
+ }
+
+ appendStringInfoChar(result, ']');
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
+}
+
+/*
+ * degenerate case of json_build_array where it gets 0 arguments.
+ */
+Datum
+json_build_array_noargs(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(cstring_to_text_with_len("[]", 2));
+}
+
+/*
+ * SQL function json_object(text[])
+ *
+ * take a one or two dimensional array of text as key/value pairs
+ * for a json object.
+ */
+Datum
+json_object(PG_FUNCTION_ARGS)
+{
+ ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0);
+ int ndims = ARR_NDIM(in_array);
+ StringInfoData result;
+ Datum *in_datums;
+ bool *in_nulls;
+ int in_count,
+ count,
+ i;
+ text *rval;
+ char *v;
+
+ switch (ndims)
+ {
+ case 0:
+ PG_RETURN_DATUM(CStringGetTextDatum("{}"));
+ break;
+
+ case 1:
+ if ((ARR_DIMS(in_array)[0]) % 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must have even number of elements")));
+ break;
+
+ case 2:
+ if ((ARR_DIMS(in_array)[1]) != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must have two columns")));
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+ }
+
+ deconstruct_array(in_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &in_datums, &in_nulls, &in_count);
+
+ count = in_count / 2;
+
+ initStringInfo(&result);
+
+ appendStringInfoChar(&result, '{');
+
+ for (i = 0; i < count; ++i)
+ {
+ if (in_nulls[i * 2])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ v = TextDatumGetCString(in_datums[i * 2]);
+ if (i > 0)
+ appendStringInfoString(&result, ", ");
+ escape_json(&result, v);
+ appendStringInfoString(&result, " : ");
+ pfree(v);
+ if (in_nulls[i * 2 + 1])
+ appendStringInfoString(&result, "null");
+ else
+ {
+ v = TextDatumGetCString(in_datums[i * 2 + 1]);
+ escape_json(&result, v);
+ pfree(v);
+ }
+ }
+
+ appendStringInfoChar(&result, '}');
+
+ pfree(in_datums);
+ pfree(in_nulls);
+
+ rval = cstring_to_text_with_len(result.data, result.len);
+ pfree(result.data);
+
+ PG_RETURN_TEXT_P(rval);
+}
+
+/*
+ * SQL function json_object(text[], text[])
+ *
+ * take separate key and value arrays of text to construct a json object
+ * pairwise.
+ */
+Datum
+json_object_two_arg(PG_FUNCTION_ARGS)
+{
+ ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1);
+ int nkdims = ARR_NDIM(key_array);
+ int nvdims = ARR_NDIM(val_array);
+ StringInfoData result;
+ Datum *key_datums,
+ *val_datums;
+ bool *key_nulls,
+ *val_nulls;
+ int key_count,
+ val_count,
+ i;
+ text *rval;
+ char *v;
+
+ if (nkdims > 1 || nkdims != nvdims)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (nkdims == 0)
+ PG_RETURN_DATUM(CStringGetTextDatum("{}"));
+
+ deconstruct_array(key_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &key_datums, &key_nulls, &key_count);
+
+ deconstruct_array(val_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &val_datums, &val_nulls, &val_count);
+
+ if (key_count != val_count)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("mismatched array dimensions")));
+
+ initStringInfo(&result);
+
+ appendStringInfoChar(&result, '{');
+
+ for (i = 0; i < key_count; ++i)
+ {
+ if (key_nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ v = TextDatumGetCString(key_datums[i]);
+ if (i > 0)
+ appendStringInfoString(&result, ", ");
+ escape_json(&result, v);
+ appendStringInfoString(&result, " : ");
+ pfree(v);
+ if (val_nulls[i])
+ appendStringInfoString(&result, "null");
+ else
+ {
+ v = TextDatumGetCString(val_datums[i]);
+ escape_json(&result, v);
+ pfree(v);
+ }
+ }
+
+ appendStringInfoChar(&result, '}');
+
+ pfree(key_datums);
+ pfree(key_nulls);
+ pfree(val_datums);
+ pfree(val_nulls);
+
+ rval = cstring_to_text_with_len(result.data, result.len);
+ pfree(result.data);
+
+ PG_RETURN_TEXT_P(rval);
+}
+
+
+/*
+ * Produce a JSON string literal, properly escaping characters in the text.
+ */
+void
+escape_json(StringInfo buf, const char *str)
+{
+ const char *p;
+
+ appendStringInfoCharMacro(buf, '"');
+ for (p = str; *p; p++)
+ {
+ switch (*p)
+ {
+ case '\b':
+ appendStringInfoString(buf, "\\b");
+ break;
+ case '\f':
+ appendStringInfoString(buf, "\\f");
+ break;
+ case '\n':
+ appendStringInfoString(buf, "\\n");
+ break;
+ case '\r':
+ appendStringInfoString(buf, "\\r");
+ break;
+ case '\t':
+ appendStringInfoString(buf, "\\t");
+ break;
+ case '"':
+ appendStringInfoString(buf, "\\\"");
+ break;
+ case '\\':
+ appendStringInfoString(buf, "\\\\");
+ break;
+ default:
+ if ((unsigned char) *p < ' ')
+ appendStringInfo(buf, "\\u%04x", (int) *p);
+ else
+ appendStringInfoCharMacro(buf, *p);
+ break;
+ }
+ }
+ appendStringInfoCharMacro(buf, '"');
+}
+
+/*
+ * SQL function json_typeof(json) -> text
+ *
+ * Returns the type of the outermost JSON value as TEXT. Possible types are
+ * "object", "array", "string", "number", "boolean", and "null".
+ *
+ * Performs a single call to json_lex() to get the first token of the supplied
+ * value. This initial token uniquely determines the value's type. As our
+ * input must already have been validated by json_in() or json_recv(), the
+ * initial token should never be JSON_TOKEN_OBJECT_END, JSON_TOKEN_ARRAY_END,
+ * JSON_TOKEN_COLON, JSON_TOKEN_COMMA, or JSON_TOKEN_END.
+ */
+Datum
+json_typeof(PG_FUNCTION_ARGS)
+{
+ text *json;
+
+ JsonLexContext *lex;
+ JsonTokenType tok;
+ char *type;
+ JsonParseErrorType result;
+
+ json = PG_GETARG_TEXT_PP(0);
+ lex = makeJsonLexContext(json, false);
+
+ /* Lex exactly one token from the input and check its type. */
+ result = json_lex(lex);
+ if (result != JSON_SUCCESS)
+ json_ereport_error(result, lex);
+ tok = lex->token_type;
+ switch (tok)
+ {
+ case JSON_TOKEN_OBJECT_START:
+ type = "object";
+ break;
+ case JSON_TOKEN_ARRAY_START:
+ type = "array";
+ break;
+ case JSON_TOKEN_STRING:
+ type = "string";
+ break;
+ case JSON_TOKEN_NUMBER:
+ type = "number";
+ break;
+ case JSON_TOKEN_TRUE:
+ case JSON_TOKEN_FALSE:
+ type = "boolean";
+ break;
+ case JSON_TOKEN_NULL:
+ type = "null";
+ break;
+ default:
+ elog(ERROR, "unexpected json token: %d", tok);
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(type));
+}
diff --git a/src/backend/utils/adt/jsonb.c b/src/backend/utils/adt/jsonb.c
new file mode 100644
index 0000000..d253ae6
--- /dev/null
+++ b/src/backend/utils/adt/jsonb.c
@@ -0,0 +1,2086 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb.c
+ * I/O routines for jsonb type
+ *
+ * Copyright (c) 2014-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonb.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/transam.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "parser/parse_coerce.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/jsonfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+typedef struct JsonbInState
+{
+ JsonbParseState *parseState;
+ JsonbValue *res;
+} JsonbInState;
+
+/* unlike with json categories, we need to treat json and jsonb differently */
+typedef enum /* type categories for datum_to_jsonb */
+{
+ JSONBTYPE_NULL, /* null, so we didn't bother to identify */
+ JSONBTYPE_BOOL, /* boolean (built-in types only) */
+ JSONBTYPE_NUMERIC, /* numeric (ditto) */
+ JSONBTYPE_DATE, /* we use special formatting for datetimes */
+ JSONBTYPE_TIMESTAMP, /* we use special formatting for timestamp */
+ JSONBTYPE_TIMESTAMPTZ, /* ... and timestamptz */
+ JSONBTYPE_JSON, /* JSON */
+ JSONBTYPE_JSONB, /* JSONB */
+ JSONBTYPE_ARRAY, /* array */
+ JSONBTYPE_COMPOSITE, /* composite */
+ JSONBTYPE_JSONCAST, /* something with an explicit cast to JSON */
+ JSONBTYPE_OTHER /* all else */
+} JsonbTypeCategory;
+
+typedef struct JsonbAggState
+{
+ JsonbInState *res;
+ JsonbTypeCategory key_category;
+ Oid key_output_func;
+ JsonbTypeCategory val_category;
+ Oid val_output_func;
+} JsonbAggState;
+
+static inline Datum jsonb_from_cstring(char *json, int len);
+static size_t checkStringLen(size_t len);
+static void jsonb_in_object_start(void *pstate);
+static void jsonb_in_object_end(void *pstate);
+static void jsonb_in_array_start(void *pstate);
+static void jsonb_in_array_end(void *pstate);
+static void jsonb_in_object_field_start(void *pstate, char *fname, bool isnull);
+static void jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal);
+static void jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype);
+static void jsonb_categorize_type(Oid typoid,
+ JsonbTypeCategory *tcategory,
+ Oid *outfuncoid);
+static void composite_to_jsonb(Datum composite, JsonbInState *result);
+static void array_dim_to_jsonb(JsonbInState *result, int dim, int ndims, int *dims,
+ Datum *vals, bool *nulls, int *valcount,
+ JsonbTypeCategory tcategory, Oid outfuncoid);
+static void array_to_jsonb_internal(Datum array, JsonbInState *result);
+static void jsonb_categorize_type(Oid typoid,
+ JsonbTypeCategory *tcategory,
+ Oid *outfuncoid);
+static void datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
+ JsonbTypeCategory tcategory, Oid outfuncoid,
+ bool key_scalar);
+static void add_jsonb(Datum val, bool is_null, JsonbInState *result,
+ Oid val_type, bool key_scalar);
+static JsonbParseState *clone_parse_state(JsonbParseState *state);
+static char *JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent);
+static void add_indent(StringInfo out, bool indent, int level);
+
+/*
+ * jsonb type input function
+ */
+Datum
+jsonb_in(PG_FUNCTION_ARGS)
+{
+ char *json = PG_GETARG_CSTRING(0);
+
+ return jsonb_from_cstring(json, strlen(json));
+}
+
+/*
+ * jsonb type recv function
+ *
+ * The type is sent as text in binary mode, so this is almost the same
+ * as the input function, but it's prefixed with a version number so we
+ * can change the binary format sent in future if necessary. For now,
+ * only version 1 is supported.
+ */
+Datum
+jsonb_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ int version = pq_getmsgint(buf, 1);
+ char *str;
+ int nbytes;
+
+ if (version == 1)
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ else
+ elog(ERROR, "unsupported jsonb version number %d", version);
+
+ return jsonb_from_cstring(str, nbytes);
+}
+
+/*
+ * jsonb type output function
+ */
+Datum
+jsonb_out(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ char *out;
+
+ out = JsonbToCString(NULL, &jb->root, VARSIZE(jb));
+
+ PG_RETURN_CSTRING(out);
+}
+
+/*
+ * jsonb type send function
+ *
+ * Just send jsonb as a version number, then a string of text
+ */
+Datum
+jsonb_send(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ StringInfoData buf;
+ StringInfo jtext = makeStringInfo();
+ int version = 1;
+
+ (void) JsonbToCString(jtext, &jb->root, VARSIZE(jb));
+
+ pq_begintypsend(&buf);
+ pq_sendint8(&buf, version);
+ pq_sendtext(&buf, jtext->data, jtext->len);
+ pfree(jtext->data);
+ pfree(jtext);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Get the type name of a jsonb container.
+ */
+static const char *
+JsonbContainerTypeName(JsonbContainer *jbc)
+{
+ JsonbValue scalar;
+
+ if (JsonbExtractScalar(jbc, &scalar))
+ return JsonbTypeName(&scalar);
+ else if (JsonContainerIsArray(jbc))
+ return "array";
+ else if (JsonContainerIsObject(jbc))
+ return "object";
+ else
+ {
+ elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header);
+ return "unknown";
+ }
+}
+
+/*
+ * Get the type name of a jsonb value.
+ */
+const char *
+JsonbTypeName(JsonbValue *jbv)
+{
+ switch (jbv->type)
+ {
+ case jbvBinary:
+ return JsonbContainerTypeName(jbv->val.binary.data);
+ case jbvObject:
+ return "object";
+ case jbvArray:
+ return "array";
+ case jbvNumeric:
+ return "number";
+ case jbvString:
+ return "string";
+ case jbvBool:
+ return "boolean";
+ case jbvNull:
+ return "null";
+ case jbvDatetime:
+ switch (jbv->val.datetime.typid)
+ {
+ case DATEOID:
+ return "date";
+ case TIMEOID:
+ return "time without time zone";
+ case TIMETZOID:
+ return "time with time zone";
+ case TIMESTAMPOID:
+ return "timestamp without time zone";
+ case TIMESTAMPTZOID:
+ return "timestamp with time zone";
+ default:
+ elog(ERROR, "unrecognized jsonb value datetime type: %d",
+ jbv->val.datetime.typid);
+ }
+ return "unknown";
+ default:
+ elog(ERROR, "unrecognized jsonb value type: %d", jbv->type);
+ return "unknown";
+ }
+}
+
+/*
+ * SQL function jsonb_typeof(jsonb) -> text
+ *
+ * This function is here because the analog json function is in json.c, since
+ * it uses the json parser internals not exposed elsewhere.
+ */
+Datum
+jsonb_typeof(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ const char *result = JsonbContainerTypeName(&in->root);
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * jsonb_from_cstring
+ *
+ * Turns json string into a jsonb Datum.
+ *
+ * Uses the json parser (with hooks) to construct a jsonb.
+ */
+static inline Datum
+jsonb_from_cstring(char *json, int len)
+{
+ JsonLexContext *lex;
+ JsonbInState state;
+ JsonSemAction sem;
+
+ memset(&state, 0, sizeof(state));
+ memset(&sem, 0, sizeof(sem));
+ lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
+
+ sem.semstate = (void *) &state;
+
+ sem.object_start = jsonb_in_object_start;
+ sem.array_start = jsonb_in_array_start;
+ sem.object_end = jsonb_in_object_end;
+ sem.array_end = jsonb_in_array_end;
+ sem.scalar = jsonb_in_scalar;
+ sem.object_field_start = jsonb_in_object_field_start;
+
+ pg_parse_json_or_ereport(lex, &sem);
+
+ /* after parsing, the item member has the composed jsonb structure */
+ PG_RETURN_POINTER(JsonbValueToJsonb(state.res));
+}
+
+static size_t
+checkStringLen(size_t len)
+{
+ if (len > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("string too long to represent as jsonb string"),
+ errdetail("Due to an implementation restriction, jsonb strings cannot exceed %d bytes.",
+ JENTRY_OFFLENMASK)));
+
+ return len;
+}
+
+static void
+jsonb_in_object_start(void *pstate)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_OBJECT, NULL);
+}
+
+static void
+jsonb_in_object_end(void *pstate)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_END_OBJECT, NULL);
+}
+
+static void
+jsonb_in_array_start(void *pstate)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, NULL);
+}
+
+static void
+jsonb_in_array_end(void *pstate)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL);
+}
+
+static void
+jsonb_in_object_field_start(void *pstate, char *fname, bool isnull)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+ JsonbValue v;
+
+ Assert(fname != NULL);
+ v.type = jbvString;
+ v.val.string.len = checkStringLen(strlen(fname));
+ v.val.string.val = fname;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_KEY, &v);
+}
+
+static void
+jsonb_put_escaped_value(StringInfo out, JsonbValue *scalarVal)
+{
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ appendBinaryStringInfo(out, "null", 4);
+ break;
+ case jbvString:
+ escape_json(out, pnstrdup(scalarVal->val.string.val, scalarVal->val.string.len));
+ break;
+ case jbvNumeric:
+ appendStringInfoString(out,
+ DatumGetCString(DirectFunctionCall1(numeric_out,
+ PointerGetDatum(scalarVal->val.numeric))));
+ break;
+ case jbvBool:
+ if (scalarVal->val.boolean)
+ appendBinaryStringInfo(out, "true", 4);
+ else
+ appendBinaryStringInfo(out, "false", 5);
+ break;
+ default:
+ elog(ERROR, "unknown jsonb scalar type");
+ }
+}
+
+/*
+ * For jsonb we always want the de-escaped value - that's what's in token
+ */
+static void
+jsonb_in_scalar(void *pstate, char *token, JsonTokenType tokentype)
+{
+ JsonbInState *_state = (JsonbInState *) pstate;
+ JsonbValue v;
+ Datum numd;
+
+ switch (tokentype)
+ {
+
+ case JSON_TOKEN_STRING:
+ Assert(token != NULL);
+ v.type = jbvString;
+ v.val.string.len = checkStringLen(strlen(token));
+ v.val.string.val = token;
+ break;
+ case JSON_TOKEN_NUMBER:
+
+ /*
+ * No need to check size of numeric values, because maximum
+ * numeric size is well below the JsonbValue restriction
+ */
+ Assert(token != NULL);
+ v.type = jbvNumeric;
+ numd = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(token),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ v.val.numeric = DatumGetNumeric(numd);
+ break;
+ case JSON_TOKEN_TRUE:
+ v.type = jbvBool;
+ v.val.boolean = true;
+ break;
+ case JSON_TOKEN_FALSE:
+ v.type = jbvBool;
+ v.val.boolean = false;
+ break;
+ case JSON_TOKEN_NULL:
+ v.type = jbvNull;
+ break;
+ default:
+ /* should not be possible */
+ elog(ERROR, "invalid json token type");
+ break;
+ }
+
+ if (_state->parseState == NULL)
+ {
+ /* single scalar */
+ JsonbValue va;
+
+ va.type = jbvArray;
+ va.val.array.rawScalar = true;
+ va.val.array.nElems = 1;
+
+ _state->res = pushJsonbValue(&_state->parseState, WJB_BEGIN_ARRAY, &va);
+ _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v);
+ _state->res = pushJsonbValue(&_state->parseState, WJB_END_ARRAY, NULL);
+ }
+ else
+ {
+ JsonbValue *o = &_state->parseState->contVal;
+
+ switch (o->type)
+ {
+ case jbvArray:
+ _state->res = pushJsonbValue(&_state->parseState, WJB_ELEM, &v);
+ break;
+ case jbvObject:
+ _state->res = pushJsonbValue(&_state->parseState, WJB_VALUE, &v);
+ break;
+ default:
+ elog(ERROR, "unexpected parent of nested structure");
+ }
+ }
+}
+
+/*
+ * JsonbToCString
+ * Converts jsonb value to a C-string.
+ *
+ * If 'out' argument is non-null, the resulting C-string is stored inside the
+ * StringBuffer. The resulting string is always returned.
+ *
+ * A typical case for passing the StringInfo in rather than NULL is where the
+ * caller wants access to the len attribute without having to call strlen, e.g.
+ * if they are converting it to a text* object.
+ */
+char *
+JsonbToCString(StringInfo out, JsonbContainer *in, int estimated_len)
+{
+ return JsonbToCStringWorker(out, in, estimated_len, false);
+}
+
+/*
+ * same thing but with indentation turned on
+ */
+char *
+JsonbToCStringIndent(StringInfo out, JsonbContainer *in, int estimated_len)
+{
+ return JsonbToCStringWorker(out, in, estimated_len, true);
+}
+
+/*
+ * common worker for above two functions
+ */
+static char *
+JsonbToCStringWorker(StringInfo out, JsonbContainer *in, int estimated_len, bool indent)
+{
+ bool first = true;
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type = WJB_DONE;
+ int level = 0;
+ bool redo_switch = false;
+
+ /* If we are indenting, don't add a space after a comma */
+ int ispaces = indent ? 1 : 2;
+
+ /*
+ * Don't indent the very first item. This gets set to the indent flag at
+ * the bottom of the loop.
+ */
+ bool use_indent = false;
+ bool raw_scalar = false;
+ bool last_was_key = false;
+
+ if (out == NULL)
+ out = makeStringInfo();
+
+ enlargeStringInfo(out, (estimated_len >= 0) ? estimated_len : 64);
+
+ it = JsonbIteratorInit(in);
+
+ while (redo_switch ||
+ ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE))
+ {
+ redo_switch = false;
+ switch (type)
+ {
+ case WJB_BEGIN_ARRAY:
+ if (!first)
+ appendBinaryStringInfo(out, ", ", ispaces);
+
+ if (!v.val.array.rawScalar)
+ {
+ add_indent(out, use_indent && !last_was_key, level);
+ appendStringInfoCharMacro(out, '[');
+ }
+ else
+ raw_scalar = true;
+
+ first = true;
+ level++;
+ break;
+ case WJB_BEGIN_OBJECT:
+ if (!first)
+ appendBinaryStringInfo(out, ", ", ispaces);
+
+ add_indent(out, use_indent && !last_was_key, level);
+ appendStringInfoCharMacro(out, '{');
+
+ first = true;
+ level++;
+ break;
+ case WJB_KEY:
+ if (!first)
+ appendBinaryStringInfo(out, ", ", ispaces);
+ first = true;
+
+ add_indent(out, use_indent, level);
+
+ /* json rules guarantee this is a string */
+ jsonb_put_escaped_value(out, &v);
+ appendBinaryStringInfo(out, ": ", 2);
+
+ type = JsonbIteratorNext(&it, &v, false);
+ if (type == WJB_VALUE)
+ {
+ first = false;
+ jsonb_put_escaped_value(out, &v);
+ }
+ else
+ {
+ Assert(type == WJB_BEGIN_OBJECT || type == WJB_BEGIN_ARRAY);
+
+ /*
+ * We need to rerun the current switch() since we need to
+ * output the object which we just got from the iterator
+ * before calling the iterator again.
+ */
+ redo_switch = true;
+ }
+ break;
+ case WJB_ELEM:
+ if (!first)
+ appendBinaryStringInfo(out, ", ", ispaces);
+ first = false;
+
+ if (!raw_scalar)
+ add_indent(out, use_indent, level);
+ jsonb_put_escaped_value(out, &v);
+ break;
+ case WJB_END_ARRAY:
+ level--;
+ if (!raw_scalar)
+ {
+ add_indent(out, use_indent, level);
+ appendStringInfoCharMacro(out, ']');
+ }
+ first = false;
+ break;
+ case WJB_END_OBJECT:
+ level--;
+ add_indent(out, use_indent, level);
+ appendStringInfoCharMacro(out, '}');
+ first = false;
+ break;
+ default:
+ elog(ERROR, "unknown jsonb iterator token type");
+ }
+ use_indent = indent;
+ last_was_key = redo_switch;
+ }
+
+ Assert(level == 0);
+
+ return out->data;
+}
+
+static void
+add_indent(StringInfo out, bool indent, int level)
+{
+ if (indent)
+ {
+ int i;
+
+ appendStringInfoCharMacro(out, '\n');
+ for (i = 0; i < level; i++)
+ appendBinaryStringInfo(out, " ", 4);
+ }
+}
+
+
+/*
+ * Determine how we want to render values of a given type in datum_to_jsonb.
+ *
+ * Given the datatype OID, return its JsonbTypeCategory, as well as the type's
+ * output function OID. If the returned category is JSONBTYPE_JSONCAST,
+ * we return the OID of the relevant cast function instead.
+ */
+static void
+jsonb_categorize_type(Oid typoid,
+ JsonbTypeCategory *tcategory,
+ Oid *outfuncoid)
+{
+ bool typisvarlena;
+
+ /* Look through any domain */
+ typoid = getBaseType(typoid);
+
+ *outfuncoid = InvalidOid;
+
+ /*
+ * We need to get the output function for everything except date and
+ * timestamp types, booleans, array and composite types, json and jsonb,
+ * and non-builtin types where there's a cast to json. In this last case
+ * we return the oid of the cast function instead.
+ */
+
+ switch (typoid)
+ {
+ case BOOLOID:
+ *tcategory = JSONBTYPE_BOOL;
+ break;
+
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ *tcategory = JSONBTYPE_NUMERIC;
+ break;
+
+ case DATEOID:
+ *tcategory = JSONBTYPE_DATE;
+ break;
+
+ case TIMESTAMPOID:
+ *tcategory = JSONBTYPE_TIMESTAMP;
+ break;
+
+ case TIMESTAMPTZOID:
+ *tcategory = JSONBTYPE_TIMESTAMPTZ;
+ break;
+
+ case JSONBOID:
+ *tcategory = JSONBTYPE_JSONB;
+ break;
+
+ case JSONOID:
+ *tcategory = JSONBTYPE_JSON;
+ break;
+
+ default:
+ /* Check for arrays and composites */
+ if (OidIsValid(get_element_type(typoid)) || typoid == ANYARRAYOID
+ || typoid == ANYCOMPATIBLEARRAYOID || typoid == RECORDARRAYOID)
+ *tcategory = JSONBTYPE_ARRAY;
+ else if (type_is_rowtype(typoid)) /* includes RECORDOID */
+ *tcategory = JSONBTYPE_COMPOSITE;
+ else
+ {
+ /* It's probably the general case ... */
+ *tcategory = JSONBTYPE_OTHER;
+
+ /*
+ * but first let's look for a cast to json (note: not to
+ * jsonb) if it's not built-in.
+ */
+ if (typoid >= FirstNormalObjectId)
+ {
+ Oid castfunc;
+ CoercionPathType ctype;
+
+ ctype = find_coercion_pathway(JSONOID, typoid,
+ COERCION_EXPLICIT, &castfunc);
+ if (ctype == COERCION_PATH_FUNC && OidIsValid(castfunc))
+ {
+ *tcategory = JSONBTYPE_JSONCAST;
+ *outfuncoid = castfunc;
+ }
+ else
+ {
+ /* not a cast type, so just get the usual output func */
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ }
+ }
+ else
+ {
+ /* any other builtin type */
+ getTypeOutputInfo(typoid, outfuncoid, &typisvarlena);
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Turn a Datum into jsonb, adding it to the result JsonbInState.
+ *
+ * tcategory and outfuncoid are from a previous call to json_categorize_type,
+ * except that if is_null is true then they can be invalid.
+ *
+ * If key_scalar is true, the value is stored as a key, so insist
+ * it's of an acceptable type, and force it to be a jbvString.
+ */
+static void
+datum_to_jsonb(Datum val, bool is_null, JsonbInState *result,
+ JsonbTypeCategory tcategory, Oid outfuncoid,
+ bool key_scalar)
+{
+ char *outputstr;
+ bool numeric_error;
+ JsonbValue jb;
+ bool scalar_jsonb = false;
+
+ check_stack_depth();
+
+ /* Convert val to a JsonbValue in jb (in most cases) */
+ if (is_null)
+ {
+ Assert(!key_scalar);
+ jb.type = jbvNull;
+ }
+ else if (key_scalar &&
+ (tcategory == JSONBTYPE_ARRAY ||
+ tcategory == JSONBTYPE_COMPOSITE ||
+ tcategory == JSONBTYPE_JSON ||
+ tcategory == JSONBTYPE_JSONB ||
+ tcategory == JSONBTYPE_JSONCAST))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("key value must be scalar, not array, composite, or json")));
+ }
+ else
+ {
+ if (tcategory == JSONBTYPE_JSONCAST)
+ val = OidFunctionCall1(outfuncoid, val);
+
+ switch (tcategory)
+ {
+ case JSONBTYPE_ARRAY:
+ array_to_jsonb_internal(val, result);
+ break;
+ case JSONBTYPE_COMPOSITE:
+ composite_to_jsonb(val, result);
+ break;
+ case JSONBTYPE_BOOL:
+ if (key_scalar)
+ {
+ outputstr = DatumGetBool(val) ? "true" : "false";
+ jb.type = jbvString;
+ jb.val.string.len = strlen(outputstr);
+ jb.val.string.val = outputstr;
+ }
+ else
+ {
+ jb.type = jbvBool;
+ jb.val.boolean = DatumGetBool(val);
+ }
+ break;
+ case JSONBTYPE_NUMERIC:
+ outputstr = OidOutputFunctionCall(outfuncoid, val);
+ if (key_scalar)
+ {
+ /* always quote keys */
+ jb.type = jbvString;
+ jb.val.string.len = strlen(outputstr);
+ jb.val.string.val = outputstr;
+ }
+ else
+ {
+ /*
+ * Make it numeric if it's a valid JSON number, otherwise
+ * a string. Invalid numeric output will always have an
+ * 'N' or 'n' in it (I think).
+ */
+ numeric_error = (strchr(outputstr, 'N') != NULL ||
+ strchr(outputstr, 'n') != NULL);
+ if (!numeric_error)
+ {
+ Datum numd;
+
+ jb.type = jbvNumeric;
+ numd = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(outputstr),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ jb.val.numeric = DatumGetNumeric(numd);
+ pfree(outputstr);
+ }
+ else
+ {
+ jb.type = jbvString;
+ jb.val.string.len = strlen(outputstr);
+ jb.val.string.val = outputstr;
+ }
+ }
+ break;
+ case JSONBTYPE_DATE:
+ jb.type = jbvString;
+ jb.val.string.val = JsonEncodeDateTime(NULL, val,
+ DATEOID, NULL);
+ jb.val.string.len = strlen(jb.val.string.val);
+ break;
+ case JSONBTYPE_TIMESTAMP:
+ jb.type = jbvString;
+ jb.val.string.val = JsonEncodeDateTime(NULL, val,
+ TIMESTAMPOID, NULL);
+ jb.val.string.len = strlen(jb.val.string.val);
+ break;
+ case JSONBTYPE_TIMESTAMPTZ:
+ jb.type = jbvString;
+ jb.val.string.val = JsonEncodeDateTime(NULL, val,
+ TIMESTAMPTZOID, NULL);
+ jb.val.string.len = strlen(jb.val.string.val);
+ break;
+ case JSONBTYPE_JSONCAST:
+ case JSONBTYPE_JSON:
+ {
+ /* parse the json right into the existing result object */
+ JsonLexContext *lex;
+ JsonSemAction sem;
+ text *json = DatumGetTextPP(val);
+
+ lex = makeJsonLexContext(json, true);
+
+ memset(&sem, 0, sizeof(sem));
+
+ sem.semstate = (void *) result;
+
+ sem.object_start = jsonb_in_object_start;
+ sem.array_start = jsonb_in_array_start;
+ sem.object_end = jsonb_in_object_end;
+ sem.array_end = jsonb_in_array_end;
+ sem.scalar = jsonb_in_scalar;
+ sem.object_field_start = jsonb_in_object_field_start;
+
+ pg_parse_json_or_ereport(lex, &sem);
+ }
+ break;
+ case JSONBTYPE_JSONB:
+ {
+ Jsonb *jsonb = DatumGetJsonbP(val);
+ JsonbIterator *it;
+
+ it = JsonbIteratorInit(&jsonb->root);
+
+ if (JB_ROOT_IS_SCALAR(jsonb))
+ {
+ (void) JsonbIteratorNext(&it, &jb, true);
+ Assert(jb.type == jbvArray);
+ (void) JsonbIteratorNext(&it, &jb, true);
+ scalar_jsonb = true;
+ }
+ else
+ {
+ JsonbIteratorToken type;
+
+ while ((type = JsonbIteratorNext(&it, &jb, false))
+ != WJB_DONE)
+ {
+ if (type == WJB_END_ARRAY || type == WJB_END_OBJECT ||
+ type == WJB_BEGIN_ARRAY || type == WJB_BEGIN_OBJECT)
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ else
+ result->res = pushJsonbValue(&result->parseState,
+ type, &jb);
+ }
+ }
+ }
+ break;
+ default:
+ outputstr = OidOutputFunctionCall(outfuncoid, val);
+ jb.type = jbvString;
+ jb.val.string.len = checkStringLen(strlen(outputstr));
+ jb.val.string.val = outputstr;
+ break;
+ }
+ }
+
+ /* Now insert jb into result, unless we did it recursively */
+ if (!is_null && !scalar_jsonb &&
+ tcategory >= JSONBTYPE_JSON && tcategory <= JSONBTYPE_JSONCAST)
+ {
+ /* work has been done recursively */
+ return;
+ }
+ else if (result->parseState == NULL)
+ {
+ /* single root scalar */
+ JsonbValue va;
+
+ va.type = jbvArray;
+ va.val.array.rawScalar = true;
+ va.val.array.nElems = 1;
+
+ result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, &va);
+ result->res = pushJsonbValue(&result->parseState, WJB_ELEM, &jb);
+ result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL);
+ }
+ else
+ {
+ JsonbValue *o = &result->parseState->contVal;
+
+ switch (o->type)
+ {
+ case jbvArray:
+ result->res = pushJsonbValue(&result->parseState, WJB_ELEM, &jb);
+ break;
+ case jbvObject:
+ result->res = pushJsonbValue(&result->parseState,
+ key_scalar ? WJB_KEY : WJB_VALUE,
+ &jb);
+ break;
+ default:
+ elog(ERROR, "unexpected parent of nested structure");
+ }
+ }
+}
+
+/*
+ * Process a single dimension of an array.
+ * If it's the innermost dimension, output the values, otherwise call
+ * ourselves recursively to process the next dimension.
+ */
+static void
+array_dim_to_jsonb(JsonbInState *result, int dim, int ndims, int *dims, Datum *vals,
+ bool *nulls, int *valcount, JsonbTypeCategory tcategory,
+ Oid outfuncoid)
+{
+ int i;
+
+ Assert(dim < ndims);
+
+ result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, NULL);
+
+ for (i = 1; i <= dims[dim]; i++)
+ {
+ if (dim + 1 == ndims)
+ {
+ datum_to_jsonb(vals[*valcount], nulls[*valcount], result, tcategory,
+ outfuncoid, false);
+ (*valcount)++;
+ }
+ else
+ {
+ array_dim_to_jsonb(result, dim + 1, ndims, dims, vals, nulls,
+ valcount, tcategory, outfuncoid);
+ }
+ }
+
+ result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL);
+}
+
+/*
+ * Turn an array into JSON.
+ */
+static void
+array_to_jsonb_internal(Datum array, JsonbInState *result)
+{
+ ArrayType *v = DatumGetArrayTypeP(array);
+ Oid element_type = ARR_ELEMTYPE(v);
+ int *dim;
+ int ndim;
+ int nitems;
+ int count = 0;
+ Datum *elements;
+ bool *nulls;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ JsonbTypeCategory tcategory;
+ Oid outfuncoid;
+
+ ndim = ARR_NDIM(v);
+ dim = ARR_DIMS(v);
+ nitems = ArrayGetNItems(ndim, dim);
+
+ if (nitems <= 0)
+ {
+ result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_ARRAY, NULL);
+ result->res = pushJsonbValue(&result->parseState, WJB_END_ARRAY, NULL);
+ return;
+ }
+
+ get_typlenbyvalalign(element_type,
+ &typlen, &typbyval, &typalign);
+
+ jsonb_categorize_type(element_type,
+ &tcategory, &outfuncoid);
+
+ deconstruct_array(v, element_type, typlen, typbyval,
+ typalign, &elements, &nulls,
+ &nitems);
+
+ array_dim_to_jsonb(result, 0, ndim, dim, elements, nulls, &count, tcategory,
+ outfuncoid);
+
+ pfree(elements);
+ pfree(nulls);
+}
+
+/*
+ * Turn a composite / record into JSON.
+ */
+static void
+composite_to_jsonb(Datum composite, JsonbInState *result)
+{
+ HeapTupleHeader td;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tmptup,
+ *tuple;
+ int i;
+
+ td = DatumGetHeapTupleHeader(composite);
+
+ /* Extract rowtype info and find a tupdesc */
+ tupType = HeapTupleHeaderGetTypeId(td);
+ tupTypmod = HeapTupleHeaderGetTypMod(td);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+
+ /* Build a temporary HeapTuple control structure */
+ tmptup.t_len = HeapTupleHeaderGetDatumLength(td);
+ tmptup.t_data = td;
+ tuple = &tmptup;
+
+ result->res = pushJsonbValue(&result->parseState, WJB_BEGIN_OBJECT, NULL);
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Datum val;
+ bool isnull;
+ char *attname;
+ JsonbTypeCategory tcategory;
+ Oid outfuncoid;
+ JsonbValue v;
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+
+ attname = NameStr(att->attname);
+
+ v.type = jbvString;
+ /* don't need checkStringLen here - can't exceed maximum name length */
+ v.val.string.len = strlen(attname);
+ v.val.string.val = attname;
+
+ result->res = pushJsonbValue(&result->parseState, WJB_KEY, &v);
+
+ val = heap_getattr(tuple, i + 1, tupdesc, &isnull);
+
+ if (isnull)
+ {
+ tcategory = JSONBTYPE_NULL;
+ outfuncoid = InvalidOid;
+ }
+ else
+ jsonb_categorize_type(att->atttypid, &tcategory, &outfuncoid);
+
+ datum_to_jsonb(val, isnull, result, tcategory, outfuncoid, false);
+ }
+
+ result->res = pushJsonbValue(&result->parseState, WJB_END_OBJECT, NULL);
+ ReleaseTupleDesc(tupdesc);
+}
+
+/*
+ * Append JSON text for "val" to "result".
+ *
+ * This is just a thin wrapper around datum_to_jsonb. If the same type will be
+ * printed many times, avoid using this; better to do the jsonb_categorize_type
+ * lookups only once.
+ */
+
+static void
+add_jsonb(Datum val, bool is_null, JsonbInState *result,
+ Oid val_type, bool key_scalar)
+{
+ JsonbTypeCategory tcategory;
+ Oid outfuncoid;
+
+ if (val_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ if (is_null)
+ {
+ tcategory = JSONBTYPE_NULL;
+ outfuncoid = InvalidOid;
+ }
+ else
+ jsonb_categorize_type(val_type,
+ &tcategory, &outfuncoid);
+
+ datum_to_jsonb(val, is_null, result, tcategory, outfuncoid, key_scalar);
+}
+
+/*
+ * SQL function to_jsonb(anyvalue)
+ */
+Datum
+to_jsonb(PG_FUNCTION_ARGS)
+{
+ Datum val = PG_GETARG_DATUM(0);
+ Oid val_type = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ JsonbInState result;
+ JsonbTypeCategory tcategory;
+ Oid outfuncoid;
+
+ if (val_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ jsonb_categorize_type(val_type,
+ &tcategory, &outfuncoid);
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ datum_to_jsonb(val, false, &result, tcategory, outfuncoid, false);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+/*
+ * SQL function jsonb_build_object(variadic "any")
+ */
+Datum
+jsonb_build_object(PG_FUNCTION_ARGS)
+{
+ int nargs;
+ int i;
+ JsonbInState result;
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* build argument values to build the object */
+ nargs = extract_variadic_args(fcinfo, 0, true, &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ if (nargs % 2 != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument list must have even number of elements"),
+ /* translator: %s is a SQL function name */
+ errhint("The arguments of %s must consist of alternating keys and values.",
+ "jsonb_build_object()")));
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ result.res = pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL);
+
+ for (i = 0; i < nargs; i += 2)
+ {
+ /* process key */
+ if (nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument %d: key must not be null", i + 1)));
+
+ add_jsonb(args[i], false, &result, types[i], true);
+
+ /* process value */
+ add_jsonb(args[i + 1], nulls[i + 1], &result, types[i + 1], false);
+ }
+
+ result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+/*
+ * degenerate case of jsonb_build_object where it gets 0 arguments.
+ */
+Datum
+jsonb_build_object_noargs(PG_FUNCTION_ARGS)
+{
+ JsonbInState result;
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL);
+ result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+/*
+ * SQL function jsonb_build_array(variadic "any")
+ */
+Datum
+jsonb_build_array(PG_FUNCTION_ARGS)
+{
+ int nargs;
+ int i;
+ JsonbInState result;
+ Datum *args;
+ bool *nulls;
+ Oid *types;
+
+ /* build argument values to build the array */
+ nargs = extract_variadic_args(fcinfo, 0, true, &args, &types, &nulls);
+
+ if (nargs < 0)
+ PG_RETURN_NULL();
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ result.res = pushJsonbValue(&result.parseState, WJB_BEGIN_ARRAY, NULL);
+
+ for (i = 0; i < nargs; i++)
+ add_jsonb(args[i], nulls[i], &result, types[i], false);
+
+ result.res = pushJsonbValue(&result.parseState, WJB_END_ARRAY, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+/*
+ * degenerate case of jsonb_build_array where it gets 0 arguments.
+ */
+Datum
+jsonb_build_array_noargs(PG_FUNCTION_ARGS)
+{
+ JsonbInState result;
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ (void) pushJsonbValue(&result.parseState, WJB_BEGIN_ARRAY, NULL);
+ result.res = pushJsonbValue(&result.parseState, WJB_END_ARRAY, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+
+/*
+ * SQL function jsonb_object(text[])
+ *
+ * take a one or two dimensional array of text as name value pairs
+ * for a jsonb object.
+ *
+ */
+Datum
+jsonb_object(PG_FUNCTION_ARGS)
+{
+ ArrayType *in_array = PG_GETARG_ARRAYTYPE_P(0);
+ int ndims = ARR_NDIM(in_array);
+ Datum *in_datums;
+ bool *in_nulls;
+ int in_count,
+ count,
+ i;
+ JsonbInState result;
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL);
+
+ switch (ndims)
+ {
+ case 0:
+ goto close_object;
+ break;
+
+ case 1:
+ if ((ARR_DIMS(in_array)[0]) % 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must have even number of elements")));
+ break;
+
+ case 2:
+ if ((ARR_DIMS(in_array)[1]) != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array must have two columns")));
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+ }
+
+ deconstruct_array(in_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &in_datums, &in_nulls, &in_count);
+
+ count = in_count / 2;
+
+ for (i = 0; i < count; ++i)
+ {
+ JsonbValue v;
+ char *str;
+ int len;
+
+ if (in_nulls[i * 2])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ str = TextDatumGetCString(in_datums[i * 2]);
+ len = strlen(str);
+
+ v.type = jbvString;
+
+ v.val.string.len = len;
+ v.val.string.val = str;
+
+ (void) pushJsonbValue(&result.parseState, WJB_KEY, &v);
+
+ if (in_nulls[i * 2 + 1])
+ {
+ v.type = jbvNull;
+ }
+ else
+ {
+ str = TextDatumGetCString(in_datums[i * 2 + 1]);
+ len = strlen(str);
+
+ v.type = jbvString;
+
+ v.val.string.len = len;
+ v.val.string.val = str;
+ }
+
+ (void) pushJsonbValue(&result.parseState, WJB_VALUE, &v);
+ }
+
+ pfree(in_datums);
+ pfree(in_nulls);
+
+close_object:
+ result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+/*
+ * SQL function jsonb_object(text[], text[])
+ *
+ * take separate name and value arrays of text to construct a jsonb object
+ * pairwise.
+ */
+Datum
+jsonb_object_two_arg(PG_FUNCTION_ARGS)
+{
+ ArrayType *key_array = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *val_array = PG_GETARG_ARRAYTYPE_P(1);
+ int nkdims = ARR_NDIM(key_array);
+ int nvdims = ARR_NDIM(val_array);
+ Datum *key_datums,
+ *val_datums;
+ bool *key_nulls,
+ *val_nulls;
+ int key_count,
+ val_count,
+ i;
+ JsonbInState result;
+
+ memset(&result, 0, sizeof(JsonbInState));
+
+ (void) pushJsonbValue(&result.parseState, WJB_BEGIN_OBJECT, NULL);
+
+ if (nkdims > 1 || nkdims != nvdims)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (nkdims == 0)
+ goto close_object;
+
+ deconstruct_array(key_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &key_datums, &key_nulls, &key_count);
+
+ deconstruct_array(val_array,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &val_datums, &val_nulls, &val_count);
+
+ if (key_count != val_count)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("mismatched array dimensions")));
+
+ for (i = 0; i < key_count; ++i)
+ {
+ JsonbValue v;
+ char *str;
+ int len;
+
+ if (key_nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null value not allowed for object key")));
+
+ str = TextDatumGetCString(key_datums[i]);
+ len = strlen(str);
+
+ v.type = jbvString;
+
+ v.val.string.len = len;
+ v.val.string.val = str;
+
+ (void) pushJsonbValue(&result.parseState, WJB_KEY, &v);
+
+ if (val_nulls[i])
+ {
+ v.type = jbvNull;
+ }
+ else
+ {
+ str = TextDatumGetCString(val_datums[i]);
+ len = strlen(str);
+
+ v.type = jbvString;
+
+ v.val.string.len = len;
+ v.val.string.val = str;
+ }
+
+ (void) pushJsonbValue(&result.parseState, WJB_VALUE, &v);
+ }
+
+ pfree(key_datums);
+ pfree(key_nulls);
+ pfree(val_datums);
+ pfree(val_nulls);
+
+close_object:
+ result.res = pushJsonbValue(&result.parseState, WJB_END_OBJECT, NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(result.res));
+}
+
+
+/*
+ * shallow clone of a parse state, suitable for use in aggregate
+ * final functions that will only append to the values rather than
+ * change them.
+ */
+static JsonbParseState *
+clone_parse_state(JsonbParseState *state)
+{
+ JsonbParseState *result,
+ *icursor,
+ *ocursor;
+
+ if (state == NULL)
+ return NULL;
+
+ result = palloc(sizeof(JsonbParseState));
+ icursor = state;
+ ocursor = result;
+ for (;;)
+ {
+ ocursor->contVal = icursor->contVal;
+ ocursor->size = icursor->size;
+ icursor = icursor->next;
+ if (icursor == NULL)
+ break;
+ ocursor->next = palloc(sizeof(JsonbParseState));
+ ocursor = ocursor->next;
+ }
+ ocursor->next = NULL;
+
+ return result;
+}
+
+
+/*
+ * jsonb_agg aggregate function
+ */
+Datum
+jsonb_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext oldcontext,
+ aggcontext;
+ JsonbAggState *state;
+ JsonbInState elem;
+ Datum val;
+ JsonbInState *result;
+ bool single_scalar = false;
+ JsonbIterator *it;
+ Jsonb *jbelem;
+ JsonbValue v;
+ JsonbIteratorToken type;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "jsonb_agg_transfn called in non-aggregate context");
+ }
+
+ /* set up the accumulator on the first go round */
+
+ if (PG_ARGISNULL(0))
+ {
+ Oid arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = palloc(sizeof(JsonbAggState));
+ result = palloc0(sizeof(JsonbInState));
+ state->res = result;
+ result->res = pushJsonbValue(&result->parseState,
+ WJB_BEGIN_ARRAY, NULL);
+ MemoryContextSwitchTo(oldcontext);
+
+ jsonb_categorize_type(arg_type, &state->val_category,
+ &state->val_output_func);
+ }
+ else
+ {
+ state = (JsonbAggState *) PG_GETARG_POINTER(0);
+ result = state->res;
+ }
+
+ /* turn the argument into jsonb in the normal function context */
+
+ val = PG_ARGISNULL(1) ? (Datum) 0 : PG_GETARG_DATUM(1);
+
+ memset(&elem, 0, sizeof(JsonbInState));
+
+ datum_to_jsonb(val, PG_ARGISNULL(1), &elem, state->val_category,
+ state->val_output_func, false);
+
+ jbelem = JsonbValueToJsonb(elem.res);
+
+ /* switch to the aggregate context for accumulation operations */
+
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+
+ it = JsonbIteratorInit(&jbelem->root);
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (type)
+ {
+ case WJB_BEGIN_ARRAY:
+ if (v.val.array.rawScalar)
+ single_scalar = true;
+ else
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_END_ARRAY:
+ if (!single_scalar)
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_BEGIN_OBJECT:
+ case WJB_END_OBJECT:
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_ELEM:
+ case WJB_KEY:
+ case WJB_VALUE:
+ if (v.type == jbvString)
+ {
+ /* copy string values in the aggregate context */
+ char *buf = palloc(v.val.string.len + 1);
+
+ snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val);
+ v.val.string.val = buf;
+ }
+ else if (v.type == jbvNumeric)
+ {
+ /* same for numeric */
+ v.val.numeric =
+ DatumGetNumeric(DirectFunctionCall1(numeric_uplus,
+ NumericGetDatum(v.val.numeric)));
+ }
+ result->res = pushJsonbValue(&result->parseState,
+ type, &v);
+ break;
+ default:
+ elog(ERROR, "unknown jsonb iterator token type");
+ }
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+jsonb_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ JsonbAggState *arg;
+ JsonbInState result;
+ Jsonb *out;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL(); /* returns null iff no input values */
+
+ arg = (JsonbAggState *) PG_GETARG_POINTER(0);
+
+ /*
+ * We need to do a shallow clone of the argument in case the final
+ * function is called more than once, so we avoid changing the argument. A
+ * shallow clone is sufficient as we aren't going to change any of the
+ * values, just add the final array end marker.
+ */
+
+ result.parseState = clone_parse_state(arg->res->parseState);
+
+ result.res = pushJsonbValue(&result.parseState,
+ WJB_END_ARRAY, NULL);
+
+ out = JsonbValueToJsonb(result.res);
+
+ PG_RETURN_POINTER(out);
+}
+
+/*
+ * jsonb_object_agg aggregate function
+ */
+Datum
+jsonb_object_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext oldcontext,
+ aggcontext;
+ JsonbInState elem;
+ JsonbAggState *state;
+ Datum val;
+ JsonbInState *result;
+ bool single_scalar;
+ JsonbIterator *it;
+ Jsonb *jbkey,
+ *jbval;
+ JsonbValue v;
+ JsonbIteratorToken type;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "jsonb_object_agg_transfn called in non-aggregate context");
+ }
+
+ /* set up the accumulator on the first go round */
+
+ if (PG_ARGISNULL(0))
+ {
+ Oid arg_type;
+
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = palloc(sizeof(JsonbAggState));
+ result = palloc0(sizeof(JsonbInState));
+ state->res = result;
+ result->res = pushJsonbValue(&result->parseState,
+ WJB_BEGIN_OBJECT, NULL);
+ MemoryContextSwitchTo(oldcontext);
+
+ arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ jsonb_categorize_type(arg_type, &state->key_category,
+ &state->key_output_func);
+
+ arg_type = get_fn_expr_argtype(fcinfo->flinfo, 2);
+
+ if (arg_type == InvalidOid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not determine input data type")));
+
+ jsonb_categorize_type(arg_type, &state->val_category,
+ &state->val_output_func);
+ }
+ else
+ {
+ state = (JsonbAggState *) PG_GETARG_POINTER(0);
+ result = state->res;
+ }
+
+ /* turn the argument into jsonb in the normal function context */
+
+ if (PG_ARGISNULL(1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("field name must not be null")));
+
+ val = PG_GETARG_DATUM(1);
+
+ memset(&elem, 0, sizeof(JsonbInState));
+
+ datum_to_jsonb(val, false, &elem, state->key_category,
+ state->key_output_func, true);
+
+ jbkey = JsonbValueToJsonb(elem.res);
+
+ val = PG_ARGISNULL(2) ? (Datum) 0 : PG_GETARG_DATUM(2);
+
+ memset(&elem, 0, sizeof(JsonbInState));
+
+ datum_to_jsonb(val, PG_ARGISNULL(2), &elem, state->val_category,
+ state->val_output_func, false);
+
+ jbval = JsonbValueToJsonb(elem.res);
+
+ it = JsonbIteratorInit(&jbkey->root);
+
+ /* switch to the aggregate context for accumulation operations */
+
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+
+ /*
+ * keys should be scalar, and we should have already checked for that
+ * above when calling datum_to_jsonb, so we only need to look for these
+ * things.
+ */
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (type)
+ {
+ case WJB_BEGIN_ARRAY:
+ if (!v.val.array.rawScalar)
+ elog(ERROR, "unexpected structure for key");
+ break;
+ case WJB_ELEM:
+ if (v.type == jbvString)
+ {
+ /* copy string values in the aggregate context */
+ char *buf = palloc(v.val.string.len + 1);
+
+ snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val);
+ v.val.string.val = buf;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("object keys must be strings")));
+ }
+ result->res = pushJsonbValue(&result->parseState,
+ WJB_KEY, &v);
+ break;
+ case WJB_END_ARRAY:
+ break;
+ default:
+ elog(ERROR, "unexpected structure for key");
+ break;
+ }
+ }
+
+ it = JsonbIteratorInit(&jbval->root);
+
+ single_scalar = false;
+
+ /*
+ * values can be anything, including structured and null, so we treat them
+ * as in json_agg_transfn, except that single scalars are always pushed as
+ * WJB_VALUE items.
+ */
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (type)
+ {
+ case WJB_BEGIN_ARRAY:
+ if (v.val.array.rawScalar)
+ single_scalar = true;
+ else
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_END_ARRAY:
+ if (!single_scalar)
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_BEGIN_OBJECT:
+ case WJB_END_OBJECT:
+ result->res = pushJsonbValue(&result->parseState,
+ type, NULL);
+ break;
+ case WJB_ELEM:
+ case WJB_KEY:
+ case WJB_VALUE:
+ if (v.type == jbvString)
+ {
+ /* copy string values in the aggregate context */
+ char *buf = palloc(v.val.string.len + 1);
+
+ snprintf(buf, v.val.string.len + 1, "%s", v.val.string.val);
+ v.val.string.val = buf;
+ }
+ else if (v.type == jbvNumeric)
+ {
+ /* same for numeric */
+ v.val.numeric =
+ DatumGetNumeric(DirectFunctionCall1(numeric_uplus,
+ NumericGetDatum(v.val.numeric)));
+ }
+ result->res = pushJsonbValue(&result->parseState,
+ single_scalar ? WJB_VALUE : type,
+ &v);
+ break;
+ default:
+ elog(ERROR, "unknown jsonb iterator token type");
+ }
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ JsonbAggState *arg;
+ JsonbInState result;
+ Jsonb *out;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL(); /* returns null iff no input values */
+
+ arg = (JsonbAggState *) PG_GETARG_POINTER(0);
+
+ /*
+ * We need to do a shallow clone of the argument's res field in case the
+ * final function is called more than once, so we avoid changing the
+ * aggregate state value. A shallow clone is sufficient as we aren't
+ * going to change any of the values, just add the final object end
+ * marker.
+ */
+
+ result.parseState = clone_parse_state(arg->res->parseState);
+
+ result.res = pushJsonbValue(&result.parseState,
+ WJB_END_OBJECT, NULL);
+
+ out = JsonbValueToJsonb(result.res);
+
+ PG_RETURN_POINTER(out);
+}
+
+
+/*
+ * Extract scalar value from raw-scalar pseudo-array jsonb.
+ */
+bool
+JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res)
+{
+ JsonbIterator *it;
+ JsonbIteratorToken tok PG_USED_FOR_ASSERTS_ONLY;
+ JsonbValue tmp;
+
+ if (!JsonContainerIsArray(jbc) || !JsonContainerIsScalar(jbc))
+ {
+ /* inform caller about actual type of container */
+ res->type = (JsonContainerIsArray(jbc)) ? jbvArray : jbvObject;
+ return false;
+ }
+
+ /*
+ * A root scalar is stored as an array of one element, so we get the array
+ * and then its first (and only) member.
+ */
+ it = JsonbIteratorInit(jbc);
+
+ tok = JsonbIteratorNext(&it, &tmp, true);
+ Assert(tok == WJB_BEGIN_ARRAY);
+ Assert(tmp.val.array.nElems == 1 && tmp.val.array.rawScalar);
+
+ tok = JsonbIteratorNext(&it, res, true);
+ Assert(tok == WJB_ELEM);
+ Assert(IsAJsonbScalar(res));
+
+ tok = JsonbIteratorNext(&it, &tmp, true);
+ Assert(tok == WJB_END_ARRAY);
+
+ tok = JsonbIteratorNext(&it, &tmp, true);
+ Assert(tok == WJB_DONE);
+
+ return true;
+}
+
+/*
+ * Emit correct, translatable cast error message
+ */
+static void
+cannotCastJsonbValue(enum jbvType type, const char *sqltype)
+{
+ static const struct
+ {
+ enum jbvType type;
+ const char *msg;
+ }
+ messages[] =
+ {
+ {jbvNull, gettext_noop("cannot cast jsonb null to type %s")},
+ {jbvString, gettext_noop("cannot cast jsonb string to type %s")},
+ {jbvNumeric, gettext_noop("cannot cast jsonb numeric to type %s")},
+ {jbvBool, gettext_noop("cannot cast jsonb boolean to type %s")},
+ {jbvArray, gettext_noop("cannot cast jsonb array to type %s")},
+ {jbvObject, gettext_noop("cannot cast jsonb object to type %s")},
+ {jbvBinary, gettext_noop("cannot cast jsonb array or object to type %s")}
+ };
+ int i;
+
+ for (i = 0; i < lengthof(messages); i++)
+ if (messages[i].type == type)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg(messages[i].msg, sqltype)));
+
+ /* should be unreachable */
+ elog(ERROR, "unknown jsonb type: %d", (int) type);
+}
+
+Datum
+jsonb_bool(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvBool)
+ cannotCastJsonbValue(v.type, "boolean");
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_BOOL(v.val.boolean);
+}
+
+Datum
+jsonb_numeric(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Numeric retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "numeric");
+
+ /*
+ * v.val.numeric points into jsonb body, so we need to make a copy to
+ * return
+ */
+ retValue = DatumGetNumericCopy(NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_NUMERIC(retValue);
+}
+
+Datum
+jsonb_int2(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Datum retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "smallint");
+
+ retValue = DirectFunctionCall1(numeric_int2,
+ NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_DATUM(retValue);
+}
+
+Datum
+jsonb_int4(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Datum retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "integer");
+
+ retValue = DirectFunctionCall1(numeric_int4,
+ NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_DATUM(retValue);
+}
+
+Datum
+jsonb_int8(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Datum retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "bigint");
+
+ retValue = DirectFunctionCall1(numeric_int8,
+ NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_DATUM(retValue);
+}
+
+Datum
+jsonb_float4(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Datum retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "real");
+
+ retValue = DirectFunctionCall1(numeric_float4,
+ NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_DATUM(retValue);
+}
+
+Datum
+jsonb_float8(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ JsonbValue v;
+ Datum retValue;
+
+ if (!JsonbExtractScalar(&in->root, &v) || v.type != jbvNumeric)
+ cannotCastJsonbValue(v.type, "double precision");
+
+ retValue = DirectFunctionCall1(numeric_float8,
+ NumericGetDatum(v.val.numeric));
+
+ PG_FREE_IF_COPY(in, 0);
+
+ PG_RETURN_DATUM(retValue);
+}
diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c
new file mode 100644
index 0000000..6c086b4
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_gin.c
@@ -0,0 +1,1411 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_gin.c
+ * GIN support functions for jsonb
+ *
+ * Copyright (c) 2014-2022, PostgreSQL Global Development Group
+ *
+ * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops.
+ * For their description see json.sgml and comments in jsonb.h.
+ *
+ * The operators support, among the others, "jsonb @? jsonpath" and
+ * "jsonb @@ jsonpath". Expressions containing these operators are easily
+ * expressed through each other.
+ *
+ * jb @? 'path' <=> jb @@ 'EXISTS(path)'
+ * jb @@ 'expr' <=> jb @? '$ ? (expr)'
+ *
+ * Thus, we're going to consider only @@ operator, while regarding @? operator
+ * the same is true for jb @@ 'EXISTS(path)'.
+ *
+ * Result of jsonpath query extraction is a tree, which leaf nodes are index
+ * entries and non-leaf nodes are AND/OR logical expressions. Basically we
+ * extract following statements out of jsonpath:
+ *
+ * 1) "accessors_chain = const",
+ * 2) "EXISTS(accessors_chain)".
+ *
+ * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops
+ * additionally supports .* and .**.
+ *
+ * For now, both jsonb_ops and jsonb_path_ops supports only statements of
+ * the 1st find. jsonb_ops might also support statements of the 2nd kind,
+ * but given we have no statistics keys extracted from accessors chain
+ * are likely non-selective. Therefore, we choose to not confuse optimizer
+ * and skip statements of the 2nd kind altogether. In future versions that
+ * might be changed.
+ *
+ * In jsonb_ops statement of the 1st kind is split into expression of AND'ed
+ * keys and const. Sometimes const might be interpreted as both value or key
+ * in jsonb_ops. Then statement of 1st kind is decomposed into the expression
+ * below.
+ *
+ * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key)
+ *
+ * jsonb_path_ops transforms each statement of the 1st kind into single hash
+ * entry below.
+ *
+ * HASH(key1, key2, ... , keyN, const)
+ *
+ * Despite statements of the 2nd kind are not supported by both jsonb_ops and
+ * jsonb_path_ops, EXISTS(path) expressions might be still supported,
+ * when statements of 1st kind could be extracted out of their filters.
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonb_gin.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gin.h"
+#include "access/stratnum.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/jsonb.h"
+#include "utils/jsonpath.h"
+#include "utils/varlena.h"
+
+typedef struct PathHashStack
+{
+ uint32 hash;
+ struct PathHashStack *parent;
+} PathHashStack;
+
+/* Buffer for GIN entries */
+typedef struct GinEntries
+{
+ Datum *buf;
+ int count;
+ int allocated;
+} GinEntries;
+
+typedef enum JsonPathGinNodeType
+{
+ JSP_GIN_OR,
+ JSP_GIN_AND,
+ JSP_GIN_ENTRY
+} JsonPathGinNodeType;
+
+typedef struct JsonPathGinNode JsonPathGinNode;
+
+/* Node in jsonpath expression tree */
+struct JsonPathGinNode
+{
+ JsonPathGinNodeType type;
+ union
+ {
+ int nargs; /* valid for OR and AND nodes */
+ int entryIndex; /* index in GinEntries array, valid for ENTRY
+ * nodes after entries output */
+ Datum entryDatum; /* path hash or key name/scalar, valid for
+ * ENTRY nodes before entries output */
+ } val;
+ JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND
+ * nodes */
+};
+
+/*
+ * jsonb_ops entry extracted from jsonpath item. Corresponding path item
+ * may be: '.key', '.*', '.**', '[index]' or '[*]'.
+ * Entry type is stored in 'type' field.
+ */
+typedef struct JsonPathGinPathItem
+{
+ struct JsonPathGinPathItem *parent;
+ Datum keyName; /* key name (for '.key' path item) or NULL */
+ JsonPathItemType type; /* type of jsonpath item */
+} JsonPathGinPathItem;
+
+/* GIN representation of the extracted json path */
+typedef union JsonPathGinPath
+{
+ JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */
+ uint32 hash; /* hash of the path (jsonb_path_ops) */
+} JsonPathGinPath;
+
+typedef struct JsonPathGinContext JsonPathGinContext;
+
+/* Callback, which stores information about path item into JsonPathGinPath */
+typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path,
+ JsonPathItem *jsp);
+
+/*
+ * Callback, which extracts set of nodes from statement of 1st kind
+ * (scalar != NULL) or statement of 2nd kind (scalar == NULL).
+ */
+typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt,
+ JsonPathGinPath path,
+ JsonbValue *scalar,
+ List *nodes);
+
+/* Context for jsonpath entries extraction */
+struct JsonPathGinContext
+{
+ JsonPathGinAddPathItemFunc add_path_item;
+ JsonPathGinExtractNodesFunc extract_nodes;
+ bool lax;
+};
+
+static Datum make_text_key(char flag, const char *str, int len);
+static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
+
+static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt,
+ JsonPathGinPath path, JsonPathItem *jsp, bool not);
+
+
+/* Initialize GinEntries struct */
+static void
+init_gin_entries(GinEntries *entries, int preallocated)
+{
+ entries->allocated = preallocated;
+ entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL;
+ entries->count = 0;
+}
+
+/* Add new entry to GinEntries */
+static int
+add_gin_entry(GinEntries *entries, Datum entry)
+{
+ int id = entries->count;
+
+ if (entries->count >= entries->allocated)
+ {
+ if (entries->allocated)
+ {
+ entries->allocated *= 2;
+ entries->buf = repalloc(entries->buf,
+ sizeof(Datum) * entries->allocated);
+ }
+ else
+ {
+ entries->allocated = 8;
+ entries->buf = palloc(sizeof(Datum) * entries->allocated);
+ }
+ }
+
+ entries->buf[entries->count++] = entry;
+
+ return id;
+}
+
+/*
+ *
+ * jsonb_ops GIN opclass support functions
+ *
+ */
+
+Datum
+gin_compare_jsonb(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int32 result;
+ char *a1p,
+ *a2p;
+ int len1,
+ len2;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ /* Compare text as bttextcmp does, but always using C collation */
+ result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+gin_extract_jsonb(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ int total = JB_ROOT_COUNT(jb);
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+ GinEntries entries;
+
+ /* If the root level is empty, we certainly have no keys */
+ if (total == 0)
+ {
+ *nentries = 0;
+ PG_RETURN_POINTER(NULL);
+ }
+
+ /* Otherwise, use 2 * root count as initial estimate of result size */
+ init_gin_entries(&entries, 2 * total);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (r)
+ {
+ case WJB_KEY:
+ add_gin_entry(&entries, make_scalar_key(&v, true));
+ break;
+ case WJB_ELEM:
+ /* Pretend string array elements are keys, see jsonb.h */
+ add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString));
+ break;
+ case WJB_VALUE:
+ add_gin_entry(&entries, make_scalar_key(&v, false));
+ break;
+ default:
+ /* we can ignore structural items */
+ break;
+ }
+ }
+
+ *nentries = entries.count;
+
+ PG_RETURN_POINTER(entries.buf);
+}
+
+/* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */
+static bool
+jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
+{
+ JsonPathGinPathItem *pentry;
+ Datum keyName;
+
+ switch (jsp->type)
+ {
+ case jpiRoot:
+ path->items = NULL; /* reset path */
+ return true;
+
+ case jpiKey:
+ {
+ int len;
+ char *key = jspGetString(jsp, &len);
+
+ keyName = make_text_key(JGINFLAG_KEY, key, len);
+ break;
+ }
+
+ case jpiAny:
+ case jpiAnyKey:
+ case jpiAnyArray:
+ case jpiIndexArray:
+ keyName = PointerGetDatum(NULL);
+ break;
+
+ default:
+ /* other path items like item methods are not supported */
+ return false;
+ }
+
+ pentry = palloc(sizeof(*pentry));
+
+ pentry->type = jsp->type;
+ pentry->keyName = keyName;
+ pentry->parent = path->items;
+
+ path->items = pentry;
+
+ return true;
+}
+
+/* Combine existing path hash with next key hash (jsonb_path_ops) */
+static bool
+jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp)
+{
+ switch (jsp->type)
+ {
+ case jpiRoot:
+ path->hash = 0; /* reset path hash */
+ return true;
+
+ case jpiKey:
+ {
+ JsonbValue jbv;
+
+ jbv.type = jbvString;
+ jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len);
+
+ JsonbHashScalarValue(&jbv, &path->hash);
+ return true;
+ }
+
+ case jpiIndexArray:
+ case jpiAnyArray:
+ return true; /* path hash is unchanged */
+
+ default:
+ /* other items (wildcard paths, item methods) are not supported */
+ return false;
+ }
+}
+
+static JsonPathGinNode *
+make_jsp_entry_node(Datum entry)
+{
+ JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args));
+
+ node->type = JSP_GIN_ENTRY;
+ node->val.entryDatum = entry;
+
+ return node;
+}
+
+static JsonPathGinNode *
+make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey)
+{
+ return make_jsp_entry_node(make_scalar_key(scalar, iskey));
+}
+
+static JsonPathGinNode *
+make_jsp_expr_node(JsonPathGinNodeType type, int nargs)
+{
+ JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) +
+ sizeof(node->args[0]) * nargs);
+
+ node->type = type;
+ node->val.nargs = nargs;
+
+ return node;
+}
+
+static JsonPathGinNode *
+make_jsp_expr_node_args(JsonPathGinNodeType type, List *args)
+{
+ JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args));
+ ListCell *lc;
+ int i = 0;
+
+ foreach(lc, args)
+ node->args[i++] = lfirst(lc);
+
+ return node;
+}
+
+static JsonPathGinNode *
+make_jsp_expr_node_binary(JsonPathGinNodeType type,
+ JsonPathGinNode *arg1, JsonPathGinNode *arg2)
+{
+ JsonPathGinNode *node = make_jsp_expr_node(type, 2);
+
+ node->args[0] = arg1;
+ node->args[1] = arg2;
+
+ return node;
+}
+
+/* Append a list of nodes from the jsonpath (jsonb_ops). */
+static List *
+jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
+ JsonbValue *scalar, List *nodes)
+{
+ JsonPathGinPathItem *pentry;
+
+ if (scalar)
+ {
+ JsonPathGinNode *node;
+
+ /*
+ * Append path entry nodes only if scalar is provided. See header
+ * comment for details.
+ */
+ for (pentry = path.items; pentry; pentry = pentry->parent)
+ {
+ if (pentry->type == jpiKey) /* only keys are indexed */
+ nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName));
+ }
+
+ /* Append scalar node for equality queries. */
+ if (scalar->type == jbvString)
+ {
+ JsonPathGinPathItem *last = path.items;
+ GinTernaryValue key_entry;
+
+ /*
+ * Assuming that jsonb_ops interprets string array elements as
+ * keys, we may extract key or non-key entry or even both. In the
+ * latter case we create OR-node. It is possible in lax mode
+ * where arrays are automatically unwrapped, or in strict mode for
+ * jpiAny items.
+ */
+
+ if (cxt->lax)
+ key_entry = GIN_MAYBE;
+ else if (!last) /* root ($) */
+ key_entry = GIN_FALSE;
+ else if (last->type == jpiAnyArray || last->type == jpiIndexArray)
+ key_entry = GIN_TRUE;
+ else if (last->type == jpiAny)
+ key_entry = GIN_MAYBE;
+ else
+ key_entry = GIN_FALSE;
+
+ if (key_entry == GIN_MAYBE)
+ {
+ JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true);
+ JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false);
+
+ node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2);
+ }
+ else
+ {
+ node = make_jsp_entry_node_scalar(scalar,
+ key_entry == GIN_TRUE);
+ }
+ }
+ else
+ {
+ node = make_jsp_entry_node_scalar(scalar, false);
+ }
+
+ nodes = lappend(nodes, node);
+ }
+
+ return nodes;
+}
+
+/* Append a list of nodes from the jsonpath (jsonb_path_ops). */
+static List *
+jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
+ JsonbValue *scalar, List *nodes)
+{
+ if (scalar)
+ {
+ /* append path hash node for equality queries */
+ uint32 hash = path.hash;
+
+ JsonbHashScalarValue(scalar, &hash);
+
+ return lappend(nodes,
+ make_jsp_entry_node(UInt32GetDatum(hash)));
+ }
+ else
+ {
+ /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */
+ return nodes;
+ }
+}
+
+/*
+ * Extract a list of expression nodes that need to be AND-ed by the caller.
+ * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and
+ * 'EXISTS(path)' otherwise.
+ */
+static List *
+extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path,
+ JsonPathItem *jsp, JsonbValue *scalar)
+{
+ JsonPathItem next;
+ List *nodes = NIL;
+
+ for (;;)
+ {
+ switch (jsp->type)
+ {
+ case jpiCurrent:
+ break;
+
+ case jpiFilter:
+ {
+ JsonPathItem arg;
+ JsonPathGinNode *filter;
+
+ jspGetArg(jsp, &arg);
+
+ filter = extract_jsp_bool_expr(cxt, path, &arg, false);
+
+ if (filter)
+ nodes = lappend(nodes, filter);
+
+ break;
+ }
+
+ default:
+ if (!cxt->add_path_item(&path, jsp))
+
+ /*
+ * Path is not supported by the index opclass, return only
+ * the extracted filter nodes.
+ */
+ return nodes;
+ break;
+ }
+
+ if (!jspGetNext(jsp, &next))
+ break;
+
+ jsp = &next;
+ }
+
+ /*
+ * Append nodes from the path expression itself to the already extracted
+ * list of filter nodes.
+ */
+ return cxt->extract_nodes(cxt, path, scalar, nodes);
+}
+
+/*
+ * Extract an expression node from one of following jsonpath path expressions:
+ * EXISTS(jsp) (when 'scalar' is NULL)
+ * jsp == scalar (when 'scalar' is not NULL).
+ *
+ * The current path (@) is passed in 'path'.
+ */
+static JsonPathGinNode *
+extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
+ JsonPathItem *jsp, JsonbValue *scalar)
+{
+ /* extract a list of nodes to be AND-ed */
+ List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar);
+
+ if (list_length(nodes) <= 0)
+ /* no nodes were extracted => full scan is needed for this path */
+ return NULL;
+
+ if (list_length(nodes) == 1)
+ return linitial(nodes); /* avoid extra AND-node */
+
+ /* construct AND-node for path with filters */
+ return make_jsp_expr_node_args(JSP_GIN_AND, nodes);
+}
+
+/* Recursively extract nodes from the boolean jsonpath expression. */
+static JsonPathGinNode *
+extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path,
+ JsonPathItem *jsp, bool not)
+{
+ check_stack_depth();
+
+ switch (jsp->type)
+ {
+ case jpiAnd: /* expr && expr */
+ case jpiOr: /* expr || expr */
+ {
+ JsonPathItem arg;
+ JsonPathGinNode *larg;
+ JsonPathGinNode *rarg;
+ JsonPathGinNodeType type;
+
+ jspGetLeftArg(jsp, &arg);
+ larg = extract_jsp_bool_expr(cxt, path, &arg, not);
+
+ jspGetRightArg(jsp, &arg);
+ rarg = extract_jsp_bool_expr(cxt, path, &arg, not);
+
+ if (!larg || !rarg)
+ {
+ if (jsp->type == jpiOr)
+ return NULL;
+
+ return larg ? larg : rarg;
+ }
+
+ type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR;
+
+ return make_jsp_expr_node_binary(type, larg, rarg);
+ }
+
+ case jpiNot: /* !expr */
+ {
+ JsonPathItem arg;
+
+ jspGetArg(jsp, &arg);
+
+ /* extract child expression inverting 'not' flag */
+ return extract_jsp_bool_expr(cxt, path, &arg, !not);
+ }
+
+ case jpiExists: /* EXISTS(path) */
+ {
+ JsonPathItem arg;
+
+ if (not)
+ return NULL; /* NOT EXISTS is not supported */
+
+ jspGetArg(jsp, &arg);
+
+ return extract_jsp_path_expr(cxt, path, &arg, NULL);
+ }
+
+ case jpiNotEqual:
+
+ /*
+ * 'not' == true case is not supported here because '!(path !=
+ * scalar)' is not equivalent to 'path == scalar' in the general
+ * case because of sequence comparison semantics: 'path == scalar'
+ * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' ===
+ * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path
+ * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but
+ * 'EMPTY(path)' queries are not supported by the both jsonb
+ * opclasses. However in strict mode we could omit 'EMPTY(path)'
+ * part if the path can return exactly one item (it does not
+ * contain wildcard accessors or item methods like .keyvalue()
+ * etc.).
+ */
+ return NULL;
+
+ case jpiEqual: /* path == scalar */
+ {
+ JsonPathItem left_item;
+ JsonPathItem right_item;
+ JsonPathItem *path_item;
+ JsonPathItem *scalar_item;
+ JsonbValue scalar;
+
+ if (not)
+ return NULL;
+
+ jspGetLeftArg(jsp, &left_item);
+ jspGetRightArg(jsp, &right_item);
+
+ if (jspIsScalar(left_item.type))
+ {
+ scalar_item = &left_item;
+ path_item = &right_item;
+ }
+ else if (jspIsScalar(right_item.type))
+ {
+ scalar_item = &right_item;
+ path_item = &left_item;
+ }
+ else
+ return NULL; /* at least one operand should be a scalar */
+
+ switch (scalar_item->type)
+ {
+ case jpiNull:
+ scalar.type = jbvNull;
+ break;
+ case jpiBool:
+ scalar.type = jbvBool;
+ scalar.val.boolean = !!*scalar_item->content.value.data;
+ break;
+ case jpiNumeric:
+ scalar.type = jbvNumeric;
+ scalar.val.numeric =
+ (Numeric) scalar_item->content.value.data;
+ break;
+ case jpiString:
+ scalar.type = jbvString;
+ scalar.val.string.val = scalar_item->content.value.data;
+ scalar.val.string.len =
+ scalar_item->content.value.datalen;
+ break;
+ default:
+ elog(ERROR, "invalid scalar jsonpath item type: %d",
+ scalar_item->type);
+ return NULL;
+ }
+
+ return extract_jsp_path_expr(cxt, path, path_item, &scalar);
+ }
+
+ default:
+ return NULL; /* not a boolean expression */
+ }
+}
+
+/* Recursively emit all GIN entries found in the node tree */
+static void
+emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries)
+{
+ check_stack_depth();
+
+ switch (node->type)
+ {
+ case JSP_GIN_ENTRY:
+ /* replace datum with its index in the array */
+ node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum);
+ break;
+
+ case JSP_GIN_OR:
+ case JSP_GIN_AND:
+ {
+ int i;
+
+ for (i = 0; i < node->val.nargs; i++)
+ emit_jsp_gin_entries(node->args[i], entries);
+
+ break;
+ }
+ }
+}
+
+/*
+ * Recursively extract GIN entries from jsonpath query.
+ * Root expression node is put into (*extra_data)[0].
+ */
+static Datum *
+extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps,
+ int32 *nentries, Pointer **extra_data)
+{
+ JsonPathGinContext cxt;
+ JsonPathItem root;
+ JsonPathGinNode *node;
+ JsonPathGinPath path = {0};
+ GinEntries entries = {0};
+
+ cxt.lax = (jp->header & JSONPATH_LAX) != 0;
+
+ if (pathOps)
+ {
+ cxt.add_path_item = jsonb_path_ops__add_path_item;
+ cxt.extract_nodes = jsonb_path_ops__extract_nodes;
+ }
+ else
+ {
+ cxt.add_path_item = jsonb_ops__add_path_item;
+ cxt.extract_nodes = jsonb_ops__extract_nodes;
+ }
+
+ jspInit(&root, jp);
+
+ node = strat == JsonbJsonpathExistsStrategyNumber
+ ? extract_jsp_path_expr(&cxt, path, &root, NULL)
+ : extract_jsp_bool_expr(&cxt, path, &root, false);
+
+ if (!node)
+ {
+ *nentries = 0;
+ return NULL;
+ }
+
+ emit_jsp_gin_entries(node, &entries);
+
+ *nentries = entries.count;
+ if (!*nentries)
+ return NULL;
+
+ *extra_data = palloc0(sizeof(**extra_data) * entries.count);
+ **extra_data = (Pointer) node;
+
+ return entries.buf;
+}
+
+/*
+ * Recursively execute jsonpath expression.
+ * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag.
+ */
+static GinTernaryValue
+execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary)
+{
+ GinTernaryValue res;
+ GinTernaryValue v;
+ int i;
+
+ switch (node->type)
+ {
+ case JSP_GIN_AND:
+ res = GIN_TRUE;
+ for (i = 0; i < node->val.nargs; i++)
+ {
+ v = execute_jsp_gin_node(node->args[i], check, ternary);
+ if (v == GIN_FALSE)
+ return GIN_FALSE;
+ else if (v == GIN_MAYBE)
+ res = GIN_MAYBE;
+ }
+ return res;
+
+ case JSP_GIN_OR:
+ res = GIN_FALSE;
+ for (i = 0; i < node->val.nargs; i++)
+ {
+ v = execute_jsp_gin_node(node->args[i], check, ternary);
+ if (v == GIN_TRUE)
+ return GIN_TRUE;
+ else if (v == GIN_MAYBE)
+ res = GIN_MAYBE;
+ }
+ return res;
+
+ case JSP_GIN_ENTRY:
+ {
+ int index = node->val.entryIndex;
+
+ if (ternary)
+ return ((GinTernaryValue *) check)[index];
+ else
+ return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE;
+ }
+
+ default:
+ elog(ERROR, "invalid jsonpath gin node type: %d", node->type);
+ return GIN_FALSE; /* keep compiler quiet */
+ }
+}
+
+Datum
+gin_extract_jsonb_query(PG_FUNCTION_ARGS)
+{
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ Datum *entries;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /* Query is a jsonb, so just apply gin_extract_jsonb... */
+ entries = (Datum *)
+ DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb,
+ PG_GETARG_DATUM(0),
+ PointerGetDatum(nentries)));
+ /* ...although "contains {}" requires a full index scan */
+ if (*nentries == 0)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else if (strategy == JsonbExistsStrategyNumber)
+ {
+ /* Query is a text string, which we treat as a key */
+ text *query = PG_GETARG_TEXT_PP(0);
+
+ *nentries = 1;
+ entries = (Datum *) palloc(sizeof(Datum));
+ entries[0] = make_text_key(JGINFLAG_KEY,
+ VARDATA_ANY(query),
+ VARSIZE_ANY_EXHDR(query));
+ }
+ else if (strategy == JsonbExistsAnyStrategyNumber ||
+ strategy == JsonbExistsAllStrategyNumber)
+ {
+ /* Query is a text array; each element is treated as a key */
+ ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
+ Datum *key_datums;
+ bool *key_nulls;
+ int key_count;
+ int i,
+ j;
+
+ deconstruct_array(query,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &key_datums, &key_nulls, &key_count);
+
+ entries = (Datum *) palloc(sizeof(Datum) * key_count);
+
+ for (i = 0, j = 0; i < key_count; i++)
+ {
+ /* Nulls in the array are ignored */
+ if (key_nulls[i])
+ continue;
+ /* We rely on the array elements not being toasted */
+ entries[j++] = make_text_key(JGINFLAG_KEY,
+ VARDATA_ANY(key_datums[i]),
+ VARSIZE_ANY_EXHDR(key_datums[i]));
+ }
+
+ *nentries = j;
+ /* ExistsAll with no keys should match everything */
+ if (j == 0 && strategy == JsonbExistsAllStrategyNumber)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ JsonPath *jp = PG_GETARG_JSONPATH_P(0);
+ Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
+ entries = extract_jsp_query(jp, strategy, false, nentries, extra_data);
+
+ if (!entries)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else
+ {
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ entries = NULL; /* keep compiler quiet */
+ }
+
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_consistent_jsonb(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = true;
+ int32 i;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /*
+ * We must always recheck, since we can't tell from the index whether
+ * the positions of the matched items match the structure of the query
+ * object. (Even if we could, we'd also have to worry about hashed
+ * keys and the index's failure to distinguish keys from string array
+ * elements.) However, the tuple certainly doesn't match unless it
+ * contains all the query keys.
+ */
+ *recheck = true;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbExistsStrategyNumber)
+ {
+ /*
+ * Although the key is certainly present in the index, we must recheck
+ * because (1) the key might be hashed, and (2) the index match might
+ * be for a key that's not at top level of the JSON object. For (1),
+ * we could look at the query key to see if it's hashed and not
+ * recheck if not, but the index lacks enough info to tell about (2).
+ */
+ *recheck = true;
+ res = true;
+ }
+ else if (strategy == JsonbExistsAnyStrategyNumber)
+ {
+ /* As for plain exists, we must recheck */
+ *recheck = true;
+ res = true;
+ }
+ else if (strategy == JsonbExistsAllStrategyNumber)
+ {
+ /* As for plain exists, we must recheck */
+ *recheck = true;
+ /* ... but unless all the keys are present, we can say "false" */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ *recheck = true;
+
+ if (nkeys > 0)
+ {
+ Assert(extra_data && extra_data[0]);
+ res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
+ false) != GIN_FALSE;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ GinTernaryValue res = GIN_MAYBE;
+ int32 i;
+
+ /*
+ * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
+ * corresponds to always forcing recheck in the regular consistent
+ * function, for the reasons listed there.
+ */
+ if (strategy == JsonbContainsStrategyNumber ||
+ strategy == JsonbExistsAllStrategyNumber)
+ {
+ /* All extracted keys must be present */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbExistsStrategyNumber ||
+ strategy == JsonbExistsAnyStrategyNumber)
+ {
+ /* At least one extracted key must be present */
+ res = GIN_FALSE;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_TRUE ||
+ check[i] == GIN_MAYBE)
+ {
+ res = GIN_MAYBE;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ if (nkeys > 0)
+ {
+ Assert(extra_data && extra_data[0]);
+ res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
+ true);
+
+ /* Should always recheck the result */
+ if (res == GIN_TRUE)
+ res = GIN_MAYBE;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+
+/*
+ *
+ * jsonb_path_ops GIN opclass support functions
+ *
+ * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON
+ * value; but the JSON key(s) leading to each value are also included in its
+ * hash computation. This means we can only support containment queries,
+ * but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
+ * since different hashes will be generated.
+ *
+ */
+
+Datum
+gin_extract_jsonb_path(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ int total = JB_ROOT_COUNT(jb);
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+ PathHashStack tail;
+ PathHashStack *stack;
+ GinEntries entries;
+
+ /* If the root level is empty, we certainly have no keys */
+ if (total == 0)
+ {
+ *nentries = 0;
+ PG_RETURN_POINTER(NULL);
+ }
+
+ /* Otherwise, use 2 * root count as initial estimate of result size */
+ init_gin_entries(&entries, 2 * total);
+
+ /* We keep a stack of partial hashes corresponding to parent key levels */
+ tail.parent = NULL;
+ tail.hash = 0;
+ stack = &tail;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ PathHashStack *parent;
+
+ switch (r)
+ {
+ case WJB_BEGIN_ARRAY:
+ case WJB_BEGIN_OBJECT:
+ /* Push a stack level for this object */
+ parent = stack;
+ stack = (PathHashStack *) palloc(sizeof(PathHashStack));
+
+ /*
+ * We pass forward hashes from outer nesting levels so that
+ * the hashes for nested values will include outer keys as
+ * well as their own keys.
+ *
+ * Nesting an array within another array will not alter
+ * innermost scalar element hash values, but that seems
+ * inconsequential.
+ */
+ stack->hash = parent->hash;
+ stack->parent = parent;
+ break;
+ case WJB_KEY:
+ /* mix this key into the current outer hash */
+ JsonbHashScalarValue(&v, &stack->hash);
+ /* hash is now ready to incorporate the value */
+ break;
+ case WJB_ELEM:
+ case WJB_VALUE:
+ /* mix the element or value's hash into the prepared hash */
+ JsonbHashScalarValue(&v, &stack->hash);
+ /* and emit an index entry */
+ add_gin_entry(&entries, UInt32GetDatum(stack->hash));
+ /* reset hash for next key, value, or sub-object */
+ stack->hash = stack->parent->hash;
+ break;
+ case WJB_END_ARRAY:
+ case WJB_END_OBJECT:
+ /* Pop the stack */
+ parent = stack->parent;
+ pfree(stack);
+ stack = parent;
+ /* reset hash for next key, value, or sub-object */
+ if (stack->parent)
+ stack->hash = stack->parent->hash;
+ else
+ stack->hash = 0;
+ break;
+ default:
+ elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
+ }
+ }
+
+ *nentries = entries.count;
+
+ PG_RETURN_POINTER(entries.buf);
+}
+
+Datum
+gin_extract_jsonb_query_path(PG_FUNCTION_ARGS)
+{
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ Datum *entries;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */
+ entries = (Datum *)
+ DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path,
+ PG_GETARG_DATUM(0),
+ PointerGetDatum(nentries)));
+
+ /* ... although "contains {}" requires a full index scan */
+ if (*nentries == 0)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ JsonPath *jp = PG_GETARG_JSONPATH_P(0);
+ Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
+ entries = extract_jsp_query(jp, strategy, true, nentries, extra_data);
+
+ if (!entries)
+ *searchMode = GIN_SEARCH_MODE_ALL;
+ }
+ else
+ {
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ entries = NULL;
+ }
+
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_consistent_jsonb_path(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = true;
+ int32 i;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /*
+ * jsonb_path_ops is necessarily lossy, not only because of hash
+ * collisions but also because it doesn't preserve complete
+ * information about the structure of the JSON object. Besides, there
+ * are some special rules around the containment of raw scalars in
+ * arrays that are not handled here. So we must always recheck a
+ * match. However, if not all of the keys are present, the tuple
+ * certainly doesn't match.
+ */
+ *recheck = true;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (!check[i])
+ {
+ res = false;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ *recheck = true;
+
+ if (nkeys > 0)
+ {
+ Assert(extra_data && extra_data[0]);
+ res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
+ false) != GIN_FALSE;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* Jsonb *query = PG_GETARG_JSONB_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ GinTernaryValue res = GIN_MAYBE;
+ int32 i;
+
+ if (strategy == JsonbContainsStrategyNumber)
+ {
+ /*
+ * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE;
+ * this corresponds to always forcing recheck in the regular
+ * consistent function, for the reasons listed there.
+ */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ }
+ }
+ else if (strategy == JsonbJsonpathPredicateStrategyNumber ||
+ strategy == JsonbJsonpathExistsStrategyNumber)
+ {
+ if (nkeys > 0)
+ {
+ Assert(extra_data && extra_data[0]);
+ res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check,
+ true);
+
+ /* Should always recheck the result */
+ if (res == GIN_TRUE)
+ res = GIN_MAYBE;
+ }
+ }
+ else
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+
+/*
+ * Construct a jsonb_ops GIN key from a flag byte and a textual representation
+ * (which need not be null-terminated). This function is responsible
+ * for hashing overlength text representations; it will add the
+ * JGINFLAG_HASHED bit to the flag value if it does that.
+ */
+static Datum
+make_text_key(char flag, const char *str, int len)
+{
+ text *item;
+ char hashbuf[10];
+
+ if (len > JGIN_MAXLENGTH)
+ {
+ uint32 hashval;
+
+ hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
+ snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
+ str = hashbuf;
+ len = 8;
+ flag |= JGINFLAG_HASHED;
+ }
+
+ /*
+ * Now build the text Datum. For simplicity we build a 4-byte-header
+ * varlena text Datum here, but we expect it will get converted to short
+ * header format when stored in the index.
+ */
+ item = (text *) palloc(VARHDRSZ + len + 1);
+ SET_VARSIZE(item, VARHDRSZ + len + 1);
+
+ *VARDATA(item) = flag;
+
+ memcpy(VARDATA(item) + 1, str, len);
+
+ return PointerGetDatum(item);
+}
+
+/*
+ * Create a textual representation of a JsonbValue that will serve as a GIN
+ * key in a jsonb_ops index. is_key is true if the JsonbValue is a key,
+ * or if it is a string array element (since we pretend those are keys,
+ * see jsonb.h).
+ */
+static Datum
+make_scalar_key(const JsonbValue *scalarVal, bool is_key)
+{
+ Datum item;
+ char *cstr;
+
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ Assert(!is_key);
+ item = make_text_key(JGINFLAG_NULL, "", 0);
+ break;
+ case jbvBool:
+ Assert(!is_key);
+ item = make_text_key(JGINFLAG_BOOL,
+ scalarVal->val.boolean ? "t" : "f", 1);
+ break;
+ case jbvNumeric:
+ Assert(!is_key);
+
+ /*
+ * A normalized textual representation, free of trailing zeroes,
+ * is required so that numerically equal values will produce equal
+ * strings.
+ *
+ * It isn't ideal that numerics are stored in a relatively bulky
+ * textual format. However, it's a notationally convenient way of
+ * storing a "union" type in the GIN B-Tree, and indexing Jsonb
+ * strings takes precedence.
+ */
+ cstr = numeric_normalize(scalarVal->val.numeric);
+ item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
+ pfree(cstr);
+ break;
+ case jbvString:
+ item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
+ scalarVal->val.string.val,
+ scalarVal->val.string.len);
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
+ item = 0; /* keep compiler quiet */
+ break;
+ }
+
+ return item;
+}
diff --git a/src/backend/utils/adt/jsonb_op.c b/src/backend/utils/adt/jsonb_op.c
new file mode 100644
index 0000000..9270520
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_op.c
@@ -0,0 +1,338 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_op.c
+ * Special operators for jsonb only, used by various index access methods
+ *
+ * Copyright (c) 2014-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonb_op.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/jsonb.h"
+
+Datum
+jsonb_exists(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ text *key = PG_GETARG_TEXT_PP(1);
+ JsonbValue kval;
+ JsonbValue *v = NULL;
+
+ /*
+ * We only match Object keys (which are naturally always Strings), or
+ * string elements in arrays. In particular, we do not match non-string
+ * scalar elements. Existence of a key/element is only considered at the
+ * top level. No recursion occurs.
+ */
+ kval.type = jbvString;
+ kval.val.string.val = VARDATA_ANY(key);
+ kval.val.string.len = VARSIZE_ANY_EXHDR(key);
+
+ v = findJsonbValueFromContainer(&jb->root,
+ JB_FOBJECT | JB_FARRAY,
+ &kval);
+
+ PG_RETURN_BOOL(v != NULL);
+}
+
+Datum
+jsonb_exists_any(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1);
+ int i;
+ Datum *key_datums;
+ bool *key_nulls;
+ int elem_count;
+
+ deconstruct_array(keys, TEXTOID, -1, false, TYPALIGN_INT,
+ &key_datums, &key_nulls, &elem_count);
+
+ for (i = 0; i < elem_count; i++)
+ {
+ JsonbValue strVal;
+
+ if (key_nulls[i])
+ continue;
+
+ strVal.type = jbvString;
+ /* We rely on the array elements not being toasted */
+ strVal.val.string.val = VARDATA_ANY(key_datums[i]);
+ strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]);
+
+ if (findJsonbValueFromContainer(&jb->root,
+ JB_FOBJECT | JB_FARRAY,
+ &strVal) != NULL)
+ PG_RETURN_BOOL(true);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+jsonb_exists_all(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1);
+ int i;
+ Datum *key_datums;
+ bool *key_nulls;
+ int elem_count;
+
+ deconstruct_array(keys, TEXTOID, -1, false, TYPALIGN_INT,
+ &key_datums, &key_nulls, &elem_count);
+
+ for (i = 0; i < elem_count; i++)
+ {
+ JsonbValue strVal;
+
+ if (key_nulls[i])
+ continue;
+
+ strVal.type = jbvString;
+ /* We rely on the array elements not being toasted */
+ strVal.val.string.val = VARDATA_ANY(key_datums[i]);
+ strVal.val.string.len = VARSIZE_ANY_EXHDR(key_datums[i]);
+
+ if (findJsonbValueFromContainer(&jb->root,
+ JB_FOBJECT | JB_FARRAY,
+ &strVal) == NULL)
+ PG_RETURN_BOOL(false);
+ }
+
+ PG_RETURN_BOOL(true);
+}
+
+Datum
+jsonb_contains(PG_FUNCTION_ARGS)
+{
+ Jsonb *val = PG_GETARG_JSONB_P(0);
+ Jsonb *tmpl = PG_GETARG_JSONB_P(1);
+
+ JsonbIterator *it1,
+ *it2;
+
+ if (JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl))
+ PG_RETURN_BOOL(false);
+
+ it1 = JsonbIteratorInit(&val->root);
+ it2 = JsonbIteratorInit(&tmpl->root);
+
+ PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2));
+}
+
+Datum
+jsonb_contained(PG_FUNCTION_ARGS)
+{
+ /* Commutator of "contains" */
+ Jsonb *tmpl = PG_GETARG_JSONB_P(0);
+ Jsonb *val = PG_GETARG_JSONB_P(1);
+
+ JsonbIterator *it1,
+ *it2;
+
+ if (JB_ROOT_IS_OBJECT(val) != JB_ROOT_IS_OBJECT(tmpl))
+ PG_RETURN_BOOL(false);
+
+ it1 = JsonbIteratorInit(&val->root);
+ it2 = JsonbIteratorInit(&tmpl->root);
+
+ PG_RETURN_BOOL(JsonbDeepContains(&it1, &it2));
+}
+
+Datum
+jsonb_ne(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) != 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * B-Tree operator class operators, support function
+ */
+Datum
+jsonb_lt(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) < 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+jsonb_gt(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) > 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+jsonb_le(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) <= 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+jsonb_ge(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) >= 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+jsonb_eq(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ bool res;
+
+ res = (compareJsonbContainers(&jba->root, &jbb->root) == 0);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+jsonb_cmp(PG_FUNCTION_ARGS)
+{
+ Jsonb *jba = PG_GETARG_JSONB_P(0);
+ Jsonb *jbb = PG_GETARG_JSONB_P(1);
+ int res;
+
+ res = compareJsonbContainers(&jba->root, &jbb->root);
+
+ PG_FREE_IF_COPY(jba, 0);
+ PG_FREE_IF_COPY(jbb, 1);
+ PG_RETURN_INT32(res);
+}
+
+/*
+ * Hash operator class jsonb hashing function
+ */
+Datum
+jsonb_hash(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+ uint32 hash = 0;
+
+ if (JB_ROOT_COUNT(jb) == 0)
+ PG_RETURN_INT32(0);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (r)
+ {
+ /* Rotation is left to JsonbHashScalarValue() */
+ case WJB_BEGIN_ARRAY:
+ hash ^= JB_FARRAY;
+ break;
+ case WJB_BEGIN_OBJECT:
+ hash ^= JB_FOBJECT;
+ break;
+ case WJB_KEY:
+ case WJB_VALUE:
+ case WJB_ELEM:
+ JsonbHashScalarValue(&v, &hash);
+ break;
+ case WJB_END_ARRAY:
+ case WJB_END_OBJECT:
+ break;
+ default:
+ elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
+ }
+ }
+
+ PG_FREE_IF_COPY(jb, 0);
+ PG_RETURN_INT32(hash);
+}
+
+Datum
+jsonb_hash_extended(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+ uint64 hash = 0;
+
+ if (JB_ROOT_COUNT(jb) == 0)
+ PG_RETURN_UINT64(seed);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ switch (r)
+ {
+ /* Rotation is left to JsonbHashScalarValueExtended() */
+ case WJB_BEGIN_ARRAY:
+ hash ^= ((uint64) JB_FARRAY) << 32 | JB_FARRAY;
+ break;
+ case WJB_BEGIN_OBJECT:
+ hash ^= ((uint64) JB_FOBJECT) << 32 | JB_FOBJECT;
+ break;
+ case WJB_KEY:
+ case WJB_VALUE:
+ case WJB_ELEM:
+ JsonbHashScalarValueExtended(&v, &hash, seed);
+ break;
+ case WJB_END_ARRAY:
+ case WJB_END_OBJECT:
+ break;
+ default:
+ elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r);
+ }
+ }
+
+ PG_FREE_IF_COPY(jb, 0);
+ PG_RETURN_UINT64(hash);
+}
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c
new file mode 100644
index 0000000..6044275
--- /dev/null
+++ b/src/backend/utils/adt/jsonb_util.c
@@ -0,0 +1,1968 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb_util.c
+ * converting between Jsonb and JsonbValues, and iterating.
+ *
+ * Copyright (c) 2014-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonb_util.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "common/jsonapi.h"
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/datetime.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/memutils.h"
+#include "utils/varlena.h"
+
+/*
+ * Maximum number of elements in an array (or key/value pairs in an object).
+ * This is limited by two things: the size of the JEntry array must fit
+ * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits
+ * reserved for that in the JsonbContainer.header field.
+ *
+ * (The total size of an array's or object's elements is also limited by
+ * JENTRY_OFFLENMASK, but we're not concerned about that here.)
+ */
+#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK))
+#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK))
+
+static void fillJsonbValue(JsonbContainer *container, int index,
+ char *base_addr, uint32 offset,
+ JsonbValue *result);
+static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b);
+static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b);
+static Jsonb *convertToJsonb(JsonbValue *val);
+static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
+static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
+static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level);
+static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal);
+
+static int reserveFromBuffer(StringInfo buffer, int len);
+static void appendToBuffer(StringInfo buffer, const char *data, int len);
+static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len);
+static short padBufferToInt(StringInfo buffer);
+
+static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent);
+static JsonbIterator *freeAndGetParent(JsonbIterator *it);
+static JsonbParseState *pushState(JsonbParseState **pstate);
+static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal);
+static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal);
+static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal);
+static int lengthCompareJsonbStringValue(const void *a, const void *b);
+static int lengthCompareJsonbString(const char *val1, int len1,
+ const char *val2, int len2);
+static int lengthCompareJsonbPair(const void *a, const void *b, void *arg);
+static void uniqueifyJsonbObject(JsonbValue *object);
+static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate,
+ JsonbIteratorToken seq,
+ JsonbValue *scalarVal);
+
+void
+JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val)
+{
+ val->type = jbvBinary;
+ val->val.binary.data = &jsonb->root;
+ val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ;
+}
+
+/*
+ * Turn an in-memory JsonbValue into a Jsonb for on-disk storage.
+ *
+ * Generally we find it more convenient to directly iterate through the Jsonb
+ * representation and only really convert nested scalar values.
+ * JsonbIteratorNext() does this, so that clients of the iteration code don't
+ * have to directly deal with the binary representation (JsonbDeepContains() is
+ * a notable exception, although all exceptions are internal to this module).
+ * In general, functions that accept a JsonbValue argument are concerned with
+ * the manipulation of scalar values, or simple containers of scalar values,
+ * where it would be inconvenient to deal with a great amount of other state.
+ */
+Jsonb *
+JsonbValueToJsonb(JsonbValue *val)
+{
+ Jsonb *out;
+
+ if (IsAJsonbScalar(val))
+ {
+ /* Scalar value */
+ JsonbParseState *pstate = NULL;
+ JsonbValue *res;
+ JsonbValue scalarArray;
+
+ scalarArray.type = jbvArray;
+ scalarArray.val.array.rawScalar = true;
+ scalarArray.val.array.nElems = 1;
+
+ pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray);
+ pushJsonbValue(&pstate, WJB_ELEM, val);
+ res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL);
+
+ out = convertToJsonb(res);
+ }
+ else if (val->type == jbvObject || val->type == jbvArray)
+ {
+ out = convertToJsonb(val);
+ }
+ else
+ {
+ Assert(val->type == jbvBinary);
+ out = palloc(VARHDRSZ + val->val.binary.len);
+ SET_VARSIZE(out, VARHDRSZ + val->val.binary.len);
+ memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len);
+ }
+
+ return out;
+}
+
+/*
+ * Get the offset of the variable-length portion of a Jsonb node within
+ * the variable-length-data part of its container. The node is identified
+ * by index within the container's JEntry array.
+ */
+uint32
+getJsonbOffset(const JsonbContainer *jc, int index)
+{
+ uint32 offset = 0;
+ int i;
+
+ /*
+ * Start offset of this entry is equal to the end offset of the previous
+ * entry. Walk backwards to the most recent entry stored as an end
+ * offset, returning that offset plus any lengths in between.
+ */
+ for (i = index - 1; i >= 0; i--)
+ {
+ offset += JBE_OFFLENFLD(jc->children[i]);
+ if (JBE_HAS_OFF(jc->children[i]))
+ break;
+ }
+
+ return offset;
+}
+
+/*
+ * Get the length of the variable-length portion of a Jsonb node.
+ * The node is identified by index within the container's JEntry array.
+ */
+uint32
+getJsonbLength(const JsonbContainer *jc, int index)
+{
+ uint32 off;
+ uint32 len;
+
+ /*
+ * If the length is stored directly in the JEntry, just return it.
+ * Otherwise, get the begin offset of the entry, and subtract that from
+ * the stored end+1 offset.
+ */
+ if (JBE_HAS_OFF(jc->children[index]))
+ {
+ off = getJsonbOffset(jc, index);
+ len = JBE_OFFLENFLD(jc->children[index]) - off;
+ }
+ else
+ len = JBE_OFFLENFLD(jc->children[index]);
+
+ return len;
+}
+
+/*
+ * BT comparator worker function. Returns an integer less than, equal to, or
+ * greater than zero, indicating whether a is less than, equal to, or greater
+ * than b. Consistent with the requirements for a B-Tree operator class
+ *
+ * Strings are compared lexically, in contrast with other places where we use a
+ * much simpler comparator logic for searching through Strings. Since this is
+ * called from B-Tree support function 1, we're careful about not leaking
+ * memory here.
+ */
+int
+compareJsonbContainers(JsonbContainer *a, JsonbContainer *b)
+{
+ JsonbIterator *ita,
+ *itb;
+ int res = 0;
+
+ ita = JsonbIteratorInit(a);
+ itb = JsonbIteratorInit(b);
+
+ do
+ {
+ JsonbValue va,
+ vb;
+ JsonbIteratorToken ra,
+ rb;
+
+ ra = JsonbIteratorNext(&ita, &va, false);
+ rb = JsonbIteratorNext(&itb, &vb, false);
+
+ if (ra == rb)
+ {
+ if (ra == WJB_DONE)
+ {
+ /* Decisively equal */
+ break;
+ }
+
+ if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT)
+ {
+ /*
+ * There is no array or object to compare at this stage of
+ * processing. jbvArray/jbvObject values are compared
+ * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT
+ * tokens.
+ */
+ continue;
+ }
+
+ if (va.type == vb.type)
+ {
+ switch (va.type)
+ {
+ case jbvString:
+ case jbvNull:
+ case jbvNumeric:
+ case jbvBool:
+ res = compareJsonbScalarValue(&va, &vb);
+ break;
+ case jbvArray:
+
+ /*
+ * This could be a "raw scalar" pseudo array. That's
+ * a special case here though, since we still want the
+ * general type-based comparisons to apply, and as far
+ * as we're concerned a pseudo array is just a scalar.
+ */
+ if (va.val.array.rawScalar != vb.val.array.rawScalar)
+ res = (va.val.array.rawScalar) ? -1 : 1;
+ if (va.val.array.nElems != vb.val.array.nElems)
+ res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1;
+ break;
+ case jbvObject:
+ if (va.val.object.nPairs != vb.val.object.nPairs)
+ res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1;
+ break;
+ case jbvBinary:
+ elog(ERROR, "unexpected jbvBinary value");
+ break;
+ case jbvDatetime:
+ elog(ERROR, "unexpected jbvDatetime value");
+ break;
+ }
+ }
+ else
+ {
+ /* Type-defined order */
+ res = (va.type > vb.type) ? 1 : -1;
+ }
+ }
+ else
+ {
+ /*
+ * It's safe to assume that the types differed, and that the va
+ * and vb values passed were set.
+ *
+ * If the two values were of the same container type, then there'd
+ * have been a chance to observe the variation in the number of
+ * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're
+ * either two heterogeneously-typed containers, or a container and
+ * some scalar type.
+ *
+ * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT
+ * cases here, because we would have seen the corresponding
+ * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and
+ * concluded that they don't match.
+ */
+ Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT);
+ Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT);
+
+ Assert(va.type != vb.type);
+ Assert(va.type != jbvBinary);
+ Assert(vb.type != jbvBinary);
+ /* Type-defined order */
+ res = (va.type > vb.type) ? 1 : -1;
+ }
+ }
+ while (res == 0);
+
+ while (ita != NULL)
+ {
+ JsonbIterator *i = ita->parent;
+
+ pfree(ita);
+ ita = i;
+ }
+ while (itb != NULL)
+ {
+ JsonbIterator *i = itb->parent;
+
+ pfree(itb);
+ itb = i;
+ }
+
+ return res;
+}
+
+/*
+ * Find value in object (i.e. the "value" part of some key/value pair in an
+ * object), or find a matching element if we're looking through an array. Do
+ * so on the basis of equality of the object keys only, or alternatively
+ * element values only, with a caller-supplied value "key". The "flags"
+ * argument allows the caller to specify which container types are of interest.
+ *
+ * This exported utility function exists to facilitate various cases concerned
+ * with "containment". If asked to look through an object, the caller had
+ * better pass a Jsonb String, because their keys can only be strings.
+ * Otherwise, for an array, any type of JsonbValue will do.
+ *
+ * In order to proceed with the search, it is necessary for callers to have
+ * both specified an interest in exactly one particular container type with an
+ * appropriate flag, as well as having the pointed-to Jsonb container be of
+ * one of those same container types at the top level. (Actually, we just do
+ * whichever makes sense to save callers the trouble of figuring it out - at
+ * most one can make sense, because the container either points to an array
+ * (possibly a "raw scalar" pseudo array) or an object.)
+ *
+ * Note that we can return a jbvBinary JsonbValue if this is called on an
+ * object, but we never do so on an array. If the caller asks to look through
+ * a container type that is not of the type pointed to by the container,
+ * immediately fall through and return NULL. If we cannot find the value,
+ * return NULL. Otherwise, return palloc()'d copy of value.
+ */
+JsonbValue *
+findJsonbValueFromContainer(JsonbContainer *container, uint32 flags,
+ JsonbValue *key)
+{
+ JEntry *children = container->children;
+ int count = JsonContainerSize(container);
+
+ Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0);
+
+ /* Quick out without a palloc cycle if object/array is empty */
+ if (count <= 0)
+ return NULL;
+
+ if ((flags & JB_FARRAY) && JsonContainerIsArray(container))
+ {
+ JsonbValue *result = palloc(sizeof(JsonbValue));
+ char *base_addr = (char *) (children + count);
+ uint32 offset = 0;
+ int i;
+
+ for (i = 0; i < count; i++)
+ {
+ fillJsonbValue(container, i, base_addr, offset, result);
+
+ if (key->type == result->type)
+ {
+ if (equalsJsonbScalarValue(key, result))
+ return result;
+ }
+
+ JBE_ADVANCE_OFFSET(offset, children[i]);
+ }
+
+ pfree(result);
+ }
+ else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container))
+ {
+ /* Object key passed by caller must be a string */
+ Assert(key->type == jbvString);
+
+ return getKeyJsonValueFromContainer(container, key->val.string.val,
+ key->val.string.len, NULL);
+ }
+
+ /* Not found */
+ return NULL;
+}
+
+/*
+ * Find value by key in Jsonb object and fetch it into 'res', which is also
+ * returned.
+ *
+ * 'res' can be passed in as NULL, in which case it's newly palloc'ed here.
+ */
+JsonbValue *
+getKeyJsonValueFromContainer(JsonbContainer *container,
+ const char *keyVal, int keyLen, JsonbValue *res)
+{
+ JEntry *children = container->children;
+ int count = JsonContainerSize(container);
+ char *baseAddr;
+ uint32 stopLow,
+ stopHigh;
+
+ Assert(JsonContainerIsObject(container));
+
+ /* Quick out without a palloc cycle if object is empty */
+ if (count <= 0)
+ return NULL;
+
+ /*
+ * Binary search the container. Since we know this is an object, account
+ * for *Pairs* of Jentrys
+ */
+ baseAddr = (char *) (children + count * 2);
+ stopLow = 0;
+ stopHigh = count;
+ while (stopLow < stopHigh)
+ {
+ uint32 stopMiddle;
+ int difference;
+ const char *candidateVal;
+ int candidateLen;
+
+ stopMiddle = stopLow + (stopHigh - stopLow) / 2;
+
+ candidateVal = baseAddr + getJsonbOffset(container, stopMiddle);
+ candidateLen = getJsonbLength(container, stopMiddle);
+
+ difference = lengthCompareJsonbString(candidateVal, candidateLen,
+ keyVal, keyLen);
+
+ if (difference == 0)
+ {
+ /* Found our key, return corresponding value */
+ int index = stopMiddle + count;
+
+ if (!res)
+ res = palloc(sizeof(JsonbValue));
+
+ fillJsonbValue(container, index, baseAddr,
+ getJsonbOffset(container, index),
+ res);
+
+ return res;
+ }
+ else
+ {
+ if (difference < 0)
+ stopLow = stopMiddle + 1;
+ else
+ stopHigh = stopMiddle;
+ }
+ }
+
+ /* Not found */
+ return NULL;
+}
+
+/*
+ * Get i-th value of a Jsonb array.
+ *
+ * Returns palloc()'d copy of the value, or NULL if it does not exist.
+ */
+JsonbValue *
+getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i)
+{
+ JsonbValue *result;
+ char *base_addr;
+ uint32 nelements;
+
+ if (!JsonContainerIsArray(container))
+ elog(ERROR, "not a jsonb array");
+
+ nelements = JsonContainerSize(container);
+ base_addr = (char *) &container->children[nelements];
+
+ if (i >= nelements)
+ return NULL;
+
+ result = palloc(sizeof(JsonbValue));
+
+ fillJsonbValue(container, i, base_addr,
+ getJsonbOffset(container, i),
+ result);
+
+ return result;
+}
+
+/*
+ * A helper function to fill in a JsonbValue to represent an element of an
+ * array, or a key or value of an object.
+ *
+ * The node's JEntry is at container->children[index], and its variable-length
+ * data is at base_addr + offset. We make the caller determine the offset
+ * since in many cases the caller can amortize that work across multiple
+ * children. When it can't, it can just call getJsonbOffset().
+ *
+ * A nested array or object will be returned as jbvBinary, ie. it won't be
+ * expanded.
+ */
+static void
+fillJsonbValue(JsonbContainer *container, int index,
+ char *base_addr, uint32 offset,
+ JsonbValue *result)
+{
+ JEntry entry = container->children[index];
+
+ if (JBE_ISNULL(entry))
+ {
+ result->type = jbvNull;
+ }
+ else if (JBE_ISSTRING(entry))
+ {
+ result->type = jbvString;
+ result->val.string.val = base_addr + offset;
+ result->val.string.len = getJsonbLength(container, index);
+ Assert(result->val.string.len >= 0);
+ }
+ else if (JBE_ISNUMERIC(entry))
+ {
+ result->type = jbvNumeric;
+ result->val.numeric = (Numeric) (base_addr + INTALIGN(offset));
+ }
+ else if (JBE_ISBOOL_TRUE(entry))
+ {
+ result->type = jbvBool;
+ result->val.boolean = true;
+ }
+ else if (JBE_ISBOOL_FALSE(entry))
+ {
+ result->type = jbvBool;
+ result->val.boolean = false;
+ }
+ else
+ {
+ Assert(JBE_ISCONTAINER(entry));
+ result->type = jbvBinary;
+ /* Remove alignment padding from data pointer and length */
+ result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset));
+ result->val.binary.len = getJsonbLength(container, index) -
+ (INTALIGN(offset) - offset);
+ }
+}
+
+/*
+ * Push JsonbValue into JsonbParseState.
+ *
+ * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory
+ * JsonbValue to a Jsonb.
+ *
+ * Initial state of *JsonbParseState is NULL, since it'll be allocated here
+ * originally (caller will get JsonbParseState back by reference).
+ *
+ * Only sequential tokens pertaining to non-container types should pass a
+ * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a
+ * "raw scalar" pseudo array to append it - the actual scalar should be passed
+ * next and it will be added as the only member of the array.
+ *
+ * Values of type jbvBinary, which are rolled up arrays and objects,
+ * are unpacked before being added to the result.
+ */
+JsonbValue *
+pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq,
+ JsonbValue *jbval)
+{
+ JsonbIterator *it;
+ JsonbValue *res = NULL;
+ JsonbValue v;
+ JsonbIteratorToken tok;
+ int i;
+
+ if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject)
+ {
+ pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL);
+ for (i = 0; i < jbval->val.object.nPairs; i++)
+ {
+ pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key);
+ pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value);
+ }
+
+ return pushJsonbValue(pstate, WJB_END_OBJECT, NULL);
+ }
+
+ if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray)
+ {
+ pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL);
+ for (i = 0; i < jbval->val.array.nElems; i++)
+ {
+ pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]);
+ }
+
+ return pushJsonbValue(pstate, WJB_END_ARRAY, NULL);
+ }
+
+ if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) ||
+ jbval->type != jbvBinary)
+ {
+ /* drop through */
+ return pushJsonbValueScalar(pstate, seq, jbval);
+ }
+
+ /* unpack the binary and add each piece to the pstate */
+ it = JsonbIteratorInit(jbval->val.binary.data);
+
+ if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate)
+ {
+ tok = JsonbIteratorNext(&it, &v, true);
+ Assert(tok == WJB_BEGIN_ARRAY);
+ Assert(v.type == jbvArray && v.val.array.rawScalar);
+
+ tok = JsonbIteratorNext(&it, &v, true);
+ Assert(tok == WJB_ELEM);
+
+ res = pushJsonbValueScalar(pstate, seq, &v);
+
+ tok = JsonbIteratorNext(&it, &v, true);
+ Assert(tok == WJB_END_ARRAY);
+ Assert(it == NULL);
+
+ return res;
+ }
+
+ while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ res = pushJsonbValueScalar(pstate, tok,
+ tok < WJB_BEGIN_ARRAY ||
+ (tok == WJB_BEGIN_ARRAY &&
+ v.val.array.rawScalar) ? &v : NULL);
+
+ return res;
+}
+
+/*
+ * Do the actual pushing, with only scalar or pseudo-scalar-array values
+ * accepted.
+ */
+static JsonbValue *
+pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq,
+ JsonbValue *scalarVal)
+{
+ JsonbValue *result = NULL;
+
+ switch (seq)
+ {
+ case WJB_BEGIN_ARRAY:
+ Assert(!scalarVal || scalarVal->val.array.rawScalar);
+ *pstate = pushState(pstate);
+ result = &(*pstate)->contVal;
+ (*pstate)->contVal.type = jbvArray;
+ (*pstate)->contVal.val.array.nElems = 0;
+ (*pstate)->contVal.val.array.rawScalar = (scalarVal &&
+ scalarVal->val.array.rawScalar);
+ if (scalarVal && scalarVal->val.array.nElems > 0)
+ {
+ /* Assume that this array is still really a scalar */
+ Assert(scalarVal->type == jbvArray);
+ (*pstate)->size = scalarVal->val.array.nElems;
+ }
+ else
+ {
+ (*pstate)->size = 4;
+ }
+ (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) *
+ (*pstate)->size);
+ break;
+ case WJB_BEGIN_OBJECT:
+ Assert(!scalarVal);
+ *pstate = pushState(pstate);
+ result = &(*pstate)->contVal;
+ (*pstate)->contVal.type = jbvObject;
+ (*pstate)->contVal.val.object.nPairs = 0;
+ (*pstate)->size = 4;
+ (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) *
+ (*pstate)->size);
+ break;
+ case WJB_KEY:
+ Assert(scalarVal->type == jbvString);
+ appendKey(*pstate, scalarVal);
+ break;
+ case WJB_VALUE:
+ Assert(IsAJsonbScalar(scalarVal));
+ appendValue(*pstate, scalarVal);
+ break;
+ case WJB_ELEM:
+ Assert(IsAJsonbScalar(scalarVal));
+ appendElement(*pstate, scalarVal);
+ break;
+ case WJB_END_OBJECT:
+ uniqueifyJsonbObject(&(*pstate)->contVal);
+ /* fall through! */
+ case WJB_END_ARRAY:
+ /* Steps here common to WJB_END_OBJECT case */
+ Assert(!scalarVal);
+ result = &(*pstate)->contVal;
+
+ /*
+ * Pop stack and push current array/object as value in parent
+ * array/object
+ */
+ *pstate = (*pstate)->next;
+ if (*pstate)
+ {
+ switch ((*pstate)->contVal.type)
+ {
+ case jbvArray:
+ appendElement(*pstate, result);
+ break;
+ case jbvObject:
+ appendValue(*pstate, result);
+ break;
+ default:
+ elog(ERROR, "invalid jsonb container type");
+ }
+ }
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonb sequential processing token");
+ }
+
+ return result;
+}
+
+/*
+ * pushJsonbValue() worker: Iteration-like forming of Jsonb
+ */
+static JsonbParseState *
+pushState(JsonbParseState **pstate)
+{
+ JsonbParseState *ns = palloc(sizeof(JsonbParseState));
+
+ ns->next = *pstate;
+ return ns;
+}
+
+/*
+ * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb
+ */
+static void
+appendKey(JsonbParseState *pstate, JsonbValue *string)
+{
+ JsonbValue *object = &pstate->contVal;
+
+ Assert(object->type == jbvObject);
+ Assert(string->type == jbvString);
+
+ if (object->val.object.nPairs >= JSONB_MAX_PAIRS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)",
+ JSONB_MAX_PAIRS)));
+
+ if (object->val.object.nPairs >= pstate->size)
+ {
+ pstate->size *= 2;
+ object->val.object.pairs = repalloc(object->val.object.pairs,
+ sizeof(JsonbPair) * pstate->size);
+ }
+
+ object->val.object.pairs[object->val.object.nPairs].key = *string;
+ object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs;
+}
+
+/*
+ * pushJsonbValue() worker: Append a pair value to state when generating a
+ * Jsonb
+ */
+static void
+appendValue(JsonbParseState *pstate, JsonbValue *scalarVal)
+{
+ JsonbValue *object = &pstate->contVal;
+
+ Assert(object->type == jbvObject);
+
+ object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal;
+}
+
+/*
+ * pushJsonbValue() worker: Append an element to state when generating a Jsonb
+ */
+static void
+appendElement(JsonbParseState *pstate, JsonbValue *scalarVal)
+{
+ JsonbValue *array = &pstate->contVal;
+
+ Assert(array->type == jbvArray);
+
+ if (array->val.array.nElems >= JSONB_MAX_ELEMS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)",
+ JSONB_MAX_ELEMS)));
+
+ if (array->val.array.nElems >= pstate->size)
+ {
+ pstate->size *= 2;
+ array->val.array.elems = repalloc(array->val.array.elems,
+ sizeof(JsonbValue) * pstate->size);
+ }
+
+ array->val.array.elems[array->val.array.nElems++] = *scalarVal;
+}
+
+/*
+ * Given a JsonbContainer, expand to JsonbIterator to iterate over items
+ * fully expanded to in-memory representation for manipulation.
+ *
+ * See JsonbIteratorNext() for notes on memory management.
+ */
+JsonbIterator *
+JsonbIteratorInit(JsonbContainer *container)
+{
+ return iteratorFromContainer(container, NULL);
+}
+
+/*
+ * Get next JsonbValue while iterating
+ *
+ * Caller should initially pass their own, original iterator. They may get
+ * back a child iterator palloc()'d here instead. The function can be relied
+ * on to free those child iterators, lest the memory allocated for highly
+ * nested objects become unreasonable, but only if callers don't end iteration
+ * early (by breaking upon having found something in a search, for example).
+ *
+ * Callers in such a scenario, that are particularly sensitive to leaking
+ * memory in a long-lived context may walk the ancestral tree from the final
+ * iterator we left them with to its oldest ancestor, pfree()ing as they go.
+ * They do not have to free any other memory previously allocated for iterators
+ * but not accessible as direct ancestors of the iterator they're last passed
+ * back.
+ *
+ * Returns "Jsonb sequential processing" token value. Iterator "state"
+ * reflects the current stage of the process in a less granular fashion, and is
+ * mostly used here to track things internally with respect to particular
+ * iterators.
+ *
+ * Clients of this function should not have to handle any jbvBinary values
+ * (since recursive calls will deal with this), provided skipNested is false.
+ * It is our job to expand the jbvBinary representation without bothering them
+ * with it. However, clients should not take it upon themselves to touch array
+ * or Object element/pair buffers, since their element/pair pointers are
+ * garbage. Also, *val will not be set when returning WJB_END_ARRAY or
+ * WJB_END_OBJECT, on the assumption that it's only useful to access values
+ * when recursing in.
+ */
+JsonbIteratorToken
+JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested)
+{
+ if (*it == NULL)
+ return WJB_DONE;
+
+ /*
+ * When stepping into a nested container, we jump back here to start
+ * processing the child. We will not recurse further in one call, because
+ * processing the child will always begin in JBI_ARRAY_START or
+ * JBI_OBJECT_START state.
+ */
+recurse:
+ switch ((*it)->state)
+ {
+ case JBI_ARRAY_START:
+ /* Set v to array on first array call */
+ val->type = jbvArray;
+ val->val.array.nElems = (*it)->nElems;
+
+ /*
+ * v->val.array.elems is not actually set, because we aren't doing
+ * a full conversion
+ */
+ val->val.array.rawScalar = (*it)->isScalar;
+ (*it)->curIndex = 0;
+ (*it)->curDataOffset = 0;
+ (*it)->curValueOffset = 0; /* not actually used */
+ /* Set state for next call */
+ (*it)->state = JBI_ARRAY_ELEM;
+ return WJB_BEGIN_ARRAY;
+
+ case JBI_ARRAY_ELEM:
+ if ((*it)->curIndex >= (*it)->nElems)
+ {
+ /*
+ * All elements within array already processed. Report this
+ * to caller, and give it back original parent iterator (which
+ * independently tracks iteration progress at its level of
+ * nesting).
+ */
+ *it = freeAndGetParent(*it);
+ return WJB_END_ARRAY;
+ }
+
+ fillJsonbValue((*it)->container, (*it)->curIndex,
+ (*it)->dataProper, (*it)->curDataOffset,
+ val);
+
+ JBE_ADVANCE_OFFSET((*it)->curDataOffset,
+ (*it)->children[(*it)->curIndex]);
+ (*it)->curIndex++;
+
+ if (!IsAJsonbScalar(val) && !skipNested)
+ {
+ /* Recurse into container. */
+ *it = iteratorFromContainer(val->val.binary.data, *it);
+ goto recurse;
+ }
+ else
+ {
+ /*
+ * Scalar item in array, or a container and caller didn't want
+ * us to recurse into it.
+ */
+ return WJB_ELEM;
+ }
+
+ case JBI_OBJECT_START:
+ /* Set v to object on first object call */
+ val->type = jbvObject;
+ val->val.object.nPairs = (*it)->nElems;
+
+ /*
+ * v->val.object.pairs is not actually set, because we aren't
+ * doing a full conversion
+ */
+ (*it)->curIndex = 0;
+ (*it)->curDataOffset = 0;
+ (*it)->curValueOffset = getJsonbOffset((*it)->container,
+ (*it)->nElems);
+ /* Set state for next call */
+ (*it)->state = JBI_OBJECT_KEY;
+ return WJB_BEGIN_OBJECT;
+
+ case JBI_OBJECT_KEY:
+ if ((*it)->curIndex >= (*it)->nElems)
+ {
+ /*
+ * All pairs within object already processed. Report this to
+ * caller, and give it back original containing iterator
+ * (which independently tracks iteration progress at its level
+ * of nesting).
+ */
+ *it = freeAndGetParent(*it);
+ return WJB_END_OBJECT;
+ }
+ else
+ {
+ /* Return key of a key/value pair. */
+ fillJsonbValue((*it)->container, (*it)->curIndex,
+ (*it)->dataProper, (*it)->curDataOffset,
+ val);
+ if (val->type != jbvString)
+ elog(ERROR, "unexpected jsonb type as object key");
+
+ /* Set state for next call */
+ (*it)->state = JBI_OBJECT_VALUE;
+ return WJB_KEY;
+ }
+
+ case JBI_OBJECT_VALUE:
+ /* Set state for next call */
+ (*it)->state = JBI_OBJECT_KEY;
+
+ fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems,
+ (*it)->dataProper, (*it)->curValueOffset,
+ val);
+
+ JBE_ADVANCE_OFFSET((*it)->curDataOffset,
+ (*it)->children[(*it)->curIndex]);
+ JBE_ADVANCE_OFFSET((*it)->curValueOffset,
+ (*it)->children[(*it)->curIndex + (*it)->nElems]);
+ (*it)->curIndex++;
+
+ /*
+ * Value may be a container, in which case we recurse with new,
+ * child iterator (unless the caller asked not to, by passing
+ * skipNested).
+ */
+ if (!IsAJsonbScalar(val) && !skipNested)
+ {
+ *it = iteratorFromContainer(val->val.binary.data, *it);
+ goto recurse;
+ }
+ else
+ return WJB_VALUE;
+ }
+
+ elog(ERROR, "invalid iterator state");
+ return -1;
+}
+
+/*
+ * Initialize an iterator for iterating all elements in a container.
+ */
+static JsonbIterator *
+iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent)
+{
+ JsonbIterator *it;
+
+ it = palloc0(sizeof(JsonbIterator));
+ it->container = container;
+ it->parent = parent;
+ it->nElems = JsonContainerSize(container);
+
+ /* Array starts just after header */
+ it->children = container->children;
+
+ switch (container->header & (JB_FARRAY | JB_FOBJECT))
+ {
+ case JB_FARRAY:
+ it->dataProper =
+ (char *) it->children + it->nElems * sizeof(JEntry);
+ it->isScalar = JsonContainerIsScalar(container);
+ /* This is either a "raw scalar", or an array */
+ Assert(!it->isScalar || it->nElems == 1);
+
+ it->state = JBI_ARRAY_START;
+ break;
+
+ case JB_FOBJECT:
+ it->dataProper =
+ (char *) it->children + it->nElems * sizeof(JEntry) * 2;
+ it->state = JBI_OBJECT_START;
+ break;
+
+ default:
+ elog(ERROR, "unknown type of jsonb container");
+ }
+
+ return it;
+}
+
+/*
+ * JsonbIteratorNext() worker: Return parent, while freeing memory for current
+ * iterator
+ */
+static JsonbIterator *
+freeAndGetParent(JsonbIterator *it)
+{
+ JsonbIterator *v = it->parent;
+
+ pfree(it);
+ return v;
+}
+
+/*
+ * Worker for "contains" operator's function
+ *
+ * Formally speaking, containment is top-down, unordered subtree isomorphism.
+ *
+ * Takes iterators that belong to some container type. These iterators
+ * "belong" to those values in the sense that they've just been initialized in
+ * respect of them by the caller (perhaps in a nested fashion).
+ *
+ * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level.
+ * We determine if mContained is contained within val.
+ */
+bool
+JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained)
+{
+ JsonbValue vval,
+ vcontained;
+ JsonbIteratorToken rval,
+ rcont;
+
+ /*
+ * Guard against stack overflow due to overly complex Jsonb.
+ *
+ * Functions called here independently take this precaution, but that
+ * might not be sufficient since this is also a recursive function.
+ */
+ check_stack_depth();
+
+ rval = JsonbIteratorNext(val, &vval, false);
+ rcont = JsonbIteratorNext(mContained, &vcontained, false);
+
+ if (rval != rcont)
+ {
+ /*
+ * The differing return values can immediately be taken as indicating
+ * two differing container types at this nesting level, which is
+ * sufficient reason to give up entirely (but it should be the case
+ * that they're both some container type).
+ */
+ Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY);
+ Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY);
+ return false;
+ }
+ else if (rcont == WJB_BEGIN_OBJECT)
+ {
+ Assert(vval.type == jbvObject);
+ Assert(vcontained.type == jbvObject);
+
+ /*
+ * If the lhs has fewer pairs than the rhs, it can't possibly contain
+ * the rhs. (This conclusion is safe only because we de-duplicate
+ * keys in all Jsonb objects; thus there can be no corresponding
+ * optimization in the array case.) The case probably won't arise
+ * often, but since it's such a cheap check we may as well make it.
+ */
+ if (vval.val.object.nPairs < vcontained.val.object.nPairs)
+ return false;
+
+ /* Work through rhs "is it contained within?" object */
+ for (;;)
+ {
+ JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */
+ JsonbValue lhsValBuf;
+
+ rcont = JsonbIteratorNext(mContained, &vcontained, false);
+
+ /*
+ * When we get through caller's rhs "is it contained within?"
+ * object without failing to find one of its values, it's
+ * contained.
+ */
+ if (rcont == WJB_END_OBJECT)
+ return true;
+
+ Assert(rcont == WJB_KEY);
+ Assert(vcontained.type == jbvString);
+
+ /* First, find value by key... */
+ lhsVal =
+ getKeyJsonValueFromContainer((*val)->container,
+ vcontained.val.string.val,
+ vcontained.val.string.len,
+ &lhsValBuf);
+ if (!lhsVal)
+ return false;
+
+ /*
+ * ...at this stage it is apparent that there is at least a key
+ * match for this rhs pair.
+ */
+ rcont = JsonbIteratorNext(mContained, &vcontained, true);
+
+ Assert(rcont == WJB_VALUE);
+
+ /*
+ * Compare rhs pair's value with lhs pair's value just found using
+ * key
+ */
+ if (lhsVal->type != vcontained.type)
+ {
+ return false;
+ }
+ else if (IsAJsonbScalar(lhsVal))
+ {
+ if (!equalsJsonbScalarValue(lhsVal, &vcontained))
+ return false;
+ }
+ else
+ {
+ /* Nested container value (object or array) */
+ JsonbIterator *nestval,
+ *nestContained;
+
+ Assert(lhsVal->type == jbvBinary);
+ Assert(vcontained.type == jbvBinary);
+
+ nestval = JsonbIteratorInit(lhsVal->val.binary.data);
+ nestContained = JsonbIteratorInit(vcontained.val.binary.data);
+
+ /*
+ * Match "value" side of rhs datum object's pair recursively.
+ * It's a nested structure.
+ *
+ * Note that nesting still has to "match up" at the right
+ * nesting sub-levels. However, there need only be zero or
+ * more matching pairs (or elements) at each nesting level
+ * (provided the *rhs* pairs/elements *all* match on each
+ * level), which enables searching nested structures for a
+ * single String or other primitive type sub-datum quite
+ * effectively (provided the user constructed the rhs nested
+ * structure such that we "know where to look").
+ *
+ * In other words, the mapping of container nodes in the rhs
+ * "vcontained" Jsonb to internal nodes on the lhs is
+ * injective, and parent-child edges on the rhs must be mapped
+ * to parent-child edges on the lhs to satisfy the condition
+ * of containment (plus of course the mapped nodes must be
+ * equal).
+ */
+ if (!JsonbDeepContains(&nestval, &nestContained))
+ return false;
+ }
+ }
+ }
+ else if (rcont == WJB_BEGIN_ARRAY)
+ {
+ JsonbValue *lhsConts = NULL;
+ uint32 nLhsElems = vval.val.array.nElems;
+
+ Assert(vval.type == jbvArray);
+ Assert(vcontained.type == jbvArray);
+
+ /*
+ * Handle distinction between "raw scalar" pseudo arrays, and real
+ * arrays.
+ *
+ * A raw scalar may contain another raw scalar, and an array may
+ * contain a raw scalar, but a raw scalar may not contain an array. We
+ * don't do something like this for the object case, since objects can
+ * only contain pairs, never raw scalars (a pair is represented by an
+ * rhs object argument with a single contained pair).
+ */
+ if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar)
+ return false;
+
+ /* Work through rhs "is it contained within?" array */
+ for (;;)
+ {
+ rcont = JsonbIteratorNext(mContained, &vcontained, true);
+
+ /*
+ * When we get through caller's rhs "is it contained within?"
+ * array without failing to find one of its values, it's
+ * contained.
+ */
+ if (rcont == WJB_END_ARRAY)
+ return true;
+
+ Assert(rcont == WJB_ELEM);
+
+ if (IsAJsonbScalar(&vcontained))
+ {
+ if (!findJsonbValueFromContainer((*val)->container,
+ JB_FARRAY,
+ &vcontained))
+ return false;
+ }
+ else
+ {
+ uint32 i;
+
+ /*
+ * If this is first container found in rhs array (at this
+ * depth), initialize temp lhs array of containers
+ */
+ if (lhsConts == NULL)
+ {
+ uint32 j = 0;
+
+ /* Make room for all possible values */
+ lhsConts = palloc(sizeof(JsonbValue) * nLhsElems);
+
+ for (i = 0; i < nLhsElems; i++)
+ {
+ /* Store all lhs elements in temp array */
+ rcont = JsonbIteratorNext(val, &vval, true);
+ Assert(rcont == WJB_ELEM);
+
+ if (vval.type == jbvBinary)
+ lhsConts[j++] = vval;
+ }
+
+ /* No container elements in temp array, so give up now */
+ if (j == 0)
+ return false;
+
+ /* We may have only partially filled array */
+ nLhsElems = j;
+ }
+
+ /* XXX: Nested array containment is O(N^2) */
+ for (i = 0; i < nLhsElems; i++)
+ {
+ /* Nested container value (object or array) */
+ JsonbIterator *nestval,
+ *nestContained;
+ bool contains;
+
+ nestval = JsonbIteratorInit(lhsConts[i].val.binary.data);
+ nestContained = JsonbIteratorInit(vcontained.val.binary.data);
+
+ contains = JsonbDeepContains(&nestval, &nestContained);
+
+ if (nestval)
+ pfree(nestval);
+ if (nestContained)
+ pfree(nestContained);
+ if (contains)
+ break;
+ }
+
+ /*
+ * Report rhs container value is not contained if couldn't
+ * match rhs container to *some* lhs cont
+ */
+ if (i == nLhsElems)
+ return false;
+ }
+ }
+ }
+ else
+ {
+ elog(ERROR, "invalid jsonb container type");
+ }
+
+ elog(ERROR, "unexpectedly fell off end of jsonb container");
+ return false;
+}
+
+/*
+ * Hash a JsonbValue scalar value, mixing the hash value into an existing
+ * hash provided by the caller.
+ *
+ * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY
+ * flags.
+ */
+void
+JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash)
+{
+ uint32 tmp;
+
+ /* Compute hash value for scalarVal */
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ tmp = 0x01;
+ break;
+ case jbvString:
+ tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val,
+ scalarVal->val.string.len));
+ break;
+ case jbvNumeric:
+ /* Must hash equal numerics to equal hash codes */
+ tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric,
+ NumericGetDatum(scalarVal->val.numeric)));
+ break;
+ case jbvBool:
+ tmp = scalarVal->val.boolean ? 0x02 : 0x04;
+
+ break;
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ tmp = 0; /* keep compiler quiet */
+ break;
+ }
+
+ /*
+ * Combine hash values of successive keys, values and elements by rotating
+ * the previous value left 1 bit, then XOR'ing in the new
+ * key/value/element's hash value.
+ */
+ *hash = pg_rotate_left32(*hash, 1);
+ *hash ^= tmp;
+}
+
+/*
+ * Hash a value to a 64-bit value, with a seed. Otherwise, similar to
+ * JsonbHashScalarValue.
+ */
+void
+JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash,
+ uint64 seed)
+{
+ uint64 tmp;
+
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ tmp = seed + 0x01;
+ break;
+ case jbvString:
+ tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val,
+ scalarVal->val.string.len,
+ seed));
+ break;
+ case jbvNumeric:
+ tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended,
+ NumericGetDatum(scalarVal->val.numeric),
+ UInt64GetDatum(seed)));
+ break;
+ case jbvBool:
+ if (seed)
+ tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended,
+ BoolGetDatum(scalarVal->val.boolean),
+ UInt64GetDatum(seed)));
+ else
+ tmp = scalarVal->val.boolean ? 0x02 : 0x04;
+
+ break;
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ break;
+ }
+
+ *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash);
+ *hash ^= tmp;
+}
+
+/*
+ * Are two scalar JsonbValues of the same type a and b equal?
+ */
+static bool
+equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
+{
+ if (aScalar->type == bScalar->type)
+ {
+ switch (aScalar->type)
+ {
+ case jbvNull:
+ return true;
+ case jbvString:
+ return lengthCompareJsonbStringValue(aScalar, bScalar) == 0;
+ case jbvNumeric:
+ return DatumGetBool(DirectFunctionCall2(numeric_eq,
+ PointerGetDatum(aScalar->val.numeric),
+ PointerGetDatum(bScalar->val.numeric)));
+ case jbvBool:
+ return aScalar->val.boolean == bScalar->val.boolean;
+
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ }
+ }
+ elog(ERROR, "jsonb scalar type mismatch");
+ return false;
+}
+
+/*
+ * Compare two scalar JsonbValues, returning -1, 0, or 1.
+ *
+ * Strings are compared using the default collation. Used by B-tree
+ * operators, where a lexical sort order is generally expected.
+ */
+static int
+compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar)
+{
+ if (aScalar->type == bScalar->type)
+ {
+ switch (aScalar->type)
+ {
+ case jbvNull:
+ return 0;
+ case jbvString:
+ return varstr_cmp(aScalar->val.string.val,
+ aScalar->val.string.len,
+ bScalar->val.string.val,
+ bScalar->val.string.len,
+ DEFAULT_COLLATION_OID);
+ case jbvNumeric:
+ return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
+ PointerGetDatum(aScalar->val.numeric),
+ PointerGetDatum(bScalar->val.numeric)));
+ case jbvBool:
+ if (aScalar->val.boolean == bScalar->val.boolean)
+ return 0;
+ else if (aScalar->val.boolean > bScalar->val.boolean)
+ return 1;
+ else
+ return -1;
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ }
+ }
+ elog(ERROR, "jsonb scalar type mismatch");
+ return -1;
+}
+
+
+/*
+ * Functions for manipulating the resizable buffer used by convertJsonb and
+ * its subroutines.
+ */
+
+/*
+ * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary.
+ * Returns the offset to the reserved area. The caller is expected to fill
+ * the reserved area later with copyToBuffer().
+ */
+static int
+reserveFromBuffer(StringInfo buffer, int len)
+{
+ int offset;
+
+ /* Make more room if needed */
+ enlargeStringInfo(buffer, len);
+
+ /* remember current offset */
+ offset = buffer->len;
+
+ /* reserve the space */
+ buffer->len += len;
+
+ /*
+ * Keep a trailing null in place, even though it's not useful for us; it
+ * seems best to preserve the invariants of StringInfos.
+ */
+ buffer->data[buffer->len] = '\0';
+
+ return offset;
+}
+
+/*
+ * Copy 'len' bytes to a previously reserved area in buffer.
+ */
+static void
+copyToBuffer(StringInfo buffer, int offset, const char *data, int len)
+{
+ memcpy(buffer->data + offset, data, len);
+}
+
+/*
+ * A shorthand for reserveFromBuffer + copyToBuffer.
+ */
+static void
+appendToBuffer(StringInfo buffer, const char *data, int len)
+{
+ int offset;
+
+ offset = reserveFromBuffer(buffer, len);
+ copyToBuffer(buffer, offset, data, len);
+}
+
+
+/*
+ * Append padding, so that the length of the StringInfo is int-aligned.
+ * Returns the number of padding bytes appended.
+ */
+static short
+padBufferToInt(StringInfo buffer)
+{
+ int padlen,
+ p,
+ offset;
+
+ padlen = INTALIGN(buffer->len) - buffer->len;
+
+ offset = reserveFromBuffer(buffer, padlen);
+
+ /* padlen must be small, so this is probably faster than a memset */
+ for (p = 0; p < padlen; p++)
+ buffer->data[offset + p] = '\0';
+
+ return padlen;
+}
+
+/*
+ * Given a JsonbValue, convert to Jsonb. The result is palloc'd.
+ */
+static Jsonb *
+convertToJsonb(JsonbValue *val)
+{
+ StringInfoData buffer;
+ JEntry jentry;
+ Jsonb *res;
+
+ /* Should not already have binary representation */
+ Assert(val->type != jbvBinary);
+
+ /* Allocate an output buffer. It will be enlarged as needed */
+ initStringInfo(&buffer);
+
+ /* Make room for the varlena header */
+ reserveFromBuffer(&buffer, VARHDRSZ);
+
+ convertJsonbValue(&buffer, &jentry, val, 0);
+
+ /*
+ * Note: the JEntry of the root is discarded. Therefore the root
+ * JsonbContainer struct must contain enough information to tell what kind
+ * of value it is.
+ */
+
+ res = (Jsonb *) buffer.data;
+
+ SET_VARSIZE(res, buffer.len);
+
+ return res;
+}
+
+/*
+ * Subroutine of convertJsonb: serialize a single JsonbValue into buffer.
+ *
+ * The JEntry header for this node is returned in *header. It is filled in
+ * with the length of this value and appropriate type bits. If we wish to
+ * store an end offset rather than a length, it is the caller's responsibility
+ * to adjust for that.
+ *
+ * If the value is an array or an object, this recurses. 'level' is only used
+ * for debugging purposes.
+ */
+static void
+convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level)
+{
+ check_stack_depth();
+
+ if (!val)
+ return;
+
+ /*
+ * A JsonbValue passed as val should never have a type of jbvBinary, and
+ * neither should any of its sub-components. Those values will be produced
+ * by convertJsonbArray and convertJsonbObject, the results of which will
+ * not be passed back to this function as an argument.
+ */
+
+ if (IsAJsonbScalar(val))
+ convertJsonbScalar(buffer, header, val);
+ else if (val->type == jbvArray)
+ convertJsonbArray(buffer, header, val, level);
+ else if (val->type == jbvObject)
+ convertJsonbObject(buffer, header, val, level);
+ else
+ elog(ERROR, "unknown type of jsonb container to convert");
+}
+
+static void
+convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
+{
+ int base_offset;
+ int jentry_offset;
+ int i;
+ int totallen;
+ uint32 header;
+ int nElems = val->val.array.nElems;
+
+ /* Remember where in the buffer this array starts. */
+ base_offset = buffer->len;
+
+ /* Align to 4-byte boundary (any padding counts as part of my data) */
+ padBufferToInt(buffer);
+
+ /*
+ * Construct the header Jentry and store it in the beginning of the
+ * variable-length payload.
+ */
+ header = nElems | JB_FARRAY;
+ if (val->val.array.rawScalar)
+ {
+ Assert(nElems == 1);
+ Assert(level == 0);
+ header |= JB_FSCALAR;
+ }
+
+ appendToBuffer(buffer, (char *) &header, sizeof(uint32));
+
+ /* Reserve space for the JEntries of the elements. */
+ jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems);
+
+ totallen = 0;
+ for (i = 0; i < nElems; i++)
+ {
+ JsonbValue *elem = &val->val.array.elems[i];
+ int len;
+ JEntry meta;
+
+ /*
+ * Convert element, producing a JEntry and appending its
+ * variable-length data to buffer
+ */
+ convertJsonbValue(buffer, &meta, elem, level + 1);
+
+ len = JBE_OFFLENFLD(meta);
+ totallen += len;
+
+ /*
+ * Bail out if total variable-length data exceeds what will fit in a
+ * JEntry length field. We check this in each iteration, not just
+ * once at the end, to forestall possible integer overflow.
+ */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /*
+ * Convert each JB_OFFSET_STRIDE'th length to an offset.
+ */
+ if ((i % JB_OFFSET_STRIDE) == 0)
+ meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
+
+ copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
+ jentry_offset += sizeof(JEntry);
+ }
+
+ /* Total data size is everything we've appended to buffer */
+ totallen = buffer->len - base_offset;
+
+ /* Check length again, since we didn't include the metadata above */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb array elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /* Initialize the header of this node in the container's JEntry array */
+ *pheader = JENTRY_ISCONTAINER | totallen;
+}
+
+static void
+convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level)
+{
+ int base_offset;
+ int jentry_offset;
+ int i;
+ int totallen;
+ uint32 header;
+ int nPairs = val->val.object.nPairs;
+
+ /* Remember where in the buffer this object starts. */
+ base_offset = buffer->len;
+
+ /* Align to 4-byte boundary (any padding counts as part of my data) */
+ padBufferToInt(buffer);
+
+ /*
+ * Construct the header Jentry and store it in the beginning of the
+ * variable-length payload.
+ */
+ header = nPairs | JB_FOBJECT;
+ appendToBuffer(buffer, (char *) &header, sizeof(uint32));
+
+ /* Reserve space for the JEntries of the keys and values. */
+ jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2);
+
+ /*
+ * Iterate over the keys, then over the values, since that is the ordering
+ * we want in the on-disk representation.
+ */
+ totallen = 0;
+ for (i = 0; i < nPairs; i++)
+ {
+ JsonbPair *pair = &val->val.object.pairs[i];
+ int len;
+ JEntry meta;
+
+ /*
+ * Convert key, producing a JEntry and appending its variable-length
+ * data to buffer
+ */
+ convertJsonbScalar(buffer, &meta, &pair->key);
+
+ len = JBE_OFFLENFLD(meta);
+ totallen += len;
+
+ /*
+ * Bail out if total variable-length data exceeds what will fit in a
+ * JEntry length field. We check this in each iteration, not just
+ * once at the end, to forestall possible integer overflow.
+ */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /*
+ * Convert each JB_OFFSET_STRIDE'th length to an offset.
+ */
+ if ((i % JB_OFFSET_STRIDE) == 0)
+ meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
+
+ copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
+ jentry_offset += sizeof(JEntry);
+ }
+ for (i = 0; i < nPairs; i++)
+ {
+ JsonbPair *pair = &val->val.object.pairs[i];
+ int len;
+ JEntry meta;
+
+ /*
+ * Convert value, producing a JEntry and appending its variable-length
+ * data to buffer
+ */
+ convertJsonbValue(buffer, &meta, &pair->value, level + 1);
+
+ len = JBE_OFFLENFLD(meta);
+ totallen += len;
+
+ /*
+ * Bail out if total variable-length data exceeds what will fit in a
+ * JEntry length field. We check this in each iteration, not just
+ * once at the end, to forestall possible integer overflow.
+ */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /*
+ * Convert each JB_OFFSET_STRIDE'th length to an offset.
+ */
+ if (((i + nPairs) % JB_OFFSET_STRIDE) == 0)
+ meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF;
+
+ copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry));
+ jentry_offset += sizeof(JEntry);
+ }
+
+ /* Total data size is everything we've appended to buffer */
+ totallen = buffer->len - base_offset;
+
+ /* Check length again, since we didn't include the metadata above */
+ if (totallen > JENTRY_OFFLENMASK)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("total size of jsonb object elements exceeds the maximum of %u bytes",
+ JENTRY_OFFLENMASK)));
+
+ /* Initialize the header of this node in the container's JEntry array */
+ *pheader = JENTRY_ISCONTAINER | totallen;
+}
+
+static void
+convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
+{
+ int numlen;
+ short padlen;
+
+ switch (scalarVal->type)
+ {
+ case jbvNull:
+ *jentry = JENTRY_ISNULL;
+ break;
+
+ case jbvString:
+ appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len);
+
+ *jentry = scalarVal->val.string.len;
+ break;
+
+ case jbvNumeric:
+ numlen = VARSIZE_ANY(scalarVal->val.numeric);
+ padlen = padBufferToInt(buffer);
+
+ appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen);
+
+ *jentry = JENTRY_ISNUMERIC | (padlen + numlen);
+ break;
+
+ case jbvBool:
+ *jentry = (scalarVal->val.boolean) ?
+ JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE;
+ break;
+
+ case jbvDatetime:
+ {
+ char buf[MAXDATELEN + 1];
+ size_t len;
+
+ JsonEncodeDateTime(buf,
+ scalarVal->val.datetime.value,
+ scalarVal->val.datetime.typid,
+ &scalarVal->val.datetime.tz);
+ len = strlen(buf);
+ appendToBuffer(buffer, buf, len);
+
+ *jentry = len;
+ }
+ break;
+
+ default:
+ elog(ERROR, "invalid jsonb scalar type");
+ }
+}
+
+/*
+ * Compare two jbvString JsonbValue values, a and b.
+ *
+ * This is a special qsort() comparator used to sort strings in certain
+ * internal contexts where it is sufficient to have a well-defined sort order.
+ * In particular, object pair keys are sorted according to this criteria to
+ * facilitate cheap binary searches where we don't care about lexical sort
+ * order.
+ *
+ * a and b are first sorted based on their length. If a tie-breaker is
+ * required, only then do we consider string binary equality.
+ */
+static int
+lengthCompareJsonbStringValue(const void *a, const void *b)
+{
+ const JsonbValue *va = (const JsonbValue *) a;
+ const JsonbValue *vb = (const JsonbValue *) b;
+
+ Assert(va->type == jbvString);
+ Assert(vb->type == jbvString);
+
+ return lengthCompareJsonbString(va->val.string.val, va->val.string.len,
+ vb->val.string.val, vb->val.string.len);
+}
+
+/*
+ * Subroutine for lengthCompareJsonbStringValue
+ *
+ * This is also useful separately to implement binary search on
+ * JsonbContainers.
+ */
+static int
+lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2)
+{
+ if (len1 == len2)
+ return memcmp(val1, val2, len1);
+ else
+ return len1 > len2 ? 1 : -1;
+}
+
+/*
+ * qsort_arg() comparator to compare JsonbPair values.
+ *
+ * Third argument 'binequal' may point to a bool. If it's set, *binequal is set
+ * to true iff a and b have full binary equality, since some callers have an
+ * interest in whether the two values are equal or merely equivalent.
+ *
+ * N.B: String comparisons here are "length-wise"
+ *
+ * Pairs with equals keys are ordered such that the order field is respected.
+ */
+static int
+lengthCompareJsonbPair(const void *a, const void *b, void *binequal)
+{
+ const JsonbPair *pa = (const JsonbPair *) a;
+ const JsonbPair *pb = (const JsonbPair *) b;
+ int res;
+
+ res = lengthCompareJsonbStringValue(&pa->key, &pb->key);
+ if (res == 0 && binequal)
+ *((bool *) binequal) = true;
+
+ /*
+ * Guarantee keeping order of equal pair. Unique algorithm will prefer
+ * first element as value.
+ */
+ if (res == 0)
+ res = (pa->order > pb->order) ? -1 : 1;
+
+ return res;
+}
+
+/*
+ * Sort and unique-ify pairs in JsonbValue object
+ */
+static void
+uniqueifyJsonbObject(JsonbValue *object)
+{
+ bool hasNonUniq = false;
+
+ Assert(object->type == jbvObject);
+
+ if (object->val.object.nPairs > 1)
+ qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair),
+ lengthCompareJsonbPair, &hasNonUniq);
+
+ if (hasNonUniq)
+ {
+ JsonbPair *ptr = object->val.object.pairs + 1,
+ *res = object->val.object.pairs;
+
+ while (ptr - object->val.object.pairs < object->val.object.nPairs)
+ {
+ /* Avoid copying over duplicate */
+ if (lengthCompareJsonbStringValue(ptr, res) != 0)
+ {
+ res++;
+ if (ptr != res)
+ memcpy(res, ptr, sizeof(JsonbPair));
+ }
+ ptr++;
+ }
+
+ object->val.object.nPairs = res + 1 - object->val.object.pairs;
+ }
+}
diff --git a/src/backend/utils/adt/jsonbsubs.c b/src/backend/utils/adt/jsonbsubs.c
new file mode 100644
index 0000000..16f1354
--- /dev/null
+++ b/src/backend/utils/adt/jsonbsubs.c
@@ -0,0 +1,416 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonbsubs.c
+ * Subscripting support functions for jsonb.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonbsubs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "executor/execExpr.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/subscripting.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_expr.h"
+#include "utils/jsonb.h"
+#include "utils/jsonfuncs.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+
+
+/* SubscriptingRefState.workspace for jsonb subscripting execution */
+typedef struct JsonbSubWorkspace
+{
+ bool expectArray; /* jsonb root is expected to be an array */
+ Oid *indexOid; /* OID of coerced subscript expression, could
+ * be only integer or text */
+ Datum *index; /* Subscript values in Datum format */
+} JsonbSubWorkspace;
+
+
+/*
+ * Finish parse analysis of a SubscriptingRef expression for a jsonb.
+ *
+ * Transform the subscript expressions, coerce them to text,
+ * and determine the result type of the SubscriptingRef node.
+ */
+static void
+jsonb_subscript_transform(SubscriptingRef *sbsref,
+ List *indirection,
+ ParseState *pstate,
+ bool isSlice,
+ bool isAssignment)
+{
+ List *upperIndexpr = NIL;
+ ListCell *idx;
+
+ /*
+ * Transform and convert the subscript expressions. Jsonb subscripting
+ * does not support slices, look only and the upper index.
+ */
+ foreach(idx, indirection)
+ {
+ A_Indices *ai = lfirst_node(A_Indices, idx);
+ Node *subExpr;
+
+ if (isSlice)
+ {
+ Node *expr = ai->uidx ? ai->uidx : ai->lidx;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("jsonb subscript does not support slices"),
+ parser_errposition(pstate, exprLocation(expr))));
+ }
+
+ if (ai->uidx)
+ {
+ Oid subExprType = InvalidOid,
+ targetType = UNKNOWNOID;
+
+ subExpr = transformExpr(pstate, ai->uidx, pstate->p_expr_kind);
+ subExprType = exprType(subExpr);
+
+ if (subExprType != UNKNOWNOID)
+ {
+ Oid targets[2] = {INT4OID, TEXTOID};
+
+ /*
+ * Jsonb can handle multiple subscript types, but cases when a
+ * subscript could be coerced to multiple target types must be
+ * avoided, similar to overloaded functions. It could be
+ * possibly extend with jsonpath in the future.
+ */
+ for (int i = 0; i < 2; i++)
+ {
+ if (can_coerce_type(1, &subExprType, &targets[i], COERCION_IMPLICIT))
+ {
+ /*
+ * One type has already succeeded, it means there are
+ * two coercion targets possible, failure.
+ */
+ if (targetType != UNKNOWNOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("subscript type %s is not supported", format_type_be(subExprType)),
+ errhint("jsonb subscript must be coercible to only one type, integer or text."),
+ parser_errposition(pstate, exprLocation(subExpr))));
+
+ targetType = targets[i];
+ }
+ }
+
+ /*
+ * No suitable types were found, failure.
+ */
+ if (targetType == UNKNOWNOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("subscript type %s is not supported", format_type_be(subExprType)),
+ errhint("jsonb subscript must be coercible to either integer or text."),
+ parser_errposition(pstate, exprLocation(subExpr))));
+ }
+ else
+ targetType = TEXTOID;
+
+ /*
+ * We known from can_coerce_type that coercion will succeed, so
+ * coerce_type could be used. Note the implicit coercion context,
+ * which is required to handle subscripts of different types,
+ * similar to overloaded functions.
+ */
+ subExpr = coerce_type(pstate,
+ subExpr, subExprType,
+ targetType, -1,
+ COERCION_IMPLICIT,
+ COERCE_IMPLICIT_CAST,
+ -1);
+ if (subExpr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("jsonb subscript must have text type"),
+ parser_errposition(pstate, exprLocation(subExpr))));
+ }
+ else
+ {
+ /*
+ * Slice with omitted upper bound. Should not happen as we already
+ * errored out on slice earlier, but handle this just in case.
+ */
+ Assert(isSlice && ai->is_slice);
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("jsonb subscript does not support slices"),
+ parser_errposition(pstate, exprLocation(ai->uidx))));
+ }
+
+ upperIndexpr = lappend(upperIndexpr, subExpr);
+ }
+
+ /* store the transformed lists into the SubscriptRef node */
+ sbsref->refupperindexpr = upperIndexpr;
+ sbsref->reflowerindexpr = NIL;
+
+ /* Determine the result type of the subscripting operation; always jsonb */
+ sbsref->refrestype = JSONBOID;
+ sbsref->reftypmod = -1;
+}
+
+/*
+ * During execution, process the subscripts in a SubscriptingRef expression.
+ *
+ * The subscript expressions are already evaluated in Datum form in the
+ * SubscriptingRefState's arrays. Check and convert them as necessary.
+ *
+ * If any subscript is NULL, we throw error in assignment cases, or in fetch
+ * cases set result to NULL and return false (instructing caller to skip the
+ * rest of the SubscriptingRef sequence).
+ */
+static bool
+jsonb_subscript_check_subscripts(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref_subscript.state;
+ JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace;
+
+ /*
+ * In case if the first subscript is an integer, the source jsonb is
+ * expected to be an array. This information is not used directly, all
+ * such cases are handled within corresponding jsonb assign functions. But
+ * if the source jsonb is NULL the expected type will be used to construct
+ * an empty source.
+ */
+ if (sbsrefstate->numupper > 0 && sbsrefstate->upperprovided[0] &&
+ !sbsrefstate->upperindexnull[0] && workspace->indexOid[0] == INT4OID)
+ workspace->expectArray = true;
+
+ /* Process upper subscripts */
+ for (int i = 0; i < sbsrefstate->numupper; i++)
+ {
+ if (sbsrefstate->upperprovided[i])
+ {
+ /* If any index expr yields NULL, result is NULL or error */
+ if (sbsrefstate->upperindexnull[i])
+ {
+ if (sbsrefstate->isassignment)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("jsonb subscript in assignment must not be null")));
+ *op->resnull = true;
+ return false;
+ }
+
+ /*
+ * For jsonb fetch and assign functions we need to provide path in
+ * text format. Convert if it's not already text.
+ */
+ if (workspace->indexOid[i] == INT4OID)
+ {
+ Datum datum = sbsrefstate->upperindex[i];
+ char *cs = DatumGetCString(DirectFunctionCall1(int4out, datum));
+
+ workspace->index[i] = CStringGetTextDatum(cs);
+ }
+ else
+ workspace->index[i] = sbsrefstate->upperindex[i];
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Evaluate SubscriptingRef fetch for a jsonb element.
+ *
+ * Source container is in step's result variable (it's known not NULL, since
+ * we set fetch_strict to true).
+ */
+static void
+jsonb_subscript_fetch(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace;
+ Jsonb *jsonbSource;
+
+ /* Should not get here if source jsonb (or any subscript) is null */
+ Assert(!(*op->resnull));
+
+ jsonbSource = DatumGetJsonbP(*op->resvalue);
+ *op->resvalue = jsonb_get_element(jsonbSource,
+ workspace->index,
+ sbsrefstate->numupper,
+ op->resnull,
+ false);
+}
+
+/*
+ * Evaluate SubscriptingRef assignment for a jsonb element assignment.
+ *
+ * Input container (possibly null) is in result area, replacement value is in
+ * SubscriptingRefState's replacevalue/replacenull.
+ */
+static void
+jsonb_subscript_assign(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+ JsonbSubWorkspace *workspace = (JsonbSubWorkspace *) sbsrefstate->workspace;
+ Jsonb *jsonbSource;
+ JsonbValue replacevalue;
+
+ if (sbsrefstate->replacenull)
+ replacevalue.type = jbvNull;
+ else
+ JsonbToJsonbValue(DatumGetJsonbP(sbsrefstate->replacevalue),
+ &replacevalue);
+
+ /*
+ * In case if the input container is null, set up an empty jsonb and
+ * proceed with the assignment.
+ */
+ if (*op->resnull)
+ {
+ JsonbValue newSource;
+
+ /*
+ * To avoid any surprising results, set up an empty jsonb array in
+ * case of an array is expected (i.e. the first subscript is integer),
+ * otherwise jsonb object.
+ */
+ if (workspace->expectArray)
+ {
+ newSource.type = jbvArray;
+ newSource.val.array.nElems = 0;
+ newSource.val.array.rawScalar = false;
+ }
+ else
+ {
+ newSource.type = jbvObject;
+ newSource.val.object.nPairs = 0;
+ }
+
+ jsonbSource = JsonbValueToJsonb(&newSource);
+ *op->resnull = false;
+ }
+ else
+ jsonbSource = DatumGetJsonbP(*op->resvalue);
+
+ *op->resvalue = jsonb_set_element(jsonbSource,
+ workspace->index,
+ sbsrefstate->numupper,
+ &replacevalue);
+ /* The result is never NULL, so no need to change *op->resnull */
+}
+
+/*
+ * Compute old jsonb element value for a SubscriptingRef assignment
+ * expression. Will only be called if the new-value subexpression
+ * contains SubscriptingRef or FieldStore. This is the same as the
+ * regular fetch case, except that we have to handle a null jsonb,
+ * and the value should be stored into the SubscriptingRefState's
+ * prevvalue/prevnull fields.
+ */
+static void
+jsonb_subscript_fetch_old(ExprState *state,
+ ExprEvalStep *op,
+ ExprContext *econtext)
+{
+ SubscriptingRefState *sbsrefstate = op->d.sbsref.state;
+
+ if (*op->resnull)
+ {
+ /* whole jsonb is null, so any element is too */
+ sbsrefstate->prevvalue = (Datum) 0;
+ sbsrefstate->prevnull = true;
+ }
+ else
+ {
+ Jsonb *jsonbSource = DatumGetJsonbP(*op->resvalue);
+
+ sbsrefstate->prevvalue = jsonb_get_element(jsonbSource,
+ sbsrefstate->upperindex,
+ sbsrefstate->numupper,
+ &sbsrefstate->prevnull,
+ false);
+ }
+}
+
+/*
+ * Set up execution state for a jsonb subscript operation. Opposite to the
+ * arrays subscription, there is no limit for number of subscripts as jsonb
+ * type itself doesn't have nesting limits.
+ */
+static void
+jsonb_exec_setup(const SubscriptingRef *sbsref,
+ SubscriptingRefState *sbsrefstate,
+ SubscriptExecSteps *methods)
+{
+ JsonbSubWorkspace *workspace;
+ ListCell *lc;
+ int nupper = sbsref->refupperindexpr->length;
+ char *ptr;
+
+ /* Allocate type-specific workspace with space for per-subscript data */
+ workspace = palloc0(MAXALIGN(sizeof(JsonbSubWorkspace)) +
+ nupper * (sizeof(Datum) + sizeof(Oid)));
+ workspace->expectArray = false;
+ ptr = ((char *) workspace) + MAXALIGN(sizeof(JsonbSubWorkspace));
+
+ /*
+ * This coding assumes sizeof(Datum) >= sizeof(Oid), else we might
+ * misalign the indexOid pointer
+ */
+ workspace->index = (Datum *) ptr;
+ ptr += nupper * sizeof(Datum);
+ workspace->indexOid = (Oid *) ptr;
+
+ sbsrefstate->workspace = workspace;
+
+ /* Collect subscript data types necessary at execution time */
+ foreach(lc, sbsref->refupperindexpr)
+ {
+ Node *expr = lfirst(lc);
+ int i = foreach_current_index(lc);
+
+ workspace->indexOid[i] = exprType(expr);
+ }
+
+ /*
+ * Pass back pointers to appropriate step execution functions.
+ */
+ methods->sbs_check_subscripts = jsonb_subscript_check_subscripts;
+ methods->sbs_fetch = jsonb_subscript_fetch;
+ methods->sbs_assign = jsonb_subscript_assign;
+ methods->sbs_fetch_old = jsonb_subscript_fetch_old;
+}
+
+/*
+ * jsonb_subscript_handler
+ * Subscripting handler for jsonb.
+ *
+ */
+Datum
+jsonb_subscript_handler(PG_FUNCTION_ARGS)
+{
+ static const SubscriptRoutines sbsroutines = {
+ .transform = jsonb_subscript_transform,
+ .exec_setup = jsonb_exec_setup,
+ .fetch_strict = true, /* fetch returns NULL for NULL inputs */
+ .fetch_leakproof = true, /* fetch returns NULL for bad subscript */
+ .store_leakproof = false /* ... but assignment throws error */
+ };
+
+ PG_RETURN_POINTER(&sbsroutines);
+}
diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c
new file mode 100644
index 0000000..d473625
--- /dev/null
+++ b/src/backend/utils/adt/jsonfuncs.c
@@ -0,0 +1,5546 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonfuncs.c
+ * Functions to process JSON data types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "common/jsonapi.h"
+#include "common/string.h"
+#include "fmgr.h"
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/hsearch.h"
+#include "utils/json.h"
+#include "utils/jsonb.h"
+#include "utils/jsonfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+
+/* Operations available for setPath */
+#define JB_PATH_CREATE 0x0001
+#define JB_PATH_DELETE 0x0002
+#define JB_PATH_REPLACE 0x0004
+#define JB_PATH_INSERT_BEFORE 0x0008
+#define JB_PATH_INSERT_AFTER 0x0010
+#define JB_PATH_CREATE_OR_INSERT \
+ (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER | JB_PATH_CREATE)
+#define JB_PATH_FILL_GAPS 0x0020
+#define JB_PATH_CONSISTENT_POSITION 0x0040
+
+/* state for json_object_keys */
+typedef struct OkeysState
+{
+ JsonLexContext *lex;
+ char **result;
+ int result_size;
+ int result_count;
+ int sent_count;
+} OkeysState;
+
+/* state for iterate_json_values function */
+typedef struct IterateJsonStringValuesState
+{
+ JsonLexContext *lex;
+ JsonIterateStringValuesAction action; /* an action that will be applied
+ * to each json value */
+ void *action_state; /* any necessary context for iteration */
+ uint32 flags; /* what kind of elements from a json we want
+ * to iterate */
+} IterateJsonStringValuesState;
+
+/* state for transform_json_string_values function */
+typedef struct TransformJsonStringValuesState
+{
+ JsonLexContext *lex;
+ StringInfo strval; /* resulting json */
+ JsonTransformStringValuesAction action; /* an action that will be applied
+ * to each json value */
+ void *action_state; /* any necessary context for transformation */
+} TransformJsonStringValuesState;
+
+/* state for json_get* functions */
+typedef struct GetState
+{
+ JsonLexContext *lex;
+ text *tresult;
+ char *result_start;
+ bool normalize_results;
+ bool next_scalar;
+ int npath; /* length of each path-related array */
+ char **path_names; /* field name(s) being sought */
+ int *path_indexes; /* array index(es) being sought */
+ bool *pathok; /* is path matched to current depth? */
+ int *array_cur_index; /* current element index at each path
+ * level */
+} GetState;
+
+/* state for json_array_length */
+typedef struct AlenState
+{
+ JsonLexContext *lex;
+ int count;
+} AlenState;
+
+/* state for json_each */
+typedef struct EachState
+{
+ JsonLexContext *lex;
+ Tuplestorestate *tuple_store;
+ TupleDesc ret_tdesc;
+ MemoryContext tmp_cxt;
+ char *result_start;
+ bool normalize_results;
+ bool next_scalar;
+ char *normalized_scalar;
+} EachState;
+
+/* state for json_array_elements */
+typedef struct ElementsState
+{
+ JsonLexContext *lex;
+ const char *function_name;
+ Tuplestorestate *tuple_store;
+ TupleDesc ret_tdesc;
+ MemoryContext tmp_cxt;
+ char *result_start;
+ bool normalize_results;
+ bool next_scalar;
+ char *normalized_scalar;
+} ElementsState;
+
+/* state for get_json_object_as_hash */
+typedef struct JHashState
+{
+ JsonLexContext *lex;
+ const char *function_name;
+ HTAB *hash;
+ char *saved_scalar;
+ char *save_json_start;
+ JsonTokenType saved_token_type;
+} JHashState;
+
+/* hashtable element */
+typedef struct JsonHashEntry
+{
+ char fname[NAMEDATALEN]; /* hash key (MUST BE FIRST) */
+ char *val;
+ JsonTokenType type;
+} JsonHashEntry;
+
+/* structure to cache type I/O metadata needed for populate_scalar() */
+typedef struct ScalarIOData
+{
+ Oid typioparam;
+ FmgrInfo typiofunc;
+} ScalarIOData;
+
+/* these two structures are used recursively */
+typedef struct ColumnIOData ColumnIOData;
+typedef struct RecordIOData RecordIOData;
+
+/* structure to cache metadata needed for populate_array() */
+typedef struct ArrayIOData
+{
+ ColumnIOData *element_info; /* metadata cache */
+ Oid element_type; /* array element type id */
+ int32 element_typmod; /* array element type modifier */
+} ArrayIOData;
+
+/* structure to cache metadata needed for populate_composite() */
+typedef struct CompositeIOData
+{
+ /*
+ * We use pointer to a RecordIOData here because variable-length struct
+ * RecordIOData can't be used directly in ColumnIOData.io union
+ */
+ RecordIOData *record_io; /* metadata cache for populate_record() */
+ TupleDesc tupdesc; /* cached tuple descriptor */
+ /* these fields differ from target type only if domain over composite: */
+ Oid base_typid; /* base type id */
+ int32 base_typmod; /* base type modifier */
+ /* this field is used only if target type is domain over composite: */
+ void *domain_info; /* opaque cache for domain checks */
+} CompositeIOData;
+
+/* structure to cache metadata needed for populate_domain() */
+typedef struct DomainIOData
+{
+ ColumnIOData *base_io; /* metadata cache */
+ Oid base_typid; /* base type id */
+ int32 base_typmod; /* base type modifier */
+ void *domain_info; /* opaque cache for domain checks */
+} DomainIOData;
+
+/* enumeration type categories */
+typedef enum TypeCat
+{
+ TYPECAT_SCALAR = 's',
+ TYPECAT_ARRAY = 'a',
+ TYPECAT_COMPOSITE = 'c',
+ TYPECAT_COMPOSITE_DOMAIN = 'C',
+ TYPECAT_DOMAIN = 'd'
+} TypeCat;
+
+/* these two are stolen from hstore / record_out, used in populate_record* */
+
+/* structure to cache record metadata needed for populate_record_field() */
+struct ColumnIOData
+{
+ Oid typid; /* column type id */
+ int32 typmod; /* column type modifier */
+ TypeCat typcat; /* column type category */
+ ScalarIOData scalar_io; /* metadata cache for direct conversion
+ * through input function */
+ union
+ {
+ ArrayIOData array;
+ CompositeIOData composite;
+ DomainIOData domain;
+ } io; /* metadata cache for various column type
+ * categories */
+};
+
+/* structure to cache record metadata needed for populate_record() */
+struct RecordIOData
+{
+ Oid record_type;
+ int32 record_typmod;
+ int ncolumns;
+ ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER];
+};
+
+/* per-query cache for populate_record_worker and populate_recordset_worker */
+typedef struct PopulateRecordCache
+{
+ Oid argtype; /* declared type of the record argument */
+ ColumnIOData c; /* metadata cache for populate_composite() */
+ MemoryContext fn_mcxt; /* where this is stored */
+} PopulateRecordCache;
+
+/* per-call state for populate_recordset */
+typedef struct PopulateRecordsetState
+{
+ JsonLexContext *lex;
+ const char *function_name;
+ HTAB *json_hash;
+ char *saved_scalar;
+ char *save_json_start;
+ JsonTokenType saved_token_type;
+ Tuplestorestate *tuple_store;
+ HeapTupleHeader rec;
+ PopulateRecordCache *cache;
+} PopulateRecordsetState;
+
+/* common data for populate_array_json() and populate_array_dim_jsonb() */
+typedef struct PopulateArrayContext
+{
+ ArrayBuildState *astate; /* array build state */
+ ArrayIOData *aio; /* metadata cache */
+ MemoryContext acxt; /* array build memory context */
+ MemoryContext mcxt; /* cache memory context */
+ const char *colname; /* for diagnostics only */
+ int *dims; /* dimensions */
+ int *sizes; /* current dimension counters */
+ int ndims; /* number of dimensions */
+} PopulateArrayContext;
+
+/* state for populate_array_json() */
+typedef struct PopulateArrayState
+{
+ JsonLexContext *lex; /* json lexer */
+ PopulateArrayContext *ctx; /* context */
+ char *element_start; /* start of the current array element */
+ char *element_scalar; /* current array element token if it is a
+ * scalar */
+ JsonTokenType element_type; /* current array element type */
+} PopulateArrayState;
+
+/* state for json_strip_nulls */
+typedef struct StripnullState
+{
+ JsonLexContext *lex;
+ StringInfo strval;
+ bool skip_next_null;
+} StripnullState;
+
+/* structure for generalized json/jsonb value passing */
+typedef struct JsValue
+{
+ bool is_json; /* json/jsonb */
+ union
+ {
+ struct
+ {
+ char *str; /* json string */
+ int len; /* json string length or -1 if null-terminated */
+ JsonTokenType type; /* json type */
+ } json; /* json value */
+
+ JsonbValue *jsonb; /* jsonb value */
+ } val;
+} JsValue;
+
+typedef struct JsObject
+{
+ bool is_json; /* json/jsonb */
+ union
+ {
+ HTAB *json_hash;
+ JsonbContainer *jsonb_cont;
+ } val;
+} JsObject;
+
+/* useful macros for testing JsValue properties */
+#define JsValueIsNull(jsv) \
+ ((jsv)->is_json ? \
+ (!(jsv)->val.json.str || (jsv)->val.json.type == JSON_TOKEN_NULL) : \
+ (!(jsv)->val.jsonb || (jsv)->val.jsonb->type == jbvNull))
+
+#define JsValueIsString(jsv) \
+ ((jsv)->is_json ? (jsv)->val.json.type == JSON_TOKEN_STRING \
+ : ((jsv)->val.jsonb && (jsv)->val.jsonb->type == jbvString))
+
+#define JsObjectIsEmpty(jso) \
+ ((jso)->is_json \
+ ? hash_get_num_entries((jso)->val.json_hash) == 0 \
+ : ((jso)->val.jsonb_cont == NULL || \
+ JsonContainerSize((jso)->val.jsonb_cont) == 0))
+
+#define JsObjectFree(jso) \
+ do { \
+ if ((jso)->is_json) \
+ hash_destroy((jso)->val.json_hash); \
+ } while (0)
+
+static int report_json_context(JsonLexContext *lex);
+
+/* semantic action functions for json_object_keys */
+static void okeys_object_field_start(void *state, char *fname, bool isnull);
+static void okeys_array_start(void *state);
+static void okeys_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* semantic action functions for json_get* functions */
+static void get_object_start(void *state);
+static void get_object_end(void *state);
+static void get_object_field_start(void *state, char *fname, bool isnull);
+static void get_object_field_end(void *state, char *fname, bool isnull);
+static void get_array_start(void *state);
+static void get_array_end(void *state);
+static void get_array_element_start(void *state, bool isnull);
+static void get_array_element_end(void *state, bool isnull);
+static void get_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* common worker function for json getter functions */
+static Datum get_path_all(FunctionCallInfo fcinfo, bool as_text);
+static text *get_worker(text *json, char **tpath, int *ipath, int npath,
+ bool normalize_results);
+static Datum get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text);
+static text *JsonbValueAsText(JsonbValue *v);
+
+/* semantic action functions for json_array_length */
+static void alen_object_start(void *state);
+static void alen_scalar(void *state, char *token, JsonTokenType tokentype);
+static void alen_array_element_start(void *state, bool isnull);
+
+/* common workers for json{b}_each* functions */
+static Datum each_worker(FunctionCallInfo fcinfo, bool as_text);
+static Datum each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
+ bool as_text);
+
+/* semantic action functions for json_each */
+static void each_object_field_start(void *state, char *fname, bool isnull);
+static void each_object_field_end(void *state, char *fname, bool isnull);
+static void each_array_start(void *state);
+static void each_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* common workers for json{b}_array_elements_* functions */
+static Datum elements_worker(FunctionCallInfo fcinfo, const char *funcname,
+ bool as_text);
+static Datum elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
+ bool as_text);
+
+/* semantic action functions for json_array_elements */
+static void elements_object_start(void *state);
+static void elements_array_element_start(void *state, bool isnull);
+static void elements_array_element_end(void *state, bool isnull);
+static void elements_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* turn a json object into a hash table */
+static HTAB *get_json_object_as_hash(char *json, int len, const char *funcname);
+
+/* semantic actions for populate_array_json */
+static void populate_array_object_start(void *_state);
+static void populate_array_array_end(void *_state);
+static void populate_array_element_start(void *_state, bool isnull);
+static void populate_array_element_end(void *_state, bool isnull);
+static void populate_array_scalar(void *_state, char *token, JsonTokenType tokentype);
+
+/* semantic action functions for get_json_object_as_hash */
+static void hash_object_field_start(void *state, char *fname, bool isnull);
+static void hash_object_field_end(void *state, char *fname, bool isnull);
+static void hash_array_start(void *state);
+static void hash_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* semantic action functions for populate_recordset */
+static void populate_recordset_object_field_start(void *state, char *fname, bool isnull);
+static void populate_recordset_object_field_end(void *state, char *fname, bool isnull);
+static void populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype);
+static void populate_recordset_object_start(void *state);
+static void populate_recordset_object_end(void *state);
+static void populate_recordset_array_start(void *state);
+static void populate_recordset_array_element_start(void *state, bool isnull);
+
+/* semantic action functions for json_strip_nulls */
+static void sn_object_start(void *state);
+static void sn_object_end(void *state);
+static void sn_array_start(void *state);
+static void sn_array_end(void *state);
+static void sn_object_field_start(void *state, char *fname, bool isnull);
+static void sn_array_element_start(void *state, bool isnull);
+static void sn_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/* worker functions for populate_record, to_record, populate_recordset and to_recordset */
+static Datum populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
+ bool is_json, bool have_record_arg);
+static Datum populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
+ bool is_json, bool have_record_arg);
+
+/* helper functions for populate_record[set] */
+static HeapTupleHeader populate_record(TupleDesc tupdesc, RecordIOData **record_p,
+ HeapTupleHeader defaultval, MemoryContext mcxt,
+ JsObject *obj);
+static void get_record_type_from_argument(FunctionCallInfo fcinfo,
+ const char *funcname,
+ PopulateRecordCache *cache);
+static void get_record_type_from_query(FunctionCallInfo fcinfo,
+ const char *funcname,
+ PopulateRecordCache *cache);
+static void JsValueToJsObject(JsValue *jsv, JsObject *jso);
+static Datum populate_composite(CompositeIOData *io, Oid typid,
+ const char *colname, MemoryContext mcxt,
+ HeapTupleHeader defaultval, JsValue *jsv, bool isnull);
+static Datum populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv);
+static void prepare_column_cache(ColumnIOData *column, Oid typid, int32 typmod,
+ MemoryContext mcxt, bool need_scalar);
+static Datum populate_record_field(ColumnIOData *col, Oid typid, int32 typmod,
+ const char *colname, MemoryContext mcxt, Datum defaultval,
+ JsValue *jsv, bool *isnull);
+static RecordIOData *allocate_record_info(MemoryContext mcxt, int ncolumns);
+static bool JsObjectGetField(JsObject *obj, char *field, JsValue *jsv);
+static void populate_recordset_record(PopulateRecordsetState *state, JsObject *obj);
+static void populate_array_json(PopulateArrayContext *ctx, char *json, int len);
+static void populate_array_dim_jsonb(PopulateArrayContext *ctx, JsonbValue *jbv,
+ int ndim);
+static void populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim);
+static void populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims);
+static void populate_array_check_dimension(PopulateArrayContext *ctx, int ndim);
+static void populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv);
+static Datum populate_array(ArrayIOData *aio, const char *colname,
+ MemoryContext mcxt, JsValue *jsv);
+static Datum populate_domain(DomainIOData *io, Oid typid, const char *colname,
+ MemoryContext mcxt, JsValue *jsv, bool isnull);
+
+/* functions supporting jsonb_delete, jsonb_set and jsonb_concat */
+static JsonbValue *IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
+ JsonbParseState **state);
+static JsonbValue *setPath(JsonbIterator **it, Datum *path_elems,
+ bool *path_nulls, int path_len,
+ JsonbParseState **st, int level, JsonbValue *newval,
+ int op_type);
+static void setPathObject(JsonbIterator **it, Datum *path_elems,
+ bool *path_nulls, int path_len, JsonbParseState **st,
+ int level,
+ JsonbValue *newval, uint32 npairs, int op_type);
+static void setPathArray(JsonbIterator **it, Datum *path_elems,
+ bool *path_nulls, int path_len, JsonbParseState **st,
+ int level,
+ JsonbValue *newval, uint32 nelems, int op_type);
+
+/* function supporting iterate_json_values */
+static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
+static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
+
+/* functions supporting transform_json_string_values */
+static void transform_string_values_object_start(void *state);
+static void transform_string_values_object_end(void *state);
+static void transform_string_values_array_start(void *state);
+static void transform_string_values_array_end(void *state);
+static void transform_string_values_object_field_start(void *state, char *fname, bool isnull);
+static void transform_string_values_array_element_start(void *state, bool isnull);
+static void transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
+
+/*
+ * pg_parse_json_or_ereport
+ *
+ * This function is like pg_parse_json, except that it does not return a
+ * JsonParseErrorType. Instead, in case of any failure, this function will
+ * ereport(ERROR).
+ */
+void
+pg_parse_json_or_ereport(JsonLexContext *lex, JsonSemAction *sem)
+{
+ JsonParseErrorType result;
+
+ result = pg_parse_json(lex, sem);
+ if (result != JSON_SUCCESS)
+ json_ereport_error(result, lex);
+}
+
+/*
+ * makeJsonLexContext
+ *
+ * This is like makeJsonLexContextCstringLen, but it accepts a text value
+ * directly.
+ */
+JsonLexContext *
+makeJsonLexContext(text *json, bool need_escapes)
+{
+ /*
+ * Most callers pass a detoasted datum, but it's not clear that they all
+ * do. pg_detoast_datum_packed() is cheap insurance.
+ */
+ json = pg_detoast_datum_packed(json);
+
+ return makeJsonLexContextCstringLen(VARDATA_ANY(json),
+ VARSIZE_ANY_EXHDR(json),
+ GetDatabaseEncoding(),
+ need_escapes);
+}
+
+/*
+ * SQL function json_object_keys
+ *
+ * Returns the set of keys for the object argument.
+ *
+ * This SRF operates in value-per-call mode. It processes the
+ * object during the first call, and the keys are simply stashed
+ * in an array, whose size is expanded as necessary. This is probably
+ * safe enough for a list of keys of a single object, since they are
+ * limited in size to NAMEDATALEN and the number of keys is unlikely to
+ * be so huge that it has major memory implications.
+ */
+Datum
+jsonb_object_keys(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ OkeysState *state;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ bool skipNested = false;
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+
+ if (JB_ROOT_IS_SCALAR(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a scalar",
+ "jsonb_object_keys")));
+ else if (JB_ROOT_IS_ARRAY(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on an array",
+ "jsonb_object_keys")));
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ state = palloc(sizeof(OkeysState));
+
+ state->result_size = JB_ROOT_COUNT(jb);
+ state->result_count = 0;
+ state->sent_count = 0;
+ state->result = palloc(state->result_size * sizeof(char *));
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if (r == WJB_KEY)
+ {
+ char *cstr;
+
+ cstr = palloc(v.val.string.len + 1 * sizeof(char));
+ memcpy(cstr, v.val.string.val, v.val.string.len);
+ cstr[v.val.string.len] = '\0';
+ state->result[state->result_count++] = cstr;
+ }
+ }
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) state;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ state = (OkeysState *) funcctx->user_fctx;
+
+ if (state->sent_count < state->result_count)
+ {
+ char *nxt = state->result[state->sent_count++];
+
+ SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Report a JSON error.
+ */
+void
+json_ereport_error(JsonParseErrorType error, JsonLexContext *lex)
+{
+ if (error == JSON_UNICODE_HIGH_ESCAPE ||
+ error == JSON_UNICODE_CODE_POINT_ZERO)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+ errmsg("unsupported Unicode escape sequence"),
+ errdetail_internal("%s", json_errdetail(error, lex)),
+ report_json_context(lex)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "json"),
+ errdetail_internal("%s", json_errdetail(error, lex)),
+ report_json_context(lex)));
+}
+
+/*
+ * Report a CONTEXT line for bogus JSON input.
+ *
+ * lex->token_terminator must be set to identify the spot where we detected
+ * the error. Note that lex->token_start might be NULL, in case we recognized
+ * error at EOF.
+ *
+ * The return value isn't meaningful, but we make it non-void so that this
+ * can be invoked inside ereport().
+ */
+static int
+report_json_context(JsonLexContext *lex)
+{
+ const char *context_start;
+ const char *context_end;
+ const char *line_start;
+ char *ctxt;
+ int ctxtlen;
+ const char *prefix;
+ const char *suffix;
+
+ /* Choose boundaries for the part of the input we will display */
+ line_start = lex->line_start;
+ context_start = line_start;
+ context_end = lex->token_terminator;
+ Assert(context_end >= context_start);
+
+ /* Advance until we are close enough to context_end */
+ while (context_end - context_start >= 50)
+ {
+ /* Advance to next multibyte character */
+ if (IS_HIGHBIT_SET(*context_start))
+ context_start += pg_mblen(context_start);
+ else
+ context_start++;
+ }
+
+ /*
+ * We add "..." to indicate that the excerpt doesn't start at the
+ * beginning of the line ... but if we're within 3 characters of the
+ * beginning of the line, we might as well just show the whole line.
+ */
+ if (context_start - line_start <= 3)
+ context_start = line_start;
+
+ /* Get a null-terminated copy of the data to present */
+ ctxtlen = context_end - context_start;
+ ctxt = palloc(ctxtlen + 1);
+ memcpy(ctxt, context_start, ctxtlen);
+ ctxt[ctxtlen] = '\0';
+
+ /*
+ * Show the context, prefixing "..." if not starting at start of line, and
+ * suffixing "..." if not ending at end of line.
+ */
+ prefix = (context_start > line_start) ? "..." : "";
+ suffix = (lex->token_type != JSON_TOKEN_END &&
+ context_end - lex->input < lex->input_length &&
+ *context_end != '\n' && *context_end != '\r') ? "..." : "";
+
+ return errcontext("JSON data, line %d: %s%s%s",
+ lex->line_number, prefix, ctxt, suffix);
+}
+
+
+Datum
+json_object_keys(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ OkeysState *state;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *json = PG_GETARG_TEXT_PP(0);
+ JsonLexContext *lex = makeJsonLexContext(json, true);
+ JsonSemAction *sem;
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ state = palloc(sizeof(OkeysState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ state->lex = lex;
+ state->result_size = 256;
+ state->result_count = 0;
+ state->sent_count = 0;
+ state->result = palloc(256 * sizeof(char *));
+
+ sem->semstate = (void *) state;
+ sem->array_start = okeys_array_start;
+ sem->scalar = okeys_scalar;
+ sem->object_field_start = okeys_object_field_start;
+ /* remainder are all NULL, courtesy of palloc0 above */
+
+ pg_parse_json_or_ereport(lex, sem);
+ /* keys are now in state->result */
+
+ pfree(lex->strval->data);
+ pfree(lex->strval);
+ pfree(lex);
+ pfree(sem);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) state;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ state = (OkeysState *) funcctx->user_fctx;
+
+ if (state->sent_count < state->result_count)
+ {
+ char *nxt = state->result[state->sent_count++];
+
+ SRF_RETURN_NEXT(funcctx, CStringGetTextDatum(nxt));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+static void
+okeys_object_field_start(void *state, char *fname, bool isnull)
+{
+ OkeysState *_state = (OkeysState *) state;
+
+ /* only collecting keys for the top level object */
+ if (_state->lex->lex_level != 1)
+ return;
+
+ /* enlarge result array if necessary */
+ if (_state->result_count >= _state->result_size)
+ {
+ _state->result_size *= 2;
+ _state->result = (char **)
+ repalloc(_state->result, sizeof(char *) * _state->result_size);
+ }
+
+ /* save a copy of the field name */
+ _state->result[_state->result_count++] = pstrdup(fname);
+}
+
+static void
+okeys_array_start(void *state)
+{
+ OkeysState *_state = (OkeysState *) state;
+
+ /* top level must be a json object */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on an array",
+ "json_object_keys")));
+}
+
+static void
+okeys_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ OkeysState *_state = (OkeysState *) state;
+
+ /* top level must be a json object */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a scalar",
+ "json_object_keys")));
+}
+
+/*
+ * json and jsonb getter functions
+ * these implement the -> ->> #> and #>> operators
+ * and the json{b?}_extract_path*(json, text, ...) functions
+ */
+
+
+Datum
+json_object_field(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ text *fname = PG_GETARG_TEXT_PP(1);
+ char *fnamestr = text_to_cstring(fname);
+ text *result;
+
+ result = get_worker(json, &fnamestr, NULL, 1, false);
+
+ if (result != NULL)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_object_field(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ text *key = PG_GETARG_TEXT_PP(1);
+ JsonbValue *v;
+ JsonbValue vbuf;
+
+ if (!JB_ROOT_IS_OBJECT(jb))
+ PG_RETURN_NULL();
+
+ v = getKeyJsonValueFromContainer(&jb->root,
+ VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ &vbuf);
+
+ if (v != NULL)
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(v));
+
+ PG_RETURN_NULL();
+}
+
+Datum
+json_object_field_text(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ text *fname = PG_GETARG_TEXT_PP(1);
+ char *fnamestr = text_to_cstring(fname);
+ text *result;
+
+ result = get_worker(json, &fnamestr, NULL, 1, true);
+
+ if (result != NULL)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_object_field_text(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ text *key = PG_GETARG_TEXT_PP(1);
+ JsonbValue *v;
+ JsonbValue vbuf;
+
+ if (!JB_ROOT_IS_OBJECT(jb))
+ PG_RETURN_NULL();
+
+ v = getKeyJsonValueFromContainer(&jb->root,
+ VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key),
+ &vbuf);
+
+ if (v != NULL && v->type != jbvNull)
+ PG_RETURN_TEXT_P(JsonbValueAsText(v));
+
+ PG_RETURN_NULL();
+}
+
+Datum
+json_array_element(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ int element = PG_GETARG_INT32(1);
+ text *result;
+
+ result = get_worker(json, NULL, &element, 1, false);
+
+ if (result != NULL)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_array_element(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ int element = PG_GETARG_INT32(1);
+ JsonbValue *v;
+
+ if (!JB_ROOT_IS_ARRAY(jb))
+ PG_RETURN_NULL();
+
+ /* Handle negative subscript */
+ if (element < 0)
+ {
+ uint32 nelements = JB_ROOT_COUNT(jb);
+
+ if (-element > nelements)
+ PG_RETURN_NULL();
+ else
+ element += nelements;
+ }
+
+ v = getIthJsonbValueFromContainer(&jb->root, element);
+ if (v != NULL)
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(v));
+
+ PG_RETURN_NULL();
+}
+
+Datum
+json_array_element_text(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ int element = PG_GETARG_INT32(1);
+ text *result;
+
+ result = get_worker(json, NULL, &element, 1, true);
+
+ if (result != NULL)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_array_element_text(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ int element = PG_GETARG_INT32(1);
+ JsonbValue *v;
+
+ if (!JB_ROOT_IS_ARRAY(jb))
+ PG_RETURN_NULL();
+
+ /* Handle negative subscript */
+ if (element < 0)
+ {
+ uint32 nelements = JB_ROOT_COUNT(jb);
+
+ if (-element > nelements)
+ PG_RETURN_NULL();
+ else
+ element += nelements;
+ }
+
+ v = getIthJsonbValueFromContainer(&jb->root, element);
+
+ if (v != NULL && v->type != jbvNull)
+ PG_RETURN_TEXT_P(JsonbValueAsText(v));
+
+ PG_RETURN_NULL();
+}
+
+Datum
+json_extract_path(PG_FUNCTION_ARGS)
+{
+ return get_path_all(fcinfo, false);
+}
+
+Datum
+json_extract_path_text(PG_FUNCTION_ARGS)
+{
+ return get_path_all(fcinfo, true);
+}
+
+/*
+ * common routine for extract_path functions
+ */
+static Datum
+get_path_all(FunctionCallInfo fcinfo, bool as_text)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
+ text *result;
+ Datum *pathtext;
+ bool *pathnulls;
+ int npath;
+ char **tpath;
+ int *ipath;
+ int i;
+
+ /*
+ * If the array contains any null elements, return NULL, on the grounds
+ * that you'd have gotten NULL if any RHS value were NULL in a nested
+ * series of applications of the -> operator. (Note: because we also
+ * return NULL for error cases such as no-such-field, this is true
+ * regardless of the contents of the rest of the array.)
+ */
+ if (array_contains_nulls(path))
+ PG_RETURN_NULL();
+
+ deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT,
+ &pathtext, &pathnulls, &npath);
+
+ tpath = palloc(npath * sizeof(char *));
+ ipath = palloc(npath * sizeof(int));
+
+ for (i = 0; i < npath; i++)
+ {
+ Assert(!pathnulls[i]);
+ tpath[i] = TextDatumGetCString(pathtext[i]);
+
+ /*
+ * we have no idea at this stage what structure the document is so
+ * just convert anything in the path that we can to an integer and set
+ * all the other integers to INT_MIN which will never match.
+ */
+ if (*tpath[i] != '\0')
+ {
+ int ind;
+ char *endptr;
+
+ errno = 0;
+ ind = strtoint(tpath[i], &endptr, 10);
+ if (endptr == tpath[i] || *endptr != '\0' || errno != 0)
+ ipath[i] = INT_MIN;
+ else
+ ipath[i] = ind;
+ }
+ else
+ ipath[i] = INT_MIN;
+ }
+
+ result = get_worker(json, tpath, ipath, npath, as_text);
+
+ if (result != NULL)
+ PG_RETURN_TEXT_P(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * get_worker
+ *
+ * common worker for all the json getter functions
+ *
+ * json: JSON object (in text form)
+ * tpath[]: field name(s) to extract
+ * ipath[]: array index(es) (zero-based) to extract, accepts negatives
+ * npath: length of tpath[] and/or ipath[]
+ * normalize_results: true to de-escape string and null scalars
+ *
+ * tpath can be NULL, or any one tpath[] entry can be NULL, if an object
+ * field is not to be matched at that nesting level. Similarly, ipath can
+ * be NULL, or any one ipath[] entry can be INT_MIN if an array element is
+ * not to be matched at that nesting level (a json datum should never be
+ * large enough to have -INT_MIN elements due to MaxAllocSize restriction).
+ */
+static text *
+get_worker(text *json,
+ char **tpath,
+ int *ipath,
+ int npath,
+ bool normalize_results)
+{
+ JsonLexContext *lex = makeJsonLexContext(json, true);
+ JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+ GetState *state = palloc0(sizeof(GetState));
+
+ Assert(npath >= 0);
+
+ state->lex = lex;
+ /* is it "_as_text" variant? */
+ state->normalize_results = normalize_results;
+ state->npath = npath;
+ state->path_names = tpath;
+ state->path_indexes = ipath;
+ state->pathok = palloc0(sizeof(bool) * npath);
+ state->array_cur_index = palloc(sizeof(int) * npath);
+
+ if (npath > 0)
+ state->pathok[0] = true;
+
+ sem->semstate = (void *) state;
+
+ /*
+ * Not all variants need all the semantic routines. Only set the ones that
+ * are actually needed for maximum efficiency.
+ */
+ sem->scalar = get_scalar;
+ if (npath == 0)
+ {
+ sem->object_start = get_object_start;
+ sem->object_end = get_object_end;
+ sem->array_start = get_array_start;
+ sem->array_end = get_array_end;
+ }
+ if (tpath != NULL)
+ {
+ sem->object_field_start = get_object_field_start;
+ sem->object_field_end = get_object_field_end;
+ }
+ if (ipath != NULL)
+ {
+ sem->array_start = get_array_start;
+ sem->array_element_start = get_array_element_start;
+ sem->array_element_end = get_array_element_end;
+ }
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ return state->tresult;
+}
+
+static void
+get_object_start(void *state)
+{
+ GetState *_state = (GetState *) state;
+ int lex_level = _state->lex->lex_level;
+
+ if (lex_level == 0 && _state->npath == 0)
+ {
+ /*
+ * Special case: we should match the entire object. We only need this
+ * at outermost level because at nested levels the match will have
+ * been started by the outer field or array element callback.
+ */
+ _state->result_start = _state->lex->token_start;
+ }
+}
+
+static void
+get_object_end(void *state)
+{
+ GetState *_state = (GetState *) state;
+ int lex_level = _state->lex->lex_level;
+
+ if (lex_level == 0 && _state->npath == 0)
+ {
+ /* Special case: return the entire object */
+ char *start = _state->result_start;
+ int len = _state->lex->prev_token_terminator - start;
+
+ _state->tresult = cstring_to_text_with_len(start, len);
+ }
+}
+
+static void
+get_object_field_start(void *state, char *fname, bool isnull)
+{
+ GetState *_state = (GetState *) state;
+ bool get_next = false;
+ int lex_level = _state->lex->lex_level;
+
+ if (lex_level <= _state->npath &&
+ _state->pathok[lex_level - 1] &&
+ _state->path_names != NULL &&
+ _state->path_names[lex_level - 1] != NULL &&
+ strcmp(fname, _state->path_names[lex_level - 1]) == 0)
+ {
+ if (lex_level < _state->npath)
+ {
+ /* if not at end of path just mark path ok */
+ _state->pathok[lex_level] = true;
+ }
+ else
+ {
+ /* end of path, so we want this value */
+ get_next = true;
+ }
+ }
+
+ if (get_next)
+ {
+ /* this object overrides any previous matching object */
+ _state->tresult = NULL;
+ _state->result_start = NULL;
+
+ if (_state->normalize_results &&
+ _state->lex->token_type == JSON_TOKEN_STRING)
+ {
+ /* for as_text variants, tell get_scalar to set it for us */
+ _state->next_scalar = true;
+ }
+ else
+ {
+ /* for non-as_text variants, just note the json starting point */
+ _state->result_start = _state->lex->token_start;
+ }
+ }
+}
+
+static void
+get_object_field_end(void *state, char *fname, bool isnull)
+{
+ GetState *_state = (GetState *) state;
+ bool get_last = false;
+ int lex_level = _state->lex->lex_level;
+
+ /* same tests as in get_object_field_start */
+ if (lex_level <= _state->npath &&
+ _state->pathok[lex_level - 1] &&
+ _state->path_names != NULL &&
+ _state->path_names[lex_level - 1] != NULL &&
+ strcmp(fname, _state->path_names[lex_level - 1]) == 0)
+ {
+ if (lex_level < _state->npath)
+ {
+ /* done with this field so reset pathok */
+ _state->pathok[lex_level] = false;
+ }
+ else
+ {
+ /* end of path, so we want this value */
+ get_last = true;
+ }
+ }
+
+ /* for as_text scalar case, our work is already done */
+ if (get_last && _state->result_start != NULL)
+ {
+ /*
+ * make a text object from the string from the previously noted json
+ * start up to the end of the previous token (the lexer is by now
+ * ahead of us on whatever came after what we're interested in).
+ */
+ if (isnull && _state->normalize_results)
+ _state->tresult = (text *) NULL;
+ else
+ {
+ char *start = _state->result_start;
+ int len = _state->lex->prev_token_terminator - start;
+
+ _state->tresult = cstring_to_text_with_len(start, len);
+ }
+
+ /* this should be unnecessary but let's do it for cleanliness: */
+ _state->result_start = NULL;
+ }
+}
+
+static void
+get_array_start(void *state)
+{
+ GetState *_state = (GetState *) state;
+ int lex_level = _state->lex->lex_level;
+
+ if (lex_level < _state->npath)
+ {
+ /* Initialize counting of elements in this array */
+ _state->array_cur_index[lex_level] = -1;
+
+ /* INT_MIN value is reserved to represent invalid subscript */
+ if (_state->path_indexes[lex_level] < 0 &&
+ _state->path_indexes[lex_level] != INT_MIN)
+ {
+ /* Negative subscript -- convert to positive-wise subscript */
+ JsonParseErrorType error;
+ int nelements;
+
+ error = json_count_array_elements(_state->lex, &nelements);
+ if (error != JSON_SUCCESS)
+ json_ereport_error(error, _state->lex);
+
+ if (-_state->path_indexes[lex_level] <= nelements)
+ _state->path_indexes[lex_level] += nelements;
+ }
+ }
+ else if (lex_level == 0 && _state->npath == 0)
+ {
+ /*
+ * Special case: we should match the entire array. We only need this
+ * at the outermost level because at nested levels the match will have
+ * been started by the outer field or array element callback.
+ */
+ _state->result_start = _state->lex->token_start;
+ }
+}
+
+static void
+get_array_end(void *state)
+{
+ GetState *_state = (GetState *) state;
+ int lex_level = _state->lex->lex_level;
+
+ if (lex_level == 0 && _state->npath == 0)
+ {
+ /* Special case: return the entire array */
+ char *start = _state->result_start;
+ int len = _state->lex->prev_token_terminator - start;
+
+ _state->tresult = cstring_to_text_with_len(start, len);
+ }
+}
+
+static void
+get_array_element_start(void *state, bool isnull)
+{
+ GetState *_state = (GetState *) state;
+ bool get_next = false;
+ int lex_level = _state->lex->lex_level;
+
+ /* Update array element counter */
+ if (lex_level <= _state->npath)
+ _state->array_cur_index[lex_level - 1]++;
+
+ if (lex_level <= _state->npath &&
+ _state->pathok[lex_level - 1] &&
+ _state->path_indexes != NULL &&
+ _state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
+ {
+ if (lex_level < _state->npath)
+ {
+ /* if not at end of path just mark path ok */
+ _state->pathok[lex_level] = true;
+ }
+ else
+ {
+ /* end of path, so we want this value */
+ get_next = true;
+ }
+ }
+
+ /* same logic as for objects */
+ if (get_next)
+ {
+ _state->tresult = NULL;
+ _state->result_start = NULL;
+
+ if (_state->normalize_results &&
+ _state->lex->token_type == JSON_TOKEN_STRING)
+ {
+ _state->next_scalar = true;
+ }
+ else
+ {
+ _state->result_start = _state->lex->token_start;
+ }
+ }
+}
+
+static void
+get_array_element_end(void *state, bool isnull)
+{
+ GetState *_state = (GetState *) state;
+ bool get_last = false;
+ int lex_level = _state->lex->lex_level;
+
+ /* same tests as in get_array_element_start */
+ if (lex_level <= _state->npath &&
+ _state->pathok[lex_level - 1] &&
+ _state->path_indexes != NULL &&
+ _state->array_cur_index[lex_level - 1] == _state->path_indexes[lex_level - 1])
+ {
+ if (lex_level < _state->npath)
+ {
+ /* done with this element so reset pathok */
+ _state->pathok[lex_level] = false;
+ }
+ else
+ {
+ /* end of path, so we want this value */
+ get_last = true;
+ }
+ }
+
+ /* same logic as for objects */
+ if (get_last && _state->result_start != NULL)
+ {
+ if (isnull && _state->normalize_results)
+ _state->tresult = (text *) NULL;
+ else
+ {
+ char *start = _state->result_start;
+ int len = _state->lex->prev_token_terminator - start;
+
+ _state->tresult = cstring_to_text_with_len(start, len);
+ }
+
+ _state->result_start = NULL;
+ }
+}
+
+static void
+get_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ GetState *_state = (GetState *) state;
+ int lex_level = _state->lex->lex_level;
+
+ /* Check for whole-object match */
+ if (lex_level == 0 && _state->npath == 0)
+ {
+ if (_state->normalize_results && tokentype == JSON_TOKEN_STRING)
+ {
+ /* we want the de-escaped string */
+ _state->next_scalar = true;
+ }
+ else if (_state->normalize_results && tokentype == JSON_TOKEN_NULL)
+ {
+ _state->tresult = (text *) NULL;
+ }
+ else
+ {
+ /*
+ * This is a bit hokey: we will suppress whitespace after the
+ * scalar token, but not whitespace before it. Probably not worth
+ * doing our own space-skipping to avoid that.
+ */
+ char *start = _state->lex->input;
+ int len = _state->lex->prev_token_terminator - start;
+
+ _state->tresult = cstring_to_text_with_len(start, len);
+ }
+ }
+
+ if (_state->next_scalar)
+ {
+ /* a de-escaped text value is wanted, so supply it */
+ _state->tresult = cstring_to_text(token);
+ /* make sure the next call to get_scalar doesn't overwrite it */
+ _state->next_scalar = false;
+ }
+}
+
+Datum
+jsonb_extract_path(PG_FUNCTION_ARGS)
+{
+ return get_jsonb_path_all(fcinfo, false);
+}
+
+Datum
+jsonb_extract_path_text(PG_FUNCTION_ARGS)
+{
+ return get_jsonb_path_all(fcinfo, true);
+}
+
+static Datum
+get_jsonb_path_all(FunctionCallInfo fcinfo, bool as_text)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
+ Datum *pathtext;
+ bool *pathnulls;
+ bool isnull;
+ int npath;
+ Datum res;
+
+ /*
+ * If the array contains any null elements, return NULL, on the grounds
+ * that you'd have gotten NULL if any RHS value were NULL in a nested
+ * series of applications of the -> operator. (Note: because we also
+ * return NULL for error cases such as no-such-field, this is true
+ * regardless of the contents of the rest of the array.)
+ */
+ if (array_contains_nulls(path))
+ PG_RETURN_NULL();
+
+ deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT,
+ &pathtext, &pathnulls, &npath);
+
+ res = jsonb_get_element(jb, pathtext, npath, &isnull, as_text);
+
+ if (isnull)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_DATUM(res);
+}
+
+Datum
+jsonb_get_element(Jsonb *jb, Datum *path, int npath, bool *isnull, bool as_text)
+{
+ JsonbContainer *container = &jb->root;
+ JsonbValue *jbvp = NULL;
+ int i;
+ bool have_object = false,
+ have_array = false;
+
+ *isnull = false;
+
+ /* Identify whether we have object, array, or scalar at top-level */
+ if (JB_ROOT_IS_OBJECT(jb))
+ have_object = true;
+ else if (JB_ROOT_IS_ARRAY(jb) && !JB_ROOT_IS_SCALAR(jb))
+ have_array = true;
+ else
+ {
+ Assert(JB_ROOT_IS_ARRAY(jb) && JB_ROOT_IS_SCALAR(jb));
+ /* Extract the scalar value, if it is what we'll return */
+ if (npath <= 0)
+ jbvp = getIthJsonbValueFromContainer(container, 0);
+ }
+
+ /*
+ * If the array is empty, return the entire LHS object, on the grounds
+ * that we should do zero field or element extractions. For the
+ * non-scalar case we can just hand back the object without much work. For
+ * the scalar case, fall through and deal with the value below the loop.
+ * (This inconsistency arises because there's no easy way to generate a
+ * JsonbValue directly for root-level containers.)
+ */
+ if (npath <= 0 && jbvp == NULL)
+ {
+ if (as_text)
+ {
+ return PointerGetDatum(cstring_to_text(JsonbToCString(NULL,
+ container,
+ VARSIZE(jb))));
+ }
+ else
+ {
+ /* not text mode - just hand back the jsonb */
+ PG_RETURN_JSONB_P(jb);
+ }
+ }
+
+ for (i = 0; i < npath; i++)
+ {
+ if (have_object)
+ {
+ text *subscr = DatumGetTextPP(path[i]);
+
+ jbvp = getKeyJsonValueFromContainer(container,
+ VARDATA_ANY(subscr),
+ VARSIZE_ANY_EXHDR(subscr),
+ NULL);
+ }
+ else if (have_array)
+ {
+ int lindex;
+ uint32 index;
+ char *indextext = TextDatumGetCString(path[i]);
+ char *endptr;
+
+ errno = 0;
+ lindex = strtoint(indextext, &endptr, 10);
+ if (endptr == indextext || *endptr != '\0' || errno != 0)
+ {
+ *isnull = true;
+ return PointerGetDatum(NULL);
+ }
+
+ if (lindex >= 0)
+ {
+ index = (uint32) lindex;
+ }
+ else
+ {
+ /* Handle negative subscript */
+ uint32 nelements;
+
+ /* Container must be array, but make sure */
+ if (!JsonContainerIsArray(container))
+ elog(ERROR, "not a jsonb array");
+
+ nelements = JsonContainerSize(container);
+
+ if (lindex == INT_MIN || -lindex > nelements)
+ {
+ *isnull = true;
+ return PointerGetDatum(NULL);
+ }
+ else
+ index = nelements + lindex;
+ }
+
+ jbvp = getIthJsonbValueFromContainer(container, index);
+ }
+ else
+ {
+ /* scalar, extraction yields a null */
+ *isnull = true;
+ return PointerGetDatum(NULL);
+ }
+
+ if (jbvp == NULL)
+ {
+ *isnull = true;
+ return PointerGetDatum(NULL);
+ }
+ else if (i == npath - 1)
+ break;
+
+ if (jbvp->type == jbvBinary)
+ {
+ container = jbvp->val.binary.data;
+ have_object = JsonContainerIsObject(container);
+ have_array = JsonContainerIsArray(container);
+ Assert(!JsonContainerIsScalar(container));
+ }
+ else
+ {
+ Assert(IsAJsonbScalar(jbvp));
+ have_object = false;
+ have_array = false;
+ }
+ }
+
+ if (as_text)
+ {
+ if (jbvp->type == jbvNull)
+ {
+ *isnull = true;
+ return PointerGetDatum(NULL);
+ }
+
+ return PointerGetDatum(JsonbValueAsText(jbvp));
+ }
+ else
+ {
+ Jsonb *res = JsonbValueToJsonb(jbvp);
+
+ /* not text mode - just hand back the jsonb */
+ PG_RETURN_JSONB_P(res);
+ }
+}
+
+Datum
+jsonb_set_element(Jsonb *jb, Datum *path, int path_len,
+ JsonbValue *newval)
+{
+ JsonbValue *res;
+ JsonbParseState *state = NULL;
+ JsonbIterator *it;
+ bool *path_nulls = palloc0(path_len * sizeof(bool));
+
+ if (newval->type == jbvArray && newval->val.array.rawScalar)
+ *newval = newval->val.array.elems[0];
+
+ it = JsonbIteratorInit(&jb->root);
+
+ res = setPath(&it, path, path_nulls, path_len, &state, 0, newval,
+ JB_PATH_CREATE | JB_PATH_FILL_GAPS |
+ JB_PATH_CONSISTENT_POSITION);
+
+ pfree(path_nulls);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+static void
+push_null_elements(JsonbParseState **ps, int num)
+{
+ JsonbValue null;
+
+ null.type = jbvNull;
+
+ while (num-- > 0)
+ pushJsonbValue(ps, WJB_ELEM, &null);
+}
+
+/*
+ * Prepare a new structure containing nested empty objects and arrays
+ * corresponding to the specified path, and assign a new value at the end of
+ * this path. E.g. the path [a][0][b] with the new value 1 will produce the
+ * structure {a: [{b: 1}]}.
+ *
+ * Caller is responsible to make sure such path does not exist yet.
+ */
+static void
+push_path(JsonbParseState **st, int level, Datum *path_elems,
+ bool *path_nulls, int path_len, JsonbValue *newval)
+{
+ /*
+ * tpath contains expected type of an empty jsonb created at each level
+ * higher or equal than the current one, either jbvObject or jbvArray.
+ * Since it contains only information about path slice from level to the
+ * end, the access index must be normalized by level.
+ */
+ enum jbvType *tpath = palloc0((path_len - level) * sizeof(enum jbvType));
+ JsonbValue newkey;
+
+ /*
+ * Create first part of the chain with beginning tokens. For the current
+ * level WJB_BEGIN_OBJECT/WJB_BEGIN_ARRAY was already created, so start
+ * with the next one.
+ */
+ for (int i = level + 1; i < path_len; i++)
+ {
+ char *c,
+ *badp;
+ int lindex;
+
+ if (path_nulls[i])
+ break;
+
+ /*
+ * Try to convert to an integer to find out the expected type, object
+ * or array.
+ */
+ c = TextDatumGetCString(path_elems[i]);
+ errno = 0;
+ lindex = strtoint(c, &badp, 10);
+ if (badp == c || *badp != '\0' || errno != 0)
+ {
+ /* text, an object is expected */
+ newkey.type = jbvString;
+ newkey.val.string.val = c;
+ newkey.val.string.len = strlen(c);
+
+ (void) pushJsonbValue(st, WJB_BEGIN_OBJECT, NULL);
+ (void) pushJsonbValue(st, WJB_KEY, &newkey);
+
+ tpath[i - level] = jbvObject;
+ }
+ else
+ {
+ /* integer, an array is expected */
+ (void) pushJsonbValue(st, WJB_BEGIN_ARRAY, NULL);
+
+ push_null_elements(st, lindex);
+
+ tpath[i - level] = jbvArray;
+ }
+ }
+
+ /* Insert an actual value for either an object or array */
+ if (tpath[(path_len - level) - 1] == jbvArray)
+ {
+ (void) pushJsonbValue(st, WJB_ELEM, newval);
+ }
+ else
+ (void) pushJsonbValue(st, WJB_VALUE, newval);
+
+ /*
+ * Close everything up to the last but one level. The last one will be
+ * closed outside of this function.
+ */
+ for (int i = path_len - 1; i > level; i--)
+ {
+ if (path_nulls[i])
+ break;
+
+ if (tpath[i - level] == jbvObject)
+ (void) pushJsonbValue(st, WJB_END_OBJECT, NULL);
+ else
+ (void) pushJsonbValue(st, WJB_END_ARRAY, NULL);
+ }
+}
+
+/*
+ * Return the text representation of the given JsonbValue.
+ */
+static text *
+JsonbValueAsText(JsonbValue *v)
+{
+ switch (v->type)
+ {
+ case jbvNull:
+ return NULL;
+
+ case jbvBool:
+ return v->val.boolean ?
+ cstring_to_text_with_len("true", 4) :
+ cstring_to_text_with_len("false", 5);
+
+ case jbvString:
+ return cstring_to_text_with_len(v->val.string.val,
+ v->val.string.len);
+
+ case jbvNumeric:
+ {
+ Datum cstr;
+
+ cstr = DirectFunctionCall1(numeric_out,
+ PointerGetDatum(v->val.numeric));
+
+ return cstring_to_text(DatumGetCString(cstr));
+ }
+
+ case jbvBinary:
+ {
+ StringInfoData jtext;
+
+ initStringInfo(&jtext);
+ (void) JsonbToCString(&jtext, v->val.binary.data,
+ v->val.binary.len);
+
+ return cstring_to_text_with_len(jtext.data, jtext.len);
+ }
+
+ default:
+ elog(ERROR, "unrecognized jsonb type: %d", (int) v->type);
+ return NULL;
+ }
+}
+
+/*
+ * SQL function json_array_length(json) -> int
+ */
+Datum
+json_array_length(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ AlenState *state;
+ JsonLexContext *lex;
+ JsonSemAction *sem;
+
+ lex = makeJsonLexContext(json, false);
+ state = palloc0(sizeof(AlenState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ /* palloc0 does this for us */
+#if 0
+ state->count = 0;
+#endif
+ state->lex = lex;
+
+ sem->semstate = (void *) state;
+ sem->object_start = alen_object_start;
+ sem->scalar = alen_scalar;
+ sem->array_element_start = alen_array_element_start;
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ PG_RETURN_INT32(state->count);
+}
+
+Datum
+jsonb_array_length(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+
+ if (JB_ROOT_IS_SCALAR(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot get array length of a scalar")));
+ else if (!JB_ROOT_IS_ARRAY(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot get array length of a non-array")));
+
+ PG_RETURN_INT32(JB_ROOT_COUNT(jb));
+}
+
+/*
+ * These next two checks ensure that the json is an array (since it can't be
+ * a scalar or an object).
+ */
+
+static void
+alen_object_start(void *state)
+{
+ AlenState *_state = (AlenState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot get array length of a non-array")));
+}
+
+static void
+alen_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ AlenState *_state = (AlenState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot get array length of a scalar")));
+}
+
+static void
+alen_array_element_start(void *state, bool isnull)
+{
+ AlenState *_state = (AlenState *) state;
+
+ /* just count up all the level 1 elements */
+ if (_state->lex->lex_level == 1)
+ _state->count++;
+}
+
+/*
+ * SQL function json_each and json_each_text
+ *
+ * decompose a json object into key value pairs.
+ *
+ * Unlike json_object_keys() these SRFs operate in materialize mode,
+ * stashing results into a Tuplestore object as they go.
+ * The construction of tuples is done using a temporary memory context
+ * that is cleared out after each tuple is built.
+ */
+Datum
+json_each(PG_FUNCTION_ARGS)
+{
+ return each_worker(fcinfo, false);
+}
+
+Datum
+jsonb_each(PG_FUNCTION_ARGS)
+{
+ return each_worker_jsonb(fcinfo, "jsonb_each", false);
+}
+
+Datum
+json_each_text(PG_FUNCTION_ARGS)
+{
+ return each_worker(fcinfo, true);
+}
+
+Datum
+jsonb_each_text(PG_FUNCTION_ARGS)
+{
+ return each_worker_jsonb(fcinfo, "jsonb_each_text", true);
+}
+
+static Datum
+each_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ ReturnSetInfo *rsi;
+ MemoryContext old_cxt,
+ tmp_cxt;
+ bool skipNested = false;
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+
+ if (!JB_ROOT_IS_OBJECT(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a non-object",
+ funcname)));
+
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+ InitMaterializedSRF(fcinfo, MAT_SRF_BLESS);
+
+ tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+ "jsonb_each temporary cxt",
+ ALLOCSET_DEFAULT_SIZES);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if (r == WJB_KEY)
+ {
+ text *key;
+ Datum values[2];
+ bool nulls[2] = {false, false};
+
+ /* Use the tmp context so we can clean up after each tuple is done */
+ old_cxt = MemoryContextSwitchTo(tmp_cxt);
+
+ key = cstring_to_text_with_len(v.val.string.val, v.val.string.len);
+
+ /*
+ * The next thing the iterator fetches should be the value, no
+ * matter what shape it is.
+ */
+ r = JsonbIteratorNext(&it, &v, skipNested);
+ Assert(r != WJB_DONE);
+
+ values[0] = PointerGetDatum(key);
+
+ if (as_text)
+ {
+ if (v.type == jbvNull)
+ {
+ /* a json null is an sql null in text mode */
+ nulls[1] = true;
+ values[1] = (Datum) NULL;
+ }
+ else
+ values[1] = PointerGetDatum(JsonbValueAsText(&v));
+ }
+ else
+ {
+ /* Not in text mode, just return the Jsonb */
+ Jsonb *val = JsonbValueToJsonb(&v);
+
+ values[1] = PointerGetDatum(val);
+ }
+
+ tuplestore_putvalues(rsi->setResult, rsi->setDesc, values, nulls);
+
+ /* clean up and switch back */
+ MemoryContextSwitchTo(old_cxt);
+ MemoryContextReset(tmp_cxt);
+ }
+ }
+
+ MemoryContextDelete(tmp_cxt);
+
+ PG_RETURN_NULL();
+}
+
+
+static Datum
+each_worker(FunctionCallInfo fcinfo, bool as_text)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ JsonLexContext *lex;
+ JsonSemAction *sem;
+ ReturnSetInfo *rsi;
+ EachState *state;
+
+ lex = makeJsonLexContext(json, true);
+ state = palloc0(sizeof(EachState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_BLESS);
+ state->tuple_store = rsi->setResult;
+ state->ret_tdesc = rsi->setDesc;
+
+ sem->semstate = (void *) state;
+ sem->array_start = each_array_start;
+ sem->scalar = each_scalar;
+ sem->object_field_start = each_object_field_start;
+ sem->object_field_end = each_object_field_end;
+
+ state->normalize_results = as_text;
+ state->next_scalar = false;
+ state->lex = lex;
+ state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+ "json_each temporary cxt",
+ ALLOCSET_DEFAULT_SIZES);
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ MemoryContextDelete(state->tmp_cxt);
+
+ PG_RETURN_NULL();
+}
+
+
+static void
+each_object_field_start(void *state, char *fname, bool isnull)
+{
+ EachState *_state = (EachState *) state;
+
+ /* save a pointer to where the value starts */
+ if (_state->lex->lex_level == 1)
+ {
+ /*
+ * next_scalar will be reset in the object_field_end handler, and
+ * since we know the value is a scalar there is no danger of it being
+ * on while recursing down the tree.
+ */
+ if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
+ _state->next_scalar = true;
+ else
+ _state->result_start = _state->lex->token_start;
+ }
+}
+
+static void
+each_object_field_end(void *state, char *fname, bool isnull)
+{
+ EachState *_state = (EachState *) state;
+ MemoryContext old_cxt;
+ int len;
+ text *val;
+ HeapTuple tuple;
+ Datum values[2];
+ bool nulls[2] = {false, false};
+
+ /* skip over nested objects */
+ if (_state->lex->lex_level != 1)
+ return;
+
+ /* use the tmp context so we can clean up after each tuple is done */
+ old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
+
+ values[0] = CStringGetTextDatum(fname);
+
+ if (isnull && _state->normalize_results)
+ {
+ nulls[1] = true;
+ values[1] = (Datum) 0;
+ }
+ else if (_state->next_scalar)
+ {
+ values[1] = CStringGetTextDatum(_state->normalized_scalar);
+ _state->next_scalar = false;
+ }
+ else
+ {
+ len = _state->lex->prev_token_terminator - _state->result_start;
+ val = cstring_to_text_with_len(_state->result_start, len);
+ values[1] = PointerGetDatum(val);
+ }
+
+ tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
+
+ tuplestore_puttuple(_state->tuple_store, tuple);
+
+ /* clean up and switch back */
+ MemoryContextSwitchTo(old_cxt);
+ MemoryContextReset(_state->tmp_cxt);
+}
+
+static void
+each_array_start(void *state)
+{
+ EachState *_state = (EachState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot deconstruct an array as an object")));
+}
+
+static void
+each_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ EachState *_state = (EachState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot deconstruct a scalar")));
+
+ /* supply de-escaped value if required */
+ if (_state->next_scalar)
+ _state->normalized_scalar = token;
+}
+
+/*
+ * SQL functions json_array_elements and json_array_elements_text
+ *
+ * get the elements from a json array
+ *
+ * a lot of this processing is similar to the json_each* functions
+ */
+
+Datum
+jsonb_array_elements(PG_FUNCTION_ARGS)
+{
+ return elements_worker_jsonb(fcinfo, "jsonb_array_elements", false);
+}
+
+Datum
+jsonb_array_elements_text(PG_FUNCTION_ARGS)
+{
+ return elements_worker_jsonb(fcinfo, "jsonb_array_elements_text", true);
+}
+
+static Datum
+elements_worker_jsonb(FunctionCallInfo fcinfo, const char *funcname,
+ bool as_text)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ ReturnSetInfo *rsi;
+ MemoryContext old_cxt,
+ tmp_cxt;
+ bool skipNested = false;
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken r;
+
+ if (JB_ROOT_IS_SCALAR(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot extract elements from a scalar")));
+ else if (!JB_ROOT_IS_ARRAY(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot extract elements from an object")));
+
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC | MAT_SRF_BLESS);
+
+ tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+ "jsonb_array_elements temporary cxt",
+ ALLOCSET_DEFAULT_SIZES);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if (r == WJB_ELEM)
+ {
+ Datum values[1];
+ bool nulls[1] = {false};
+
+ /* use the tmp context so we can clean up after each tuple is done */
+ old_cxt = MemoryContextSwitchTo(tmp_cxt);
+
+ if (as_text)
+ {
+ if (v.type == jbvNull)
+ {
+ /* a json null is an sql null in text mode */
+ nulls[0] = true;
+ values[0] = (Datum) NULL;
+ }
+ else
+ values[0] = PointerGetDatum(JsonbValueAsText(&v));
+ }
+ else
+ {
+ /* Not in text mode, just return the Jsonb */
+ Jsonb *val = JsonbValueToJsonb(&v);
+
+ values[0] = PointerGetDatum(val);
+ }
+
+ tuplestore_putvalues(rsi->setResult, rsi->setDesc, values, nulls);
+
+ /* clean up and switch back */
+ MemoryContextSwitchTo(old_cxt);
+ MemoryContextReset(tmp_cxt);
+ }
+ }
+
+ MemoryContextDelete(tmp_cxt);
+
+ PG_RETURN_NULL();
+}
+
+Datum
+json_array_elements(PG_FUNCTION_ARGS)
+{
+ return elements_worker(fcinfo, "json_array_elements", false);
+}
+
+Datum
+json_array_elements_text(PG_FUNCTION_ARGS)
+{
+ return elements_worker(fcinfo, "json_array_elements_text", true);
+}
+
+static Datum
+elements_worker(FunctionCallInfo fcinfo, const char *funcname, bool as_text)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+
+ /* elements only needs escaped strings when as_text */
+ JsonLexContext *lex = makeJsonLexContext(json, as_text);
+ JsonSemAction *sem;
+ ReturnSetInfo *rsi;
+ ElementsState *state;
+
+ state = palloc0(sizeof(ElementsState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC | MAT_SRF_BLESS);
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+ state->tuple_store = rsi->setResult;
+ state->ret_tdesc = rsi->setDesc;
+
+ sem->semstate = (void *) state;
+ sem->object_start = elements_object_start;
+ sem->scalar = elements_scalar;
+ sem->array_element_start = elements_array_element_start;
+ sem->array_element_end = elements_array_element_end;
+
+ state->function_name = funcname;
+ state->normalize_results = as_text;
+ state->next_scalar = false;
+ state->lex = lex;
+ state->tmp_cxt = AllocSetContextCreate(CurrentMemoryContext,
+ "json_array_elements temporary cxt",
+ ALLOCSET_DEFAULT_SIZES);
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ MemoryContextDelete(state->tmp_cxt);
+
+ PG_RETURN_NULL();
+}
+
+static void
+elements_array_element_start(void *state, bool isnull)
+{
+ ElementsState *_state = (ElementsState *) state;
+
+ /* save a pointer to where the value starts */
+ if (_state->lex->lex_level == 1)
+ {
+ /*
+ * next_scalar will be reset in the array_element_end handler, and
+ * since we know the value is a scalar there is no danger of it being
+ * on while recursing down the tree.
+ */
+ if (_state->normalize_results && _state->lex->token_type == JSON_TOKEN_STRING)
+ _state->next_scalar = true;
+ else
+ _state->result_start = _state->lex->token_start;
+ }
+}
+
+static void
+elements_array_element_end(void *state, bool isnull)
+{
+ ElementsState *_state = (ElementsState *) state;
+ MemoryContext old_cxt;
+ int len;
+ text *val;
+ HeapTuple tuple;
+ Datum values[1];
+ bool nulls[1] = {false};
+
+ /* skip over nested objects */
+ if (_state->lex->lex_level != 1)
+ return;
+
+ /* use the tmp context so we can clean up after each tuple is done */
+ old_cxt = MemoryContextSwitchTo(_state->tmp_cxt);
+
+ if (isnull && _state->normalize_results)
+ {
+ nulls[0] = true;
+ values[0] = (Datum) NULL;
+ }
+ else if (_state->next_scalar)
+ {
+ values[0] = CStringGetTextDatum(_state->normalized_scalar);
+ _state->next_scalar = false;
+ }
+ else
+ {
+ len = _state->lex->prev_token_terminator - _state->result_start;
+ val = cstring_to_text_with_len(_state->result_start, len);
+ values[0] = PointerGetDatum(val);
+ }
+
+ tuple = heap_form_tuple(_state->ret_tdesc, values, nulls);
+
+ tuplestore_puttuple(_state->tuple_store, tuple);
+
+ /* clean up and switch back */
+ MemoryContextSwitchTo(old_cxt);
+ MemoryContextReset(_state->tmp_cxt);
+}
+
+static void
+elements_object_start(void *state)
+{
+ ElementsState *_state = (ElementsState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a non-array",
+ _state->function_name)));
+}
+
+static void
+elements_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ ElementsState *_state = (ElementsState *) state;
+
+ /* json structure check */
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a scalar",
+ _state->function_name)));
+
+ /* supply de-escaped value if required */
+ if (_state->next_scalar)
+ _state->normalized_scalar = token;
+}
+
+/*
+ * SQL function json_populate_record
+ *
+ * set fields in a record from the argument json
+ *
+ * Code adapted shamelessly from hstore's populate_record
+ * which is in turn partly adapted from record_out.
+ *
+ * The json is decomposed into a hash table, in which each
+ * field in the record is then looked up by name. For jsonb
+ * we fetch the values direct from the object.
+ */
+Datum
+jsonb_populate_record(PG_FUNCTION_ARGS)
+{
+ return populate_record_worker(fcinfo, "jsonb_populate_record",
+ false, true);
+}
+
+Datum
+jsonb_to_record(PG_FUNCTION_ARGS)
+{
+ return populate_record_worker(fcinfo, "jsonb_to_record",
+ false, false);
+}
+
+Datum
+json_populate_record(PG_FUNCTION_ARGS)
+{
+ return populate_record_worker(fcinfo, "json_populate_record",
+ true, true);
+}
+
+Datum
+json_to_record(PG_FUNCTION_ARGS)
+{
+ return populate_record_worker(fcinfo, "json_to_record",
+ true, false);
+}
+
+/* helper function for diagnostics */
+static void
+populate_array_report_expected_array(PopulateArrayContext *ctx, int ndim)
+{
+ if (ndim <= 0)
+ {
+ if (ctx->colname)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected JSON array"),
+ errhint("See the value of key \"%s\".", ctx->colname)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected JSON array")));
+ }
+ else
+ {
+ StringInfoData indices;
+ int i;
+
+ initStringInfo(&indices);
+
+ Assert(ctx->ndims > 0 && ndim < ctx->ndims);
+
+ for (i = 0; i < ndim; i++)
+ appendStringInfo(&indices, "[%d]", ctx->sizes[i]);
+
+ if (ctx->colname)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected JSON array"),
+ errhint("See the array element %s of key \"%s\".",
+ indices.data, ctx->colname)));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected JSON array"),
+ errhint("See the array element %s.",
+ indices.data)));
+ }
+}
+
+/* set the number of dimensions of the populated array when it becomes known */
+static void
+populate_array_assign_ndims(PopulateArrayContext *ctx, int ndims)
+{
+ int i;
+
+ Assert(ctx->ndims <= 0);
+
+ if (ndims <= 0)
+ populate_array_report_expected_array(ctx, ndims);
+
+ ctx->ndims = ndims;
+ ctx->dims = palloc(sizeof(int) * ndims);
+ ctx->sizes = palloc0(sizeof(int) * ndims);
+
+ for (i = 0; i < ndims; i++)
+ ctx->dims[i] = -1; /* dimensions are unknown yet */
+}
+
+/* check the populated subarray dimension */
+static void
+populate_array_check_dimension(PopulateArrayContext *ctx, int ndim)
+{
+ int dim = ctx->sizes[ndim]; /* current dimension counter */
+
+ if (ctx->dims[ndim] == -1)
+ ctx->dims[ndim] = dim; /* assign dimension if not yet known */
+ else if (ctx->dims[ndim] != dim)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed JSON array"),
+ errdetail("Multidimensional arrays must have "
+ "sub-arrays with matching dimensions.")));
+
+ /* reset the current array dimension size counter */
+ ctx->sizes[ndim] = 0;
+
+ /* increment the parent dimension counter if it is a nested sub-array */
+ if (ndim > 0)
+ ctx->sizes[ndim - 1]++;
+}
+
+static void
+populate_array_element(PopulateArrayContext *ctx, int ndim, JsValue *jsv)
+{
+ Datum element;
+ bool element_isnull;
+
+ /* populate the array element */
+ element = populate_record_field(ctx->aio->element_info,
+ ctx->aio->element_type,
+ ctx->aio->element_typmod,
+ NULL, ctx->mcxt, PointerGetDatum(NULL),
+ jsv, &element_isnull);
+
+ accumArrayResult(ctx->astate, element, element_isnull,
+ ctx->aio->element_type, ctx->acxt);
+
+ Assert(ndim > 0);
+ ctx->sizes[ndim - 1]++; /* increment current dimension counter */
+}
+
+/* json object start handler for populate_array_json() */
+static void
+populate_array_object_start(void *_state)
+{
+ PopulateArrayState *state = (PopulateArrayState *) _state;
+ int ndim = state->lex->lex_level;
+
+ if (state->ctx->ndims <= 0)
+ populate_array_assign_ndims(state->ctx, ndim);
+ else if (ndim < state->ctx->ndims)
+ populate_array_report_expected_array(state->ctx, ndim);
+}
+
+/* json array end handler for populate_array_json() */
+static void
+populate_array_array_end(void *_state)
+{
+ PopulateArrayState *state = (PopulateArrayState *) _state;
+ PopulateArrayContext *ctx = state->ctx;
+ int ndim = state->lex->lex_level;
+
+ if (ctx->ndims <= 0)
+ populate_array_assign_ndims(ctx, ndim + 1);
+
+ if (ndim < ctx->ndims)
+ populate_array_check_dimension(ctx, ndim);
+}
+
+/* json array element start handler for populate_array_json() */
+static void
+populate_array_element_start(void *_state, bool isnull)
+{
+ PopulateArrayState *state = (PopulateArrayState *) _state;
+ int ndim = state->lex->lex_level;
+
+ if (state->ctx->ndims <= 0 || ndim == state->ctx->ndims)
+ {
+ /* remember current array element start */
+ state->element_start = state->lex->token_start;
+ state->element_type = state->lex->token_type;
+ state->element_scalar = NULL;
+ }
+}
+
+/* json array element end handler for populate_array_json() */
+static void
+populate_array_element_end(void *_state, bool isnull)
+{
+ PopulateArrayState *state = (PopulateArrayState *) _state;
+ PopulateArrayContext *ctx = state->ctx;
+ int ndim = state->lex->lex_level;
+
+ Assert(ctx->ndims > 0);
+
+ if (ndim == ctx->ndims)
+ {
+ JsValue jsv;
+
+ jsv.is_json = true;
+ jsv.val.json.type = state->element_type;
+
+ if (isnull)
+ {
+ Assert(jsv.val.json.type == JSON_TOKEN_NULL);
+ jsv.val.json.str = NULL;
+ jsv.val.json.len = 0;
+ }
+ else if (state->element_scalar)
+ {
+ jsv.val.json.str = state->element_scalar;
+ jsv.val.json.len = -1; /* null-terminated */
+ }
+ else
+ {
+ jsv.val.json.str = state->element_start;
+ jsv.val.json.len = (state->lex->prev_token_terminator -
+ state->element_start) * sizeof(char);
+ }
+
+ populate_array_element(ctx, ndim, &jsv);
+ }
+}
+
+/* json scalar handler for populate_array_json() */
+static void
+populate_array_scalar(void *_state, char *token, JsonTokenType tokentype)
+{
+ PopulateArrayState *state = (PopulateArrayState *) _state;
+ PopulateArrayContext *ctx = state->ctx;
+ int ndim = state->lex->lex_level;
+
+ if (ctx->ndims <= 0)
+ populate_array_assign_ndims(ctx, ndim);
+ else if (ndim < ctx->ndims)
+ populate_array_report_expected_array(ctx, ndim);
+
+ if (ndim == ctx->ndims)
+ {
+ /* remember the scalar element token */
+ state->element_scalar = token;
+ /* element_type must already be set in populate_array_element_start() */
+ Assert(state->element_type == tokentype);
+ }
+}
+
+/* parse a json array and populate array */
+static void
+populate_array_json(PopulateArrayContext *ctx, char *json, int len)
+{
+ PopulateArrayState state;
+ JsonSemAction sem;
+
+ state.lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
+ state.ctx = ctx;
+
+ memset(&sem, 0, sizeof(sem));
+ sem.semstate = (void *) &state;
+ sem.object_start = populate_array_object_start;
+ sem.array_end = populate_array_array_end;
+ sem.array_element_start = populate_array_element_start;
+ sem.array_element_end = populate_array_element_end;
+ sem.scalar = populate_array_scalar;
+
+ pg_parse_json_or_ereport(state.lex, &sem);
+
+ /* number of dimensions should be already known */
+ Assert(ctx->ndims > 0 && ctx->dims);
+
+ pfree(state.lex);
+}
+
+/*
+ * populate_array_dim_jsonb() -- Iterate recursively through jsonb sub-array
+ * elements and accumulate result using given ArrayBuildState.
+ */
+static void
+populate_array_dim_jsonb(PopulateArrayContext *ctx, /* context */
+ JsonbValue *jbv, /* jsonb sub-array */
+ int ndim) /* current dimension */
+{
+ JsonbContainer *jbc = jbv->val.binary.data;
+ JsonbIterator *it;
+ JsonbIteratorToken tok;
+ JsonbValue val;
+ JsValue jsv;
+
+ check_stack_depth();
+
+ if (jbv->type != jbvBinary || !JsonContainerIsArray(jbc))
+ populate_array_report_expected_array(ctx, ndim - 1);
+
+ Assert(!JsonContainerIsScalar(jbc));
+
+ it = JsonbIteratorInit(jbc);
+
+ tok = JsonbIteratorNext(&it, &val, true);
+ Assert(tok == WJB_BEGIN_ARRAY);
+
+ tok = JsonbIteratorNext(&it, &val, true);
+
+ /*
+ * If the number of dimensions is not yet known and we have found end of
+ * the array, or the first child element is not an array, then assign the
+ * number of dimensions now.
+ */
+ if (ctx->ndims <= 0 &&
+ (tok == WJB_END_ARRAY ||
+ (tok == WJB_ELEM &&
+ (val.type != jbvBinary ||
+ !JsonContainerIsArray(val.val.binary.data)))))
+ populate_array_assign_ndims(ctx, ndim);
+
+ jsv.is_json = false;
+ jsv.val.jsonb = &val;
+
+ /* process all the array elements */
+ while (tok == WJB_ELEM)
+ {
+ /*
+ * Recurse only if the dimensions of dimensions is still unknown or if
+ * it is not the innermost dimension.
+ */
+ if (ctx->ndims > 0 && ndim >= ctx->ndims)
+ populate_array_element(ctx, ndim, &jsv);
+ else
+ {
+ /* populate child sub-array */
+ populate_array_dim_jsonb(ctx, &val, ndim + 1);
+
+ /* number of dimensions should be already known */
+ Assert(ctx->ndims > 0 && ctx->dims);
+
+ populate_array_check_dimension(ctx, ndim);
+ }
+
+ tok = JsonbIteratorNext(&it, &val, true);
+ }
+
+ Assert(tok == WJB_END_ARRAY);
+
+ /* free iterator, iterating until WJB_DONE */
+ tok = JsonbIteratorNext(&it, &val, true);
+ Assert(tok == WJB_DONE && !it);
+}
+
+/* recursively populate an array from json/jsonb */
+static Datum
+populate_array(ArrayIOData *aio,
+ const char *colname,
+ MemoryContext mcxt,
+ JsValue *jsv)
+{
+ PopulateArrayContext ctx;
+ Datum result;
+ int *lbs;
+ int i;
+
+ ctx.aio = aio;
+ ctx.mcxt = mcxt;
+ ctx.acxt = CurrentMemoryContext;
+ ctx.astate = initArrayResult(aio->element_type, ctx.acxt, true);
+ ctx.colname = colname;
+ ctx.ndims = 0; /* unknown yet */
+ ctx.dims = NULL;
+ ctx.sizes = NULL;
+
+ if (jsv->is_json)
+ populate_array_json(&ctx, jsv->val.json.str,
+ jsv->val.json.len >= 0 ? jsv->val.json.len
+ : strlen(jsv->val.json.str));
+ else
+ {
+ populate_array_dim_jsonb(&ctx, jsv->val.jsonb, 1);
+ ctx.dims[0] = ctx.sizes[0];
+ }
+
+ Assert(ctx.ndims > 0);
+
+ lbs = palloc(sizeof(int) * ctx.ndims);
+
+ for (i = 0; i < ctx.ndims; i++)
+ lbs[i] = 1;
+
+ result = makeMdArrayResult(ctx.astate, ctx.ndims, ctx.dims, lbs,
+ ctx.acxt, true);
+
+ pfree(ctx.dims);
+ pfree(ctx.sizes);
+ pfree(lbs);
+
+ return result;
+}
+
+static void
+JsValueToJsObject(JsValue *jsv, JsObject *jso)
+{
+ jso->is_json = jsv->is_json;
+
+ if (jsv->is_json)
+ {
+ /* convert plain-text json into a hash table */
+ jso->val.json_hash =
+ get_json_object_as_hash(jsv->val.json.str,
+ jsv->val.json.len >= 0
+ ? jsv->val.json.len
+ : strlen(jsv->val.json.str),
+ "populate_composite");
+ }
+ else
+ {
+ JsonbValue *jbv = jsv->val.jsonb;
+
+ if (jbv->type == jbvBinary &&
+ JsonContainerIsObject(jbv->val.binary.data))
+ {
+ jso->val.jsonb_cont = jbv->val.binary.data;
+ }
+ else
+ {
+ bool is_scalar;
+
+ is_scalar = IsAJsonbScalar(jbv) ||
+ (jbv->type == jbvBinary &&
+ JsonContainerIsScalar(jbv->val.binary.data));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ is_scalar
+ ? errmsg("cannot call %s on a scalar",
+ "populate_composite")
+ : errmsg("cannot call %s on an array",
+ "populate_composite")));
+ }
+ }
+}
+
+/* acquire or update cached tuple descriptor for a composite type */
+static void
+update_cached_tupdesc(CompositeIOData *io, MemoryContext mcxt)
+{
+ if (!io->tupdesc ||
+ io->tupdesc->tdtypeid != io->base_typid ||
+ io->tupdesc->tdtypmod != io->base_typmod)
+ {
+ TupleDesc tupdesc = lookup_rowtype_tupdesc(io->base_typid,
+ io->base_typmod);
+ MemoryContext oldcxt;
+
+ if (io->tupdesc)
+ FreeTupleDesc(io->tupdesc);
+
+ /* copy tuple desc without constraints into cache memory context */
+ oldcxt = MemoryContextSwitchTo(mcxt);
+ io->tupdesc = CreateTupleDescCopy(tupdesc);
+ MemoryContextSwitchTo(oldcxt);
+
+ ReleaseTupleDesc(tupdesc);
+ }
+}
+
+/* recursively populate a composite (row type) value from json/jsonb */
+static Datum
+populate_composite(CompositeIOData *io,
+ Oid typid,
+ const char *colname,
+ MemoryContext mcxt,
+ HeapTupleHeader defaultval,
+ JsValue *jsv,
+ bool isnull)
+{
+ Datum result;
+
+ /* acquire/update cached tuple descriptor */
+ update_cached_tupdesc(io, mcxt);
+
+ if (isnull)
+ result = (Datum) 0;
+ else
+ {
+ HeapTupleHeader tuple;
+ JsObject jso;
+
+ /* prepare input value */
+ JsValueToJsObject(jsv, &jso);
+
+ /* populate resulting record tuple */
+ tuple = populate_record(io->tupdesc, &io->record_io,
+ defaultval, mcxt, &jso);
+ result = HeapTupleHeaderGetDatum(tuple);
+
+ JsObjectFree(&jso);
+ }
+
+ /*
+ * If it's domain over composite, check domain constraints. (This should
+ * probably get refactored so that we can see the TYPECAT value, but for
+ * now, we can tell by comparing typid to base_typid.)
+ */
+ if (typid != io->base_typid && typid != RECORDOID)
+ domain_check(result, isnull, typid, &io->domain_info, mcxt);
+
+ return result;
+}
+
+/* populate non-null scalar value from json/jsonb value */
+static Datum
+populate_scalar(ScalarIOData *io, Oid typid, int32 typmod, JsValue *jsv)
+{
+ Datum res;
+ char *str = NULL;
+ char *json = NULL;
+
+ if (jsv->is_json)
+ {
+ int len = jsv->val.json.len;
+
+ json = jsv->val.json.str;
+ Assert(json);
+ if (len >= 0)
+ {
+ /* Need to copy non-null-terminated string */
+ str = palloc(len + 1 * sizeof(char));
+ memcpy(str, json, len);
+ str[len] = '\0';
+ }
+ else
+ str = json; /* string is already null-terminated */
+
+ /* If converting to json/jsonb, make string into valid JSON literal */
+ if ((typid == JSONOID || typid == JSONBOID) &&
+ jsv->val.json.type == JSON_TOKEN_STRING)
+ {
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ escape_json(&buf, str);
+ /* free temporary buffer */
+ if (str != json)
+ pfree(str);
+ str = buf.data;
+ }
+ }
+ else
+ {
+ JsonbValue *jbv = jsv->val.jsonb;
+
+ if (typid == JSONBOID)
+ {
+ Jsonb *jsonb = JsonbValueToJsonb(jbv); /* directly use jsonb */
+
+ return JsonbPGetDatum(jsonb);
+ }
+ /* convert jsonb to string for typio call */
+ else if (typid == JSONOID && jbv->type != jbvBinary)
+ {
+ /*
+ * Convert scalar jsonb (non-scalars are passed here as jbvBinary)
+ * to json string, preserving quotes around top-level strings.
+ */
+ Jsonb *jsonb = JsonbValueToJsonb(jbv);
+
+ str = JsonbToCString(NULL, &jsonb->root, VARSIZE(jsonb));
+ }
+ else if (jbv->type == jbvString) /* quotes are stripped */
+ str = pnstrdup(jbv->val.string.val, jbv->val.string.len);
+ else if (jbv->type == jbvBool)
+ str = pstrdup(jbv->val.boolean ? "true" : "false");
+ else if (jbv->type == jbvNumeric)
+ str = DatumGetCString(DirectFunctionCall1(numeric_out,
+ PointerGetDatum(jbv->val.numeric)));
+ else if (jbv->type == jbvBinary)
+ str = JsonbToCString(NULL, jbv->val.binary.data,
+ jbv->val.binary.len);
+ else
+ elog(ERROR, "unrecognized jsonb type: %d", (int) jbv->type);
+ }
+
+ res = InputFunctionCall(&io->typiofunc, str, io->typioparam, typmod);
+
+ /* free temporary buffer */
+ if (str != json)
+ pfree(str);
+
+ return res;
+}
+
+static Datum
+populate_domain(DomainIOData *io,
+ Oid typid,
+ const char *colname,
+ MemoryContext mcxt,
+ JsValue *jsv,
+ bool isnull)
+{
+ Datum res;
+
+ if (isnull)
+ res = (Datum) 0;
+ else
+ {
+ res = populate_record_field(io->base_io,
+ io->base_typid, io->base_typmod,
+ colname, mcxt, PointerGetDatum(NULL),
+ jsv, &isnull);
+ Assert(!isnull);
+ }
+
+ domain_check(res, isnull, typid, &io->domain_info, mcxt);
+
+ return res;
+}
+
+/* prepare column metadata cache for the given type */
+static void
+prepare_column_cache(ColumnIOData *column,
+ Oid typid,
+ int32 typmod,
+ MemoryContext mcxt,
+ bool need_scalar)
+{
+ HeapTuple tup;
+ Form_pg_type type;
+
+ column->typid = typid;
+ column->typmod = typmod;
+
+ tup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for type %u", typid);
+
+ type = (Form_pg_type) GETSTRUCT(tup);
+
+ if (type->typtype == TYPTYPE_DOMAIN)
+ {
+ /*
+ * We can move directly to the bottom base type; domain_check() will
+ * take care of checking all constraints for a stack of domains.
+ */
+ Oid base_typid;
+ int32 base_typmod = typmod;
+
+ base_typid = getBaseTypeAndTypmod(typid, &base_typmod);
+ if (get_typtype(base_typid) == TYPTYPE_COMPOSITE)
+ {
+ /* domain over composite has its own code path */
+ column->typcat = TYPECAT_COMPOSITE_DOMAIN;
+ column->io.composite.record_io = NULL;
+ column->io.composite.tupdesc = NULL;
+ column->io.composite.base_typid = base_typid;
+ column->io.composite.base_typmod = base_typmod;
+ column->io.composite.domain_info = NULL;
+ }
+ else
+ {
+ /* domain over anything else */
+ column->typcat = TYPECAT_DOMAIN;
+ column->io.domain.base_typid = base_typid;
+ column->io.domain.base_typmod = base_typmod;
+ column->io.domain.base_io =
+ MemoryContextAllocZero(mcxt, sizeof(ColumnIOData));
+ column->io.domain.domain_info = NULL;
+ }
+ }
+ else if (type->typtype == TYPTYPE_COMPOSITE || typid == RECORDOID)
+ {
+ column->typcat = TYPECAT_COMPOSITE;
+ column->io.composite.record_io = NULL;
+ column->io.composite.tupdesc = NULL;
+ column->io.composite.base_typid = typid;
+ column->io.composite.base_typmod = typmod;
+ column->io.composite.domain_info = NULL;
+ }
+ else if (IsTrueArrayType(type))
+ {
+ column->typcat = TYPECAT_ARRAY;
+ column->io.array.element_info = MemoryContextAllocZero(mcxt,
+ sizeof(ColumnIOData));
+ column->io.array.element_type = type->typelem;
+ /* array element typemod stored in attribute's typmod */
+ column->io.array.element_typmod = typmod;
+ }
+ else
+ {
+ column->typcat = TYPECAT_SCALAR;
+ need_scalar = true;
+ }
+
+ /* caller can force us to look up scalar_io info even for non-scalars */
+ if (need_scalar)
+ {
+ Oid typioproc;
+
+ getTypeInputInfo(typid, &typioproc, &column->scalar_io.typioparam);
+ fmgr_info_cxt(typioproc, &column->scalar_io.typiofunc, mcxt);
+ }
+
+ ReleaseSysCache(tup);
+}
+
+/* recursively populate a record field or an array element from a json/jsonb value */
+static Datum
+populate_record_field(ColumnIOData *col,
+ Oid typid,
+ int32 typmod,
+ const char *colname,
+ MemoryContext mcxt,
+ Datum defaultval,
+ JsValue *jsv,
+ bool *isnull)
+{
+ TypeCat typcat;
+
+ check_stack_depth();
+
+ /*
+ * Prepare column metadata cache for the given type. Force lookup of the
+ * scalar_io data so that the json string hack below will work.
+ */
+ if (col->typid != typid || col->typmod != typmod)
+ prepare_column_cache(col, typid, typmod, mcxt, true);
+
+ *isnull = JsValueIsNull(jsv);
+
+ typcat = col->typcat;
+
+ /* try to convert json string to a non-scalar type through input function */
+ if (JsValueIsString(jsv) &&
+ (typcat == TYPECAT_ARRAY ||
+ typcat == TYPECAT_COMPOSITE ||
+ typcat == TYPECAT_COMPOSITE_DOMAIN))
+ typcat = TYPECAT_SCALAR;
+
+ /* we must perform domain checks for NULLs, otherwise exit immediately */
+ if (*isnull &&
+ typcat != TYPECAT_DOMAIN &&
+ typcat != TYPECAT_COMPOSITE_DOMAIN)
+ return (Datum) 0;
+
+ switch (typcat)
+ {
+ case TYPECAT_SCALAR:
+ return populate_scalar(&col->scalar_io, typid, typmod, jsv);
+
+ case TYPECAT_ARRAY:
+ return populate_array(&col->io.array, colname, mcxt, jsv);
+
+ case TYPECAT_COMPOSITE:
+ case TYPECAT_COMPOSITE_DOMAIN:
+ return populate_composite(&col->io.composite, typid,
+ colname, mcxt,
+ DatumGetPointer(defaultval)
+ ? DatumGetHeapTupleHeader(defaultval)
+ : NULL,
+ jsv, *isnull);
+
+ case TYPECAT_DOMAIN:
+ return populate_domain(&col->io.domain, typid, colname, mcxt,
+ jsv, *isnull);
+
+ default:
+ elog(ERROR, "unrecognized type category '%c'", typcat);
+ return (Datum) 0;
+ }
+}
+
+static RecordIOData *
+allocate_record_info(MemoryContext mcxt, int ncolumns)
+{
+ RecordIOData *data = (RecordIOData *)
+ MemoryContextAlloc(mcxt,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+
+ data->record_type = InvalidOid;
+ data->record_typmod = 0;
+ data->ncolumns = ncolumns;
+ MemSet(data->columns, 0, sizeof(ColumnIOData) * ncolumns);
+
+ return data;
+}
+
+static bool
+JsObjectGetField(JsObject *obj, char *field, JsValue *jsv)
+{
+ jsv->is_json = obj->is_json;
+
+ if (jsv->is_json)
+ {
+ JsonHashEntry *hashentry = hash_search(obj->val.json_hash, field,
+ HASH_FIND, NULL);
+
+ jsv->val.json.type = hashentry ? hashentry->type : JSON_TOKEN_NULL;
+ jsv->val.json.str = jsv->val.json.type == JSON_TOKEN_NULL ? NULL :
+ hashentry->val;
+ jsv->val.json.len = jsv->val.json.str ? -1 : 0; /* null-terminated */
+
+ return hashentry != NULL;
+ }
+ else
+ {
+ jsv->val.jsonb = !obj->val.jsonb_cont ? NULL :
+ getKeyJsonValueFromContainer(obj->val.jsonb_cont, field, strlen(field),
+ NULL);
+
+ return jsv->val.jsonb != NULL;
+ }
+}
+
+/* populate a record tuple from json/jsonb value */
+static HeapTupleHeader
+populate_record(TupleDesc tupdesc,
+ RecordIOData **record_p,
+ HeapTupleHeader defaultval,
+ MemoryContext mcxt,
+ JsObject *obj)
+{
+ RecordIOData *record = *record_p;
+ Datum *values;
+ bool *nulls;
+ HeapTuple res;
+ int ncolumns = tupdesc->natts;
+ int i;
+
+ /*
+ * if the input json is empty, we can only skip the rest if we were passed
+ * in a non-null record, since otherwise there may be issues with domain
+ * nulls.
+ */
+ if (defaultval && JsObjectIsEmpty(obj))
+ return defaultval;
+
+ /* (re)allocate metadata cache */
+ if (record == NULL ||
+ record->ncolumns != ncolumns)
+ *record_p = record = allocate_record_info(mcxt, ncolumns);
+
+ /* invalidate metadata cache if the record type has changed */
+ if (record->record_type != tupdesc->tdtypeid ||
+ record->record_typmod != tupdesc->tdtypmod)
+ {
+ MemSet(record, 0, offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ record->record_type = tupdesc->tdtypeid;
+ record->record_typmod = tupdesc->tdtypmod;
+ record->ncolumns = ncolumns;
+ }
+
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+ if (defaultval)
+ {
+ HeapTupleData tuple;
+
+ /* Build a temporary HeapTuple control structure */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(defaultval);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = defaultval;
+
+ /* Break down the tuple into fields */
+ heap_deform_tuple(&tuple, tupdesc, values, nulls);
+ }
+ else
+ {
+ for (i = 0; i < ncolumns; ++i)
+ {
+ values[i] = (Datum) 0;
+ nulls[i] = true;
+ }
+ }
+
+ for (i = 0; i < ncolumns; ++i)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+ char *colname = NameStr(att->attname);
+ JsValue field = {0};
+ bool found;
+
+ /* Ignore dropped columns in datatype */
+ if (att->attisdropped)
+ {
+ nulls[i] = true;
+ continue;
+ }
+
+ found = JsObjectGetField(obj, colname, &field);
+
+ /*
+ * we can't just skip here if the key wasn't found since we might have
+ * a domain to deal with. If we were passed in a non-null record
+ * datum, we assume that the existing values are valid (if they're
+ * not, then it's not our fault), but if we were passed in a null,
+ * then every field which we don't populate needs to be run through
+ * the input function just in case it's a domain type.
+ */
+ if (defaultval && !found)
+ continue;
+
+ values[i] = populate_record_field(&record->columns[i],
+ att->atttypid,
+ att->atttypmod,
+ colname,
+ mcxt,
+ nulls[i] ? (Datum) 0 : values[i],
+ &field,
+ &nulls[i]);
+ }
+
+ res = heap_form_tuple(tupdesc, values, nulls);
+
+ pfree(values);
+ pfree(nulls);
+
+ return res->t_data;
+}
+
+/*
+ * Setup for json{b}_populate_record{set}: result type will be same as first
+ * argument's type --- unless first argument is "null::record", which we can't
+ * extract type info from; we handle that later.
+ */
+static void
+get_record_type_from_argument(FunctionCallInfo fcinfo,
+ const char *funcname,
+ PopulateRecordCache *cache)
+{
+ cache->argtype = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ prepare_column_cache(&cache->c,
+ cache->argtype, -1,
+ cache->fn_mcxt, false);
+ if (cache->c.typcat != TYPECAT_COMPOSITE &&
+ cache->c.typcat != TYPECAT_COMPOSITE_DOMAIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ /* translator: %s is a function name, eg json_to_record */
+ errmsg("first argument of %s must be a row type",
+ funcname)));
+}
+
+/*
+ * Setup for json{b}_to_record{set}: result type is specified by calling
+ * query. We'll also use this code for json{b}_populate_record{set},
+ * if we discover that the first argument is a null of type RECORD.
+ *
+ * Here it is syntactically impossible to specify the target type
+ * as domain-over-composite.
+ */
+static void
+get_record_type_from_query(FunctionCallInfo fcinfo,
+ const char *funcname,
+ PopulateRecordCache *cache)
+{
+ TupleDesc tupdesc;
+ MemoryContext old_cxt;
+
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ /* translator: %s is a function name, eg json_to_record */
+ errmsg("could not determine row type for result of %s",
+ funcname),
+ errhint("Provide a non-null record argument, "
+ "or call the function in the FROM clause "
+ "using a column definition list.")));
+
+ Assert(tupdesc);
+ cache->argtype = tupdesc->tdtypeid;
+
+ /* If we go through this more than once, avoid memory leak */
+ if (cache->c.io.composite.tupdesc)
+ FreeTupleDesc(cache->c.io.composite.tupdesc);
+
+ /* Save identified tupdesc */
+ old_cxt = MemoryContextSwitchTo(cache->fn_mcxt);
+ cache->c.io.composite.tupdesc = CreateTupleDescCopy(tupdesc);
+ cache->c.io.composite.base_typid = tupdesc->tdtypeid;
+ cache->c.io.composite.base_typmod = tupdesc->tdtypmod;
+ MemoryContextSwitchTo(old_cxt);
+}
+
+/*
+ * common worker for json{b}_populate_record() and json{b}_to_record()
+ * is_json and have_record_arg identify the specific function
+ */
+static Datum
+populate_record_worker(FunctionCallInfo fcinfo, const char *funcname,
+ bool is_json, bool have_record_arg)
+{
+ int json_arg_num = have_record_arg ? 1 : 0;
+ JsValue jsv = {0};
+ HeapTupleHeader rec;
+ Datum rettuple;
+ JsonbValue jbv;
+ MemoryContext fnmcxt = fcinfo->flinfo->fn_mcxt;
+ PopulateRecordCache *cache = fcinfo->flinfo->fn_extra;
+
+ /*
+ * If first time through, identify input/result record type. Note that
+ * this stanza looks only at fcinfo context, which can't change during the
+ * query; so we may not be able to fully resolve a RECORD input type yet.
+ */
+ if (!cache)
+ {
+ fcinfo->flinfo->fn_extra = cache =
+ MemoryContextAllocZero(fnmcxt, sizeof(*cache));
+ cache->fn_mcxt = fnmcxt;
+
+ if (have_record_arg)
+ get_record_type_from_argument(fcinfo, funcname, cache);
+ else
+ get_record_type_from_query(fcinfo, funcname, cache);
+ }
+
+ /* Collect record arg if we have one */
+ if (!have_record_arg)
+ rec = NULL; /* it's json{b}_to_record() */
+ else if (!PG_ARGISNULL(0))
+ {
+ rec = PG_GETARG_HEAPTUPLEHEADER(0);
+
+ /*
+ * When declared arg type is RECORD, identify actual record type from
+ * the tuple itself.
+ */
+ if (cache->argtype == RECORDOID)
+ {
+ cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec);
+ cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec);
+ }
+ }
+ else
+ {
+ rec = NULL;
+
+ /*
+ * When declared arg type is RECORD, identify actual record type from
+ * calling query, or fail if we can't.
+ */
+ if (cache->argtype == RECORDOID)
+ {
+ get_record_type_from_query(fcinfo, funcname, cache);
+ /* This can't change argtype, which is important for next time */
+ Assert(cache->argtype == RECORDOID);
+ }
+ }
+
+ /* If no JSON argument, just return the record (if any) unchanged */
+ if (PG_ARGISNULL(json_arg_num))
+ {
+ if (rec)
+ PG_RETURN_POINTER(rec);
+ else
+ PG_RETURN_NULL();
+ }
+
+ jsv.is_json = is_json;
+
+ if (is_json)
+ {
+ text *json = PG_GETARG_TEXT_PP(json_arg_num);
+
+ jsv.val.json.str = VARDATA_ANY(json);
+ jsv.val.json.len = VARSIZE_ANY_EXHDR(json);
+ jsv.val.json.type = JSON_TOKEN_INVALID; /* not used in
+ * populate_composite() */
+ }
+ else
+ {
+ Jsonb *jb = PG_GETARG_JSONB_P(json_arg_num);
+
+ jsv.val.jsonb = &jbv;
+
+ /* fill binary jsonb value pointing to jb */
+ jbv.type = jbvBinary;
+ jbv.val.binary.data = &jb->root;
+ jbv.val.binary.len = VARSIZE(jb) - VARHDRSZ;
+ }
+
+ rettuple = populate_composite(&cache->c.io.composite, cache->argtype,
+ NULL, fnmcxt, rec, &jsv, false);
+
+ PG_RETURN_DATUM(rettuple);
+}
+
+/*
+ * get_json_object_as_hash
+ *
+ * decompose a json object into a hash table.
+ */
+static HTAB *
+get_json_object_as_hash(char *json, int len, const char *funcname)
+{
+ HASHCTL ctl;
+ HTAB *tab;
+ JHashState *state;
+ JsonLexContext *lex = makeJsonLexContextCstringLen(json, len, GetDatabaseEncoding(), true);
+ JsonSemAction *sem;
+
+ ctl.keysize = NAMEDATALEN;
+ ctl.entrysize = sizeof(JsonHashEntry);
+ ctl.hcxt = CurrentMemoryContext;
+ tab = hash_create("json object hashtable",
+ 100,
+ &ctl,
+ HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+
+ state = palloc0(sizeof(JHashState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ state->function_name = funcname;
+ state->hash = tab;
+ state->lex = lex;
+
+ sem->semstate = (void *) state;
+ sem->array_start = hash_array_start;
+ sem->scalar = hash_scalar;
+ sem->object_field_start = hash_object_field_start;
+ sem->object_field_end = hash_object_field_end;
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ return tab;
+}
+
+static void
+hash_object_field_start(void *state, char *fname, bool isnull)
+{
+ JHashState *_state = (JHashState *) state;
+
+ if (_state->lex->lex_level > 1)
+ return;
+
+ /* remember token type */
+ _state->saved_token_type = _state->lex->token_type;
+
+ if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
+ _state->lex->token_type == JSON_TOKEN_OBJECT_START)
+ {
+ /* remember start position of the whole text of the subobject */
+ _state->save_json_start = _state->lex->token_start;
+ }
+ else
+ {
+ /* must be a scalar */
+ _state->save_json_start = NULL;
+ }
+}
+
+static void
+hash_object_field_end(void *state, char *fname, bool isnull)
+{
+ JHashState *_state = (JHashState *) state;
+ JsonHashEntry *hashentry;
+ bool found;
+
+ /*
+ * Ignore nested fields.
+ */
+ if (_state->lex->lex_level > 1)
+ return;
+
+ /*
+ * Ignore field names >= NAMEDATALEN - they can't match a record field.
+ * (Note: without this test, the hash code would truncate the string at
+ * NAMEDATALEN-1, and could then match against a similarly-truncated
+ * record field name. That would be a reasonable behavior, but this code
+ * has previously insisted on exact equality, so we keep this behavior.)
+ */
+ if (strlen(fname) >= NAMEDATALEN)
+ return;
+
+ hashentry = hash_search(_state->hash, fname, HASH_ENTER, &found);
+
+ /*
+ * found being true indicates a duplicate. We don't do anything about
+ * that, a later field with the same name overrides the earlier field.
+ */
+
+ hashentry->type = _state->saved_token_type;
+ Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
+
+ if (_state->save_json_start != NULL)
+ {
+ int len = _state->lex->prev_token_terminator - _state->save_json_start;
+ char *val = palloc((len + 1) * sizeof(char));
+
+ memcpy(val, _state->save_json_start, len);
+ val[len] = '\0';
+ hashentry->val = val;
+ }
+ else
+ {
+ /* must have had a scalar instead */
+ hashentry->val = _state->saved_scalar;
+ }
+}
+
+static void
+hash_array_start(void *state)
+{
+ JHashState *_state = (JHashState *) state;
+
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on an array", _state->function_name)));
+}
+
+static void
+hash_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ JHashState *_state = (JHashState *) state;
+
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a scalar", _state->function_name)));
+
+ if (_state->lex->lex_level == 1)
+ {
+ _state->saved_scalar = token;
+ /* saved_token_type must already be set in hash_object_field_start() */
+ Assert(_state->saved_token_type == tokentype);
+ }
+}
+
+
+/*
+ * SQL function json_populate_recordset
+ *
+ * set fields in a set of records from the argument json,
+ * which must be an array of objects.
+ *
+ * similar to json_populate_record, but the tuple-building code
+ * is pushed down into the semantic action handlers so it's done
+ * per object in the array.
+ */
+Datum
+jsonb_populate_recordset(PG_FUNCTION_ARGS)
+{
+ return populate_recordset_worker(fcinfo, "jsonb_populate_recordset",
+ false, true);
+}
+
+Datum
+jsonb_to_recordset(PG_FUNCTION_ARGS)
+{
+ return populate_recordset_worker(fcinfo, "jsonb_to_recordset",
+ false, false);
+}
+
+Datum
+json_populate_recordset(PG_FUNCTION_ARGS)
+{
+ return populate_recordset_worker(fcinfo, "json_populate_recordset",
+ true, true);
+}
+
+Datum
+json_to_recordset(PG_FUNCTION_ARGS)
+{
+ return populate_recordset_worker(fcinfo, "json_to_recordset",
+ true, false);
+}
+
+static void
+populate_recordset_record(PopulateRecordsetState *state, JsObject *obj)
+{
+ PopulateRecordCache *cache = state->cache;
+ HeapTupleHeader tuphead;
+ HeapTupleData tuple;
+
+ /* acquire/update cached tuple descriptor */
+ update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt);
+
+ /* replace record fields from json */
+ tuphead = populate_record(cache->c.io.composite.tupdesc,
+ &cache->c.io.composite.record_io,
+ state->rec,
+ cache->fn_mcxt,
+ obj);
+
+ /* if it's domain over composite, check domain constraints */
+ if (cache->c.typcat == TYPECAT_COMPOSITE_DOMAIN)
+ domain_check(HeapTupleHeaderGetDatum(tuphead), false,
+ cache->argtype,
+ &cache->c.io.composite.domain_info,
+ cache->fn_mcxt);
+
+ /* ok, save into tuplestore */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(tuphead);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = tuphead;
+
+ tuplestore_puttuple(state->tuple_store, &tuple);
+}
+
+/*
+ * common worker for json{b}_populate_recordset() and json{b}_to_recordset()
+ * is_json and have_record_arg identify the specific function
+ */
+static Datum
+populate_recordset_worker(FunctionCallInfo fcinfo, const char *funcname,
+ bool is_json, bool have_record_arg)
+{
+ int json_arg_num = have_record_arg ? 1 : 0;
+ ReturnSetInfo *rsi;
+ MemoryContext old_cxt;
+ HeapTupleHeader rec;
+ PopulateRecordCache *cache = fcinfo->flinfo->fn_extra;
+ PopulateRecordsetState *state;
+
+ rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ if (!rsi || !IsA(rsi, ReturnSetInfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("set-valued function called in context that cannot accept a set")));
+
+ if (!(rsi->allowedModes & SFRM_Materialize))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("materialize mode required, but it is not allowed in this context")));
+
+ rsi->returnMode = SFRM_Materialize;
+
+ /*
+ * If first time through, identify input/result record type. Note that
+ * this stanza looks only at fcinfo context, which can't change during the
+ * query; so we may not be able to fully resolve a RECORD input type yet.
+ */
+ if (!cache)
+ {
+ fcinfo->flinfo->fn_extra = cache =
+ MemoryContextAllocZero(fcinfo->flinfo->fn_mcxt, sizeof(*cache));
+ cache->fn_mcxt = fcinfo->flinfo->fn_mcxt;
+
+ if (have_record_arg)
+ get_record_type_from_argument(fcinfo, funcname, cache);
+ else
+ get_record_type_from_query(fcinfo, funcname, cache);
+ }
+
+ /* Collect record arg if we have one */
+ if (!have_record_arg)
+ rec = NULL; /* it's json{b}_to_recordset() */
+ else if (!PG_ARGISNULL(0))
+ {
+ rec = PG_GETARG_HEAPTUPLEHEADER(0);
+
+ /*
+ * When declared arg type is RECORD, identify actual record type from
+ * the tuple itself.
+ */
+ if (cache->argtype == RECORDOID)
+ {
+ cache->c.io.composite.base_typid = HeapTupleHeaderGetTypeId(rec);
+ cache->c.io.composite.base_typmod = HeapTupleHeaderGetTypMod(rec);
+ }
+ }
+ else
+ {
+ rec = NULL;
+
+ /*
+ * When declared arg type is RECORD, identify actual record type from
+ * calling query, or fail if we can't.
+ */
+ if (cache->argtype == RECORDOID)
+ {
+ get_record_type_from_query(fcinfo, funcname, cache);
+ /* This can't change argtype, which is important for next time */
+ Assert(cache->argtype == RECORDOID);
+ }
+ }
+
+ /* if the json is null send back an empty set */
+ if (PG_ARGISNULL(json_arg_num))
+ PG_RETURN_NULL();
+
+ /*
+ * Forcibly update the cached tupdesc, to ensure we have the right tupdesc
+ * to return even if the JSON contains no rows.
+ */
+ update_cached_tupdesc(&cache->c.io.composite, cache->fn_mcxt);
+
+ state = palloc0(sizeof(PopulateRecordsetState));
+
+ /* make tuplestore in a sufficiently long-lived memory context */
+ old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory);
+ state->tuple_store = tuplestore_begin_heap(rsi->allowedModes &
+ SFRM_Materialize_Random,
+ false, work_mem);
+ MemoryContextSwitchTo(old_cxt);
+
+ state->function_name = funcname;
+ state->cache = cache;
+ state->rec = rec;
+
+ if (is_json)
+ {
+ text *json = PG_GETARG_TEXT_PP(json_arg_num);
+ JsonLexContext *lex;
+ JsonSemAction *sem;
+
+ sem = palloc0(sizeof(JsonSemAction));
+
+ lex = makeJsonLexContext(json, true);
+
+ sem->semstate = (void *) state;
+ sem->array_start = populate_recordset_array_start;
+ sem->array_element_start = populate_recordset_array_element_start;
+ sem->scalar = populate_recordset_scalar;
+ sem->object_field_start = populate_recordset_object_field_start;
+ sem->object_field_end = populate_recordset_object_field_end;
+ sem->object_start = populate_recordset_object_start;
+ sem->object_end = populate_recordset_object_end;
+
+ state->lex = lex;
+
+ pg_parse_json_or_ereport(lex, sem);
+ }
+ else
+ {
+ Jsonb *jb = PG_GETARG_JSONB_P(json_arg_num);
+ JsonbIterator *it;
+ JsonbValue v;
+ bool skipNested = false;
+ JsonbIteratorToken r;
+
+ if (JB_ROOT_IS_SCALAR(jb) || !JB_ROOT_IS_ARRAY(jb))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a non-array",
+ funcname)));
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if (r == WJB_ELEM)
+ {
+ JsObject obj;
+
+ if (v.type != jbvBinary ||
+ !JsonContainerIsObject(v.val.binary.data))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument of %s must be an array of objects",
+ funcname)));
+
+ obj.is_json = false;
+ obj.val.jsonb_cont = v.val.binary.data;
+
+ populate_recordset_record(state, &obj);
+ }
+ }
+ }
+
+ /*
+ * Note: we must copy the cached tupdesc because the executor will free
+ * the passed-back setDesc, but we want to hang onto the cache in case
+ * we're called again in the same query.
+ */
+ rsi->setResult = state->tuple_store;
+ rsi->setDesc = CreateTupleDescCopy(cache->c.io.composite.tupdesc);
+
+ PG_RETURN_NULL();
+}
+
+static void
+populate_recordset_object_start(void *state)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+ int lex_level = _state->lex->lex_level;
+ HASHCTL ctl;
+
+ /* Reject object at top level: we must have an array at level 0 */
+ if (lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on an object",
+ _state->function_name)));
+
+ /* Nested objects require no special processing */
+ if (lex_level > 1)
+ return;
+
+ /* Object at level 1: set up a new hash table for this object */
+ ctl.keysize = NAMEDATALEN;
+ ctl.entrysize = sizeof(JsonHashEntry);
+ ctl.hcxt = CurrentMemoryContext;
+ _state->json_hash = hash_create("json object hashtable",
+ 100,
+ &ctl,
+ HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+}
+
+static void
+populate_recordset_object_end(void *state)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+ JsObject obj;
+
+ /* Nested objects require no special processing */
+ if (_state->lex->lex_level > 1)
+ return;
+
+ obj.is_json = true;
+ obj.val.json_hash = _state->json_hash;
+
+ /* Otherwise, construct and return a tuple based on this level-1 object */
+ populate_recordset_record(_state, &obj);
+
+ /* Done with hash for this object */
+ hash_destroy(_state->json_hash);
+ _state->json_hash = NULL;
+}
+
+static void
+populate_recordset_array_element_start(void *state, bool isnull)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+
+ if (_state->lex->lex_level == 1 &&
+ _state->lex->token_type != JSON_TOKEN_OBJECT_START)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("argument of %s must be an array of objects",
+ _state->function_name)));
+}
+
+static void
+populate_recordset_array_start(void *state)
+{
+ /* nothing to do */
+}
+
+static void
+populate_recordset_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+
+ if (_state->lex->lex_level == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot call %s on a scalar",
+ _state->function_name)));
+
+ if (_state->lex->lex_level == 2)
+ _state->saved_scalar = token;
+}
+
+static void
+populate_recordset_object_field_start(void *state, char *fname, bool isnull)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+
+ if (_state->lex->lex_level > 2)
+ return;
+
+ _state->saved_token_type = _state->lex->token_type;
+
+ if (_state->lex->token_type == JSON_TOKEN_ARRAY_START ||
+ _state->lex->token_type == JSON_TOKEN_OBJECT_START)
+ {
+ _state->save_json_start = _state->lex->token_start;
+ }
+ else
+ {
+ _state->save_json_start = NULL;
+ }
+}
+
+static void
+populate_recordset_object_field_end(void *state, char *fname, bool isnull)
+{
+ PopulateRecordsetState *_state = (PopulateRecordsetState *) state;
+ JsonHashEntry *hashentry;
+ bool found;
+
+ /*
+ * Ignore nested fields.
+ */
+ if (_state->lex->lex_level > 2)
+ return;
+
+ /*
+ * Ignore field names >= NAMEDATALEN - they can't match a record field.
+ * (Note: without this test, the hash code would truncate the string at
+ * NAMEDATALEN-1, and could then match against a similarly-truncated
+ * record field name. That would be a reasonable behavior, but this code
+ * has previously insisted on exact equality, so we keep this behavior.)
+ */
+ if (strlen(fname) >= NAMEDATALEN)
+ return;
+
+ hashentry = hash_search(_state->json_hash, fname, HASH_ENTER, &found);
+
+ /*
+ * found being true indicates a duplicate. We don't do anything about
+ * that, a later field with the same name overrides the earlier field.
+ */
+
+ hashentry->type = _state->saved_token_type;
+ Assert(isnull == (hashentry->type == JSON_TOKEN_NULL));
+
+ if (_state->save_json_start != NULL)
+ {
+ int len = _state->lex->prev_token_terminator - _state->save_json_start;
+ char *val = palloc((len + 1) * sizeof(char));
+
+ memcpy(val, _state->save_json_start, len);
+ val[len] = '\0';
+ hashentry->val = val;
+ }
+ else
+ {
+ /* must have had a scalar instead */
+ hashentry->val = _state->saved_scalar;
+ }
+}
+
+/*
+ * Semantic actions for json_strip_nulls.
+ *
+ * Simply repeat the input on the output unless we encounter
+ * a null object field. State for this is set when the field
+ * is started and reset when the scalar action (which must be next)
+ * is called.
+ */
+
+static void
+sn_object_start(void *state)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+sn_object_end(void *state)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+sn_array_start(void *state)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+sn_array_end(void *state)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+sn_object_field_start(void *state, char *fname, bool isnull)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ if (isnull)
+ {
+ /*
+ * The next thing must be a scalar or isnull couldn't be true, so
+ * there is no danger of this state being carried down into a nested
+ * object or array. The flag will be reset in the scalar action.
+ */
+ _state->skip_next_null = true;
+ return;
+ }
+
+ if (_state->strval->data[_state->strval->len - 1] != '{')
+ appendStringInfoCharMacro(_state->strval, ',');
+
+ /*
+ * Unfortunately we don't have the quoted and escaped string any more, so
+ * we have to re-escape it.
+ */
+ escape_json(_state->strval, fname);
+
+ appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+sn_array_element_start(void *state, bool isnull)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ if (_state->strval->data[_state->strval->len - 1] != '[')
+ appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+sn_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ StripnullState *_state = (StripnullState *) state;
+
+ if (_state->skip_next_null)
+ {
+ Assert(tokentype == JSON_TOKEN_NULL);
+ _state->skip_next_null = false;
+ return;
+ }
+
+ if (tokentype == JSON_TOKEN_STRING)
+ escape_json(_state->strval, token);
+ else
+ appendStringInfoString(_state->strval, token);
+}
+
+/*
+ * SQL function json_strip_nulls(json) -> json
+ */
+Datum
+json_strip_nulls(PG_FUNCTION_ARGS)
+{
+ text *json = PG_GETARG_TEXT_PP(0);
+ StripnullState *state;
+ JsonLexContext *lex;
+ JsonSemAction *sem;
+
+ lex = makeJsonLexContext(json, true);
+ state = palloc0(sizeof(StripnullState));
+ sem = palloc0(sizeof(JsonSemAction));
+
+ state->strval = makeStringInfo();
+ state->skip_next_null = false;
+ state->lex = lex;
+
+ sem->semstate = (void *) state;
+ sem->object_start = sn_object_start;
+ sem->object_end = sn_object_end;
+ sem->array_start = sn_array_start;
+ sem->array_end = sn_array_end;
+ sem->scalar = sn_scalar;
+ sem->array_element_start = sn_array_element_start;
+ sem->object_field_start = sn_object_field_start;
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(state->strval->data,
+ state->strval->len));
+}
+
+/*
+ * SQL function jsonb_strip_nulls(jsonb) -> jsonb
+ */
+Datum
+jsonb_strip_nulls(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonbIterator *it;
+ JsonbParseState *parseState = NULL;
+ JsonbValue *res = NULL;
+ JsonbValue v,
+ k;
+ JsonbIteratorToken type;
+ bool last_was_key = false;
+
+ if (JB_ROOT_IS_SCALAR(jb))
+ PG_RETURN_POINTER(jb);
+
+ it = JsonbIteratorInit(&jb->root);
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ Assert(!(type == WJB_KEY && last_was_key));
+
+ if (type == WJB_KEY)
+ {
+ /* stash the key until we know if it has a null value */
+ k = v;
+ last_was_key = true;
+ continue;
+ }
+
+ if (last_was_key)
+ {
+ /* if the last element was a key this one can't be */
+ last_was_key = false;
+
+ /* skip this field if value is null */
+ if (type == WJB_VALUE && v.type == jbvNull)
+ continue;
+
+ /* otherwise, do a delayed push of the key */
+ (void) pushJsonbValue(&parseState, WJB_KEY, &k);
+ }
+
+ if (type == WJB_VALUE || type == WJB_ELEM)
+ res = pushJsonbValue(&parseState, type, &v);
+ else
+ res = pushJsonbValue(&parseState, type, NULL);
+ }
+
+ Assert(res != NULL);
+
+ PG_RETURN_POINTER(JsonbValueToJsonb(res));
+}
+
+/*
+ * SQL function jsonb_pretty (jsonb)
+ *
+ * Pretty-printed text for the jsonb
+ */
+Datum
+jsonb_pretty(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ StringInfo str = makeStringInfo();
+
+ JsonbToCStringIndent(str, &jb->root, VARSIZE(jb));
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(str->data, str->len));
+}
+
+/*
+ * SQL function jsonb_concat (jsonb, jsonb)
+ *
+ * function for || operator
+ */
+Datum
+jsonb_concat(PG_FUNCTION_ARGS)
+{
+ Jsonb *jb1 = PG_GETARG_JSONB_P(0);
+ Jsonb *jb2 = PG_GETARG_JSONB_P(1);
+ JsonbParseState *state = NULL;
+ JsonbValue *res;
+ JsonbIterator *it1,
+ *it2;
+
+ /*
+ * If one of the jsonb is empty, just return the other if it's not scalar
+ * and both are of the same kind. If it's a scalar or they are of
+ * different kinds we need to perform the concatenation even if one is
+ * empty.
+ */
+ if (JB_ROOT_IS_OBJECT(jb1) == JB_ROOT_IS_OBJECT(jb2))
+ {
+ if (JB_ROOT_COUNT(jb1) == 0 && !JB_ROOT_IS_SCALAR(jb2))
+ PG_RETURN_JSONB_P(jb2);
+ else if (JB_ROOT_COUNT(jb2) == 0 && !JB_ROOT_IS_SCALAR(jb1))
+ PG_RETURN_JSONB_P(jb1);
+ }
+
+ it1 = JsonbIteratorInit(&jb1->root);
+ it2 = JsonbIteratorInit(&jb2->root);
+
+ res = IteratorConcat(&it1, &it2, &state);
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+
+/*
+ * SQL function jsonb_delete (jsonb, text)
+ *
+ * return a copy of the jsonb with the indicated item
+ * removed.
+ */
+Datum
+jsonb_delete(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ text *key = PG_GETARG_TEXT_PP(1);
+ char *keyptr = VARDATA_ANY(key);
+ int keylen = VARSIZE_ANY_EXHDR(key);
+ JsonbParseState *state = NULL;
+ JsonbIterator *it;
+ JsonbValue v,
+ *res = NULL;
+ bool skipNested = false;
+ JsonbIteratorToken r;
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete from scalar")));
+
+ if (JB_ROOT_COUNT(in) == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if ((r == WJB_ELEM || r == WJB_KEY) &&
+ (v.type == jbvString && keylen == v.val.string.len &&
+ memcmp(keyptr, v.val.string.val, keylen) == 0))
+ {
+ /* skip corresponding value as well */
+ if (r == WJB_KEY)
+ (void) JsonbIteratorNext(&it, &v, true);
+
+ continue;
+ }
+
+ res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+/*
+ * SQL function jsonb_delete (jsonb, variadic text[])
+ *
+ * return a copy of the jsonb with the indicated items
+ * removed.
+ */
+Datum
+jsonb_delete_array(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ ArrayType *keys = PG_GETARG_ARRAYTYPE_P(1);
+ Datum *keys_elems;
+ bool *keys_nulls;
+ int keys_len;
+ JsonbParseState *state = NULL;
+ JsonbIterator *it;
+ JsonbValue v,
+ *res = NULL;
+ bool skipNested = false;
+ JsonbIteratorToken r;
+
+ if (ARR_NDIM(keys) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete from scalar")));
+
+ if (JB_ROOT_COUNT(in) == 0)
+ PG_RETURN_JSONB_P(in);
+
+ deconstruct_array(keys, TEXTOID, -1, false, TYPALIGN_INT,
+ &keys_elems, &keys_nulls, &keys_len);
+
+ if (keys_len == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ while ((r = JsonbIteratorNext(&it, &v, skipNested)) != WJB_DONE)
+ {
+ skipNested = true;
+
+ if ((r == WJB_ELEM || r == WJB_KEY) && v.type == jbvString)
+ {
+ int i;
+ bool found = false;
+
+ for (i = 0; i < keys_len; i++)
+ {
+ char *keyptr;
+ int keylen;
+
+ if (keys_nulls[i])
+ continue;
+
+ /* We rely on the array elements not being toasted */
+ keyptr = VARDATA_ANY(keys_elems[i]);
+ keylen = VARSIZE_ANY_EXHDR(keys_elems[i]);
+ if (keylen == v.val.string.len &&
+ memcmp(keyptr, v.val.string.val, keylen) == 0)
+ {
+ found = true;
+ break;
+ }
+ }
+ if (found)
+ {
+ /* skip corresponding value as well */
+ if (r == WJB_KEY)
+ (void) JsonbIteratorNext(&it, &v, true);
+
+ continue;
+ }
+ }
+
+ res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+/*
+ * SQL function jsonb_delete (jsonb, int)
+ *
+ * return a copy of the jsonb with the indicated item
+ * removed. Negative int means count back from the
+ * end of the items.
+ */
+Datum
+jsonb_delete_idx(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ int idx = PG_GETARG_INT32(1);
+ JsonbParseState *state = NULL;
+ JsonbIterator *it;
+ uint32 i = 0,
+ n;
+ JsonbValue v,
+ *res = NULL;
+ JsonbIteratorToken r;
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete from scalar")));
+
+ if (JB_ROOT_IS_OBJECT(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete from object using integer index")));
+
+ if (JB_ROOT_COUNT(in) == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ r = JsonbIteratorNext(&it, &v, false);
+ Assert(r == WJB_BEGIN_ARRAY);
+ n = v.val.array.nElems;
+
+ if (idx < 0)
+ {
+ if (-idx > n)
+ idx = n;
+ else
+ idx = n + idx;
+ }
+
+ if (idx >= n)
+ PG_RETURN_JSONB_P(in);
+
+ pushJsonbValue(&state, r, NULL);
+
+ while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE)
+ {
+ if (r == WJB_ELEM)
+ {
+ if (i++ == idx)
+ continue;
+ }
+
+ res = pushJsonbValue(&state, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+/*
+ * SQL function jsonb_set(jsonb, text[], jsonb, boolean)
+ */
+Datum
+jsonb_set(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
+ Jsonb *newjsonb = PG_GETARG_JSONB_P(2);
+ JsonbValue newval;
+ bool create = PG_GETARG_BOOL(3);
+ JsonbValue *res = NULL;
+ Datum *path_elems;
+ bool *path_nulls;
+ int path_len;
+ JsonbIterator *it;
+ JsonbParseState *st = NULL;
+
+ JsonbToJsonbValue(newjsonb, &newval);
+
+ if (ARR_NDIM(path) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot set path in scalar")));
+
+ if (JB_ROOT_COUNT(in) == 0 && !create)
+ PG_RETURN_JSONB_P(in);
+
+ deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT,
+ &path_elems, &path_nulls, &path_len);
+
+ if (path_len == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ res = setPath(&it, path_elems, path_nulls, path_len, &st,
+ 0, &newval, create ? JB_PATH_CREATE : JB_PATH_REPLACE);
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+
+/*
+ * SQL function jsonb_set_lax(jsonb, text[], jsonb, boolean, text)
+ */
+Datum
+jsonb_set_lax(PG_FUNCTION_ARGS)
+{
+ /* Jsonb *in = PG_GETARG_JSONB_P(0); */
+ /* ArrayType *path = PG_GETARG_ARRAYTYPE_P(1); */
+ /* Jsonb *newval = PG_GETARG_JSONB_P(2); */
+ /* bool create = PG_GETARG_BOOL(3); */
+ text *handle_null;
+ char *handle_val;
+
+ if (PG_ARGISNULL(0) || PG_ARGISNULL(1) || PG_ARGISNULL(3))
+ PG_RETURN_NULL();
+
+ /* could happen if they pass in an explicit NULL */
+ if (PG_ARGISNULL(4))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("null_value_treatment must be \"delete_key\", \"return_target\", \"use_json_null\", or \"raise_exception\"")));
+
+ /* if the new value isn't an SQL NULL just call jsonb_set */
+ if (!PG_ARGISNULL(2))
+ return jsonb_set(fcinfo);
+
+ handle_null = PG_GETARG_TEXT_P(4);
+ handle_val = text_to_cstring(handle_null);
+
+ if (strcmp(handle_val, "raise_exception") == 0)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("JSON value must not be null"),
+ errdetail("Exception was raised because null_value_treatment is \"raise_exception\"."),
+ errhint("To avoid, either change the null_value_treatment argument or ensure that an SQL NULL is not passed.")));
+ return (Datum) 0; /* silence stupider compilers */
+ }
+ else if (strcmp(handle_val, "use_json_null") == 0)
+ {
+ Datum newval;
+
+ newval = DirectFunctionCall1(jsonb_in, CStringGetDatum("null"));
+
+ fcinfo->args[2].value = newval;
+ fcinfo->args[2].isnull = false;
+ return jsonb_set(fcinfo);
+ }
+ else if (strcmp(handle_val, "delete_key") == 0)
+ {
+ return jsonb_delete_path(fcinfo);
+ }
+ else if (strcmp(handle_val, "return_target") == 0)
+ {
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+
+ PG_RETURN_JSONB_P(in);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("null_value_treatment must be \"delete_key\", \"return_target\", \"use_json_null\", or \"raise_exception\"")));
+ return (Datum) 0; /* silence stupider compilers */
+ }
+}
+
+/*
+ * SQL function jsonb_delete_path(jsonb, text[])
+ */
+Datum
+jsonb_delete_path(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
+ JsonbValue *res = NULL;
+ Datum *path_elems;
+ bool *path_nulls;
+ int path_len;
+ JsonbIterator *it;
+ JsonbParseState *st = NULL;
+
+ if (ARR_NDIM(path) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot delete path in scalar")));
+
+ if (JB_ROOT_COUNT(in) == 0)
+ PG_RETURN_JSONB_P(in);
+
+ deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT,
+ &path_elems, &path_nulls, &path_len);
+
+ if (path_len == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ res = setPath(&it, path_elems, path_nulls, path_len, &st,
+ 0, NULL, JB_PATH_DELETE);
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+/*
+ * SQL function jsonb_insert(jsonb, text[], jsonb, boolean)
+ */
+Datum
+jsonb_insert(PG_FUNCTION_ARGS)
+{
+ Jsonb *in = PG_GETARG_JSONB_P(0);
+ ArrayType *path = PG_GETARG_ARRAYTYPE_P(1);
+ Jsonb *newjsonb = PG_GETARG_JSONB_P(2);
+ JsonbValue newval;
+ bool after = PG_GETARG_BOOL(3);
+ JsonbValue *res = NULL;
+ Datum *path_elems;
+ bool *path_nulls;
+ int path_len;
+ JsonbIterator *it;
+ JsonbParseState *st = NULL;
+
+ JsonbToJsonbValue(newjsonb, &newval);
+
+ if (ARR_NDIM(path) > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("wrong number of array subscripts")));
+
+ if (JB_ROOT_IS_SCALAR(in))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot set path in scalar")));
+
+ deconstruct_array(path, TEXTOID, -1, false, TYPALIGN_INT,
+ &path_elems, &path_nulls, &path_len);
+
+ if (path_len == 0)
+ PG_RETURN_JSONB_P(in);
+
+ it = JsonbIteratorInit(&in->root);
+
+ res = setPath(&it, path_elems, path_nulls, path_len, &st, 0, &newval,
+ after ? JB_PATH_INSERT_AFTER : JB_PATH_INSERT_BEFORE);
+
+ Assert(res != NULL);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(res));
+}
+
+/*
+ * Iterate over all jsonb objects and merge them into one.
+ * The logic of this function copied from the same hstore function,
+ * except the case, when it1 & it2 represents jbvObject.
+ * In that case we just append the content of it2 to it1 without any
+ * verifications.
+ */
+static JsonbValue *
+IteratorConcat(JsonbIterator **it1, JsonbIterator **it2,
+ JsonbParseState **state)
+{
+ JsonbValue v1,
+ v2,
+ *res = NULL;
+ JsonbIteratorToken r1,
+ r2,
+ rk1,
+ rk2;
+
+ rk1 = JsonbIteratorNext(it1, &v1, false);
+ rk2 = JsonbIteratorNext(it2, &v2, false);
+
+ /*
+ * JsonbIteratorNext reports raw scalars as if they were single-element
+ * arrays; hence we only need consider "object" and "array" cases here.
+ */
+ if (rk1 == WJB_BEGIN_OBJECT && rk2 == WJB_BEGIN_OBJECT)
+ {
+ /*
+ * Both inputs are objects.
+ *
+ * Append all the tokens from v1 to res, except last WJB_END_OBJECT
+ * (because res will not be finished yet).
+ */
+ pushJsonbValue(state, rk1, NULL);
+ while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_OBJECT)
+ pushJsonbValue(state, r1, &v1);
+
+ /*
+ * Append all the tokens from v2 to res, including last WJB_END_OBJECT
+ * (the concatenation will be completed). Any duplicate keys will
+ * automatically override the value from the first object.
+ */
+ while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
+ res = pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
+ }
+ else if (rk1 == WJB_BEGIN_ARRAY && rk2 == WJB_BEGIN_ARRAY)
+ {
+ /*
+ * Both inputs are arrays.
+ */
+ pushJsonbValue(state, rk1, NULL);
+
+ while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY)
+ {
+ Assert(r1 == WJB_ELEM);
+ pushJsonbValue(state, r1, &v1);
+ }
+
+ while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_END_ARRAY)
+ {
+ Assert(r2 == WJB_ELEM);
+ pushJsonbValue(state, WJB_ELEM, &v2);
+ }
+
+ res = pushJsonbValue(state, WJB_END_ARRAY, NULL /* signal to sort */ );
+ }
+ else if (rk1 == WJB_BEGIN_OBJECT)
+ {
+ /*
+ * We have object || array.
+ */
+ Assert(rk2 == WJB_BEGIN_ARRAY);
+
+ pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL);
+
+ pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
+ while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_DONE)
+ pushJsonbValue(state, r1, r1 != WJB_END_OBJECT ? &v1 : NULL);
+
+ while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
+ res = pushJsonbValue(state, r2, r2 != WJB_END_ARRAY ? &v2 : NULL);
+ }
+ else
+ {
+ /*
+ * We have array || object.
+ */
+ Assert(rk1 == WJB_BEGIN_ARRAY);
+ Assert(rk2 == WJB_BEGIN_OBJECT);
+
+ pushJsonbValue(state, WJB_BEGIN_ARRAY, NULL);
+
+ while ((r1 = JsonbIteratorNext(it1, &v1, true)) != WJB_END_ARRAY)
+ pushJsonbValue(state, r1, &v1);
+
+ pushJsonbValue(state, WJB_BEGIN_OBJECT, NULL);
+ while ((r2 = JsonbIteratorNext(it2, &v2, true)) != WJB_DONE)
+ pushJsonbValue(state, r2, r2 != WJB_END_OBJECT ? &v2 : NULL);
+
+ res = pushJsonbValue(state, WJB_END_ARRAY, NULL);
+ }
+
+ return res;
+}
+
+/*
+ * Do most of the heavy work for jsonb_set/jsonb_insert
+ *
+ * If JB_PATH_DELETE bit is set in op_type, the element is to be removed.
+ *
+ * If any bit mentioned in JB_PATH_CREATE_OR_INSERT is set in op_type,
+ * we create the new value if the key or array index does not exist.
+ *
+ * Bits JB_PATH_INSERT_BEFORE and JB_PATH_INSERT_AFTER in op_type
+ * behave as JB_PATH_CREATE if new value is inserted in JsonbObject.
+ *
+ * If JB_PATH_FILL_GAPS bit is set, this will change an assignment logic in
+ * case if target is an array. The assignment index will not be restricted by
+ * number of elements in the array, and if there are any empty slots between
+ * last element of the array and a new one they will be filled with nulls. If
+ * the index is negative, it still will be considered an index from the end
+ * of the array. Of a part of the path is not present and this part is more
+ * than just one last element, this flag will instruct to create the whole
+ * chain of corresponding objects and insert the value.
+ *
+ * JB_PATH_CONSISTENT_POSITION for an array indicates that the caller wants to
+ * keep values with fixed indices. Indices for existing elements could be
+ * changed (shifted forward) in case if the array is prepended with a new value
+ * and a negative index out of the range, so this behavior will be prevented
+ * and return an error.
+ *
+ * All path elements before the last must already exist
+ * whatever bits in op_type are set, or nothing is done.
+ */
+static JsonbValue *
+setPath(JsonbIterator **it, Datum *path_elems,
+ bool *path_nulls, int path_len,
+ JsonbParseState **st, int level, JsonbValue *newval, int op_type)
+{
+ JsonbValue v;
+ JsonbIteratorToken r;
+ JsonbValue *res;
+
+ check_stack_depth();
+
+ if (path_nulls[level])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("path element at position %d is null",
+ level + 1)));
+
+ r = JsonbIteratorNext(it, &v, false);
+
+ switch (r)
+ {
+ case WJB_BEGIN_ARRAY:
+
+ /*
+ * If instructed complain about attempts to replace within a raw
+ * scalar value. This happens even when current level is equal to
+ * path_len, because the last path key should also correspond to
+ * an object or an array, not raw scalar.
+ */
+ if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1) &&
+ v.val.array.rawScalar)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot replace existing key"),
+ errdetail("The path assumes key is a composite object, "
+ "but it is a scalar value.")));
+
+ (void) pushJsonbValue(st, r, NULL);
+ setPathArray(it, path_elems, path_nulls, path_len, st, level,
+ newval, v.val.array.nElems, op_type);
+ r = JsonbIteratorNext(it, &v, false);
+ Assert(r == WJB_END_ARRAY);
+ res = pushJsonbValue(st, r, NULL);
+ break;
+ case WJB_BEGIN_OBJECT:
+ (void) pushJsonbValue(st, r, NULL);
+ setPathObject(it, path_elems, path_nulls, path_len, st, level,
+ newval, v.val.object.nPairs, op_type);
+ r = JsonbIteratorNext(it, &v, true);
+ Assert(r == WJB_END_OBJECT);
+ res = pushJsonbValue(st, r, NULL);
+ break;
+ case WJB_ELEM:
+ case WJB_VALUE:
+
+ /*
+ * If instructed complain about attempts to replace within a
+ * scalar value. This happens even when current level is equal to
+ * path_len, because the last path key should also correspond to
+ * an object or an array, not an element or value.
+ */
+ if ((op_type & JB_PATH_FILL_GAPS) && (level <= path_len - 1))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot replace existing key"),
+ errdetail("The path assumes key is a composite object, "
+ "but it is a scalar value.")));
+
+ res = pushJsonbValue(st, r, &v);
+ break;
+ default:
+ elog(ERROR, "unrecognized iterator result: %d", (int) r);
+ res = NULL; /* keep compiler quiet */
+ break;
+ }
+
+ return res;
+}
+
+/*
+ * Object walker for setPath
+ */
+static void
+setPathObject(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
+ int path_len, JsonbParseState **st, int level,
+ JsonbValue *newval, uint32 npairs, int op_type)
+{
+ text *pathelem = NULL;
+ int i;
+ JsonbValue k,
+ v;
+ bool done = false;
+
+ if (level >= path_len || path_nulls[level])
+ done = true;
+ else
+ {
+ /* The path Datum could be toasted, in which case we must detoast it */
+ pathelem = DatumGetTextPP(path_elems[level]);
+ }
+
+ /* empty object is a special case for create */
+ if ((npairs == 0) && (op_type & JB_PATH_CREATE_OR_INSERT) &&
+ (level == path_len - 1))
+ {
+ JsonbValue newkey;
+
+ newkey.type = jbvString;
+ newkey.val.string.val = VARDATA_ANY(pathelem);
+ newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem);
+
+ (void) pushJsonbValue(st, WJB_KEY, &newkey);
+ (void) pushJsonbValue(st, WJB_VALUE, newval);
+ }
+
+ for (i = 0; i < npairs; i++)
+ {
+ JsonbIteratorToken r = JsonbIteratorNext(it, &k, true);
+
+ Assert(r == WJB_KEY);
+
+ if (!done &&
+ k.val.string.len == VARSIZE_ANY_EXHDR(pathelem) &&
+ memcmp(k.val.string.val, VARDATA_ANY(pathelem),
+ k.val.string.len) == 0)
+ {
+ done = true;
+
+ if (level == path_len - 1)
+ {
+ /*
+ * called from jsonb_insert(), it forbids redefining an
+ * existing value
+ */
+ if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_INSERT_AFTER))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot replace existing key"),
+ errhint("Try using the function jsonb_set "
+ "to replace key value.")));
+
+ r = JsonbIteratorNext(it, &v, true); /* skip value */
+ if (!(op_type & JB_PATH_DELETE))
+ {
+ (void) pushJsonbValue(st, WJB_KEY, &k);
+ (void) pushJsonbValue(st, WJB_VALUE, newval);
+ }
+ }
+ else
+ {
+ (void) pushJsonbValue(st, r, &k);
+ setPath(it, path_elems, path_nulls, path_len,
+ st, level + 1, newval, op_type);
+ }
+ }
+ else
+ {
+ if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done &&
+ level == path_len - 1 && i == npairs - 1)
+ {
+ JsonbValue newkey;
+
+ newkey.type = jbvString;
+ newkey.val.string.val = VARDATA_ANY(pathelem);
+ newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem);
+
+ (void) pushJsonbValue(st, WJB_KEY, &newkey);
+ (void) pushJsonbValue(st, WJB_VALUE, newval);
+ }
+
+ (void) pushJsonbValue(st, r, &k);
+ r = JsonbIteratorNext(it, &v, false);
+ (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
+ {
+ int walking_level = 1;
+
+ while (walking_level != 0)
+ {
+ r = JsonbIteratorNext(it, &v, false);
+
+ if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
+ ++walking_level;
+ if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
+ --walking_level;
+
+ (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+ }
+ }
+ }
+
+ /*--
+ * If we got here there are only few possibilities:
+ * - no target path was found, and an open object with some keys/values was
+ * pushed into the state
+ * - an object is empty, only WJB_BEGIN_OBJECT is pushed
+ *
+ * In both cases if instructed to create the path when not present,
+ * generate the whole chain of empty objects and insert the new value
+ * there.
+ */
+ if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1))
+ {
+ JsonbValue newkey;
+
+ newkey.type = jbvString;
+ newkey.val.string.val = VARDATA_ANY(pathelem);
+ newkey.val.string.len = VARSIZE_ANY_EXHDR(pathelem);
+
+ (void) pushJsonbValue(st, WJB_KEY, &newkey);
+ (void) push_path(st, level, path_elems, path_nulls,
+ path_len, newval);
+
+ /* Result is closed with WJB_END_OBJECT outside of this function */
+ }
+}
+
+/*
+ * Array walker for setPath
+ */
+static void
+setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
+ int path_len, JsonbParseState **st, int level,
+ JsonbValue *newval, uint32 nelems, int op_type)
+{
+ JsonbValue v;
+ int idx,
+ i;
+ bool done = false;
+
+ /* pick correct index */
+ if (level < path_len && !path_nulls[level])
+ {
+ char *c = TextDatumGetCString(path_elems[level]);
+ char *badp;
+
+ errno = 0;
+ idx = strtoint(c, &badp, 10);
+ if (badp == c || *badp != '\0' || errno != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("path element at position %d is not an integer: \"%s\"",
+ level + 1, c)));
+ }
+ else
+ idx = nelems;
+
+ if (idx < 0)
+ {
+ if (-idx > nelems)
+ {
+ /*
+ * If asked to keep elements position consistent, it's not allowed
+ * to prepend the array.
+ */
+ if (op_type & JB_PATH_CONSISTENT_POSITION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("path element at position %d is out of range: %d",
+ level + 1, idx)));
+ else
+ idx = INT_MIN;
+ }
+ else
+ idx = nelems + idx;
+ }
+
+ /*
+ * Filling the gaps means there are no limits on the positive index are
+ * imposed, we can set any element. Otherwise limit the index by nelems.
+ */
+ if (!(op_type & JB_PATH_FILL_GAPS))
+ {
+ if (idx > 0 && idx > nelems)
+ idx = nelems;
+ }
+
+ /*
+ * if we're creating, and idx == INT_MIN, we prepend the new value to the
+ * array also if the array is empty - in which case we don't really care
+ * what the idx value is
+ */
+ if ((idx == INT_MIN || nelems == 0) && (level == path_len - 1) &&
+ (op_type & JB_PATH_CREATE_OR_INSERT))
+ {
+ Assert(newval != NULL);
+
+ if (op_type & JB_PATH_FILL_GAPS && nelems == 0 && idx > 0)
+ push_null_elements(st, idx);
+
+ (void) pushJsonbValue(st, WJB_ELEM, newval);
+
+ done = true;
+ }
+
+ /* iterate over the array elements */
+ for (i = 0; i < nelems; i++)
+ {
+ JsonbIteratorToken r;
+
+ if (i == idx && level < path_len)
+ {
+ done = true;
+
+ if (level == path_len - 1)
+ {
+ r = JsonbIteratorNext(it, &v, true); /* skip */
+
+ if (op_type & (JB_PATH_INSERT_BEFORE | JB_PATH_CREATE))
+ (void) pushJsonbValue(st, WJB_ELEM, newval);
+
+ /*
+ * We should keep current value only in case of
+ * JB_PATH_INSERT_BEFORE or JB_PATH_INSERT_AFTER because
+ * otherwise it should be deleted or replaced
+ */
+ if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_INSERT_BEFORE))
+ (void) pushJsonbValue(st, r, &v);
+
+ if (op_type & (JB_PATH_INSERT_AFTER | JB_PATH_REPLACE))
+ (void) pushJsonbValue(st, WJB_ELEM, newval);
+ }
+ else
+ (void) setPath(it, path_elems, path_nulls, path_len,
+ st, level + 1, newval, op_type);
+ }
+ else
+ {
+ r = JsonbIteratorNext(it, &v, false);
+
+ (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+
+ if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
+ {
+ int walking_level = 1;
+
+ while (walking_level != 0)
+ {
+ r = JsonbIteratorNext(it, &v, false);
+
+ if (r == WJB_BEGIN_ARRAY || r == WJB_BEGIN_OBJECT)
+ ++walking_level;
+ if (r == WJB_END_ARRAY || r == WJB_END_OBJECT)
+ --walking_level;
+
+ (void) pushJsonbValue(st, r, r < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+ }
+ }
+ }
+
+ if ((op_type & JB_PATH_CREATE_OR_INSERT) && !done && level == path_len - 1)
+ {
+ /*
+ * If asked to fill the gaps, idx could be bigger than nelems, so
+ * prepend the new element with nulls if that's the case.
+ */
+ if (op_type & JB_PATH_FILL_GAPS && idx > nelems)
+ push_null_elements(st, idx - nelems);
+
+ (void) pushJsonbValue(st, WJB_ELEM, newval);
+ done = true;
+ }
+
+ /*--
+ * If we got here there are only few possibilities:
+ * - no target path was found, and an open array with some keys/values was
+ * pushed into the state
+ * - an array is empty, only WJB_BEGIN_ARRAY is pushed
+ *
+ * In both cases if instructed to create the path when not present,
+ * generate the whole chain of empty objects and insert the new value
+ * there.
+ */
+ if (!done && (op_type & JB_PATH_FILL_GAPS) && (level < path_len - 1))
+ {
+ if (idx > 0)
+ push_null_elements(st, idx - nelems);
+
+ (void) push_path(st, level, path_elems, path_nulls,
+ path_len, newval);
+
+ /* Result is closed with WJB_END_OBJECT outside of this function */
+ }
+}
+
+/*
+ * Parse information about what elements of a jsonb document we want to iterate
+ * in functions iterate_json(b)_values. This information is presented in jsonb
+ * format, so that it can be easily extended in the future.
+ */
+uint32
+parse_jsonb_index_flags(Jsonb *jb)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+ uint32 flags = 0;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ type = JsonbIteratorNext(&it, &v, false);
+
+ /*
+ * We iterate over array (scalar internally is represented as array, so,
+ * we will accept it too) to check all its elements. Flag names are
+ * chosen the same as jsonb_typeof uses.
+ */
+ if (type != WJB_BEGIN_ARRAY)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong flag type, only arrays and scalars are allowed")));
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
+ {
+ if (v.type != jbvString)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("flag array element is not a string"),
+ errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\".")));
+
+ if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "all", 3) == 0)
+ flags |= jtiAll;
+ else if (v.val.string.len == 3 &&
+ pg_strncasecmp(v.val.string.val, "key", 3) == 0)
+ flags |= jtiKey;
+ else if (v.val.string.len == 6 &&
+ pg_strncasecmp(v.val.string.val, "string", 6) == 0)
+ flags |= jtiString;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
+ flags |= jtiNumeric;
+ else if (v.val.string.len == 7 &&
+ pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
+ flags |= jtiBool;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("wrong flag in flag array: \"%s\"",
+ pnstrdup(v.val.string.val, v.val.string.len)),
+ errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\", and \"all\".")));
+ }
+
+ /* expect end of array now */
+ if (type != WJB_END_ARRAY)
+ elog(ERROR, "unexpected end of flag array");
+
+ /* get final WJB_DONE and free iterator */
+ type = JsonbIteratorNext(&it, &v, false);
+ if (type != WJB_DONE)
+ elog(ERROR, "unexpected end of flag array");
+
+ return flags;
+}
+
+/*
+ * Iterate over jsonb values or elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
+ JsonIterateStringValuesAction action)
+{
+ JsonbIterator *it;
+ JsonbValue v;
+ JsonbIteratorToken type;
+
+ it = JsonbIteratorInit(&jb->root);
+
+ /*
+ * Just recursively iterating over jsonb and call callback on all
+ * corresponding elements
+ */
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ if (type == WJB_KEY)
+ {
+ if (flags & jtiKey)
+ action(state, v.val.string.val, v.val.string.len);
+
+ continue;
+ }
+ else if (!(type == WJB_VALUE || type == WJB_ELEM))
+ {
+ /* do not call callback for composite JsonbValue */
+ continue;
+ }
+
+ /* JsonbValue is a value of object or element of array */
+ switch (v.type)
+ {
+ case jbvString:
+ if (flags & jtiString)
+ action(state, v.val.string.val, v.val.string.len);
+ break;
+ case jbvNumeric:
+ if (flags & jtiNumeric)
+ {
+ char *val;
+
+ val = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(v.val.numeric)));
+
+ action(state, val, strlen(val));
+ pfree(val);
+ }
+ break;
+ case jbvBool:
+ if (flags & jtiBool)
+ {
+ if (v.val.boolean)
+ action(state, "true", 4);
+ else
+ action(state, "false", 5);
+ }
+ break;
+ default:
+ /* do not call callback for composite JsonbValue */
+ break;
+ }
+ }
+}
+
+/*
+ * Iterate over json values and elements, specified by flags, and pass them
+ * together with an iteration state to a specified JsonIterateStringValuesAction.
+ */
+void
+iterate_json_values(text *json, uint32 flags, void *action_state,
+ JsonIterateStringValuesAction action)
+{
+ JsonLexContext *lex = makeJsonLexContext(json, true);
+ JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+ IterateJsonStringValuesState *state = palloc0(sizeof(IterateJsonStringValuesState));
+
+ state->lex = lex;
+ state->action = action;
+ state->action_state = action_state;
+ state->flags = flags;
+
+ sem->semstate = (void *) state;
+ sem->scalar = iterate_values_scalar;
+ sem->object_field_start = iterate_values_object_field_start;
+
+ pg_parse_json_or_ereport(lex, sem);
+}
+
+/*
+ * An auxiliary function for iterate_json_values to invoke a specified
+ * JsonIterateStringValuesAction for specified values.
+ */
+static void
+iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ switch (tokentype)
+ {
+ case JSON_TOKEN_STRING:
+ if (_state->flags & jtiString)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_NUMBER:
+ if (_state->flags & jtiNumeric)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ case JSON_TOKEN_TRUE:
+ case JSON_TOKEN_FALSE:
+ if (_state->flags & jtiBool)
+ _state->action(_state->action_state, token, strlen(token));
+ break;
+ default:
+ /* do not call callback for any other token */
+ break;
+ }
+}
+
+static void
+iterate_values_object_field_start(void *state, char *fname, bool isnull)
+{
+ IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
+
+ if (_state->flags & jtiKey)
+ {
+ char *val = pstrdup(fname);
+
+ _state->action(_state->action_state, val, strlen(val));
+ }
+}
+
+/*
+ * Iterate over a jsonb, and apply a specified JsonTransformStringValuesAction
+ * to every string value or element. Any necessary context for a
+ * JsonTransformStringValuesAction can be passed in the action_state variable.
+ * Function returns a copy of an original jsonb object with transformed values.
+ */
+Jsonb *
+transform_jsonb_string_values(Jsonb *jsonb, void *action_state,
+ JsonTransformStringValuesAction transform_action)
+{
+ JsonbIterator *it;
+ JsonbValue v,
+ *res = NULL;
+ JsonbIteratorToken type;
+ JsonbParseState *st = NULL;
+ text *out;
+ bool is_scalar = false;
+
+ it = JsonbIteratorInit(&jsonb->root);
+ is_scalar = it->isScalar;
+
+ while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
+ {
+ if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
+ {
+ out = transform_action(action_state, v.val.string.val, v.val.string.len);
+ /* out is probably not toasted, but let's be sure */
+ out = pg_detoast_datum_packed(out);
+ v.val.string.val = VARDATA_ANY(out);
+ v.val.string.len = VARSIZE_ANY_EXHDR(out);
+ res = pushJsonbValue(&st, type, type < WJB_BEGIN_ARRAY ? &v : NULL);
+ }
+ else
+ {
+ res = pushJsonbValue(&st, type, (type == WJB_KEY ||
+ type == WJB_VALUE ||
+ type == WJB_ELEM) ? &v : NULL);
+ }
+ }
+
+ if (res->type == jbvArray)
+ res->val.array.rawScalar = is_scalar;
+
+ return JsonbValueToJsonb(res);
+}
+
+/*
+ * Iterate over a json, and apply a specified JsonTransformStringValuesAction
+ * to every string value or element. Any necessary context for a
+ * JsonTransformStringValuesAction can be passed in the action_state variable.
+ * Function returns a StringInfo, which is a copy of an original json with
+ * transformed values.
+ */
+text *
+transform_json_string_values(text *json, void *action_state,
+ JsonTransformStringValuesAction transform_action)
+{
+ JsonLexContext *lex = makeJsonLexContext(json, true);
+ JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
+ TransformJsonStringValuesState *state = palloc0(sizeof(TransformJsonStringValuesState));
+
+ state->lex = lex;
+ state->strval = makeStringInfo();
+ state->action = transform_action;
+ state->action_state = action_state;
+
+ sem->semstate = (void *) state;
+ sem->scalar = transform_string_values_scalar;
+ sem->object_start = transform_string_values_object_start;
+ sem->object_end = transform_string_values_object_end;
+ sem->array_start = transform_string_values_array_start;
+ sem->array_end = transform_string_values_array_end;
+ sem->scalar = transform_string_values_scalar;
+ sem->array_element_start = transform_string_values_array_element_start;
+ sem->object_field_start = transform_string_values_object_field_start;
+
+ pg_parse_json_or_ereport(lex, sem);
+
+ return cstring_to_text_with_len(state->strval->data, state->strval->len);
+}
+
+/*
+ * Set of auxiliary functions for transform_json_string_values to invoke a
+ * specified JsonTransformStringValuesAction for all values and left everything
+ * else untouched.
+ */
+static void
+transform_string_values_object_start(void *state)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '{');
+}
+
+static void
+transform_string_values_object_end(void *state)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '}');
+}
+
+static void
+transform_string_values_array_start(void *state)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ appendStringInfoCharMacro(_state->strval, '[');
+}
+
+static void
+transform_string_values_array_end(void *state)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ appendStringInfoCharMacro(_state->strval, ']');
+}
+
+static void
+transform_string_values_object_field_start(void *state, char *fname, bool isnull)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ if (_state->strval->data[_state->strval->len - 1] != '{')
+ appendStringInfoCharMacro(_state->strval, ',');
+
+ /*
+ * Unfortunately we don't have the quoted and escaped string any more, so
+ * we have to re-escape it.
+ */
+ escape_json(_state->strval, fname);
+ appendStringInfoCharMacro(_state->strval, ':');
+}
+
+static void
+transform_string_values_array_element_start(void *state, bool isnull)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ if (_state->strval->data[_state->strval->len - 1] != '[')
+ appendStringInfoCharMacro(_state->strval, ',');
+}
+
+static void
+transform_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
+{
+ TransformJsonStringValuesState *_state = (TransformJsonStringValuesState *) state;
+
+ if (tokentype == JSON_TOKEN_STRING)
+ {
+ text *out = _state->action(_state->action_state, token, strlen(token));
+
+ escape_json(_state->strval, text_to_cstring(out));
+ }
+ else
+ appendStringInfoString(_state->strval, token);
+}
diff --git a/src/backend/utils/adt/jsonpath.c b/src/backend/utils/adt/jsonpath.c
new file mode 100644
index 0000000..f921044
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath.c
@@ -0,0 +1,1080 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath.c
+ * Input/output and supporting routines for jsonpath
+ *
+ * jsonpath expression is a chain of path items. First path item is $, $var,
+ * literal or arithmetic expression. Subsequent path items are accessors
+ * (.key, .*, [subscripts], [*]), filters (? (predicate)) and methods (.type(),
+ * .size() etc).
+ *
+ * For instance, structure of path items for simple expression:
+ *
+ * $.a[*].type()
+ *
+ * is pretty evident:
+ *
+ * $ => .a => [*] => .type()
+ *
+ * Some path items such as arithmetic operations, predicates or array
+ * subscripts may comprise subtrees. For instance, more complex expression
+ *
+ * ($.a + $[1 to 5, 7] ? (@ > 3).double()).type()
+ *
+ * have following structure of path items:
+ *
+ * + => .type()
+ * ___/ \___
+ * / \
+ * $ => .a $ => [] => ? => .double()
+ * _||_ |
+ * / \ >
+ * to to / \
+ * / \ / @ 3
+ * 1 5 7
+ *
+ * Binary encoding of jsonpath constitutes a sequence of 4-bytes aligned
+ * variable-length path items connected by links. Every item has a header
+ * consisting of item type (enum JsonPathItemType) and offset of next item
+ * (zero means no next item). After the header, item may have payload
+ * depending on item type. For instance, payload of '.key' accessor item is
+ * length of key name and key name itself. Payload of '>' arithmetic operator
+ * item is offsets of right and left operands.
+ *
+ * So, binary representation of sample expression above is:
+ * (bottom arrows are next links, top lines are argument links)
+ *
+ * _____
+ * _____ ___/____ \ __
+ * _ /_ \ _____/__/____ \ \ __ _ /_ \
+ * / / \ \ / / / \ \ \ / \ / / \ \
+ * +(LR) $ .a $ [](* to *, * to *) 1 5 7 ?(A) >(LR) @ 3 .double() .type()
+ * | | ^ | ^| ^| ^ ^
+ * | |__| |__||________________________||___________________| |
+ * |_______________________________________________________________________|
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/json.h"
+#include "utils/jsonpath.h"
+
+
+static Datum jsonPathFromCstring(char *in, int len);
+static char *jsonPathToCstring(StringInfo out, JsonPath *in,
+ int estimated_len);
+static int flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
+ int nestingLevel, bool insideArraySubscript);
+static void alignStringInfoInt(StringInfo buf);
+static int32 reserveSpaceForItemPointer(StringInfo buf);
+static void printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
+ bool printBracketes);
+static int operationPriority(JsonPathItemType op);
+
+
+/**************************** INPUT/OUTPUT ********************************/
+
+/*
+ * jsonpath type input function
+ */
+Datum
+jsonpath_in(PG_FUNCTION_ARGS)
+{
+ char *in = PG_GETARG_CSTRING(0);
+ int len = strlen(in);
+
+ return jsonPathFromCstring(in, len);
+}
+
+/*
+ * jsonpath type recv function
+ *
+ * The type is sent as text in binary mode, so this is almost the same
+ * as the input function, but it's prefixed with a version number so we
+ * can change the binary format sent in future if necessary. For now,
+ * only version 1 is supported.
+ */
+Datum
+jsonpath_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ int version = pq_getmsgint(buf, 1);
+ char *str;
+ int nbytes;
+
+ if (version == JSONPATH_VERSION)
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ else
+ elog(ERROR, "unsupported jsonpath version number: %d", version);
+
+ return jsonPathFromCstring(str, nbytes);
+}
+
+/*
+ * jsonpath type output function
+ */
+Datum
+jsonpath_out(PG_FUNCTION_ARGS)
+{
+ JsonPath *in = PG_GETARG_JSONPATH_P(0);
+
+ PG_RETURN_CSTRING(jsonPathToCstring(NULL, in, VARSIZE(in)));
+}
+
+/*
+ * jsonpath type send function
+ *
+ * Just send jsonpath as a version number, then a string of text
+ */
+Datum
+jsonpath_send(PG_FUNCTION_ARGS)
+{
+ JsonPath *in = PG_GETARG_JSONPATH_P(0);
+ StringInfoData buf;
+ StringInfoData jtext;
+ int version = JSONPATH_VERSION;
+
+ initStringInfo(&jtext);
+ (void) jsonPathToCstring(&jtext, in, VARSIZE(in));
+
+ pq_begintypsend(&buf);
+ pq_sendint8(&buf, version);
+ pq_sendtext(&buf, jtext.data, jtext.len);
+ pfree(jtext.data);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * Converts C-string to a jsonpath value.
+ *
+ * Uses jsonpath parser to turn string into an AST, then
+ * flattenJsonPathParseItem() does second pass turning AST into binary
+ * representation of jsonpath.
+ */
+static Datum
+jsonPathFromCstring(char *in, int len)
+{
+ JsonPathParseResult *jsonpath = parsejsonpath(in, len);
+ JsonPath *res;
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ enlargeStringInfo(&buf, 4 * len /* estimation */ );
+
+ appendStringInfoSpaces(&buf, JSONPATH_HDRSZ);
+
+ if (!jsonpath)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "jsonpath",
+ in)));
+
+ flattenJsonPathParseItem(&buf, jsonpath->expr, 0, false);
+
+ res = (JsonPath *) buf.data;
+ SET_VARSIZE(res, buf.len);
+ res->header = JSONPATH_VERSION;
+ if (jsonpath->lax)
+ res->header |= JSONPATH_LAX;
+
+ PG_RETURN_JSONPATH_P(res);
+}
+
+/*
+ * Converts jsonpath value to a C-string.
+ *
+ * If 'out' argument is non-null, the resulting C-string is stored inside the
+ * StringBuffer. The resulting string is always returned.
+ */
+static char *
+jsonPathToCstring(StringInfo out, JsonPath *in, int estimated_len)
+{
+ StringInfoData buf;
+ JsonPathItem v;
+
+ if (!out)
+ {
+ out = &buf;
+ initStringInfo(out);
+ }
+ enlargeStringInfo(out, estimated_len);
+
+ if (!(in->header & JSONPATH_LAX))
+ appendBinaryStringInfo(out, "strict ", 7);
+
+ jspInit(&v, in);
+ printJsonPathItem(out, &v, false, true);
+
+ return out->data;
+}
+
+/*
+ * Recursive function converting given jsonpath parse item and all its
+ * children into a binary representation.
+ */
+static int
+flattenJsonPathParseItem(StringInfo buf, JsonPathParseItem *item,
+ int nestingLevel, bool insideArraySubscript)
+{
+ /* position from beginning of jsonpath data */
+ int32 pos = buf->len - JSONPATH_HDRSZ;
+ int32 chld;
+ int32 next;
+ int argNestingLevel = 0;
+
+ check_stack_depth();
+ CHECK_FOR_INTERRUPTS();
+
+ appendStringInfoChar(buf, (char) (item->type));
+
+ /*
+ * We align buffer to int32 because a series of int32 values often goes
+ * after the header, and we want to read them directly by dereferencing
+ * int32 pointer (see jspInitByBuffer()).
+ */
+ alignStringInfoInt(buf);
+
+ /*
+ * Reserve space for next item pointer. Actual value will be recorded
+ * later, after next and children items processing.
+ */
+ next = reserveSpaceForItemPointer(buf);
+
+ switch (item->type)
+ {
+ case jpiString:
+ case jpiVariable:
+ case jpiKey:
+ appendBinaryStringInfo(buf, (char *) &item->value.string.len,
+ sizeof(item->value.string.len));
+ appendBinaryStringInfo(buf, item->value.string.val,
+ item->value.string.len);
+ appendStringInfoChar(buf, '\0');
+ break;
+ case jpiNumeric:
+ appendBinaryStringInfo(buf, (char *) item->value.numeric,
+ VARSIZE(item->value.numeric));
+ break;
+ case jpiBool:
+ appendBinaryStringInfo(buf, (char *) &item->value.boolean,
+ sizeof(item->value.boolean));
+ break;
+ case jpiAnd:
+ case jpiOr:
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ case jpiAdd:
+ case jpiSub:
+ case jpiMul:
+ case jpiDiv:
+ case jpiMod:
+ case jpiStartsWith:
+ {
+ /*
+ * First, reserve place for left/right arg's positions, then
+ * record both args and sets actual position in reserved
+ * places.
+ */
+ int32 left = reserveSpaceForItemPointer(buf);
+ int32 right = reserveSpaceForItemPointer(buf);
+
+ chld = !item->value.args.left ? pos :
+ flattenJsonPathParseItem(buf, item->value.args.left,
+ nestingLevel + argNestingLevel,
+ insideArraySubscript);
+ *(int32 *) (buf->data + left) = chld - pos;
+
+ chld = !item->value.args.right ? pos :
+ flattenJsonPathParseItem(buf, item->value.args.right,
+ nestingLevel + argNestingLevel,
+ insideArraySubscript);
+ *(int32 *) (buf->data + right) = chld - pos;
+ }
+ break;
+ case jpiLikeRegex:
+ {
+ int32 offs;
+
+ appendBinaryStringInfo(buf,
+ (char *) &item->value.like_regex.flags,
+ sizeof(item->value.like_regex.flags));
+ offs = reserveSpaceForItemPointer(buf);
+ appendBinaryStringInfo(buf,
+ (char *) &item->value.like_regex.patternlen,
+ sizeof(item->value.like_regex.patternlen));
+ appendBinaryStringInfo(buf, item->value.like_regex.pattern,
+ item->value.like_regex.patternlen);
+ appendStringInfoChar(buf, '\0');
+
+ chld = flattenJsonPathParseItem(buf, item->value.like_regex.expr,
+ nestingLevel,
+ insideArraySubscript);
+ *(int32 *) (buf->data + offs) = chld - pos;
+ }
+ break;
+ case jpiFilter:
+ argNestingLevel++;
+ /* FALLTHROUGH */
+ case jpiIsUnknown:
+ case jpiNot:
+ case jpiPlus:
+ case jpiMinus:
+ case jpiExists:
+ case jpiDatetime:
+ {
+ int32 arg = reserveSpaceForItemPointer(buf);
+
+ chld = !item->value.arg ? pos :
+ flattenJsonPathParseItem(buf, item->value.arg,
+ nestingLevel + argNestingLevel,
+ insideArraySubscript);
+ *(int32 *) (buf->data + arg) = chld - pos;
+ }
+ break;
+ case jpiNull:
+ break;
+ case jpiRoot:
+ break;
+ case jpiAnyArray:
+ case jpiAnyKey:
+ break;
+ case jpiCurrent:
+ if (nestingLevel <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("@ is not allowed in root expressions")));
+ break;
+ case jpiLast:
+ if (!insideArraySubscript)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("LAST is allowed only in array subscripts")));
+ break;
+ case jpiIndexArray:
+ {
+ int32 nelems = item->value.array.nelems;
+ int offset;
+ int i;
+
+ appendBinaryStringInfo(buf, (char *) &nelems, sizeof(nelems));
+
+ offset = buf->len;
+
+ appendStringInfoSpaces(buf, sizeof(int32) * 2 * nelems);
+
+ for (i = 0; i < nelems; i++)
+ {
+ int32 *ppos;
+ int32 topos;
+ int32 frompos =
+ flattenJsonPathParseItem(buf,
+ item->value.array.elems[i].from,
+ nestingLevel, true) - pos;
+
+ if (item->value.array.elems[i].to)
+ topos = flattenJsonPathParseItem(buf,
+ item->value.array.elems[i].to,
+ nestingLevel, true) - pos;
+ else
+ topos = 0;
+
+ ppos = (int32 *) &buf->data[offset + i * 2 * sizeof(int32)];
+
+ ppos[0] = frompos;
+ ppos[1] = topos;
+ }
+ }
+ break;
+ case jpiAny:
+ appendBinaryStringInfo(buf,
+ (char *) &item->value.anybounds.first,
+ sizeof(item->value.anybounds.first));
+ appendBinaryStringInfo(buf,
+ (char *) &item->value.anybounds.last,
+ sizeof(item->value.anybounds.last));
+ break;
+ case jpiType:
+ case jpiSize:
+ case jpiAbs:
+ case jpiFloor:
+ case jpiCeiling:
+ case jpiDouble:
+ case jpiKeyValue:
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonpath item type: %d", item->type);
+ }
+
+ if (item->next)
+ {
+ chld = flattenJsonPathParseItem(buf, item->next, nestingLevel,
+ insideArraySubscript) - pos;
+ *(int32 *) (buf->data + next) = chld;
+ }
+
+ return pos;
+}
+
+/*
+ * Align StringInfo to int by adding zero padding bytes
+ */
+static void
+alignStringInfoInt(StringInfo buf)
+{
+ switch (INTALIGN(buf->len) - buf->len)
+ {
+ case 3:
+ appendStringInfoCharMacro(buf, 0);
+ /* FALLTHROUGH */
+ case 2:
+ appendStringInfoCharMacro(buf, 0);
+ /* FALLTHROUGH */
+ case 1:
+ appendStringInfoCharMacro(buf, 0);
+ /* FALLTHROUGH */
+ default:
+ break;
+ }
+}
+
+/*
+ * Reserve space for int32 JsonPathItem pointer. Now zero pointer is written,
+ * actual value will be recorded at '(int32 *) &buf->data[pos]' later.
+ */
+static int32
+reserveSpaceForItemPointer(StringInfo buf)
+{
+ int32 pos = buf->len;
+ int32 ptr = 0;
+
+ appendBinaryStringInfo(buf, (char *) &ptr, sizeof(ptr));
+
+ return pos;
+}
+
+/*
+ * Prints text representation of given jsonpath item and all its children.
+ */
+static void
+printJsonPathItem(StringInfo buf, JsonPathItem *v, bool inKey,
+ bool printBracketes)
+{
+ JsonPathItem elem;
+ int i;
+
+ check_stack_depth();
+ CHECK_FOR_INTERRUPTS();
+
+ switch (v->type)
+ {
+ case jpiNull:
+ appendStringInfoString(buf, "null");
+ break;
+ case jpiKey:
+ if (inKey)
+ appendStringInfoChar(buf, '.');
+ escape_json(buf, jspGetString(v, NULL));
+ break;
+ case jpiString:
+ escape_json(buf, jspGetString(v, NULL));
+ break;
+ case jpiVariable:
+ appendStringInfoChar(buf, '$');
+ escape_json(buf, jspGetString(v, NULL));
+ break;
+ case jpiNumeric:
+ if (jspHasNext(v))
+ appendStringInfoChar(buf, '(');
+ appendStringInfoString(buf,
+ DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(jspGetNumeric(v)))));
+ if (jspHasNext(v))
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiBool:
+ if (jspGetBool(v))
+ appendBinaryStringInfo(buf, "true", 4);
+ else
+ appendBinaryStringInfo(buf, "false", 5);
+ break;
+ case jpiAnd:
+ case jpiOr:
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ case jpiAdd:
+ case jpiSub:
+ case jpiMul:
+ case jpiDiv:
+ case jpiMod:
+ case jpiStartsWith:
+ if (printBracketes)
+ appendStringInfoChar(buf, '(');
+ jspGetLeftArg(v, &elem);
+ printJsonPathItem(buf, &elem, false,
+ operationPriority(elem.type) <=
+ operationPriority(v->type));
+ appendStringInfoChar(buf, ' ');
+ appendStringInfoString(buf, jspOperationName(v->type));
+ appendStringInfoChar(buf, ' ');
+ jspGetRightArg(v, &elem);
+ printJsonPathItem(buf, &elem, false,
+ operationPriority(elem.type) <=
+ operationPriority(v->type));
+ if (printBracketes)
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiLikeRegex:
+ if (printBracketes)
+ appendStringInfoChar(buf, '(');
+
+ jspInitByBuffer(&elem, v->base, v->content.like_regex.expr);
+ printJsonPathItem(buf, &elem, false,
+ operationPriority(elem.type) <=
+ operationPriority(v->type));
+
+ appendBinaryStringInfo(buf, " like_regex ", 12);
+
+ escape_json(buf, v->content.like_regex.pattern);
+
+ if (v->content.like_regex.flags)
+ {
+ appendBinaryStringInfo(buf, " flag \"", 7);
+
+ if (v->content.like_regex.flags & JSP_REGEX_ICASE)
+ appendStringInfoChar(buf, 'i');
+ if (v->content.like_regex.flags & JSP_REGEX_DOTALL)
+ appendStringInfoChar(buf, 's');
+ if (v->content.like_regex.flags & JSP_REGEX_MLINE)
+ appendStringInfoChar(buf, 'm');
+ if (v->content.like_regex.flags & JSP_REGEX_WSPACE)
+ appendStringInfoChar(buf, 'x');
+ if (v->content.like_regex.flags & JSP_REGEX_QUOTE)
+ appendStringInfoChar(buf, 'q');
+
+ appendStringInfoChar(buf, '"');
+ }
+
+ if (printBracketes)
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiPlus:
+ case jpiMinus:
+ if (printBracketes)
+ appendStringInfoChar(buf, '(');
+ appendStringInfoChar(buf, v->type == jpiPlus ? '+' : '-');
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false,
+ operationPriority(elem.type) <=
+ operationPriority(v->type));
+ if (printBracketes)
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiFilter:
+ appendBinaryStringInfo(buf, "?(", 2);
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiNot:
+ appendBinaryStringInfo(buf, "!(", 2);
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiIsUnknown:
+ appendStringInfoChar(buf, '(');
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ appendBinaryStringInfo(buf, ") is unknown", 12);
+ break;
+ case jpiExists:
+ appendBinaryStringInfo(buf, "exists (", 8);
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiCurrent:
+ Assert(!inKey);
+ appendStringInfoChar(buf, '@');
+ break;
+ case jpiRoot:
+ Assert(!inKey);
+ appendStringInfoChar(buf, '$');
+ break;
+ case jpiLast:
+ appendBinaryStringInfo(buf, "last", 4);
+ break;
+ case jpiAnyArray:
+ appendBinaryStringInfo(buf, "[*]", 3);
+ break;
+ case jpiAnyKey:
+ if (inKey)
+ appendStringInfoChar(buf, '.');
+ appendStringInfoChar(buf, '*');
+ break;
+ case jpiIndexArray:
+ appendStringInfoChar(buf, '[');
+ for (i = 0; i < v->content.array.nelems; i++)
+ {
+ JsonPathItem from;
+ JsonPathItem to;
+ bool range = jspGetArraySubscript(v, &from, &to, i);
+
+ if (i)
+ appendStringInfoChar(buf, ',');
+
+ printJsonPathItem(buf, &from, false, false);
+
+ if (range)
+ {
+ appendBinaryStringInfo(buf, " to ", 4);
+ printJsonPathItem(buf, &to, false, false);
+ }
+ }
+ appendStringInfoChar(buf, ']');
+ break;
+ case jpiAny:
+ if (inKey)
+ appendStringInfoChar(buf, '.');
+
+ if (v->content.anybounds.first == 0 &&
+ v->content.anybounds.last == PG_UINT32_MAX)
+ appendBinaryStringInfo(buf, "**", 2);
+ else if (v->content.anybounds.first == v->content.anybounds.last)
+ {
+ if (v->content.anybounds.first == PG_UINT32_MAX)
+ appendStringInfoString(buf, "**{last}");
+ else
+ appendStringInfo(buf, "**{%u}",
+ v->content.anybounds.first);
+ }
+ else if (v->content.anybounds.first == PG_UINT32_MAX)
+ appendStringInfo(buf, "**{last to %u}",
+ v->content.anybounds.last);
+ else if (v->content.anybounds.last == PG_UINT32_MAX)
+ appendStringInfo(buf, "**{%u to last}",
+ v->content.anybounds.first);
+ else
+ appendStringInfo(buf, "**{%u to %u}",
+ v->content.anybounds.first,
+ v->content.anybounds.last);
+ break;
+ case jpiType:
+ appendBinaryStringInfo(buf, ".type()", 7);
+ break;
+ case jpiSize:
+ appendBinaryStringInfo(buf, ".size()", 7);
+ break;
+ case jpiAbs:
+ appendBinaryStringInfo(buf, ".abs()", 6);
+ break;
+ case jpiFloor:
+ appendBinaryStringInfo(buf, ".floor()", 8);
+ break;
+ case jpiCeiling:
+ appendBinaryStringInfo(buf, ".ceiling()", 10);
+ break;
+ case jpiDouble:
+ appendBinaryStringInfo(buf, ".double()", 9);
+ break;
+ case jpiDatetime:
+ appendBinaryStringInfo(buf, ".datetime(", 10);
+ if (v->content.arg)
+ {
+ jspGetArg(v, &elem);
+ printJsonPathItem(buf, &elem, false, false);
+ }
+ appendStringInfoChar(buf, ')');
+ break;
+ case jpiKeyValue:
+ appendBinaryStringInfo(buf, ".keyvalue()", 11);
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonpath item type: %d", v->type);
+ }
+
+ if (jspGetNext(v, &elem))
+ printJsonPathItem(buf, &elem, true, true);
+}
+
+const char *
+jspOperationName(JsonPathItemType type)
+{
+ switch (type)
+ {
+ case jpiAnd:
+ return "&&";
+ case jpiOr:
+ return "||";
+ case jpiEqual:
+ return "==";
+ case jpiNotEqual:
+ return "!=";
+ case jpiLess:
+ return "<";
+ case jpiGreater:
+ return ">";
+ case jpiLessOrEqual:
+ return "<=";
+ case jpiGreaterOrEqual:
+ return ">=";
+ case jpiPlus:
+ case jpiAdd:
+ return "+";
+ case jpiMinus:
+ case jpiSub:
+ return "-";
+ case jpiMul:
+ return "*";
+ case jpiDiv:
+ return "/";
+ case jpiMod:
+ return "%";
+ case jpiStartsWith:
+ return "starts with";
+ case jpiLikeRegex:
+ return "like_regex";
+ case jpiType:
+ return "type";
+ case jpiSize:
+ return "size";
+ case jpiKeyValue:
+ return "keyvalue";
+ case jpiDouble:
+ return "double";
+ case jpiAbs:
+ return "abs";
+ case jpiFloor:
+ return "floor";
+ case jpiCeiling:
+ return "ceiling";
+ case jpiDatetime:
+ return "datetime";
+ default:
+ elog(ERROR, "unrecognized jsonpath item type: %d", type);
+ return NULL;
+ }
+}
+
+static int
+operationPriority(JsonPathItemType op)
+{
+ switch (op)
+ {
+ case jpiOr:
+ return 0;
+ case jpiAnd:
+ return 1;
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ case jpiStartsWith:
+ return 2;
+ case jpiAdd:
+ case jpiSub:
+ return 3;
+ case jpiMul:
+ case jpiDiv:
+ case jpiMod:
+ return 4;
+ case jpiPlus:
+ case jpiMinus:
+ return 5;
+ default:
+ return 6;
+ }
+}
+
+/******************* Support functions for JsonPath *************************/
+
+/*
+ * Support macros to read stored values
+ */
+
+#define read_byte(v, b, p) do { \
+ (v) = *(uint8*)((b) + (p)); \
+ (p) += 1; \
+} while(0) \
+
+#define read_int32(v, b, p) do { \
+ (v) = *(uint32*)((b) + (p)); \
+ (p) += sizeof(int32); \
+} while(0) \
+
+#define read_int32_n(v, b, p, n) do { \
+ (v) = (void *)((b) + (p)); \
+ (p) += sizeof(int32) * (n); \
+} while(0) \
+
+/*
+ * Read root node and fill root node representation
+ */
+void
+jspInit(JsonPathItem *v, JsonPath *js)
+{
+ Assert((js->header & ~JSONPATH_LAX) == JSONPATH_VERSION);
+ jspInitByBuffer(v, js->data, 0);
+}
+
+/*
+ * Read node from buffer and fill its representation
+ */
+void
+jspInitByBuffer(JsonPathItem *v, char *base, int32 pos)
+{
+ v->base = base + pos;
+
+ read_byte(v->type, base, pos);
+ pos = INTALIGN((uintptr_t) (base + pos)) - (uintptr_t) base;
+ read_int32(v->nextPos, base, pos);
+
+ switch (v->type)
+ {
+ case jpiNull:
+ case jpiRoot:
+ case jpiCurrent:
+ case jpiAnyArray:
+ case jpiAnyKey:
+ case jpiType:
+ case jpiSize:
+ case jpiAbs:
+ case jpiFloor:
+ case jpiCeiling:
+ case jpiDouble:
+ case jpiKeyValue:
+ case jpiLast:
+ break;
+ case jpiKey:
+ case jpiString:
+ case jpiVariable:
+ read_int32(v->content.value.datalen, base, pos);
+ /* FALLTHROUGH */
+ case jpiNumeric:
+ case jpiBool:
+ v->content.value.data = base + pos;
+ break;
+ case jpiAnd:
+ case jpiOr:
+ case jpiAdd:
+ case jpiSub:
+ case jpiMul:
+ case jpiDiv:
+ case jpiMod:
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ case jpiStartsWith:
+ read_int32(v->content.args.left, base, pos);
+ read_int32(v->content.args.right, base, pos);
+ break;
+ case jpiLikeRegex:
+ read_int32(v->content.like_regex.flags, base, pos);
+ read_int32(v->content.like_regex.expr, base, pos);
+ read_int32(v->content.like_regex.patternlen, base, pos);
+ v->content.like_regex.pattern = base + pos;
+ break;
+ case jpiNot:
+ case jpiExists:
+ case jpiIsUnknown:
+ case jpiPlus:
+ case jpiMinus:
+ case jpiFilter:
+ case jpiDatetime:
+ read_int32(v->content.arg, base, pos);
+ break;
+ case jpiIndexArray:
+ read_int32(v->content.array.nelems, base, pos);
+ read_int32_n(v->content.array.elems, base, pos,
+ v->content.array.nelems * 2);
+ break;
+ case jpiAny:
+ read_int32(v->content.anybounds.first, base, pos);
+ read_int32(v->content.anybounds.last, base, pos);
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonpath item type: %d", v->type);
+ }
+}
+
+void
+jspGetArg(JsonPathItem *v, JsonPathItem *a)
+{
+ Assert(v->type == jpiFilter ||
+ v->type == jpiNot ||
+ v->type == jpiIsUnknown ||
+ v->type == jpiExists ||
+ v->type == jpiPlus ||
+ v->type == jpiMinus ||
+ v->type == jpiDatetime);
+
+ jspInitByBuffer(a, v->base, v->content.arg);
+}
+
+bool
+jspGetNext(JsonPathItem *v, JsonPathItem *a)
+{
+ if (jspHasNext(v))
+ {
+ Assert(v->type == jpiString ||
+ v->type == jpiNumeric ||
+ v->type == jpiBool ||
+ v->type == jpiNull ||
+ v->type == jpiKey ||
+ v->type == jpiAny ||
+ v->type == jpiAnyArray ||
+ v->type == jpiAnyKey ||
+ v->type == jpiIndexArray ||
+ v->type == jpiFilter ||
+ v->type == jpiCurrent ||
+ v->type == jpiExists ||
+ v->type == jpiRoot ||
+ v->type == jpiVariable ||
+ v->type == jpiLast ||
+ v->type == jpiAdd ||
+ v->type == jpiSub ||
+ v->type == jpiMul ||
+ v->type == jpiDiv ||
+ v->type == jpiMod ||
+ v->type == jpiPlus ||
+ v->type == jpiMinus ||
+ v->type == jpiEqual ||
+ v->type == jpiNotEqual ||
+ v->type == jpiGreater ||
+ v->type == jpiGreaterOrEqual ||
+ v->type == jpiLess ||
+ v->type == jpiLessOrEqual ||
+ v->type == jpiAnd ||
+ v->type == jpiOr ||
+ v->type == jpiNot ||
+ v->type == jpiIsUnknown ||
+ v->type == jpiType ||
+ v->type == jpiSize ||
+ v->type == jpiAbs ||
+ v->type == jpiFloor ||
+ v->type == jpiCeiling ||
+ v->type == jpiDouble ||
+ v->type == jpiDatetime ||
+ v->type == jpiKeyValue ||
+ v->type == jpiStartsWith ||
+ v->type == jpiLikeRegex);
+
+ if (a)
+ jspInitByBuffer(a, v->base, v->nextPos);
+ return true;
+ }
+
+ return false;
+}
+
+void
+jspGetLeftArg(JsonPathItem *v, JsonPathItem *a)
+{
+ Assert(v->type == jpiAnd ||
+ v->type == jpiOr ||
+ v->type == jpiEqual ||
+ v->type == jpiNotEqual ||
+ v->type == jpiLess ||
+ v->type == jpiGreater ||
+ v->type == jpiLessOrEqual ||
+ v->type == jpiGreaterOrEqual ||
+ v->type == jpiAdd ||
+ v->type == jpiSub ||
+ v->type == jpiMul ||
+ v->type == jpiDiv ||
+ v->type == jpiMod ||
+ v->type == jpiStartsWith);
+
+ jspInitByBuffer(a, v->base, v->content.args.left);
+}
+
+void
+jspGetRightArg(JsonPathItem *v, JsonPathItem *a)
+{
+ Assert(v->type == jpiAnd ||
+ v->type == jpiOr ||
+ v->type == jpiEqual ||
+ v->type == jpiNotEqual ||
+ v->type == jpiLess ||
+ v->type == jpiGreater ||
+ v->type == jpiLessOrEqual ||
+ v->type == jpiGreaterOrEqual ||
+ v->type == jpiAdd ||
+ v->type == jpiSub ||
+ v->type == jpiMul ||
+ v->type == jpiDiv ||
+ v->type == jpiMod ||
+ v->type == jpiStartsWith);
+
+ jspInitByBuffer(a, v->base, v->content.args.right);
+}
+
+bool
+jspGetBool(JsonPathItem *v)
+{
+ Assert(v->type == jpiBool);
+
+ return (bool) *v->content.value.data;
+}
+
+Numeric
+jspGetNumeric(JsonPathItem *v)
+{
+ Assert(v->type == jpiNumeric);
+
+ return (Numeric) v->content.value.data;
+}
+
+char *
+jspGetString(JsonPathItem *v, int32 *len)
+{
+ Assert(v->type == jpiKey ||
+ v->type == jpiString ||
+ v->type == jpiVariable);
+
+ if (len)
+ *len = v->content.value.datalen;
+ return v->content.value.data;
+}
+
+bool
+jspGetArraySubscript(JsonPathItem *v, JsonPathItem *from, JsonPathItem *to,
+ int i)
+{
+ Assert(v->type == jpiIndexArray);
+
+ jspInitByBuffer(from, v->base, v->content.array.elems[i].from);
+
+ if (!v->content.array.elems[i].to)
+ return false;
+
+ jspInitByBuffer(to, v->base, v->content.array.elems[i].to);
+
+ return true;
+}
diff --git a/src/backend/utils/adt/jsonpath_exec.c b/src/backend/utils/adt/jsonpath_exec.c
new file mode 100644
index 0000000..c8368ea
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_exec.c
@@ -0,0 +1,2812 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_exec.c
+ * Routines for SQL/JSON path execution.
+ *
+ * Jsonpath is executed in the global context stored in JsonPathExecContext,
+ * which is passed to almost every function involved into execution. Entry
+ * point for jsonpath execution is executeJsonPath() function, which
+ * initializes execution context including initial JsonPathItem and JsonbValue,
+ * flags, stack for calculation of @ in filters.
+ *
+ * The result of jsonpath query execution is enum JsonPathExecResult and
+ * if succeeded sequence of JsonbValue, written to JsonValueList *found, which
+ * is passed through the jsonpath items. When found == NULL, we're inside
+ * exists-query and we're interested only in whether result is empty. In this
+ * case execution is stopped once first result item is found, and the only
+ * execution result is JsonPathExecResult. The values of JsonPathExecResult
+ * are following:
+ * - jperOk -- result sequence is not empty
+ * - jperNotFound -- result sequence is empty
+ * - jperError -- error occurred during execution
+ *
+ * Jsonpath is executed recursively (see executeItem()) starting form the
+ * first path item (which in turn might be, for instance, an arithmetic
+ * expression evaluated separately). On each step single JsonbValue obtained
+ * from previous path item is processed. The result of processing is a
+ * sequence of JsonbValue (probably empty), which is passed to the next path
+ * item one by one. When there is no next path item, then JsonbValue is added
+ * to the 'found' list. When found == NULL, then execution functions just
+ * return jperOk (see executeNextItem()).
+ *
+ * Many of jsonpath operations require automatic unwrapping of arrays in lax
+ * mode. So, if input value is array, then corresponding operation is
+ * processed not on array itself, but on all of its members one by one.
+ * executeItemOptUnwrapTarget() function have 'unwrap' argument, which indicates
+ * whether unwrapping of array is needed. When unwrap == true, each of array
+ * members is passed to executeItemOptUnwrapTarget() again but with unwrap == false
+ * in order to avoid subsequent array unwrapping.
+ *
+ * All boolean expressions (predicates) are evaluated by executeBoolItem()
+ * function, which returns tri-state JsonPathBool. When error is occurred
+ * during predicate execution, it returns jpbUnknown. According to standard
+ * predicates can be only inside filters. But we support their usage as
+ * jsonpath expression. This helps us to implement @@ operator. In this case
+ * resulting JsonPathBool is transformed into jsonb bool or null.
+ *
+ * Arithmetic and boolean expression are evaluated recursively from expression
+ * tree top down to the leaves. Therefore, for binary arithmetic expressions
+ * we calculate operands first. Then we check that results are numeric
+ * singleton lists, calculate the result and pass it to the next path item.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_exec.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/datum.h"
+#include "utils/float.h"
+#include "utils/formatting.h"
+#include "utils/guc.h"
+#include "utils/json.h"
+#include "utils/jsonpath.h"
+#include "utils/timestamp.h"
+#include "utils/varlena.h"
+
+/*
+ * Represents "base object" and it's "id" for .keyvalue() evaluation.
+ */
+typedef struct JsonBaseObjectInfo
+{
+ JsonbContainer *jbc;
+ int id;
+} JsonBaseObjectInfo;
+
+/*
+ * Context of jsonpath execution.
+ */
+typedef struct JsonPathExecContext
+{
+ Jsonb *vars; /* variables to substitute into jsonpath */
+ JsonbValue *root; /* for $ evaluation */
+ JsonbValue *current; /* for @ evaluation */
+ JsonBaseObjectInfo baseObject; /* "base object" for .keyvalue()
+ * evaluation */
+ int lastGeneratedObjectId; /* "id" counter for .keyvalue()
+ * evaluation */
+ int innermostArraySize; /* for LAST array index evaluation */
+ bool laxMode; /* true for "lax" mode, false for "strict"
+ * mode */
+ bool ignoreStructuralErrors; /* with "true" structural errors such
+ * as absence of required json item or
+ * unexpected json item type are
+ * ignored */
+ bool throwErrors; /* with "false" all suppressible errors are
+ * suppressed */
+ bool useTz;
+} JsonPathExecContext;
+
+/* Context for LIKE_REGEX execution. */
+typedef struct JsonLikeRegexContext
+{
+ text *regex;
+ int cflags;
+} JsonLikeRegexContext;
+
+/* Result of jsonpath predicate evaluation */
+typedef enum JsonPathBool
+{
+ jpbFalse = 0,
+ jpbTrue = 1,
+ jpbUnknown = 2
+} JsonPathBool;
+
+/* Result of jsonpath expression evaluation */
+typedef enum JsonPathExecResult
+{
+ jperOk = 0,
+ jperNotFound = 1,
+ jperError = 2
+} JsonPathExecResult;
+
+#define jperIsError(jper) ((jper) == jperError)
+
+/*
+ * List of jsonb values with shortcut for single-value list.
+ */
+typedef struct JsonValueList
+{
+ JsonbValue *singleton;
+ List *list;
+} JsonValueList;
+
+typedef struct JsonValueListIterator
+{
+ JsonbValue *value;
+ List *list;
+ ListCell *next;
+} JsonValueListIterator;
+
+/* strict/lax flags is decomposed into four [un]wrap/error flags */
+#define jspStrictAbsenseOfErrors(cxt) (!(cxt)->laxMode)
+#define jspAutoUnwrap(cxt) ((cxt)->laxMode)
+#define jspAutoWrap(cxt) ((cxt)->laxMode)
+#define jspIgnoreStructuralErrors(cxt) ((cxt)->ignoreStructuralErrors)
+#define jspThrowErrors(cxt) ((cxt)->throwErrors)
+
+/* Convenience macro: return or throw error depending on context */
+#define RETURN_ERROR(throw_error) \
+do { \
+ if (jspThrowErrors(cxt)) \
+ throw_error; \
+ else \
+ return jperError; \
+} while (0)
+
+typedef JsonPathBool (*JsonPathPredicateCallback) (JsonPathItem *jsp,
+ JsonbValue *larg,
+ JsonbValue *rarg,
+ void *param);
+typedef Numeric (*BinaryArithmFunc) (Numeric num1, Numeric num2, bool *error);
+
+static JsonPathExecResult executeJsonPath(JsonPath *path, Jsonb *vars,
+ Jsonb *json, bool throwErrors,
+ JsonValueList *result, bool useTz);
+static JsonPathExecResult executeItem(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found);
+static JsonPathExecResult executeItemOptUnwrapTarget(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb,
+ JsonValueList *found, bool unwrap);
+static JsonPathExecResult executeItemUnwrapTargetArray(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb,
+ JsonValueList *found, bool unwrapElements);
+static JsonPathExecResult executeNextItem(JsonPathExecContext *cxt,
+ JsonPathItem *cur, JsonPathItem *next,
+ JsonbValue *v, JsonValueList *found, bool copy);
+static JsonPathExecResult executeItemOptUnwrapResult(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb,
+ bool unwrap, JsonValueList *found);
+static JsonPathExecResult executeItemOptUnwrapResultNoThrow(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, bool unwrap, JsonValueList *found);
+static JsonPathBool executeBoolItem(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, bool canHaveNext);
+static JsonPathBool executeNestedBoolItem(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb);
+static JsonPathExecResult executeAnyItem(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbContainer *jbc, JsonValueList *found,
+ uint32 level, uint32 first, uint32 last,
+ bool ignoreStructuralErrors, bool unwrapNext);
+static JsonPathBool executePredicate(JsonPathExecContext *cxt,
+ JsonPathItem *pred, JsonPathItem *larg, JsonPathItem *rarg,
+ JsonbValue *jb, bool unwrapRightArg,
+ JsonPathPredicateCallback exec, void *param);
+static JsonPathExecResult executeBinaryArithmExpr(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb,
+ BinaryArithmFunc func, JsonValueList *found);
+static JsonPathExecResult executeUnaryArithmExpr(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, PGFunction func,
+ JsonValueList *found);
+static JsonPathBool executeStartsWith(JsonPathItem *jsp,
+ JsonbValue *whole, JsonbValue *initial, void *param);
+static JsonPathBool executeLikeRegex(JsonPathItem *jsp, JsonbValue *str,
+ JsonbValue *rarg, void *param);
+static JsonPathExecResult executeNumericItemMethod(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, bool unwrap, PGFunction func,
+ JsonValueList *found);
+static JsonPathExecResult executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found);
+static JsonPathExecResult executeKeyValueMethod(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, JsonValueList *found);
+static JsonPathExecResult appendBoolResult(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonValueList *found, JsonPathBool res);
+static void getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item,
+ JsonbValue *value);
+static void getJsonPathVariable(JsonPathExecContext *cxt,
+ JsonPathItem *variable, Jsonb *vars, JsonbValue *value);
+static int JsonbArraySize(JsonbValue *jb);
+static JsonPathBool executeComparison(JsonPathItem *cmp, JsonbValue *lv,
+ JsonbValue *rv, void *p);
+static JsonPathBool compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2,
+ bool useTz);
+static int compareNumeric(Numeric a, Numeric b);
+static JsonbValue *copyJsonbValue(JsonbValue *src);
+static JsonPathExecResult getArrayIndex(JsonPathExecContext *cxt,
+ JsonPathItem *jsp, JsonbValue *jb, int32 *index);
+static JsonBaseObjectInfo setBaseObject(JsonPathExecContext *cxt,
+ JsonbValue *jbv, int32 id);
+static void JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv);
+static int JsonValueListLength(const JsonValueList *jvl);
+static bool JsonValueListIsEmpty(JsonValueList *jvl);
+static JsonbValue *JsonValueListHead(JsonValueList *jvl);
+static List *JsonValueListGetList(JsonValueList *jvl);
+static void JsonValueListInitIterator(const JsonValueList *jvl,
+ JsonValueListIterator *it);
+static JsonbValue *JsonValueListNext(const JsonValueList *jvl,
+ JsonValueListIterator *it);
+static int JsonbType(JsonbValue *jb);
+static JsonbValue *JsonbInitBinary(JsonbValue *jbv, Jsonb *jb);
+static int JsonbType(JsonbValue *jb);
+static JsonbValue *getScalar(JsonbValue *scalar, enum jbvType type);
+static JsonbValue *wrapItemsInArray(const JsonValueList *items);
+static int compareDatetime(Datum val1, Oid typid1, Datum val2, Oid typid2,
+ bool useTz, bool *have_error);
+
+/****************** User interface to JsonPath executor ********************/
+
+/*
+ * jsonb_path_exists
+ * Returns true if jsonpath returns at least one item for the specified
+ * jsonb value. This function and jsonb_path_match() are used to
+ * implement @? and @@ operators, which in turn are intended to have an
+ * index support. Thus, it's desirable to make it easier to achieve
+ * consistency between index scan results and sequential scan results.
+ * So, we throw as few errors as possible. Regarding this function,
+ * such behavior also matches behavior of JSON_EXISTS() clause of
+ * SQL/JSON. Regarding jsonb_path_match(), this function doesn't have
+ * an analogy in SQL/JSON, so we define its behavior on our own.
+ */
+static Datum
+jsonb_path_exists_internal(FunctionCallInfo fcinfo, bool tz)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonPath *jp = PG_GETARG_JSONPATH_P(1);
+ JsonPathExecResult res;
+ Jsonb *vars = NULL;
+ bool silent = true;
+
+ if (PG_NARGS() == 4)
+ {
+ vars = PG_GETARG_JSONB_P(2);
+ silent = PG_GETARG_BOOL(3);
+ }
+
+ res = executeJsonPath(jp, vars, jb, !silent, NULL, tz);
+
+ PG_FREE_IF_COPY(jb, 0);
+ PG_FREE_IF_COPY(jp, 1);
+
+ if (jperIsError(res))
+ PG_RETURN_NULL();
+
+ PG_RETURN_BOOL(res == jperOk);
+}
+
+Datum
+jsonb_path_exists(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_exists_internal(fcinfo, false);
+}
+
+Datum
+jsonb_path_exists_tz(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_exists_internal(fcinfo, true);
+}
+
+/*
+ * jsonb_path_exists_opr
+ * Implementation of operator "jsonb @? jsonpath" (2-argument version of
+ * jsonb_path_exists()).
+ */
+Datum
+jsonb_path_exists_opr(PG_FUNCTION_ARGS)
+{
+ /* just call the other one -- it can handle both cases */
+ return jsonb_path_exists_internal(fcinfo, false);
+}
+
+/*
+ * jsonb_path_match
+ * Returns jsonpath predicate result item for the specified jsonb value.
+ * See jsonb_path_exists() comment for details regarding error handling.
+ */
+static Datum
+jsonb_path_match_internal(FunctionCallInfo fcinfo, bool tz)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonPath *jp = PG_GETARG_JSONPATH_P(1);
+ JsonValueList found = {0};
+ Jsonb *vars = NULL;
+ bool silent = true;
+
+ if (PG_NARGS() == 4)
+ {
+ vars = PG_GETARG_JSONB_P(2);
+ silent = PG_GETARG_BOOL(3);
+ }
+
+ (void) executeJsonPath(jp, vars, jb, !silent, &found, tz);
+
+ PG_FREE_IF_COPY(jb, 0);
+ PG_FREE_IF_COPY(jp, 1);
+
+ if (JsonValueListLength(&found) == 1)
+ {
+ JsonbValue *jbv = JsonValueListHead(&found);
+
+ if (jbv->type == jbvBool)
+ PG_RETURN_BOOL(jbv->val.boolean);
+
+ if (jbv->type == jbvNull)
+ PG_RETURN_NULL();
+ }
+
+ if (!silent)
+ ereport(ERROR,
+ (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED),
+ errmsg("single boolean result is expected")));
+
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_path_match(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_match_internal(fcinfo, false);
+}
+
+Datum
+jsonb_path_match_tz(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_match_internal(fcinfo, true);
+}
+
+/*
+ * jsonb_path_match_opr
+ * Implementation of operator "jsonb @@ jsonpath" (2-argument version of
+ * jsonb_path_match()).
+ */
+Datum
+jsonb_path_match_opr(PG_FUNCTION_ARGS)
+{
+ /* just call the other one -- it can handle both cases */
+ return jsonb_path_match_internal(fcinfo, false);
+}
+
+/*
+ * jsonb_path_query
+ * Executes jsonpath for given jsonb document and returns result as
+ * rowset.
+ */
+static Datum
+jsonb_path_query_internal(FunctionCallInfo fcinfo, bool tz)
+{
+ FuncCallContext *funcctx;
+ List *found;
+ JsonbValue *v;
+ ListCell *c;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ JsonPath *jp;
+ Jsonb *jb;
+ MemoryContext oldcontext;
+ Jsonb *vars;
+ bool silent;
+ JsonValueList found = {0};
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ jb = PG_GETARG_JSONB_P_COPY(0);
+ jp = PG_GETARG_JSONPATH_P_COPY(1);
+ vars = PG_GETARG_JSONB_P_COPY(2);
+ silent = PG_GETARG_BOOL(3);
+
+ (void) executeJsonPath(jp, vars, jb, !silent, &found, tz);
+
+ funcctx->user_fctx = JsonValueListGetList(&found);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ found = funcctx->user_fctx;
+
+ c = list_head(found);
+
+ if (c == NULL)
+ SRF_RETURN_DONE(funcctx);
+
+ v = lfirst(c);
+ funcctx->user_fctx = list_delete_first(found);
+
+ SRF_RETURN_NEXT(funcctx, JsonbPGetDatum(JsonbValueToJsonb(v)));
+}
+
+Datum
+jsonb_path_query(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_internal(fcinfo, false);
+}
+
+Datum
+jsonb_path_query_tz(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_internal(fcinfo, true);
+}
+
+/*
+ * jsonb_path_query_array
+ * Executes jsonpath for given jsonb document and returns result as
+ * jsonb array.
+ */
+static Datum
+jsonb_path_query_array_internal(FunctionCallInfo fcinfo, bool tz)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonPath *jp = PG_GETARG_JSONPATH_P(1);
+ JsonValueList found = {0};
+ Jsonb *vars = PG_GETARG_JSONB_P(2);
+ bool silent = PG_GETARG_BOOL(3);
+
+ (void) executeJsonPath(jp, vars, jb, !silent, &found, tz);
+
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(wrapItemsInArray(&found)));
+}
+
+Datum
+jsonb_path_query_array(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_array_internal(fcinfo, false);
+}
+
+Datum
+jsonb_path_query_array_tz(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_array_internal(fcinfo, true);
+}
+
+/*
+ * jsonb_path_query_first
+ * Executes jsonpath for given jsonb document and returns first result
+ * item. If there are no items, NULL returned.
+ */
+static Datum
+jsonb_path_query_first_internal(FunctionCallInfo fcinfo, bool tz)
+{
+ Jsonb *jb = PG_GETARG_JSONB_P(0);
+ JsonPath *jp = PG_GETARG_JSONPATH_P(1);
+ JsonValueList found = {0};
+ Jsonb *vars = PG_GETARG_JSONB_P(2);
+ bool silent = PG_GETARG_BOOL(3);
+
+ (void) executeJsonPath(jp, vars, jb, !silent, &found, tz);
+
+ if (JsonValueListLength(&found) >= 1)
+ PG_RETURN_JSONB_P(JsonbValueToJsonb(JsonValueListHead(&found)));
+ else
+ PG_RETURN_NULL();
+}
+
+Datum
+jsonb_path_query_first(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_first_internal(fcinfo, false);
+}
+
+Datum
+jsonb_path_query_first_tz(PG_FUNCTION_ARGS)
+{
+ return jsonb_path_query_first_internal(fcinfo, true);
+}
+
+/********************Execute functions for JsonPath**************************/
+
+/*
+ * Interface to jsonpath executor
+ *
+ * 'path' - jsonpath to be executed
+ * 'vars' - variables to be substituted to jsonpath
+ * 'json' - target document for jsonpath evaluation
+ * 'throwErrors' - whether we should throw suppressible errors
+ * 'result' - list to store result items into
+ *
+ * Returns an error if a recoverable error happens during processing, or NULL
+ * on no error.
+ *
+ * Note, jsonb and jsonpath values should be available and untoasted during
+ * work because JsonPathItem, JsonbValue and result item could have pointers
+ * into input values. If caller needs to just check if document matches
+ * jsonpath, then it doesn't provide a result arg. In this case executor
+ * works till first positive result and does not check the rest if possible.
+ * In other case it tries to find all the satisfied result items.
+ */
+static JsonPathExecResult
+executeJsonPath(JsonPath *path, Jsonb *vars, Jsonb *json, bool throwErrors,
+ JsonValueList *result, bool useTz)
+{
+ JsonPathExecContext cxt;
+ JsonPathExecResult res;
+ JsonPathItem jsp;
+ JsonbValue jbv;
+
+ jspInit(&jsp, path);
+
+ if (!JsonbExtractScalar(&json->root, &jbv))
+ JsonbInitBinary(&jbv, json);
+
+ if (vars && !JsonContainerIsObject(&vars->root))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("\"vars\" argument is not an object"),
+ errdetail("Jsonpath parameters should be encoded as key-value pairs of \"vars\" object.")));
+ }
+
+ cxt.vars = vars;
+ cxt.laxMode = (path->header & JSONPATH_LAX) != 0;
+ cxt.ignoreStructuralErrors = cxt.laxMode;
+ cxt.root = &jbv;
+ cxt.current = &jbv;
+ cxt.baseObject.jbc = NULL;
+ cxt.baseObject.id = 0;
+ cxt.lastGeneratedObjectId = vars ? 2 : 1;
+ cxt.innermostArraySize = -1;
+ cxt.throwErrors = throwErrors;
+ cxt.useTz = useTz;
+
+ if (jspStrictAbsenseOfErrors(&cxt) && !result)
+ {
+ /*
+ * In strict mode we must get a complete list of values to check that
+ * there are no errors at all.
+ */
+ JsonValueList vals = {0};
+
+ res = executeItem(&cxt, &jsp, &jbv, &vals);
+
+ if (jperIsError(res))
+ return res;
+
+ return JsonValueListIsEmpty(&vals) ? jperNotFound : jperOk;
+ }
+
+ res = executeItem(&cxt, &jsp, &jbv, result);
+
+ Assert(!throwErrors || !jperIsError(res));
+
+ return res;
+}
+
+/*
+ * Execute jsonpath with automatic unwrapping of current item in lax mode.
+ */
+static JsonPathExecResult
+executeItem(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found)
+{
+ return executeItemOptUnwrapTarget(cxt, jsp, jb, found, jspAutoUnwrap(cxt));
+}
+
+/*
+ * Main jsonpath executor function: walks on jsonpath structure, finds
+ * relevant parts of jsonb and evaluates expressions over them.
+ * When 'unwrap' is true current SQL/JSON item is unwrapped if it is an array.
+ */
+static JsonPathExecResult
+executeItemOptUnwrapTarget(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found, bool unwrap)
+{
+ JsonPathItem elem;
+ JsonPathExecResult res = jperNotFound;
+ JsonBaseObjectInfo baseObject;
+
+ check_stack_depth();
+ CHECK_FOR_INTERRUPTS();
+
+ switch (jsp->type)
+ {
+ /* all boolean item types: */
+ case jpiAnd:
+ case jpiOr:
+ case jpiNot:
+ case jpiIsUnknown:
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ case jpiExists:
+ case jpiStartsWith:
+ case jpiLikeRegex:
+ {
+ JsonPathBool st = executeBoolItem(cxt, jsp, jb, true);
+
+ res = appendBoolResult(cxt, jsp, found, st);
+ break;
+ }
+
+ case jpiKey:
+ if (JsonbType(jb) == jbvObject)
+ {
+ JsonbValue *v;
+ JsonbValue key;
+
+ key.type = jbvString;
+ key.val.string.val = jspGetString(jsp, &key.val.string.len);
+
+ v = findJsonbValueFromContainer(jb->val.binary.data,
+ JB_FOBJECT, &key);
+
+ if (v != NULL)
+ {
+ res = executeNextItem(cxt, jsp, NULL,
+ v, found, false);
+
+ /* free value if it was not added to found list */
+ if (jspHasNext(jsp) || !found)
+ pfree(v);
+ }
+ else if (!jspIgnoreStructuralErrors(cxt))
+ {
+ Assert(found);
+
+ if (!jspThrowErrors(cxt))
+ return jperError;
+
+ ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_MEMBER_NOT_FOUND), \
+ errmsg("JSON object does not contain key \"%s\"",
+ pnstrdup(key.val.string.val,
+ key.val.string.len))));
+ }
+ }
+ else if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false);
+ else if (!jspIgnoreStructuralErrors(cxt))
+ {
+ Assert(found);
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_MEMBER_NOT_FOUND),
+ errmsg("jsonpath member accessor can only be applied to an object"))));
+ }
+ break;
+
+ case jpiRoot:
+ jb = cxt->root;
+ baseObject = setBaseObject(cxt, jb, 0);
+ res = executeNextItem(cxt, jsp, NULL, jb, found, true);
+ cxt->baseObject = baseObject;
+ break;
+
+ case jpiCurrent:
+ res = executeNextItem(cxt, jsp, NULL, cxt->current,
+ found, true);
+ break;
+
+ case jpiAnyArray:
+ if (JsonbType(jb) == jbvArray)
+ {
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ res = executeItemUnwrapTargetArray(cxt, hasNext ? &elem : NULL,
+ jb, found, jspAutoUnwrap(cxt));
+ }
+ else if (jspAutoWrap(cxt))
+ res = executeNextItem(cxt, jsp, NULL, jb, found, true);
+ else if (!jspIgnoreStructuralErrors(cxt))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND),
+ errmsg("jsonpath wildcard array accessor can only be applied to an array"))));
+ break;
+
+ case jpiIndexArray:
+ if (JsonbType(jb) == jbvArray || jspAutoWrap(cxt))
+ {
+ int innermostArraySize = cxt->innermostArraySize;
+ int i;
+ int size = JsonbArraySize(jb);
+ bool singleton = size < 0;
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ if (singleton)
+ size = 1;
+
+ cxt->innermostArraySize = size; /* for LAST evaluation */
+
+ for (i = 0; i < jsp->content.array.nelems; i++)
+ {
+ JsonPathItem from;
+ JsonPathItem to;
+ int32 index;
+ int32 index_from;
+ int32 index_to;
+ bool range = jspGetArraySubscript(jsp, &from,
+ &to, i);
+
+ res = getArrayIndex(cxt, &from, jb, &index_from);
+
+ if (jperIsError(res))
+ break;
+
+ if (range)
+ {
+ res = getArrayIndex(cxt, &to, jb, &index_to);
+
+ if (jperIsError(res))
+ break;
+ }
+ else
+ index_to = index_from;
+
+ if (!jspIgnoreStructuralErrors(cxt) &&
+ (index_from < 0 ||
+ index_from > index_to ||
+ index_to >= size))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT),
+ errmsg("jsonpath array subscript is out of bounds"))));
+
+ if (index_from < 0)
+ index_from = 0;
+
+ if (index_to >= size)
+ index_to = size - 1;
+
+ res = jperNotFound;
+
+ for (index = index_from; index <= index_to; index++)
+ {
+ JsonbValue *v;
+ bool copy;
+
+ if (singleton)
+ {
+ v = jb;
+ copy = true;
+ }
+ else
+ {
+ v = getIthJsonbValueFromContainer(jb->val.binary.data,
+ (uint32) index);
+
+ if (v == NULL)
+ continue;
+
+ copy = false;
+ }
+
+ if (!hasNext && !found)
+ return jperOk;
+
+ res = executeNextItem(cxt, jsp, &elem, v, found,
+ copy);
+
+ if (jperIsError(res))
+ break;
+
+ if (res == jperOk && !found)
+ break;
+ }
+
+ if (jperIsError(res))
+ break;
+
+ if (res == jperOk && !found)
+ break;
+ }
+
+ cxt->innermostArraySize = innermostArraySize;
+ }
+ else if (!jspIgnoreStructuralErrors(cxt))
+ {
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND),
+ errmsg("jsonpath array accessor can only be applied to an array"))));
+ }
+ break;
+
+ case jpiLast:
+ {
+ JsonbValue tmpjbv;
+ JsonbValue *lastjbv;
+ int last;
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ if (cxt->innermostArraySize < 0)
+ elog(ERROR, "evaluating jsonpath LAST outside of array subscript");
+
+ if (!hasNext && !found)
+ {
+ res = jperOk;
+ break;
+ }
+
+ last = cxt->innermostArraySize - 1;
+
+ lastjbv = hasNext ? &tmpjbv : palloc(sizeof(*lastjbv));
+
+ lastjbv->type = jbvNumeric;
+ lastjbv->val.numeric = int64_to_numeric(last);
+
+ res = executeNextItem(cxt, jsp, &elem,
+ lastjbv, found, hasNext);
+ }
+ break;
+
+ case jpiAnyKey:
+ if (JsonbType(jb) == jbvObject)
+ {
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ if (jb->type != jbvBinary)
+ elog(ERROR, "invalid jsonb object type: %d", jb->type);
+
+ return executeAnyItem
+ (cxt, hasNext ? &elem : NULL,
+ jb->val.binary.data, found, 1, 1, 1,
+ false, jspAutoUnwrap(cxt));
+ }
+ else if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false);
+ else if (!jspIgnoreStructuralErrors(cxt))
+ {
+ Assert(found);
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_OBJECT_NOT_FOUND),
+ errmsg("jsonpath wildcard member accessor can only be applied to an object"))));
+ }
+ break;
+
+ case jpiAdd:
+ return executeBinaryArithmExpr(cxt, jsp, jb,
+ numeric_add_opt_error, found);
+
+ case jpiSub:
+ return executeBinaryArithmExpr(cxt, jsp, jb,
+ numeric_sub_opt_error, found);
+
+ case jpiMul:
+ return executeBinaryArithmExpr(cxt, jsp, jb,
+ numeric_mul_opt_error, found);
+
+ case jpiDiv:
+ return executeBinaryArithmExpr(cxt, jsp, jb,
+ numeric_div_opt_error, found);
+
+ case jpiMod:
+ return executeBinaryArithmExpr(cxt, jsp, jb,
+ numeric_mod_opt_error, found);
+
+ case jpiPlus:
+ return executeUnaryArithmExpr(cxt, jsp, jb, NULL, found);
+
+ case jpiMinus:
+ return executeUnaryArithmExpr(cxt, jsp, jb, numeric_uminus,
+ found);
+
+ case jpiFilter:
+ {
+ JsonPathBool st;
+
+ if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found,
+ false);
+
+ jspGetArg(jsp, &elem);
+ st = executeNestedBoolItem(cxt, &elem, jb);
+ if (st != jpbTrue)
+ res = jperNotFound;
+ else
+ res = executeNextItem(cxt, jsp, NULL,
+ jb, found, true);
+ break;
+ }
+
+ case jpiAny:
+ {
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ /* first try without any intermediate steps */
+ if (jsp->content.anybounds.first == 0)
+ {
+ bool savedIgnoreStructuralErrors;
+
+ savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors;
+ cxt->ignoreStructuralErrors = true;
+ res = executeNextItem(cxt, jsp, &elem,
+ jb, found, true);
+ cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors;
+
+ if (res == jperOk && !found)
+ break;
+ }
+
+ if (jb->type == jbvBinary)
+ res = executeAnyItem
+ (cxt, hasNext ? &elem : NULL,
+ jb->val.binary.data, found,
+ 1,
+ jsp->content.anybounds.first,
+ jsp->content.anybounds.last,
+ true, jspAutoUnwrap(cxt));
+ break;
+ }
+
+ case jpiNull:
+ case jpiBool:
+ case jpiNumeric:
+ case jpiString:
+ case jpiVariable:
+ {
+ JsonbValue vbuf;
+ JsonbValue *v;
+ bool hasNext = jspGetNext(jsp, &elem);
+
+ if (!hasNext && !found && jsp->type != jpiVariable)
+ {
+ /*
+ * Skip evaluation, but not for variables. We must
+ * trigger an error for the missing variable.
+ */
+ res = jperOk;
+ break;
+ }
+
+ v = hasNext ? &vbuf : palloc(sizeof(*v));
+
+ baseObject = cxt->baseObject;
+ getJsonPathItem(cxt, jsp, v);
+
+ res = executeNextItem(cxt, jsp, &elem,
+ v, found, hasNext);
+ cxt->baseObject = baseObject;
+ }
+ break;
+
+ case jpiType:
+ {
+ JsonbValue *jbv = palloc(sizeof(*jbv));
+
+ jbv->type = jbvString;
+ jbv->val.string.val = pstrdup(JsonbTypeName(jb));
+ jbv->val.string.len = strlen(jbv->val.string.val);
+
+ res = executeNextItem(cxt, jsp, NULL, jbv,
+ found, false);
+ }
+ break;
+
+ case jpiSize:
+ {
+ int size = JsonbArraySize(jb);
+
+ if (size < 0)
+ {
+ if (!jspAutoWrap(cxt))
+ {
+ if (!jspIgnoreStructuralErrors(cxt))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_ARRAY_NOT_FOUND),
+ errmsg("jsonpath item method .%s() can only be applied to an array",
+ jspOperationName(jsp->type)))));
+ break;
+ }
+
+ size = 1;
+ }
+
+ jb = palloc(sizeof(*jb));
+
+ jb->type = jbvNumeric;
+ jb->val.numeric = int64_to_numeric(size);
+
+ res = executeNextItem(cxt, jsp, NULL, jb, found, false);
+ }
+ break;
+
+ case jpiAbs:
+ return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_abs,
+ found);
+
+ case jpiFloor:
+ return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_floor,
+ found);
+
+ case jpiCeiling:
+ return executeNumericItemMethod(cxt, jsp, jb, unwrap, numeric_ceil,
+ found);
+
+ case jpiDouble:
+ {
+ JsonbValue jbv;
+
+ if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found,
+ false);
+
+ if (jb->type == jbvNumeric)
+ {
+ char *tmp = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(jb->val.numeric)));
+ double val;
+ bool have_error = false;
+
+ val = float8in_internal_opt_error(tmp,
+ NULL,
+ "double precision",
+ tmp,
+ &have_error);
+
+ if (have_error || isinf(val) || isnan(val))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
+ errmsg("numeric argument of jsonpath item method .%s() is out of range for type double precision",
+ jspOperationName(jsp->type)))));
+ res = jperOk;
+ }
+ else if (jb->type == jbvString)
+ {
+ /* cast string as double */
+ double val;
+ char *tmp = pnstrdup(jb->val.string.val,
+ jb->val.string.len);
+ bool have_error = false;
+
+ val = float8in_internal_opt_error(tmp,
+ NULL,
+ "double precision",
+ tmp,
+ &have_error);
+
+ if (have_error || isinf(val) || isnan(val))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
+ errmsg("string argument of jsonpath item method .%s() is not a valid representation of a double precision number",
+ jspOperationName(jsp->type)))));
+
+ jb = &jbv;
+ jb->type = jbvNumeric;
+ jb->val.numeric = DatumGetNumeric(DirectFunctionCall1(float8_numeric,
+ Float8GetDatum(val)));
+ res = jperOk;
+ }
+
+ if (res == jperNotFound)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
+ errmsg("jsonpath item method .%s() can only be applied to a string or numeric value",
+ jspOperationName(jsp->type)))));
+
+ res = executeNextItem(cxt, jsp, NULL, jb, found, true);
+ }
+ break;
+
+ case jpiDatetime:
+ if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false);
+
+ return executeDateTimeMethod(cxt, jsp, jb, found);
+
+ case jpiKeyValue:
+ if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false);
+
+ return executeKeyValueMethod(cxt, jsp, jb, found);
+
+ default:
+ elog(ERROR, "unrecognized jsonpath item type: %d", jsp->type);
+ }
+
+ return res;
+}
+
+/*
+ * Unwrap current array item and execute jsonpath for each of its elements.
+ */
+static JsonPathExecResult
+executeItemUnwrapTargetArray(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found,
+ bool unwrapElements)
+{
+ if (jb->type != jbvBinary)
+ {
+ Assert(jb->type != jbvArray);
+ elog(ERROR, "invalid jsonb array value type: %d", jb->type);
+ }
+
+ return executeAnyItem
+ (cxt, jsp, jb->val.binary.data, found, 1, 1, 1,
+ false, unwrapElements);
+}
+
+/*
+ * Execute next jsonpath item if exists. Otherwise put "v" to the "found"
+ * list if provided.
+ */
+static JsonPathExecResult
+executeNextItem(JsonPathExecContext *cxt,
+ JsonPathItem *cur, JsonPathItem *next,
+ JsonbValue *v, JsonValueList *found, bool copy)
+{
+ JsonPathItem elem;
+ bool hasNext;
+
+ if (!cur)
+ hasNext = next != NULL;
+ else if (next)
+ hasNext = jspHasNext(cur);
+ else
+ {
+ next = &elem;
+ hasNext = jspGetNext(cur, next);
+ }
+
+ if (hasNext)
+ return executeItem(cxt, next, v, found);
+
+ if (found)
+ JsonValueListAppend(found, copy ? copyJsonbValue(v) : v);
+
+ return jperOk;
+}
+
+/*
+ * Same as executeItem(), but when "unwrap == true" automatically unwraps
+ * each array item from the resulting sequence in lax mode.
+ */
+static JsonPathExecResult
+executeItemOptUnwrapResult(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, bool unwrap,
+ JsonValueList *found)
+{
+ if (unwrap && jspAutoUnwrap(cxt))
+ {
+ JsonValueList seq = {0};
+ JsonValueListIterator it;
+ JsonPathExecResult res = executeItem(cxt, jsp, jb, &seq);
+ JsonbValue *item;
+
+ if (jperIsError(res))
+ return res;
+
+ JsonValueListInitIterator(&seq, &it);
+ while ((item = JsonValueListNext(&seq, &it)))
+ {
+ Assert(item->type != jbvArray);
+
+ if (JsonbType(item) == jbvArray)
+ executeItemUnwrapTargetArray(cxt, NULL, item, found, false);
+ else
+ JsonValueListAppend(found, item);
+ }
+
+ return jperOk;
+ }
+
+ return executeItem(cxt, jsp, jb, found);
+}
+
+/*
+ * Same as executeItemOptUnwrapResult(), but with error suppression.
+ */
+static JsonPathExecResult
+executeItemOptUnwrapResultNoThrow(JsonPathExecContext *cxt,
+ JsonPathItem *jsp,
+ JsonbValue *jb, bool unwrap,
+ JsonValueList *found)
+{
+ JsonPathExecResult res;
+ bool throwErrors = cxt->throwErrors;
+
+ cxt->throwErrors = false;
+ res = executeItemOptUnwrapResult(cxt, jsp, jb, unwrap, found);
+ cxt->throwErrors = throwErrors;
+
+ return res;
+}
+
+/* Execute boolean-valued jsonpath expression. */
+static JsonPathBool
+executeBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, bool canHaveNext)
+{
+ JsonPathItem larg;
+ JsonPathItem rarg;
+ JsonPathBool res;
+ JsonPathBool res2;
+
+ if (!canHaveNext && jspHasNext(jsp))
+ elog(ERROR, "boolean jsonpath item cannot have next item");
+
+ switch (jsp->type)
+ {
+ case jpiAnd:
+ jspGetLeftArg(jsp, &larg);
+ res = executeBoolItem(cxt, &larg, jb, false);
+
+ if (res == jpbFalse)
+ return jpbFalse;
+
+ /*
+ * SQL/JSON says that we should check second arg in case of
+ * jperError
+ */
+
+ jspGetRightArg(jsp, &rarg);
+ res2 = executeBoolItem(cxt, &rarg, jb, false);
+
+ return res2 == jpbTrue ? res : res2;
+
+ case jpiOr:
+ jspGetLeftArg(jsp, &larg);
+ res = executeBoolItem(cxt, &larg, jb, false);
+
+ if (res == jpbTrue)
+ return jpbTrue;
+
+ jspGetRightArg(jsp, &rarg);
+ res2 = executeBoolItem(cxt, &rarg, jb, false);
+
+ return res2 == jpbFalse ? res : res2;
+
+ case jpiNot:
+ jspGetArg(jsp, &larg);
+
+ res = executeBoolItem(cxt, &larg, jb, false);
+
+ if (res == jpbUnknown)
+ return jpbUnknown;
+
+ return res == jpbTrue ? jpbFalse : jpbTrue;
+
+ case jpiIsUnknown:
+ jspGetArg(jsp, &larg);
+ res = executeBoolItem(cxt, &larg, jb, false);
+ return res == jpbUnknown ? jpbTrue : jpbFalse;
+
+ case jpiEqual:
+ case jpiNotEqual:
+ case jpiLess:
+ case jpiGreater:
+ case jpiLessOrEqual:
+ case jpiGreaterOrEqual:
+ jspGetLeftArg(jsp, &larg);
+ jspGetRightArg(jsp, &rarg);
+ return executePredicate(cxt, jsp, &larg, &rarg, jb, true,
+ executeComparison, cxt);
+
+ case jpiStartsWith: /* 'whole STARTS WITH initial' */
+ jspGetLeftArg(jsp, &larg); /* 'whole' */
+ jspGetRightArg(jsp, &rarg); /* 'initial' */
+ return executePredicate(cxt, jsp, &larg, &rarg, jb, false,
+ executeStartsWith, NULL);
+
+ case jpiLikeRegex: /* 'expr LIKE_REGEX pattern FLAGS flags' */
+ {
+ /*
+ * 'expr' is a sequence-returning expression. 'pattern' is a
+ * regex string literal. SQL/JSON standard requires XQuery
+ * regexes, but we use Postgres regexes here. 'flags' is a
+ * string literal converted to integer flags at compile-time.
+ */
+ JsonLikeRegexContext lrcxt = {0};
+
+ jspInitByBuffer(&larg, jsp->base,
+ jsp->content.like_regex.expr);
+
+ return executePredicate(cxt, jsp, &larg, NULL, jb, false,
+ executeLikeRegex, &lrcxt);
+ }
+
+ case jpiExists:
+ jspGetArg(jsp, &larg);
+
+ if (jspStrictAbsenseOfErrors(cxt))
+ {
+ /*
+ * In strict mode we must get a complete list of values to
+ * check that there are no errors at all.
+ */
+ JsonValueList vals = {0};
+ JsonPathExecResult res =
+ executeItemOptUnwrapResultNoThrow(cxt, &larg, jb,
+ false, &vals);
+
+ if (jperIsError(res))
+ return jpbUnknown;
+
+ return JsonValueListIsEmpty(&vals) ? jpbFalse : jpbTrue;
+ }
+ else
+ {
+ JsonPathExecResult res =
+ executeItemOptUnwrapResultNoThrow(cxt, &larg, jb,
+ false, NULL);
+
+ if (jperIsError(res))
+ return jpbUnknown;
+
+ return res == jperOk ? jpbTrue : jpbFalse;
+ }
+
+ default:
+ elog(ERROR, "invalid boolean jsonpath item type: %d", jsp->type);
+ return jpbUnknown;
+ }
+}
+
+/*
+ * Execute nested (filters etc.) boolean expression pushing current SQL/JSON
+ * item onto the stack.
+ */
+static JsonPathBool
+executeNestedBoolItem(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb)
+{
+ JsonbValue *prev;
+ JsonPathBool res;
+
+ prev = cxt->current;
+ cxt->current = jb;
+ res = executeBoolItem(cxt, jsp, jb, false);
+ cxt->current = prev;
+
+ return res;
+}
+
+/*
+ * Implementation of several jsonpath nodes:
+ * - jpiAny (.** accessor),
+ * - jpiAnyKey (.* accessor),
+ * - jpiAnyArray ([*] accessor)
+ */
+static JsonPathExecResult
+executeAnyItem(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbContainer *jbc,
+ JsonValueList *found, uint32 level, uint32 first, uint32 last,
+ bool ignoreStructuralErrors, bool unwrapNext)
+{
+ JsonPathExecResult res = jperNotFound;
+ JsonbIterator *it;
+ int32 r;
+ JsonbValue v;
+
+ check_stack_depth();
+
+ if (level > last)
+ return res;
+
+ it = JsonbIteratorInit(jbc);
+
+ /*
+ * Recursively iterate over jsonb objects/arrays
+ */
+ while ((r = JsonbIteratorNext(&it, &v, true)) != WJB_DONE)
+ {
+ if (r == WJB_KEY)
+ {
+ r = JsonbIteratorNext(&it, &v, true);
+ Assert(r == WJB_VALUE);
+ }
+
+ if (r == WJB_VALUE || r == WJB_ELEM)
+ {
+
+ if (level >= first ||
+ (first == PG_UINT32_MAX && last == PG_UINT32_MAX &&
+ v.type != jbvBinary)) /* leaves only requested */
+ {
+ /* check expression */
+ if (jsp)
+ {
+ if (ignoreStructuralErrors)
+ {
+ bool savedIgnoreStructuralErrors;
+
+ savedIgnoreStructuralErrors = cxt->ignoreStructuralErrors;
+ cxt->ignoreStructuralErrors = true;
+ res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext);
+ cxt->ignoreStructuralErrors = savedIgnoreStructuralErrors;
+ }
+ else
+ res = executeItemOptUnwrapTarget(cxt, jsp, &v, found, unwrapNext);
+
+ if (jperIsError(res))
+ break;
+
+ if (res == jperOk && !found)
+ break;
+ }
+ else if (found)
+ JsonValueListAppend(found, copyJsonbValue(&v));
+ else
+ return jperOk;
+ }
+
+ if (level < last && v.type == jbvBinary)
+ {
+ res = executeAnyItem
+ (cxt, jsp, v.val.binary.data, found,
+ level + 1, first, last,
+ ignoreStructuralErrors, unwrapNext);
+
+ if (jperIsError(res))
+ break;
+
+ if (res == jperOk && found == NULL)
+ break;
+ }
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Execute unary or binary predicate.
+ *
+ * Predicates have existence semantics, because their operands are item
+ * sequences. Pairs of items from the left and right operand's sequences are
+ * checked. TRUE returned only if any pair satisfying the condition is found.
+ * In strict mode, even if the desired pair has already been found, all pairs
+ * still need to be examined to check the absence of errors. If any error
+ * occurs, UNKNOWN (analogous to SQL NULL) is returned.
+ */
+static JsonPathBool
+executePredicate(JsonPathExecContext *cxt, JsonPathItem *pred,
+ JsonPathItem *larg, JsonPathItem *rarg, JsonbValue *jb,
+ bool unwrapRightArg, JsonPathPredicateCallback exec,
+ void *param)
+{
+ JsonPathExecResult res;
+ JsonValueListIterator lseqit;
+ JsonValueList lseq = {0};
+ JsonValueList rseq = {0};
+ JsonbValue *lval;
+ bool error = false;
+ bool found = false;
+
+ /* Left argument is always auto-unwrapped. */
+ res = executeItemOptUnwrapResultNoThrow(cxt, larg, jb, true, &lseq);
+ if (jperIsError(res))
+ return jpbUnknown;
+
+ if (rarg)
+ {
+ /* Right argument is conditionally auto-unwrapped. */
+ res = executeItemOptUnwrapResultNoThrow(cxt, rarg, jb,
+ unwrapRightArg, &rseq);
+ if (jperIsError(res))
+ return jpbUnknown;
+ }
+
+ JsonValueListInitIterator(&lseq, &lseqit);
+ while ((lval = JsonValueListNext(&lseq, &lseqit)))
+ {
+ JsonValueListIterator rseqit;
+ JsonbValue *rval;
+ bool first = true;
+
+ JsonValueListInitIterator(&rseq, &rseqit);
+ if (rarg)
+ rval = JsonValueListNext(&rseq, &rseqit);
+ else
+ rval = NULL;
+
+ /* Loop over right arg sequence or do single pass otherwise */
+ while (rarg ? (rval != NULL) : first)
+ {
+ JsonPathBool res = exec(pred, lval, rval, param);
+
+ if (res == jpbUnknown)
+ {
+ if (jspStrictAbsenseOfErrors(cxt))
+ return jpbUnknown;
+
+ error = true;
+ }
+ else if (res == jpbTrue)
+ {
+ if (!jspStrictAbsenseOfErrors(cxt))
+ return jpbTrue;
+
+ found = true;
+ }
+
+ first = false;
+ if (rarg)
+ rval = JsonValueListNext(&rseq, &rseqit);
+ }
+ }
+
+ if (found) /* possible only in strict mode */
+ return jpbTrue;
+
+ if (error) /* possible only in lax mode */
+ return jpbUnknown;
+
+ return jpbFalse;
+}
+
+/*
+ * Execute binary arithmetic expression on singleton numeric operands.
+ * Array operands are automatically unwrapped in lax mode.
+ */
+static JsonPathExecResult
+executeBinaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, BinaryArithmFunc func,
+ JsonValueList *found)
+{
+ JsonPathExecResult jper;
+ JsonPathItem elem;
+ JsonValueList lseq = {0};
+ JsonValueList rseq = {0};
+ JsonbValue *lval;
+ JsonbValue *rval;
+ Numeric res;
+
+ jspGetLeftArg(jsp, &elem);
+
+ /*
+ * XXX: By standard only operands of multiplicative expressions are
+ * unwrapped. We extend it to other binary arithmetic expressions too.
+ */
+ jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &lseq);
+ if (jperIsError(jper))
+ return jper;
+
+ jspGetRightArg(jsp, &elem);
+
+ jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &rseq);
+ if (jperIsError(jper))
+ return jper;
+
+ if (JsonValueListLength(&lseq) != 1 ||
+ !(lval = getScalar(JsonValueListHead(&lseq), jbvNumeric)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED),
+ errmsg("left operand of jsonpath operator %s is not a single numeric value",
+ jspOperationName(jsp->type)))));
+
+ if (JsonValueListLength(&rseq) != 1 ||
+ !(rval = getScalar(JsonValueListHead(&rseq), jbvNumeric)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SINGLETON_SQL_JSON_ITEM_REQUIRED),
+ errmsg("right operand of jsonpath operator %s is not a single numeric value",
+ jspOperationName(jsp->type)))));
+
+ if (jspThrowErrors(cxt))
+ {
+ res = func(lval->val.numeric, rval->val.numeric, NULL);
+ }
+ else
+ {
+ bool error = false;
+
+ res = func(lval->val.numeric, rval->val.numeric, &error);
+
+ if (error)
+ return jperError;
+ }
+
+ if (!jspGetNext(jsp, &elem) && !found)
+ return jperOk;
+
+ lval = palloc(sizeof(*lval));
+ lval->type = jbvNumeric;
+ lval->val.numeric = res;
+
+ return executeNextItem(cxt, jsp, &elem, lval, found, false);
+}
+
+/*
+ * Execute unary arithmetic expression for each numeric item in its operand's
+ * sequence. Array operand is automatically unwrapped in lax mode.
+ */
+static JsonPathExecResult
+executeUnaryArithmExpr(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, PGFunction func, JsonValueList *found)
+{
+ JsonPathExecResult jper;
+ JsonPathExecResult jper2;
+ JsonPathItem elem;
+ JsonValueList seq = {0};
+ JsonValueListIterator it;
+ JsonbValue *val;
+ bool hasNext;
+
+ jspGetArg(jsp, &elem);
+ jper = executeItemOptUnwrapResult(cxt, &elem, jb, true, &seq);
+
+ if (jperIsError(jper))
+ return jper;
+
+ jper = jperNotFound;
+
+ hasNext = jspGetNext(jsp, &elem);
+
+ JsonValueListInitIterator(&seq, &it);
+ while ((val = JsonValueListNext(&seq, &it)))
+ {
+ if ((val = getScalar(val, jbvNumeric)))
+ {
+ if (!found && !hasNext)
+ return jperOk;
+ }
+ else
+ {
+ if (!found && !hasNext)
+ continue; /* skip non-numerics processing */
+
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_NUMBER_NOT_FOUND),
+ errmsg("operand of unary jsonpath operator %s is not a numeric value",
+ jspOperationName(jsp->type)))));
+ }
+
+ if (func)
+ val->val.numeric =
+ DatumGetNumeric(DirectFunctionCall1(func,
+ NumericGetDatum(val->val.numeric)));
+
+ jper2 = executeNextItem(cxt, jsp, &elem, val, found, false);
+
+ if (jperIsError(jper2))
+ return jper2;
+
+ if (jper2 == jperOk)
+ {
+ if (!found)
+ return jperOk;
+ jper = jperOk;
+ }
+ }
+
+ return jper;
+}
+
+/*
+ * STARTS_WITH predicate callback.
+ *
+ * Check if the 'whole' string starts from 'initial' string.
+ */
+static JsonPathBool
+executeStartsWith(JsonPathItem *jsp, JsonbValue *whole, JsonbValue *initial,
+ void *param)
+{
+ if (!(whole = getScalar(whole, jbvString)))
+ return jpbUnknown; /* error */
+
+ if (!(initial = getScalar(initial, jbvString)))
+ return jpbUnknown; /* error */
+
+ if (whole->val.string.len >= initial->val.string.len &&
+ !memcmp(whole->val.string.val,
+ initial->val.string.val,
+ initial->val.string.len))
+ return jpbTrue;
+
+ return jpbFalse;
+}
+
+/*
+ * LIKE_REGEX predicate callback.
+ *
+ * Check if the string matches regex pattern.
+ */
+static JsonPathBool
+executeLikeRegex(JsonPathItem *jsp, JsonbValue *str, JsonbValue *rarg,
+ void *param)
+{
+ JsonLikeRegexContext *cxt = param;
+
+ if (!(str = getScalar(str, jbvString)))
+ return jpbUnknown;
+
+ /* Cache regex text and converted flags. */
+ if (!cxt->regex)
+ {
+ cxt->regex =
+ cstring_to_text_with_len(jsp->content.like_regex.pattern,
+ jsp->content.like_regex.patternlen);
+ cxt->cflags = jspConvertRegexFlags(jsp->content.like_regex.flags);
+ }
+
+ if (RE_compile_and_execute(cxt->regex, str->val.string.val,
+ str->val.string.len,
+ cxt->cflags, DEFAULT_COLLATION_OID, 0, NULL))
+ return jpbTrue;
+
+ return jpbFalse;
+}
+
+/*
+ * Execute numeric item methods (.abs(), .floor(), .ceil()) using the specified
+ * user function 'func'.
+ */
+static JsonPathExecResult
+executeNumericItemMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, bool unwrap, PGFunction func,
+ JsonValueList *found)
+{
+ JsonPathItem next;
+ Datum datum;
+
+ if (unwrap && JsonbType(jb) == jbvArray)
+ return executeItemUnwrapTargetArray(cxt, jsp, jb, found, false);
+
+ if (!(jb = getScalar(jb, jbvNumeric)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_NON_NUMERIC_SQL_JSON_ITEM),
+ errmsg("jsonpath item method .%s() can only be applied to a numeric value",
+ jspOperationName(jsp->type)))));
+
+ datum = DirectFunctionCall1(func, NumericGetDatum(jb->val.numeric));
+
+ if (!jspGetNext(jsp, &next) && !found)
+ return jperOk;
+
+ jb = palloc(sizeof(*jb));
+ jb->type = jbvNumeric;
+ jb->val.numeric = DatumGetNumeric(datum);
+
+ return executeNextItem(cxt, jsp, &next, jb, found, false);
+}
+
+/*
+ * Implementation of the .datetime() method.
+ *
+ * Converts a string into a date/time value. The actual type is determined at run time.
+ * If an argument is provided, this argument is used as a template string.
+ * Otherwise, the first fitting ISO format is selected.
+ */
+static JsonPathExecResult
+executeDateTimeMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found)
+{
+ JsonbValue jbvbuf;
+ Datum value;
+ text *datetime;
+ Oid collid;
+ Oid typid;
+ int32 typmod = -1;
+ int tz = 0;
+ bool hasNext;
+ JsonPathExecResult res = jperNotFound;
+ JsonPathItem elem;
+
+ if (!(jb = getScalar(jb, jbvString)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION),
+ errmsg("jsonpath item method .%s() can only be applied to a string",
+ jspOperationName(jsp->type)))));
+
+ datetime = cstring_to_text_with_len(jb->val.string.val,
+ jb->val.string.len);
+
+ /*
+ * At some point we might wish to have callers supply the collation to
+ * use, but right now it's unclear that they'd be able to do better than
+ * DEFAULT_COLLATION_OID anyway.
+ */
+ collid = DEFAULT_COLLATION_OID;
+
+ if (jsp->content.arg)
+ {
+ text *template;
+ char *template_str;
+ int template_len;
+ bool have_error = false;
+
+ jspGetArg(jsp, &elem);
+
+ if (elem.type != jpiString)
+ elog(ERROR, "invalid jsonpath item type for .datetime() argument");
+
+ template_str = jspGetString(&elem, &template_len);
+
+ template = cstring_to_text_with_len(template_str,
+ template_len);
+
+ value = parse_datetime(datetime, template, collid, true,
+ &typid, &typmod, &tz,
+ jspThrowErrors(cxt) ? NULL : &have_error);
+
+ if (have_error)
+ res = jperError;
+ else
+ res = jperOk;
+ }
+ else
+ {
+ /*
+ * According to SQL/JSON standard enumerate ISO formats for: date,
+ * timetz, time, timestamptz, timestamp.
+ *
+ * We also support ISO 8601 format (with "T") for timestamps, because
+ * to_json[b]() functions use this format.
+ */
+ static const char *fmt_str[] =
+ {
+ "yyyy-mm-dd", /* date */
+ "HH24:MI:SS.USTZH:TZM", /* timetz */
+ "HH24:MI:SS.USTZH",
+ "HH24:MI:SSTZH:TZM",
+ "HH24:MI:SSTZH",
+ "HH24:MI:SS.US", /* time without tz */
+ "HH24:MI:SS",
+ "yyyy-mm-dd HH24:MI:SS.USTZH:TZM", /* timestamptz */
+ "yyyy-mm-dd HH24:MI:SS.USTZH",
+ "yyyy-mm-dd HH24:MI:SSTZH:TZM",
+ "yyyy-mm-dd HH24:MI:SSTZH",
+ "yyyy-mm-dd\"T\"HH24:MI:SS.USTZH:TZM",
+ "yyyy-mm-dd\"T\"HH24:MI:SS.USTZH",
+ "yyyy-mm-dd\"T\"HH24:MI:SSTZH:TZM",
+ "yyyy-mm-dd\"T\"HH24:MI:SSTZH",
+ "yyyy-mm-dd HH24:MI:SS.US", /* timestamp without tz */
+ "yyyy-mm-dd HH24:MI:SS",
+ "yyyy-mm-dd\"T\"HH24:MI:SS.US",
+ "yyyy-mm-dd\"T\"HH24:MI:SS"
+ };
+
+ /* cache for format texts */
+ static text *fmt_txt[lengthof(fmt_str)] = {0};
+ int i;
+
+ /* loop until datetime format fits */
+ for (i = 0; i < lengthof(fmt_str); i++)
+ {
+ bool have_error = false;
+
+ if (!fmt_txt[i])
+ {
+ MemoryContext oldcxt =
+ MemoryContextSwitchTo(TopMemoryContext);
+
+ fmt_txt[i] = cstring_to_text(fmt_str[i]);
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ value = parse_datetime(datetime, fmt_txt[i], collid, true,
+ &typid, &typmod, &tz,
+ &have_error);
+
+ if (!have_error)
+ {
+ res = jperOk;
+ break;
+ }
+ }
+
+ if (res == jperNotFound)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_SQL_JSON_DATETIME_FUNCTION),
+ errmsg("datetime format is not recognized: \"%s\"",
+ text_to_cstring(datetime)),
+ errhint("Use a datetime template argument to specify the input data format."))));
+ }
+
+ pfree(datetime);
+
+ if (jperIsError(res))
+ return res;
+
+ hasNext = jspGetNext(jsp, &elem);
+
+ if (!hasNext && !found)
+ return res;
+
+ jb = hasNext ? &jbvbuf : palloc(sizeof(*jb));
+
+ jb->type = jbvDatetime;
+ jb->val.datetime.value = value;
+ jb->val.datetime.typid = typid;
+ jb->val.datetime.typmod = typmod;
+ jb->val.datetime.tz = tz;
+
+ return executeNextItem(cxt, jsp, &elem, jb, found, hasNext);
+}
+
+/*
+ * Implementation of .keyvalue() method.
+ *
+ * .keyvalue() method returns a sequence of object's key-value pairs in the
+ * following format: '{ "key": key, "value": value, "id": id }'.
+ *
+ * "id" field is an object identifier which is constructed from the two parts:
+ * base object id and its binary offset in base object's jsonb:
+ * id = 10000000000 * base_object_id + obj_offset_in_base_object
+ *
+ * 10000000000 (10^10) -- is a first round decimal number greater than 2^32
+ * (maximal offset in jsonb). Decimal multiplier is used here to improve the
+ * readability of identifiers.
+ *
+ * Base object is usually a root object of the path: context item '$' or path
+ * variable '$var', literals can't produce objects for now. But if the path
+ * contains generated objects (.keyvalue() itself, for example), then they
+ * become base object for the subsequent .keyvalue().
+ *
+ * Id of '$' is 0. Id of '$var' is its ordinal (positive) number in the list
+ * of variables (see getJsonPathVariable()). Ids for generated objects
+ * are assigned using global counter JsonPathExecContext.lastGeneratedObjectId.
+ */
+static JsonPathExecResult
+executeKeyValueMethod(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonbValue *jb, JsonValueList *found)
+{
+ JsonPathExecResult res = jperNotFound;
+ JsonPathItem next;
+ JsonbContainer *jbc;
+ JsonbValue key;
+ JsonbValue val;
+ JsonbValue idval;
+ JsonbValue keystr;
+ JsonbValue valstr;
+ JsonbValue idstr;
+ JsonbIterator *it;
+ JsonbIteratorToken tok;
+ int64 id;
+ bool hasNext;
+
+ if (JsonbType(jb) != jbvObject || jb->type != jbvBinary)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_SQL_JSON_OBJECT_NOT_FOUND),
+ errmsg("jsonpath item method .%s() can only be applied to an object",
+ jspOperationName(jsp->type)))));
+
+ jbc = jb->val.binary.data;
+
+ if (!JsonContainerSize(jbc))
+ return jperNotFound; /* no key-value pairs */
+
+ hasNext = jspGetNext(jsp, &next);
+
+ keystr.type = jbvString;
+ keystr.val.string.val = "key";
+ keystr.val.string.len = 3;
+
+ valstr.type = jbvString;
+ valstr.val.string.val = "value";
+ valstr.val.string.len = 5;
+
+ idstr.type = jbvString;
+ idstr.val.string.val = "id";
+ idstr.val.string.len = 2;
+
+ /* construct object id from its base object and offset inside that */
+ id = jb->type != jbvBinary ? 0 :
+ (int64) ((char *) jbc - (char *) cxt->baseObject.jbc);
+ id += (int64) cxt->baseObject.id * INT64CONST(10000000000);
+
+ idval.type = jbvNumeric;
+ idval.val.numeric = int64_to_numeric(id);
+
+ it = JsonbIteratorInit(jbc);
+
+ while ((tok = JsonbIteratorNext(&it, &key, true)) != WJB_DONE)
+ {
+ JsonBaseObjectInfo baseObject;
+ JsonbValue obj;
+ JsonbParseState *ps;
+ JsonbValue *keyval;
+ Jsonb *jsonb;
+
+ if (tok != WJB_KEY)
+ continue;
+
+ res = jperOk;
+
+ if (!hasNext && !found)
+ break;
+
+ tok = JsonbIteratorNext(&it, &val, true);
+ Assert(tok == WJB_VALUE);
+
+ ps = NULL;
+ pushJsonbValue(&ps, WJB_BEGIN_OBJECT, NULL);
+
+ pushJsonbValue(&ps, WJB_KEY, &keystr);
+ pushJsonbValue(&ps, WJB_VALUE, &key);
+
+ pushJsonbValue(&ps, WJB_KEY, &valstr);
+ pushJsonbValue(&ps, WJB_VALUE, &val);
+
+ pushJsonbValue(&ps, WJB_KEY, &idstr);
+ pushJsonbValue(&ps, WJB_VALUE, &idval);
+
+ keyval = pushJsonbValue(&ps, WJB_END_OBJECT, NULL);
+
+ jsonb = JsonbValueToJsonb(keyval);
+
+ JsonbInitBinary(&obj, jsonb);
+
+ baseObject = setBaseObject(cxt, &obj, cxt->lastGeneratedObjectId++);
+
+ res = executeNextItem(cxt, jsp, &next, &obj, found, true);
+
+ cxt->baseObject = baseObject;
+
+ if (jperIsError(res))
+ return res;
+
+ if (res == jperOk && !found)
+ break;
+ }
+
+ return res;
+}
+
+/*
+ * Convert boolean execution status 'res' to a boolean JSON item and execute
+ * next jsonpath.
+ */
+static JsonPathExecResult
+appendBoolResult(JsonPathExecContext *cxt, JsonPathItem *jsp,
+ JsonValueList *found, JsonPathBool res)
+{
+ JsonPathItem next;
+ JsonbValue jbv;
+
+ if (!jspGetNext(jsp, &next) && !found)
+ return jperOk; /* found singleton boolean value */
+
+ if (res == jpbUnknown)
+ {
+ jbv.type = jbvNull;
+ }
+ else
+ {
+ jbv.type = jbvBool;
+ jbv.val.boolean = res == jpbTrue;
+ }
+
+ return executeNextItem(cxt, jsp, &next, &jbv, found, true);
+}
+
+/*
+ * Convert jsonpath's scalar or variable node to actual jsonb value.
+ *
+ * If node is a variable then its id returned, otherwise 0 returned.
+ */
+static void
+getJsonPathItem(JsonPathExecContext *cxt, JsonPathItem *item,
+ JsonbValue *value)
+{
+ switch (item->type)
+ {
+ case jpiNull:
+ value->type = jbvNull;
+ break;
+ case jpiBool:
+ value->type = jbvBool;
+ value->val.boolean = jspGetBool(item);
+ break;
+ case jpiNumeric:
+ value->type = jbvNumeric;
+ value->val.numeric = jspGetNumeric(item);
+ break;
+ case jpiString:
+ value->type = jbvString;
+ value->val.string.val = jspGetString(item,
+ &value->val.string.len);
+ break;
+ case jpiVariable:
+ getJsonPathVariable(cxt, item, cxt->vars, value);
+ return;
+ default:
+ elog(ERROR, "unexpected jsonpath item type");
+ }
+}
+
+/*
+ * Get the value of variable passed to jsonpath executor
+ */
+static void
+getJsonPathVariable(JsonPathExecContext *cxt, JsonPathItem *variable,
+ Jsonb *vars, JsonbValue *value)
+{
+ char *varName;
+ int varNameLength;
+ JsonbValue tmp;
+ JsonbValue *v;
+
+ if (!vars)
+ {
+ value->type = jbvNull;
+ return;
+ }
+
+ Assert(variable->type == jpiVariable);
+ varName = jspGetString(variable, &varNameLength);
+ tmp.type = jbvString;
+ tmp.val.string.val = varName;
+ tmp.val.string.len = varNameLength;
+
+ v = findJsonbValueFromContainer(&vars->root, JB_FOBJECT, &tmp);
+
+ if (v)
+ {
+ *value = *v;
+ pfree(v);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("could not find jsonpath variable \"%s\"",
+ pnstrdup(varName, varNameLength))));
+ }
+
+ JsonbInitBinary(&tmp, vars);
+ setBaseObject(cxt, &tmp, 1);
+}
+
+/**************** Support functions for JsonPath execution *****************/
+
+/*
+ * Returns the size of an array item, or -1 if item is not an array.
+ */
+static int
+JsonbArraySize(JsonbValue *jb)
+{
+ Assert(jb->type != jbvArray);
+
+ if (jb->type == jbvBinary)
+ {
+ JsonbContainer *jbc = jb->val.binary.data;
+
+ if (JsonContainerIsArray(jbc) && !JsonContainerIsScalar(jbc))
+ return JsonContainerSize(jbc);
+ }
+
+ return -1;
+}
+
+/* Comparison predicate callback. */
+static JsonPathBool
+executeComparison(JsonPathItem *cmp, JsonbValue *lv, JsonbValue *rv, void *p)
+{
+ JsonPathExecContext *cxt = (JsonPathExecContext *) p;
+
+ return compareItems(cmp->type, lv, rv, cxt->useTz);
+}
+
+/*
+ * Perform per-byte comparison of two strings.
+ */
+static int
+binaryCompareStrings(const char *s1, int len1,
+ const char *s2, int len2)
+{
+ int cmp;
+
+ cmp = memcmp(s1, s2, Min(len1, len2));
+
+ if (cmp != 0)
+ return cmp;
+
+ if (len1 == len2)
+ return 0;
+
+ return len1 < len2 ? -1 : 1;
+}
+
+/*
+ * Compare two strings in the current server encoding using Unicode codepoint
+ * collation.
+ */
+static int
+compareStrings(const char *mbstr1, int mblen1,
+ const char *mbstr2, int mblen2)
+{
+ if (GetDatabaseEncoding() == PG_SQL_ASCII ||
+ GetDatabaseEncoding() == PG_UTF8)
+ {
+ /*
+ * It's known property of UTF-8 strings that their per-byte comparison
+ * result matches codepoints comparison result. ASCII can be
+ * considered as special case of UTF-8.
+ */
+ return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2);
+ }
+ else
+ {
+ char *utf8str1,
+ *utf8str2;
+ int cmp,
+ utf8len1,
+ utf8len2;
+
+ /*
+ * We have to convert other encodings to UTF-8 first, then compare.
+ * Input strings may be not null-terminated and pg_server_to_any() may
+ * return them "as is". So, use strlen() only if there is real
+ * conversion.
+ */
+ utf8str1 = pg_server_to_any(mbstr1, mblen1, PG_UTF8);
+ utf8str2 = pg_server_to_any(mbstr2, mblen2, PG_UTF8);
+ utf8len1 = (mbstr1 == utf8str1) ? mblen1 : strlen(utf8str1);
+ utf8len2 = (mbstr2 == utf8str2) ? mblen2 : strlen(utf8str2);
+
+ cmp = binaryCompareStrings(utf8str1, utf8len1, utf8str2, utf8len2);
+
+ /*
+ * If pg_server_to_any() did no real conversion, then we actually
+ * compared original strings. So, we already done.
+ */
+ if (mbstr1 == utf8str1 && mbstr2 == utf8str2)
+ return cmp;
+
+ /* Free memory if needed */
+ if (mbstr1 != utf8str1)
+ pfree(utf8str1);
+ if (mbstr2 != utf8str2)
+ pfree(utf8str2);
+
+ /*
+ * When all Unicode codepoints are equal, return result of binary
+ * comparison. In some edge cases, same characters may have different
+ * representations in encoding. Then our behavior could diverge from
+ * standard. However, that allow us to do simple binary comparison
+ * for "==" operator, which is performance critical in typical cases.
+ * In future to implement strict standard conformance, we can do
+ * normalization of input JSON strings.
+ */
+ if (cmp == 0)
+ return binaryCompareStrings(mbstr1, mblen1, mbstr2, mblen2);
+ else
+ return cmp;
+ }
+}
+
+/*
+ * Compare two SQL/JSON items using comparison operation 'op'.
+ */
+static JsonPathBool
+compareItems(int32 op, JsonbValue *jb1, JsonbValue *jb2, bool useTz)
+{
+ int cmp;
+ bool res;
+
+ if (jb1->type != jb2->type)
+ {
+ if (jb1->type == jbvNull || jb2->type == jbvNull)
+
+ /*
+ * Equality and order comparison of nulls to non-nulls returns
+ * always false, but inequality comparison returns true.
+ */
+ return op == jpiNotEqual ? jpbTrue : jpbFalse;
+
+ /* Non-null items of different types are not comparable. */
+ return jpbUnknown;
+ }
+
+ switch (jb1->type)
+ {
+ case jbvNull:
+ cmp = 0;
+ break;
+ case jbvBool:
+ cmp = jb1->val.boolean == jb2->val.boolean ? 0 :
+ jb1->val.boolean ? 1 : -1;
+ break;
+ case jbvNumeric:
+ cmp = compareNumeric(jb1->val.numeric, jb2->val.numeric);
+ break;
+ case jbvString:
+ if (op == jpiEqual)
+ return jb1->val.string.len != jb2->val.string.len ||
+ memcmp(jb1->val.string.val,
+ jb2->val.string.val,
+ jb1->val.string.len) ? jpbFalse : jpbTrue;
+
+ cmp = compareStrings(jb1->val.string.val, jb1->val.string.len,
+ jb2->val.string.val, jb2->val.string.len);
+ break;
+ case jbvDatetime:
+ {
+ bool cast_error;
+
+ cmp = compareDatetime(jb1->val.datetime.value,
+ jb1->val.datetime.typid,
+ jb2->val.datetime.value,
+ jb2->val.datetime.typid,
+ useTz,
+ &cast_error);
+
+ if (cast_error)
+ return jpbUnknown;
+ }
+ break;
+
+ case jbvBinary:
+ case jbvArray:
+ case jbvObject:
+ return jpbUnknown; /* non-scalars are not comparable */
+
+ default:
+ elog(ERROR, "invalid jsonb value type %d", jb1->type);
+ }
+
+ switch (op)
+ {
+ case jpiEqual:
+ res = (cmp == 0);
+ break;
+ case jpiNotEqual:
+ res = (cmp != 0);
+ break;
+ case jpiLess:
+ res = (cmp < 0);
+ break;
+ case jpiGreater:
+ res = (cmp > 0);
+ break;
+ case jpiLessOrEqual:
+ res = (cmp <= 0);
+ break;
+ case jpiGreaterOrEqual:
+ res = (cmp >= 0);
+ break;
+ default:
+ elog(ERROR, "unrecognized jsonpath operation: %d", op);
+ return jpbUnknown;
+ }
+
+ return res ? jpbTrue : jpbFalse;
+}
+
+/* Compare two numerics */
+static int
+compareNumeric(Numeric a, Numeric b)
+{
+ return DatumGetInt32(DirectFunctionCall2(numeric_cmp,
+ NumericGetDatum(a),
+ NumericGetDatum(b)));
+}
+
+static JsonbValue *
+copyJsonbValue(JsonbValue *src)
+{
+ JsonbValue *dst = palloc(sizeof(*dst));
+
+ *dst = *src;
+
+ return dst;
+}
+
+/*
+ * Execute array subscript expression and convert resulting numeric item to
+ * the integer type with truncation.
+ */
+static JsonPathExecResult
+getArrayIndex(JsonPathExecContext *cxt, JsonPathItem *jsp, JsonbValue *jb,
+ int32 *index)
+{
+ JsonbValue *jbv;
+ JsonValueList found = {0};
+ JsonPathExecResult res = executeItem(cxt, jsp, jb, &found);
+ Datum numeric_index;
+ bool have_error = false;
+
+ if (jperIsError(res))
+ return res;
+
+ if (JsonValueListLength(&found) != 1 ||
+ !(jbv = getScalar(JsonValueListHead(&found), jbvNumeric)))
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT),
+ errmsg("jsonpath array subscript is not a single numeric value"))));
+
+ numeric_index = DirectFunctionCall2(numeric_trunc,
+ NumericGetDatum(jbv->val.numeric),
+ Int32GetDatum(0));
+
+ *index = numeric_int4_opt_error(DatumGetNumeric(numeric_index),
+ &have_error);
+
+ if (have_error)
+ RETURN_ERROR(ereport(ERROR,
+ (errcode(ERRCODE_INVALID_SQL_JSON_SUBSCRIPT),
+ errmsg("jsonpath array subscript is out of integer range"))));
+
+ return jperOk;
+}
+
+/* Save base object and its id needed for the execution of .keyvalue(). */
+static JsonBaseObjectInfo
+setBaseObject(JsonPathExecContext *cxt, JsonbValue *jbv, int32 id)
+{
+ JsonBaseObjectInfo baseObject = cxt->baseObject;
+
+ cxt->baseObject.jbc = jbv->type != jbvBinary ? NULL :
+ (JsonbContainer *) jbv->val.binary.data;
+ cxt->baseObject.id = id;
+
+ return baseObject;
+}
+
+static void
+JsonValueListAppend(JsonValueList *jvl, JsonbValue *jbv)
+{
+ if (jvl->singleton)
+ {
+ jvl->list = list_make2(jvl->singleton, jbv);
+ jvl->singleton = NULL;
+ }
+ else if (!jvl->list)
+ jvl->singleton = jbv;
+ else
+ jvl->list = lappend(jvl->list, jbv);
+}
+
+static int
+JsonValueListLength(const JsonValueList *jvl)
+{
+ return jvl->singleton ? 1 : list_length(jvl->list);
+}
+
+static bool
+JsonValueListIsEmpty(JsonValueList *jvl)
+{
+ return !jvl->singleton && list_length(jvl->list) <= 0;
+}
+
+static JsonbValue *
+JsonValueListHead(JsonValueList *jvl)
+{
+ return jvl->singleton ? jvl->singleton : linitial(jvl->list);
+}
+
+static List *
+JsonValueListGetList(JsonValueList *jvl)
+{
+ if (jvl->singleton)
+ return list_make1(jvl->singleton);
+
+ return jvl->list;
+}
+
+static void
+JsonValueListInitIterator(const JsonValueList *jvl, JsonValueListIterator *it)
+{
+ if (jvl->singleton)
+ {
+ it->value = jvl->singleton;
+ it->list = NIL;
+ it->next = NULL;
+ }
+ else if (jvl->list != NIL)
+ {
+ it->value = (JsonbValue *) linitial(jvl->list);
+ it->list = jvl->list;
+ it->next = list_second_cell(jvl->list);
+ }
+ else
+ {
+ it->value = NULL;
+ it->list = NIL;
+ it->next = NULL;
+ }
+}
+
+/*
+ * Get the next item from the sequence advancing iterator.
+ */
+static JsonbValue *
+JsonValueListNext(const JsonValueList *jvl, JsonValueListIterator *it)
+{
+ JsonbValue *result = it->value;
+
+ if (it->next)
+ {
+ it->value = lfirst(it->next);
+ it->next = lnext(it->list, it->next);
+ }
+ else
+ {
+ it->value = NULL;
+ }
+
+ return result;
+}
+
+/*
+ * Initialize a binary JsonbValue with the given jsonb container.
+ */
+static JsonbValue *
+JsonbInitBinary(JsonbValue *jbv, Jsonb *jb)
+{
+ jbv->type = jbvBinary;
+ jbv->val.binary.data = &jb->root;
+ jbv->val.binary.len = VARSIZE_ANY_EXHDR(jb);
+
+ return jbv;
+}
+
+/*
+ * Returns jbv* type of JsonbValue. Note, it never returns jbvBinary as is.
+ */
+static int
+JsonbType(JsonbValue *jb)
+{
+ int type = jb->type;
+
+ if (jb->type == jbvBinary)
+ {
+ JsonbContainer *jbc = (void *) jb->val.binary.data;
+
+ /* Scalars should be always extracted during jsonpath execution. */
+ Assert(!JsonContainerIsScalar(jbc));
+
+ if (JsonContainerIsObject(jbc))
+ type = jbvObject;
+ else if (JsonContainerIsArray(jbc))
+ type = jbvArray;
+ else
+ elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header);
+ }
+
+ return type;
+}
+
+/* Get scalar of given type or NULL on type mismatch */
+static JsonbValue *
+getScalar(JsonbValue *scalar, enum jbvType type)
+{
+ /* Scalars should be always extracted during jsonpath execution. */
+ Assert(scalar->type != jbvBinary ||
+ !JsonContainerIsScalar(scalar->val.binary.data));
+
+ return scalar->type == type ? scalar : NULL;
+}
+
+/* Construct a JSON array from the item list */
+static JsonbValue *
+wrapItemsInArray(const JsonValueList *items)
+{
+ JsonbParseState *ps = NULL;
+ JsonValueListIterator it;
+ JsonbValue *jbv;
+
+ pushJsonbValue(&ps, WJB_BEGIN_ARRAY, NULL);
+
+ JsonValueListInitIterator(items, &it);
+ while ((jbv = JsonValueListNext(items, &it)))
+ pushJsonbValue(&ps, WJB_ELEM, jbv);
+
+ return pushJsonbValue(&ps, WJB_END_ARRAY, NULL);
+}
+
+/* Check if the timezone required for casting from type1 to type2 is used */
+static void
+checkTimezoneIsUsedForCast(bool useTz, const char *type1, const char *type2)
+{
+ if (!useTz)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert value from %s to %s without time zone usage",
+ type1, type2),
+ errhint("Use *_tz() function for time zone support.")));
+}
+
+/* Convert time datum to timetz datum */
+static Datum
+castTimeToTimeTz(Datum time, bool useTz)
+{
+ checkTimezoneIsUsedForCast(useTz, "time", "timetz");
+
+ return DirectFunctionCall1(time_timetz, time);
+}
+
+/*
+ * Compare date to timestamp.
+ * Note that this doesn't involve any timezone considerations.
+ */
+static int
+cmpDateToTimestamp(DateADT date1, Timestamp ts2, bool useTz)
+{
+ return date_cmp_timestamp_internal(date1, ts2);
+}
+
+/*
+ * Compare date to timestamptz.
+ */
+static int
+cmpDateToTimestampTz(DateADT date1, TimestampTz tstz2, bool useTz)
+{
+ checkTimezoneIsUsedForCast(useTz, "date", "timestamptz");
+
+ return date_cmp_timestamptz_internal(date1, tstz2);
+}
+
+/*
+ * Compare timestamp to timestamptz.
+ */
+static int
+cmpTimestampToTimestampTz(Timestamp ts1, TimestampTz tstz2, bool useTz)
+{
+ checkTimezoneIsUsedForCast(useTz, "timestamp", "timestamptz");
+
+ return timestamp_cmp_timestamptz_internal(ts1, tstz2);
+}
+
+/*
+ * Cross-type comparison of two datetime SQL/JSON items. If items are
+ * uncomparable *cast_error flag is set, otherwise *cast_error is unset.
+ * If the cast requires timezone and it is not used, then explicit error is thrown.
+ */
+static int
+compareDatetime(Datum val1, Oid typid1, Datum val2, Oid typid2,
+ bool useTz, bool *cast_error)
+{
+ PGFunction cmpfunc;
+
+ *cast_error = false;
+
+ switch (typid1)
+ {
+ case DATEOID:
+ switch (typid2)
+ {
+ case DATEOID:
+ cmpfunc = date_cmp;
+
+ break;
+
+ case TIMESTAMPOID:
+ return cmpDateToTimestamp(DatumGetDateADT(val1),
+ DatumGetTimestamp(val2),
+ useTz);
+
+ case TIMESTAMPTZOID:
+ return cmpDateToTimestampTz(DatumGetDateADT(val1),
+ DatumGetTimestampTz(val2),
+ useTz);
+
+ case TIMEOID:
+ case TIMETZOID:
+ *cast_error = true; /* uncomparable types */
+ return 0;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u",
+ typid2);
+ }
+ break;
+
+ case TIMEOID:
+ switch (typid2)
+ {
+ case TIMEOID:
+ cmpfunc = time_cmp;
+
+ break;
+
+ case TIMETZOID:
+ val1 = castTimeToTimeTz(val1, useTz);
+ cmpfunc = timetz_cmp;
+
+ break;
+
+ case DATEOID:
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ *cast_error = true; /* uncomparable types */
+ return 0;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u",
+ typid2);
+ }
+ break;
+
+ case TIMETZOID:
+ switch (typid2)
+ {
+ case TIMEOID:
+ val2 = castTimeToTimeTz(val2, useTz);
+ cmpfunc = timetz_cmp;
+
+ break;
+
+ case TIMETZOID:
+ cmpfunc = timetz_cmp;
+
+ break;
+
+ case DATEOID:
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ *cast_error = true; /* uncomparable types */
+ return 0;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u",
+ typid2);
+ }
+ break;
+
+ case TIMESTAMPOID:
+ switch (typid2)
+ {
+ case DATEOID:
+ return -cmpDateToTimestamp(DatumGetDateADT(val2),
+ DatumGetTimestamp(val1),
+ useTz);
+
+ case TIMESTAMPOID:
+ cmpfunc = timestamp_cmp;
+
+ break;
+
+ case TIMESTAMPTZOID:
+ return cmpTimestampToTimestampTz(DatumGetTimestamp(val1),
+ DatumGetTimestampTz(val2),
+ useTz);
+
+ case TIMEOID:
+ case TIMETZOID:
+ *cast_error = true; /* uncomparable types */
+ return 0;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u",
+ typid2);
+ }
+ break;
+
+ case TIMESTAMPTZOID:
+ switch (typid2)
+ {
+ case DATEOID:
+ return -cmpDateToTimestampTz(DatumGetDateADT(val2),
+ DatumGetTimestampTz(val1),
+ useTz);
+
+ case TIMESTAMPOID:
+ return -cmpTimestampToTimestampTz(DatumGetTimestamp(val2),
+ DatumGetTimestampTz(val1),
+ useTz);
+
+ case TIMESTAMPTZOID:
+ cmpfunc = timestamp_cmp;
+
+ break;
+
+ case TIMEOID:
+ case TIMETZOID:
+ *cast_error = true; /* uncomparable types */
+ return 0;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u",
+ typid2);
+ }
+ break;
+
+ default:
+ elog(ERROR, "unrecognized SQL/JSON datetime type oid: %u", typid1);
+ }
+
+ if (*cast_error)
+ return 0; /* cast error */
+
+ return DatumGetInt32(DirectFunctionCall2(cmpfunc, val1, val2));
+}
diff --git a/src/backend/utils/adt/jsonpath_gram.c b/src/backend/utils/adt/jsonpath_gram.c
new file mode 100644
index 0000000..a09bcfd
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_gram.c
@@ -0,0 +1,2416 @@
+/* A Bison parser, made by GNU Bison 3.7.5. */
+
+/* Bison implementation for Yacc-like parsers in C
+
+ Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation,
+ Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+/* As a special exception, you may create a larger work that contains
+ part or all of the Bison parser skeleton and distribute that work
+ under terms of your choice, so long as that work isn't itself a
+ parser generator using the skeleton or a modified version thereof
+ as a parser skeleton. Alternatively, if you modify or redistribute
+ the parser skeleton itself, you may (at your option) remove this
+ special exception, which will cause the skeleton and the resulting
+ Bison output files to be licensed under the GNU General Public
+ License without this special exception.
+
+ This special exception was added by the Free Software Foundation in
+ version 2.2 of Bison. */
+
+/* C LALR(1) parser skeleton written by Richard Stallman, by
+ simplifying the original so-called "semantic" parser. */
+
+/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
+ especially those whose name start with YY_ or yy_. They are
+ private implementation details that can be changed or removed. */
+
+/* All symbols defined below should begin with yy or YY, to avoid
+ infringing on user name space. This should be done even for local
+ variables, as they might otherwise be expanded by user macros.
+ There are some unavoidable exceptions within include files to
+ define necessary library symbols; they are noted "INFRINGES ON
+ USER NAME SPACE" below. */
+
+/* Identify Bison output, and Bison version. */
+#define YYBISON 30705
+
+/* Bison version string. */
+#define YYBISON_VERSION "3.7.5"
+
+/* Skeleton name. */
+#define YYSKELETON_NAME "yacc.c"
+
+/* Pure parsers. */
+#define YYPURE 1
+
+/* Push parsers. */
+#define YYPUSH 0
+
+/* Pull parsers. */
+#define YYPULL 1
+
+
+/* Substitute the variable and function names. */
+#define yyparse jsonpath_yyparse
+#define yylex jsonpath_yylex
+#define yyerror jsonpath_yyerror
+#define yydebug jsonpath_yydebug
+#define yynerrs jsonpath_yynerrs
+
+/* First part of user prologue. */
+#line 1 "jsonpath_gram.y"
+
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_gram.y
+ * Grammar definitions for jsonpath datatype
+ *
+ * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_gram.y
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/jsonpath.h"
+
+/* struct JsonPathString is shared between scan and gram */
+typedef struct JsonPathString
+{
+ char *val;
+ int len;
+ int total;
+} JsonPathString;
+
+union YYSTYPE;
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+int jsonpath_yylex(union YYSTYPE *yylval_param);
+int jsonpath_yyparse(JsonPathParseResult **result);
+void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
+
+static JsonPathParseItem *makeItemType(JsonPathItemType type);
+static JsonPathParseItem *makeItemString(JsonPathString *s);
+static JsonPathParseItem *makeItemVariable(JsonPathString *s);
+static JsonPathParseItem *makeItemKey(JsonPathString *s);
+static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
+static JsonPathParseItem *makeItemBool(bool val);
+static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
+ JsonPathParseItem *la,
+ JsonPathParseItem *ra);
+static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
+ JsonPathParseItem *a);
+static JsonPathParseItem *makeItemList(List *list);
+static JsonPathParseItem *makeIndexArray(List *list);
+static JsonPathParseItem *makeAny(int first, int last);
+static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
+ JsonPathString *pattern,
+ JsonPathString *flags);
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc. This prevents
+ * memory leaks if we error out during parsing. Note this only works with
+ * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE pfree
+
+
+#line 148 "jsonpath_gram.c"
+
+# ifndef YY_CAST
+# ifdef __cplusplus
+# define YY_CAST(Type, Val) static_cast<Type> (Val)
+# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val)
+# else
+# define YY_CAST(Type, Val) ((Type) (Val))
+# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val))
+# endif
+# endif
+# ifndef YY_NULLPTR
+# if defined __cplusplus
+# if 201103L <= __cplusplus
+# define YY_NULLPTR nullptr
+# else
+# define YY_NULLPTR 0
+# endif
+# else
+# define YY_NULLPTR ((void*)0)
+# endif
+# endif
+
+
+/* Debug traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int jsonpath_yydebug;
+#endif
+
+/* Token kinds. */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+ enum yytokentype
+ {
+ YYEMPTY = -2,
+ YYEOF = 0, /* "end of file" */
+ YYerror = 256, /* error */
+ YYUNDEF = 257, /* "invalid token" */
+ TO_P = 258, /* TO_P */
+ NULL_P = 259, /* NULL_P */
+ TRUE_P = 260, /* TRUE_P */
+ FALSE_P = 261, /* FALSE_P */
+ IS_P = 262, /* IS_P */
+ UNKNOWN_P = 263, /* UNKNOWN_P */
+ EXISTS_P = 264, /* EXISTS_P */
+ IDENT_P = 265, /* IDENT_P */
+ STRING_P = 266, /* STRING_P */
+ NUMERIC_P = 267, /* NUMERIC_P */
+ INT_P = 268, /* INT_P */
+ VARIABLE_P = 269, /* VARIABLE_P */
+ OR_P = 270, /* OR_P */
+ AND_P = 271, /* AND_P */
+ NOT_P = 272, /* NOT_P */
+ LESS_P = 273, /* LESS_P */
+ LESSEQUAL_P = 274, /* LESSEQUAL_P */
+ EQUAL_P = 275, /* EQUAL_P */
+ NOTEQUAL_P = 276, /* NOTEQUAL_P */
+ GREATEREQUAL_P = 277, /* GREATEREQUAL_P */
+ GREATER_P = 278, /* GREATER_P */
+ ANY_P = 279, /* ANY_P */
+ STRICT_P = 280, /* STRICT_P */
+ LAX_P = 281, /* LAX_P */
+ LAST_P = 282, /* LAST_P */
+ STARTS_P = 283, /* STARTS_P */
+ WITH_P = 284, /* WITH_P */
+ LIKE_REGEX_P = 285, /* LIKE_REGEX_P */
+ FLAG_P = 286, /* FLAG_P */
+ ABS_P = 287, /* ABS_P */
+ SIZE_P = 288, /* SIZE_P */
+ TYPE_P = 289, /* TYPE_P */
+ FLOOR_P = 290, /* FLOOR_P */
+ DOUBLE_P = 291, /* DOUBLE_P */
+ CEILING_P = 292, /* CEILING_P */
+ KEYVALUE_P = 293, /* KEYVALUE_P */
+ DATETIME_P = 294, /* DATETIME_P */
+ UMINUS = 295 /* UMINUS */
+ };
+ typedef enum yytokentype yytoken_kind_t;
+#endif
+
+/* Value type. */
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+union YYSTYPE
+{
+#line 80 "jsonpath_gram.y"
+
+ JsonPathString str;
+ List *elems; /* list of JsonPathParseItem */
+ List *indexs; /* list of integers */
+ JsonPathParseItem *value;
+ JsonPathParseResult *result;
+ JsonPathItemType optype;
+ bool boolean;
+ int integer;
+
+#line 246 "jsonpath_gram.c"
+
+};
+typedef union YYSTYPE YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+
+
+int jsonpath_yyparse (JsonPathParseResult **result);
+
+
+/* Symbol kind. */
+enum yysymbol_kind_t
+{
+ YYSYMBOL_YYEMPTY = -2,
+ YYSYMBOL_YYEOF = 0, /* "end of file" */
+ YYSYMBOL_YYerror = 1, /* error */
+ YYSYMBOL_YYUNDEF = 2, /* "invalid token" */
+ YYSYMBOL_TO_P = 3, /* TO_P */
+ YYSYMBOL_NULL_P = 4, /* NULL_P */
+ YYSYMBOL_TRUE_P = 5, /* TRUE_P */
+ YYSYMBOL_FALSE_P = 6, /* FALSE_P */
+ YYSYMBOL_IS_P = 7, /* IS_P */
+ YYSYMBOL_UNKNOWN_P = 8, /* UNKNOWN_P */
+ YYSYMBOL_EXISTS_P = 9, /* EXISTS_P */
+ YYSYMBOL_IDENT_P = 10, /* IDENT_P */
+ YYSYMBOL_STRING_P = 11, /* STRING_P */
+ YYSYMBOL_NUMERIC_P = 12, /* NUMERIC_P */
+ YYSYMBOL_INT_P = 13, /* INT_P */
+ YYSYMBOL_VARIABLE_P = 14, /* VARIABLE_P */
+ YYSYMBOL_OR_P = 15, /* OR_P */
+ YYSYMBOL_AND_P = 16, /* AND_P */
+ YYSYMBOL_NOT_P = 17, /* NOT_P */
+ YYSYMBOL_LESS_P = 18, /* LESS_P */
+ YYSYMBOL_LESSEQUAL_P = 19, /* LESSEQUAL_P */
+ YYSYMBOL_EQUAL_P = 20, /* EQUAL_P */
+ YYSYMBOL_NOTEQUAL_P = 21, /* NOTEQUAL_P */
+ YYSYMBOL_GREATEREQUAL_P = 22, /* GREATEREQUAL_P */
+ YYSYMBOL_GREATER_P = 23, /* GREATER_P */
+ YYSYMBOL_ANY_P = 24, /* ANY_P */
+ YYSYMBOL_STRICT_P = 25, /* STRICT_P */
+ YYSYMBOL_LAX_P = 26, /* LAX_P */
+ YYSYMBOL_LAST_P = 27, /* LAST_P */
+ YYSYMBOL_STARTS_P = 28, /* STARTS_P */
+ YYSYMBOL_WITH_P = 29, /* WITH_P */
+ YYSYMBOL_LIKE_REGEX_P = 30, /* LIKE_REGEX_P */
+ YYSYMBOL_FLAG_P = 31, /* FLAG_P */
+ YYSYMBOL_ABS_P = 32, /* ABS_P */
+ YYSYMBOL_SIZE_P = 33, /* SIZE_P */
+ YYSYMBOL_TYPE_P = 34, /* TYPE_P */
+ YYSYMBOL_FLOOR_P = 35, /* FLOOR_P */
+ YYSYMBOL_DOUBLE_P = 36, /* DOUBLE_P */
+ YYSYMBOL_CEILING_P = 37, /* CEILING_P */
+ YYSYMBOL_KEYVALUE_P = 38, /* KEYVALUE_P */
+ YYSYMBOL_DATETIME_P = 39, /* DATETIME_P */
+ YYSYMBOL_40_ = 40, /* '+' */
+ YYSYMBOL_41_ = 41, /* '-' */
+ YYSYMBOL_42_ = 42, /* '*' */
+ YYSYMBOL_43_ = 43, /* '/' */
+ YYSYMBOL_44_ = 44, /* '%' */
+ YYSYMBOL_UMINUS = 45, /* UMINUS */
+ YYSYMBOL_46_ = 46, /* '(' */
+ YYSYMBOL_47_ = 47, /* ')' */
+ YYSYMBOL_48_ = 48, /* '$' */
+ YYSYMBOL_49_ = 49, /* '@' */
+ YYSYMBOL_50_ = 50, /* ',' */
+ YYSYMBOL_51_ = 51, /* '[' */
+ YYSYMBOL_52_ = 52, /* ']' */
+ YYSYMBOL_53_ = 53, /* '{' */
+ YYSYMBOL_54_ = 54, /* '}' */
+ YYSYMBOL_55_ = 55, /* '.' */
+ YYSYMBOL_56_ = 56, /* '?' */
+ YYSYMBOL_YYACCEPT = 57, /* $accept */
+ YYSYMBOL_result = 58, /* result */
+ YYSYMBOL_expr_or_predicate = 59, /* expr_or_predicate */
+ YYSYMBOL_mode = 60, /* mode */
+ YYSYMBOL_scalar_value = 61, /* scalar_value */
+ YYSYMBOL_comp_op = 62, /* comp_op */
+ YYSYMBOL_delimited_predicate = 63, /* delimited_predicate */
+ YYSYMBOL_predicate = 64, /* predicate */
+ YYSYMBOL_starts_with_initial = 65, /* starts_with_initial */
+ YYSYMBOL_path_primary = 66, /* path_primary */
+ YYSYMBOL_accessor_expr = 67, /* accessor_expr */
+ YYSYMBOL_expr = 68, /* expr */
+ YYSYMBOL_index_elem = 69, /* index_elem */
+ YYSYMBOL_index_list = 70, /* index_list */
+ YYSYMBOL_array_accessor = 71, /* array_accessor */
+ YYSYMBOL_any_level = 72, /* any_level */
+ YYSYMBOL_any_path = 73, /* any_path */
+ YYSYMBOL_accessor_op = 74, /* accessor_op */
+ YYSYMBOL_datetime_template = 75, /* datetime_template */
+ YYSYMBOL_opt_datetime_template = 76, /* opt_datetime_template */
+ YYSYMBOL_key = 77, /* key */
+ YYSYMBOL_key_name = 78, /* key_name */
+ YYSYMBOL_method = 79 /* method */
+};
+typedef enum yysymbol_kind_t yysymbol_kind_t;
+
+
+
+
+#ifdef short
+# undef short
+#endif
+
+/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure
+ <limits.h> and (if available) <stdint.h> are included
+ so that the code can choose integer types of a good width. */
+
+#ifndef __PTRDIFF_MAX__
+# include <limits.h> /* INFRINGES ON USER NAME SPACE */
+# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__
+# include <stdint.h> /* INFRINGES ON USER NAME SPACE */
+# define YY_STDINT_H
+# endif
+#endif
+
+/* Narrow types that promote to a signed type and that can represent a
+ signed or unsigned integer of at least N bits. In tables they can
+ save space and decrease cache pressure. Promoting to a signed type
+ helps avoid bugs in integer arithmetic. */
+
+#ifdef __INT_LEAST8_MAX__
+typedef __INT_LEAST8_TYPE__ yytype_int8;
+#elif defined YY_STDINT_H
+typedef int_least8_t yytype_int8;
+#else
+typedef signed char yytype_int8;
+#endif
+
+#ifdef __INT_LEAST16_MAX__
+typedef __INT_LEAST16_TYPE__ yytype_int16;
+#elif defined YY_STDINT_H
+typedef int_least16_t yytype_int16;
+#else
+typedef short yytype_int16;
+#endif
+
+/* Work around bug in HP-UX 11.23, which defines these macros
+ incorrectly for preprocessor constants. This workaround can likely
+ be removed in 2023, as HPE has promised support for HP-UX 11.23
+ (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of
+ <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>. */
+#ifdef __hpux
+# undef UINT_LEAST8_MAX
+# undef UINT_LEAST16_MAX
+# define UINT_LEAST8_MAX 255
+# define UINT_LEAST16_MAX 65535
+#endif
+
+#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__
+typedef __UINT_LEAST8_TYPE__ yytype_uint8;
+#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \
+ && UINT_LEAST8_MAX <= INT_MAX)
+typedef uint_least8_t yytype_uint8;
+#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX
+typedef unsigned char yytype_uint8;
+#else
+typedef short yytype_uint8;
+#endif
+
+#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__
+typedef __UINT_LEAST16_TYPE__ yytype_uint16;
+#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \
+ && UINT_LEAST16_MAX <= INT_MAX)
+typedef uint_least16_t yytype_uint16;
+#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX
+typedef unsigned short yytype_uint16;
+#else
+typedef int yytype_uint16;
+#endif
+
+#ifndef YYPTRDIFF_T
+# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__
+# define YYPTRDIFF_T __PTRDIFF_TYPE__
+# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__
+# elif defined PTRDIFF_MAX
+# ifndef ptrdiff_t
+# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+# endif
+# define YYPTRDIFF_T ptrdiff_t
+# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX
+# else
+# define YYPTRDIFF_T long
+# define YYPTRDIFF_MAXIMUM LONG_MAX
+# endif
+#endif
+
+#ifndef YYSIZE_T
+# ifdef __SIZE_TYPE__
+# define YYSIZE_T __SIZE_TYPE__
+# elif defined size_t
+# define YYSIZE_T size_t
+# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__
+# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
+# define YYSIZE_T size_t
+# else
+# define YYSIZE_T unsigned
+# endif
+#endif
+
+#define YYSIZE_MAXIMUM \
+ YY_CAST (YYPTRDIFF_T, \
+ (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \
+ ? YYPTRDIFF_MAXIMUM \
+ : YY_CAST (YYSIZE_T, -1)))
+
+#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X))
+
+
+/* Stored state numbers (used for stacks). */
+typedef yytype_uint8 yy_state_t;
+
+/* State numbers in computations. */
+typedef int yy_state_fast_t;
+
+#ifndef YY_
+# if defined YYENABLE_NLS && YYENABLE_NLS
+# if ENABLE_NLS
+# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
+# define YY_(Msgid) dgettext ("bison-runtime", Msgid)
+# endif
+# endif
+# ifndef YY_
+# define YY_(Msgid) Msgid
+# endif
+#endif
+
+
+#ifndef YY_ATTRIBUTE_PURE
+# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__)
+# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__))
+# else
+# define YY_ATTRIBUTE_PURE
+# endif
+#endif
+
+#ifndef YY_ATTRIBUTE_UNUSED
+# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__)
+# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__))
+# else
+# define YY_ATTRIBUTE_UNUSED
+# endif
+#endif
+
+/* Suppress unused-variable warnings by "using" E. */
+#if ! defined lint || defined __GNUC__
+# define YY_USE(E) ((void) (E))
+#else
+# define YY_USE(E) /* empty */
+#endif
+
+#if defined __GNUC__ && ! defined __ICC && 407 <= __GNUC__ * 100 + __GNUC_MINOR__
+/* Suppress an incorrect diagnostic about yylval being uninitialized. */
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \
+ _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"")
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END \
+ _Pragma ("GCC diagnostic pop")
+#else
+# define YY_INITIAL_VALUE(Value) Value
+#endif
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
+#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__
+# define YY_IGNORE_USELESS_CAST_BEGIN \
+ _Pragma ("GCC diagnostic push") \
+ _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"")
+# define YY_IGNORE_USELESS_CAST_END \
+ _Pragma ("GCC diagnostic pop")
+#endif
+#ifndef YY_IGNORE_USELESS_CAST_BEGIN
+# define YY_IGNORE_USELESS_CAST_BEGIN
+# define YY_IGNORE_USELESS_CAST_END
+#endif
+
+
+#define YY_ASSERT(E) ((void) (0 && (E)))
+
+#if !defined yyoverflow
+
+/* The parser invokes alloca or malloc; define the necessary symbols. */
+
+# ifdef YYSTACK_USE_ALLOCA
+# if YYSTACK_USE_ALLOCA
+# ifdef __GNUC__
+# define YYSTACK_ALLOC __builtin_alloca
+# elif defined __BUILTIN_VA_ARG_INCR
+# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
+# elif defined _AIX
+# define YYSTACK_ALLOC __alloca
+# elif defined _MSC_VER
+# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
+# define alloca _alloca
+# else
+# define YYSTACK_ALLOC alloca
+# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+ /* Use EXIT_SUCCESS as a witness for stdlib.h. */
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+# endif
+# endif
+# endif
+# endif
+# endif
+
+# ifdef YYSTACK_ALLOC
+ /* Pacify GCC's 'empty if-body' warning. */
+# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0)
+# ifndef YYSTACK_ALLOC_MAXIMUM
+ /* The OS might guarantee only one guard page at the bottom of the stack,
+ and a page size can be as small as 4096 bytes. So we cannot safely
+ invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
+ to allow for a few compiler-allocated temporary stack slots. */
+# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
+# endif
+# else
+# define YYSTACK_ALLOC YYMALLOC
+# define YYSTACK_FREE YYFREE
+# ifndef YYSTACK_ALLOC_MAXIMUM
+# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
+# endif
+# if (defined __cplusplus && ! defined EXIT_SUCCESS \
+ && ! ((defined YYMALLOC || defined malloc) \
+ && (defined YYFREE || defined free)))
+# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+# ifndef EXIT_SUCCESS
+# define EXIT_SUCCESS 0
+# endif
+# endif
+# ifndef YYMALLOC
+# define YYMALLOC malloc
+# if ! defined malloc && ! defined EXIT_SUCCESS
+void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# ifndef YYFREE
+# define YYFREE free
+# if ! defined free && ! defined EXIT_SUCCESS
+void free (void *); /* INFRINGES ON USER NAME SPACE */
+# endif
+# endif
+# endif
+#endif /* !defined yyoverflow */
+
+#if (! defined yyoverflow \
+ && (! defined __cplusplus \
+ || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
+
+/* A type that is properly aligned for any stack member. */
+union yyalloc
+{
+ yy_state_t yyss_alloc;
+ YYSTYPE yyvs_alloc;
+};
+
+/* The size of the maximum gap between one aligned stack and the next. */
+# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1)
+
+/* The size of an array large to enough to hold all stacks, each with
+ N elements. */
+# define YYSTACK_BYTES(N) \
+ ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \
+ + YYSTACK_GAP_MAXIMUM)
+
+# define YYCOPY_NEEDED 1
+
+/* Relocate STACK from its old location to the new one. The
+ local variables YYSIZE and YYSTACKSIZE give the old and new number of
+ elements in the stack, and YYPTR gives the new location of the
+ stack. Advance YYPTR to a properly aligned location for the next
+ stack. */
+# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
+ do \
+ { \
+ YYPTRDIFF_T yynewbytes; \
+ YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
+ Stack = &yyptr->Stack_alloc; \
+ yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \
+ yyptr += yynewbytes / YYSIZEOF (*yyptr); \
+ } \
+ while (0)
+
+#endif
+
+#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
+/* Copy COUNT objects from SRC to DST. The source and destination do
+ not overlap. */
+# ifndef YYCOPY
+# if defined __GNUC__ && 1 < __GNUC__
+# define YYCOPY(Dst, Src, Count) \
+ __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src)))
+# else
+# define YYCOPY(Dst, Src, Count) \
+ do \
+ { \
+ YYPTRDIFF_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (Dst)[yyi] = (Src)[yyi]; \
+ } \
+ while (0)
+# endif
+# endif
+#endif /* !YYCOPY_NEEDED */
+
+/* YYFINAL -- State number of the termination state. */
+#define YYFINAL 5
+/* YYLAST -- Last index in YYTABLE. */
+#define YYLAST 239
+
+/* YYNTOKENS -- Number of terminals. */
+#define YYNTOKENS 57
+/* YYNNTS -- Number of nonterminals. */
+#define YYNNTS 23
+/* YYNRULES -- Number of rules. */
+#define YYNRULES 104
+/* YYNSTATES -- Number of states. */
+#define YYNSTATES 143
+
+/* YYMAXUTOK -- Last valid token kind. */
+#define YYMAXUTOK 295
+
+
+/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM
+ as returned by yylex, with out-of-bounds checking. */
+#define YYTRANSLATE(YYX) \
+ (0 <= (YYX) && (YYX) <= YYMAXUTOK \
+ ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \
+ : YYSYMBOL_YYUNDEF)
+
+/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM
+ as returned by yylex. */
+static const yytype_int8 yytranslate[] =
+{
+ 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 48, 44, 2, 2,
+ 46, 47, 42, 40, 50, 41, 55, 43, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 56, 49, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 51, 2, 52, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 53, 2, 54, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
+ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
+ 35, 36, 37, 38, 39, 45
+};
+
+#if YYDEBUG
+ /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */
+static const yytype_int16 yyrline[] =
+{
+ 0, 130, 130, 136, 140, 141, 145, 146, 147, 151,
+ 152, 153, 154, 155, 156, 157, 161, 162, 163, 164,
+ 165, 166, 170, 171, 175, 176, 177, 178, 179, 180,
+ 182, 184, 185, 190, 191, 195, 196, 197, 198, 202,
+ 203, 204, 205, 209, 210, 211, 212, 213, 214, 215,
+ 216, 217, 221, 222, 226, 227, 231, 232, 236, 237,
+ 241, 242, 243, 248, 249, 250, 251, 252, 253, 255,
+ 259, 263, 264, 268, 272, 273, 274, 275, 276, 277,
+ 278, 279, 280, 281, 282, 283, 284, 285, 286, 287,
+ 288, 289, 290, 291, 292, 293, 294, 295, 299, 300,
+ 301, 302, 303, 304, 305
+};
+#endif
+
+/** Accessing symbol of state STATE. */
+#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State])
+
+#if YYDEBUG || 0
+/* The user-facing name of the symbol whose (internal) number is
+ YYSYMBOL. No bounds checking. */
+static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED;
+
+/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
+ First, the terminals, then, starting at YYNTOKENS, nonterminals. */
+static const char *const yytname[] =
+{
+ "\"end of file\"", "error", "\"invalid token\"", "TO_P", "NULL_P",
+ "TRUE_P", "FALSE_P", "IS_P", "UNKNOWN_P", "EXISTS_P", "IDENT_P",
+ "STRING_P", "NUMERIC_P", "INT_P", "VARIABLE_P", "OR_P", "AND_P", "NOT_P",
+ "LESS_P", "LESSEQUAL_P", "EQUAL_P", "NOTEQUAL_P", "GREATEREQUAL_P",
+ "GREATER_P", "ANY_P", "STRICT_P", "LAX_P", "LAST_P", "STARTS_P",
+ "WITH_P", "LIKE_REGEX_P", "FLAG_P", "ABS_P", "SIZE_P", "TYPE_P",
+ "FLOOR_P", "DOUBLE_P", "CEILING_P", "KEYVALUE_P", "DATETIME_P", "'+'",
+ "'-'", "'*'", "'/'", "'%'", "UMINUS", "'('", "')'", "'$'", "'@'", "','",
+ "'['", "']'", "'{'", "'}'", "'.'", "'?'", "$accept", "result",
+ "expr_or_predicate", "mode", "scalar_value", "comp_op",
+ "delimited_predicate", "predicate", "starts_with_initial",
+ "path_primary", "accessor_expr", "expr", "index_elem", "index_list",
+ "array_accessor", "any_level", "any_path", "accessor_op",
+ "datetime_template", "opt_datetime_template", "key", "key_name",
+ "method", YY_NULLPTR
+};
+
+static const char *
+yysymbol_name (yysymbol_kind_t yysymbol)
+{
+ return yytname[yysymbol];
+}
+#endif
+
+#ifdef YYPRINT
+/* YYTOKNUM[NUM] -- (External) token number corresponding to the
+ (internal) symbol number NUM (which must be that of a token). */
+static const yytype_int16 yytoknum[] =
+{
+ 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
+ 265, 266, 267, 268, 269, 270, 271, 272, 273, 274,
+ 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
+ 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
+ 43, 45, 42, 47, 37, 295, 40, 41, 36, 64,
+ 44, 91, 93, 123, 125, 46, 63
+};
+#endif
+
+#define YYPACT_NINF (-44)
+
+#define yypact_value_is_default(Yyn) \
+ ((Yyn) == YYPACT_NINF)
+
+#define YYTABLE_NINF (-105)
+
+#define yytable_value_is_error(Yyn) \
+ 0
+
+ /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
+ STATE-NUM. */
+static const yytype_int16 yypact[] =
+{
+ 7, -44, -44, 18, 51, -44, -44, -44, -44, -43,
+ -44, -44, -44, -44, -3, -44, 114, 114, 51, -44,
+ -44, -44, -44, -44, 10, -44, -35, 195, 114, 51,
+ -44, 51, -44, -44, 14, 165, 51, 51, 68, 140,
+ -9, -44, -44, -44, -44, -44, -44, -44, -44, 37,
+ 60, 114, 114, 114, 114, 114, 114, 46, 20, 195,
+ 30, 3, -35, 59, -44, 24, -2, -44, -41, -44,
+ -44, -44, -44, -44, -44, -44, -44, -44, 31, -44,
+ -44, -44, -44, -44, -44, -44, 48, 50, 52, 61,
+ 67, 69, 78, 83, -44, -44, -44, -44, 84, 51,
+ 17, 100, 79, 79, -44, -44, -44, 62, -44, -44,
+ -35, 75, -44, -44, -44, 114, 114, -44, -8, 121,
+ 86, 54, -44, -44, -44, 123, -44, 62, -44, -44,
+ -44, -1, -44, -44, 88, -44, -44, -44, -8, -44,
+ -44, 82, -44
+};
+
+ /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM.
+ Performed when YYTABLE does not specify something else to do. Zero
+ means the default is an error. */
+static const yytype_int8 yydefact[] =
+{
+ 8, 6, 7, 0, 0, 1, 10, 11, 12, 0,
+ 9, 13, 14, 15, 0, 38, 0, 0, 0, 36,
+ 37, 2, 35, 24, 5, 39, 43, 4, 0, 0,
+ 28, 0, 45, 46, 0, 0, 0, 0, 0, 0,
+ 0, 65, 42, 18, 20, 16, 17, 21, 19, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 22, 44, 27, 26, 0, 52, 54, 0, 76,
+ 77, 78, 79, 80, 81, 82, 74, 75, 60, 83,
+ 84, 93, 94, 95, 96, 97, 85, 86, 87, 88,
+ 89, 90, 92, 91, 64, 66, 63, 73, 0, 0,
+ 0, 31, 47, 48, 49, 50, 51, 25, 23, 22,
+ 0, 0, 41, 40, 56, 0, 0, 57, 0, 72,
+ 0, 0, 33, 34, 30, 0, 29, 53, 55, 58,
+ 59, 0, 70, 71, 0, 67, 69, 32, 0, 61,
+ 68, 0, 62
+};
+
+ /* YYPGOTO[NTERM-NUM]. */
+static const yytype_int8 yypgoto[] =
+{
+ -44, -44, -44, -44, -44, -44, 124, -14, -44, -44,
+ -44, -4, 21, -44, -44, 1, -44, -18, -44, -44,
+ -44, -44, -44
+};
+
+ /* YYDEFGOTO[NTERM-NUM]. */
+static const yytype_uint8 yydefgoto[] =
+{
+ 0, 3, 21, 4, 22, 56, 23, 24, 124, 25,
+ 26, 59, 67, 68, 41, 131, 95, 112, 133, 134,
+ 96, 97, 98
+};
+
+ /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If
+ positive, shift that token. If negative, reduce the rule whose
+ number is the opposite. If YYTABLE_NINF, syntax error. */
+static const yytype_int16 yytable[] =
+{
+ 27, 115, 138, 28, 34, 129, 9, -3, 42, 116,
+ 111, 117, 32, 33, 35, 58, 38, 60, 5, 130,
+ 39, 40, 63, 64, 57, 36, 37, 35, 122, 36,
+ 37, 123, 1, 2, 66, 36, 37, 99, 51, 52,
+ 53, 54, 55, 29, 113, 36, 37, 102, 103, 104,
+ 105, 106, 107, 139, 38, 6, 7, 8, 39, 40,
+ 9, 61, 10, 11, 12, 13, 100, 109, 14, 36,
+ 37, 101, 6, 7, 8, 37, 114, 110, 15, 10,
+ 11, 12, 13, 126, 118, 121, 51, 52, 53, 54,
+ 55, 16, 17, 108, -98, 15, -99, 18, -100, 19,
+ 20, 136, 51, 52, 53, 54, 55, -101, 16, 17,
+ 65, 127, 66, -102, 31, -103, 19, 20, 6, 7,
+ 8, 53, 54, 55, -104, 10, 11, 12, 13, 119,
+ 120, 125, 132, 135, 137, 140, 142, 128, 30, 141,
+ 0, 15, 0, 69, 70, 71, 72, 73, 74, 75,
+ 76, 77, 0, 0, 16, 17, 0, 0, 0, 0,
+ 31, 0, 19, 20, 78, 79, 80, 81, 82, 83,
+ 84, 85, 86, 87, 88, 89, 90, 91, 92, 93,
+ 0, 0, 94, 43, 44, 45, 46, 47, 48, 0,
+ 0, 0, 0, 49, 0, 50, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 51, 52, 53, 54, 55,
+ 0, 0, 62, 43, 44, 45, 46, 47, 48, 0,
+ 0, 0, 0, 49, 0, 50, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 51, 52, 53, 54, 55
+};
+
+static const yytype_int16 yycheck[] =
+{
+ 4, 3, 3, 46, 18, 13, 9, 0, 26, 50,
+ 7, 52, 16, 17, 18, 29, 51, 31, 0, 27,
+ 55, 56, 36, 37, 28, 15, 16, 31, 11, 15,
+ 16, 14, 25, 26, 38, 15, 16, 46, 40, 41,
+ 42, 43, 44, 46, 62, 15, 16, 51, 52, 53,
+ 54, 55, 56, 54, 51, 4, 5, 6, 55, 56,
+ 9, 47, 11, 12, 13, 14, 29, 47, 17, 15,
+ 16, 11, 4, 5, 6, 16, 52, 47, 27, 11,
+ 12, 13, 14, 8, 53, 99, 40, 41, 42, 43,
+ 44, 40, 41, 47, 46, 27, 46, 46, 46, 48,
+ 49, 47, 40, 41, 42, 43, 44, 46, 40, 41,
+ 42, 115, 116, 46, 46, 46, 48, 49, 4, 5,
+ 6, 42, 43, 44, 46, 11, 12, 13, 14, 46,
+ 46, 31, 11, 47, 11, 47, 54, 116, 14, 138,
+ -1, 27, -1, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, -1, -1, 40, 41, -1, -1, -1, -1,
+ 46, -1, 48, 49, 24, 25, 26, 27, 28, 29,
+ 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
+ -1, -1, 42, 18, 19, 20, 21, 22, 23, -1,
+ -1, -1, -1, 28, -1, 30, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 40, 41, 42, 43, 44,
+ -1, -1, 47, 18, 19, 20, 21, 22, 23, -1,
+ -1, -1, -1, 28, -1, 30, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, 40, 41, 42, 43, 44
+};
+
+ /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
+ symbol of state STATE-NUM. */
+static const yytype_int8 yystos[] =
+{
+ 0, 25, 26, 58, 60, 0, 4, 5, 6, 9,
+ 11, 12, 13, 14, 17, 27, 40, 41, 46, 48,
+ 49, 59, 61, 63, 64, 66, 67, 68, 46, 46,
+ 63, 46, 68, 68, 64, 68, 15, 16, 51, 55,
+ 56, 71, 74, 18, 19, 20, 21, 22, 23, 28,
+ 30, 40, 41, 42, 43, 44, 62, 68, 64, 68,
+ 64, 47, 47, 64, 64, 42, 68, 69, 70, 3,
+ 4, 5, 6, 7, 8, 9, 10, 11, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 36, 37, 38, 39, 42, 73, 77, 78, 79, 46,
+ 29, 11, 68, 68, 68, 68, 68, 68, 47, 47,
+ 47, 7, 74, 74, 52, 3, 50, 52, 53, 46,
+ 46, 64, 11, 14, 65, 31, 8, 68, 69, 13,
+ 27, 72, 11, 75, 76, 47, 47, 11, 3, 54,
+ 47, 72, 54
+};
+
+ /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
+static const yytype_int8 yyr1[] =
+{
+ 0, 57, 58, 58, 59, 59, 60, 60, 60, 61,
+ 61, 61, 61, 61, 61, 61, 62, 62, 62, 62,
+ 62, 62, 63, 63, 64, 64, 64, 64, 64, 64,
+ 64, 64, 64, 65, 65, 66, 66, 66, 66, 67,
+ 67, 67, 67, 68, 68, 68, 68, 68, 68, 68,
+ 68, 68, 69, 69, 70, 70, 71, 71, 72, 72,
+ 73, 73, 73, 74, 74, 74, 74, 74, 74, 74,
+ 75, 76, 76, 77, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 78, 78,
+ 78, 78, 78, 78, 78, 78, 78, 78, 79, 79,
+ 79, 79, 79, 79, 79
+};
+
+ /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */
+static const yytype_int8 yyr2[] =
+{
+ 0, 2, 2, 0, 1, 1, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 3, 4, 1, 3, 3, 3, 2, 5,
+ 4, 3, 5, 1, 1, 1, 1, 1, 1, 1,
+ 4, 4, 2, 1, 3, 2, 2, 3, 3, 3,
+ 3, 3, 1, 3, 1, 3, 3, 3, 1, 1,
+ 1, 4, 6, 2, 2, 1, 2, 4, 5, 4,
+ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+
+enum { YYENOMEM = -2 };
+
+#define yyerrok (yyerrstatus = 0)
+#define yyclearin (yychar = YYEMPTY)
+
+#define YYACCEPT goto yyacceptlab
+#define YYABORT goto yyabortlab
+#define YYERROR goto yyerrorlab
+
+
+#define YYRECOVERING() (!!yyerrstatus)
+
+#define YYBACKUP(Token, Value) \
+ do \
+ if (yychar == YYEMPTY) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ YYPOPSTACK (yylen); \
+ yystate = *yyssp; \
+ goto yybackup; \
+ } \
+ else \
+ { \
+ yyerror (result, YY_("syntax error: cannot back up")); \
+ YYERROR; \
+ } \
+ while (0)
+
+/* Backward compatibility with an undocumented macro.
+ Use YYerror or YYUNDEF. */
+#define YYERRCODE YYUNDEF
+
+
+/* Enable debugging if requested. */
+#if YYDEBUG
+
+# ifndef YYFPRINTF
+# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
+# define YYFPRINTF fprintf
+# endif
+
+# define YYDPRINTF(Args) \
+do { \
+ if (yydebug) \
+ YYFPRINTF Args; \
+} while (0)
+
+/* This macro is provided for backward compatibility. */
+# ifndef YY_LOCATION_PRINT
+# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
+# endif
+
+
+# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \
+do { \
+ if (yydebug) \
+ { \
+ YYFPRINTF (stderr, "%s ", Title); \
+ yy_symbol_print (stderr, \
+ Kind, Value, result); \
+ YYFPRINTF (stderr, "\n"); \
+ } \
+} while (0)
+
+
+/*-----------------------------------.
+| Print this symbol's value on YYO. |
+`-----------------------------------*/
+
+static void
+yy_symbol_value_print (FILE *yyo,
+ yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, JsonPathParseResult **result)
+{
+ FILE *yyoutput = yyo;
+ YY_USE (yyoutput);
+ YY_USE (result);
+ if (!yyvaluep)
+ return;
+# ifdef YYPRINT
+ if (yykind < YYNTOKENS)
+ YYPRINT (yyo, yytoknum[yykind], *yyvaluep);
+# endif
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ YY_USE (yykind);
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
+}
+
+
+/*---------------------------.
+| Print this symbol on YYO. |
+`---------------------------*/
+
+static void
+yy_symbol_print (FILE *yyo,
+ yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, JsonPathParseResult **result)
+{
+ YYFPRINTF (yyo, "%s %s (",
+ yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind));
+
+ yy_symbol_value_print (yyo, yykind, yyvaluep, result);
+ YYFPRINTF (yyo, ")");
+}
+
+/*------------------------------------------------------------------.
+| yy_stack_print -- Print the state stack from its BOTTOM up to its |
+| TOP (included). |
+`------------------------------------------------------------------*/
+
+static void
+yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop)
+{
+ YYFPRINTF (stderr, "Stack now");
+ for (; yybottom <= yytop; yybottom++)
+ {
+ int yybot = *yybottom;
+ YYFPRINTF (stderr, " %d", yybot);
+ }
+ YYFPRINTF (stderr, "\n");
+}
+
+# define YY_STACK_PRINT(Bottom, Top) \
+do { \
+ if (yydebug) \
+ yy_stack_print ((Bottom), (Top)); \
+} while (0)
+
+
+/*------------------------------------------------.
+| Report that the YYRULE is going to be reduced. |
+`------------------------------------------------*/
+
+static void
+yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp,
+ int yyrule, JsonPathParseResult **result)
+{
+ int yylno = yyrline[yyrule];
+ int yynrhs = yyr2[yyrule];
+ int yyi;
+ YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n",
+ yyrule - 1, yylno);
+ /* The symbols being reduced. */
+ for (yyi = 0; yyi < yynrhs; yyi++)
+ {
+ YYFPRINTF (stderr, " $%d = ", yyi + 1);
+ yy_symbol_print (stderr,
+ YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]),
+ &yyvsp[(yyi + 1) - (yynrhs)], result);
+ YYFPRINTF (stderr, "\n");
+ }
+}
+
+# define YY_REDUCE_PRINT(Rule) \
+do { \
+ if (yydebug) \
+ yy_reduce_print (yyssp, yyvsp, Rule, result); \
+} while (0)
+
+/* Nonzero means print parse trace. It is left uninitialized so that
+ multiple parsers can coexist. */
+int yydebug;
+#else /* !YYDEBUG */
+# define YYDPRINTF(Args) ((void) 0)
+# define YY_SYMBOL_PRINT(Title, Kind, Value, Location)
+# define YY_STACK_PRINT(Bottom, Top)
+# define YY_REDUCE_PRINT(Rule)
+#endif /* !YYDEBUG */
+
+
+/* YYINITDEPTH -- initial size of the parser's stacks. */
+#ifndef YYINITDEPTH
+# define YYINITDEPTH 200
+#endif
+
+/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
+ if the built-in stack extension method is used).
+
+ Do not make this value too large; the results are undefined if
+ YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
+ evaluated with infinite-precision integer arithmetic. */
+
+#ifndef YYMAXDEPTH
+# define YYMAXDEPTH 10000
+#endif
+
+
+
+
+
+
+/*-----------------------------------------------.
+| Release the memory associated to this symbol. |
+`-----------------------------------------------*/
+
+static void
+yydestruct (const char *yymsg,
+ yysymbol_kind_t yykind, YYSTYPE *yyvaluep, JsonPathParseResult **result)
+{
+ YY_USE (yyvaluep);
+ YY_USE (result);
+ if (!yymsg)
+ yymsg = "Deleting";
+ YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp);
+
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ YY_USE (yykind);
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
+}
+
+
+
+
+
+
+/*----------.
+| yyparse. |
+`----------*/
+
+int
+yyparse (JsonPathParseResult **result)
+{
+/* Lookahead token kind. */
+int yychar;
+
+
+/* The semantic value of the lookahead symbol. */
+/* Default value used for initialization, for pacifying older GCCs
+ or non-GCC compilers. */
+YY_INITIAL_VALUE (static YYSTYPE yyval_default;)
+YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default);
+
+ /* Number of syntax errors so far. */
+ int yynerrs = 0;
+
+ yy_state_fast_t yystate = 0;
+ /* Number of tokens to shift before error messages enabled. */
+ int yyerrstatus = 0;
+
+ /* Refer to the stacks through separate pointers, to allow yyoverflow
+ to reallocate them elsewhere. */
+
+ /* Their size. */
+ YYPTRDIFF_T yystacksize = YYINITDEPTH;
+
+ /* The state stack: array, bottom, top. */
+ yy_state_t yyssa[YYINITDEPTH];
+ yy_state_t *yyss = yyssa;
+ yy_state_t *yyssp = yyss;
+
+ /* The semantic value stack: array, bottom, top. */
+ YYSTYPE yyvsa[YYINITDEPTH];
+ YYSTYPE *yyvs = yyvsa;
+ YYSTYPE *yyvsp = yyvs;
+
+ int yyn;
+ /* The return value of yyparse. */
+ int yyresult;
+ /* Lookahead symbol kind. */
+ yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY;
+ /* The variables used to return semantic value and location from the
+ action routines. */
+ YYSTYPE yyval;
+
+
+
+#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
+
+ /* The number of symbols on the RHS of the reduced rule.
+ Keep to zero when no symbol should be popped. */
+ int yylen = 0;
+
+ YYDPRINTF ((stderr, "Starting parse\n"));
+
+ yychar = YYEMPTY; /* Cause a token to be read. */
+ goto yysetstate;
+
+
+/*------------------------------------------------------------.
+| yynewstate -- push a new state, which is found in yystate. |
+`------------------------------------------------------------*/
+yynewstate:
+ /* In all cases, when you get here, the value and location stacks
+ have just been pushed. So pushing a state here evens the stacks. */
+ yyssp++;
+
+
+/*--------------------------------------------------------------------.
+| yysetstate -- set current state (the top of the stack) to yystate. |
+`--------------------------------------------------------------------*/
+yysetstate:
+ YYDPRINTF ((stderr, "Entering state %d\n", yystate));
+ YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+ YY_IGNORE_USELESS_CAST_BEGIN
+ *yyssp = YY_CAST (yy_state_t, yystate);
+ YY_IGNORE_USELESS_CAST_END
+ YY_STACK_PRINT (yyss, yyssp);
+
+ if (yyss + yystacksize - 1 <= yyssp)
+#if !defined yyoverflow && !defined YYSTACK_RELOCATE
+ goto yyexhaustedlab;
+#else
+ {
+ /* Get the current used size of the three stacks, in elements. */
+ YYPTRDIFF_T yysize = yyssp - yyss + 1;
+
+# if defined yyoverflow
+ {
+ /* Give user a chance to reallocate the stack. Use copies of
+ these so that the &'s don't force the real ones into
+ memory. */
+ yy_state_t *yyss1 = yyss;
+ YYSTYPE *yyvs1 = yyvs;
+
+ /* Each stack pointer address is followed by the size of the
+ data in use in that stack, in bytes. This used to be a
+ conditional around just the two extra args, but that might
+ be undefined if yyoverflow is a macro. */
+ yyoverflow (YY_("memory exhausted"),
+ &yyss1, yysize * YYSIZEOF (*yyssp),
+ &yyvs1, yysize * YYSIZEOF (*yyvsp),
+ &yystacksize);
+ yyss = yyss1;
+ yyvs = yyvs1;
+ }
+# else /* defined YYSTACK_RELOCATE */
+ /* Extend the stack our own way. */
+ if (YYMAXDEPTH <= yystacksize)
+ goto yyexhaustedlab;
+ yystacksize *= 2;
+ if (YYMAXDEPTH < yystacksize)
+ yystacksize = YYMAXDEPTH;
+
+ {
+ yy_state_t *yyss1 = yyss;
+ union yyalloc *yyptr =
+ YY_CAST (union yyalloc *,
+ YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize))));
+ if (! yyptr)
+ goto yyexhaustedlab;
+ YYSTACK_RELOCATE (yyss_alloc, yyss);
+ YYSTACK_RELOCATE (yyvs_alloc, yyvs);
+# undef YYSTACK_RELOCATE
+ if (yyss1 != yyssa)
+ YYSTACK_FREE (yyss1);
+ }
+# endif
+
+ yyssp = yyss + yysize - 1;
+ yyvsp = yyvs + yysize - 1;
+
+ YY_IGNORE_USELESS_CAST_BEGIN
+ YYDPRINTF ((stderr, "Stack size increased to %ld\n",
+ YY_CAST (long, yystacksize)));
+ YY_IGNORE_USELESS_CAST_END
+
+ if (yyss + yystacksize - 1 <= yyssp)
+ YYABORT;
+ }
+#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */
+
+ if (yystate == YYFINAL)
+ YYACCEPT;
+
+ goto yybackup;
+
+
+/*-----------.
+| yybackup. |
+`-----------*/
+yybackup:
+ /* Do appropriate processing given the current state. Read a
+ lookahead token if we need one and don't already have one. */
+
+ /* First try to decide what to do without reference to lookahead token. */
+ yyn = yypact[yystate];
+ if (yypact_value_is_default (yyn))
+ goto yydefault;
+
+ /* Not known => get a lookahead token if don't already have one. */
+
+ /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */
+ if (yychar == YYEMPTY)
+ {
+ YYDPRINTF ((stderr, "Reading a token\n"));
+ yychar = yylex (&yylval);
+ }
+
+ if (yychar <= YYEOF)
+ {
+ yychar = YYEOF;
+ yytoken = YYSYMBOL_YYEOF;
+ YYDPRINTF ((stderr, "Now at end of input.\n"));
+ }
+ else if (yychar == YYerror)
+ {
+ /* The scanner already issued an error message, process directly
+ to error recovery. But do not keep the error token as
+ lookahead, it is too special and may lead us to an endless
+ loop in error recovery. */
+ yychar = YYUNDEF;
+ yytoken = YYSYMBOL_YYerror;
+ goto yyerrlab1;
+ }
+ else
+ {
+ yytoken = YYTRANSLATE (yychar);
+ YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
+ }
+
+ /* If the proper action on seeing token YYTOKEN is to reduce or to
+ detect an error, take that action. */
+ yyn += yytoken;
+ if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
+ goto yydefault;
+ yyn = yytable[yyn];
+ if (yyn <= 0)
+ {
+ if (yytable_value_is_error (yyn))
+ goto yyerrlab;
+ yyn = -yyn;
+ goto yyreduce;
+ }
+
+ /* Count tokens shifted since error; after three, turn off error
+ status. */
+ if (yyerrstatus)
+ yyerrstatus--;
+
+ /* Shift the lookahead token. */
+ YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
+ yystate = yyn;
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ *++yyvsp = yylval;
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
+
+ /* Discard the shifted token. */
+ yychar = YYEMPTY;
+ goto yynewstate;
+
+
+/*-----------------------------------------------------------.
+| yydefault -- do the default action for the current state. |
+`-----------------------------------------------------------*/
+yydefault:
+ yyn = yydefact[yystate];
+ if (yyn == 0)
+ goto yyerrlab;
+ goto yyreduce;
+
+
+/*-----------------------------.
+| yyreduce -- do a reduction. |
+`-----------------------------*/
+yyreduce:
+ /* yyn is the number of a rule to reduce with. */
+ yylen = yyr2[yyn];
+
+ /* If YYLEN is nonzero, implement the default value of the action:
+ '$$ = $1'.
+
+ Otherwise, the following line sets YYVAL to garbage.
+ This behavior is undocumented and Bison
+ users should not rely upon it. Assigning to YYVAL
+ unconditionally makes the parser a bit smaller, and it avoids a
+ GCC warning that YYVAL may be used uninitialized. */
+ yyval = yyvsp[1-yylen];
+
+
+ YY_REDUCE_PRINT (yyn);
+ switch (yyn)
+ {
+ case 2: /* result: mode expr_or_predicate */
+#line 130 "jsonpath_gram.y"
+ {
+ *result = palloc(sizeof(JsonPathParseResult));
+ (*result)->expr = (yyvsp[0].value);
+ (*result)->lax = (yyvsp[-1].boolean);
+ (void) yynerrs;
+ }
+#line 1451 "jsonpath_gram.c"
+ break;
+
+ case 3: /* result: %empty */
+#line 136 "jsonpath_gram.y"
+ { *result = NULL; }
+#line 1457 "jsonpath_gram.c"
+ break;
+
+ case 4: /* expr_or_predicate: expr */
+#line 140 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1463 "jsonpath_gram.c"
+ break;
+
+ case 5: /* expr_or_predicate: predicate */
+#line 141 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1469 "jsonpath_gram.c"
+ break;
+
+ case 6: /* mode: STRICT_P */
+#line 145 "jsonpath_gram.y"
+ { (yyval.boolean) = false; }
+#line 1475 "jsonpath_gram.c"
+ break;
+
+ case 7: /* mode: LAX_P */
+#line 146 "jsonpath_gram.y"
+ { (yyval.boolean) = true; }
+#line 1481 "jsonpath_gram.c"
+ break;
+
+ case 8: /* mode: %empty */
+#line 147 "jsonpath_gram.y"
+ { (yyval.boolean) = true; }
+#line 1487 "jsonpath_gram.c"
+ break;
+
+ case 9: /* scalar_value: STRING_P */
+#line 151 "jsonpath_gram.y"
+ { (yyval.value) = makeItemString(&(yyvsp[0].str)); }
+#line 1493 "jsonpath_gram.c"
+ break;
+
+ case 10: /* scalar_value: NULL_P */
+#line 152 "jsonpath_gram.y"
+ { (yyval.value) = makeItemString(NULL); }
+#line 1499 "jsonpath_gram.c"
+ break;
+
+ case 11: /* scalar_value: TRUE_P */
+#line 153 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBool(true); }
+#line 1505 "jsonpath_gram.c"
+ break;
+
+ case 12: /* scalar_value: FALSE_P */
+#line 154 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBool(false); }
+#line 1511 "jsonpath_gram.c"
+ break;
+
+ case 13: /* scalar_value: NUMERIC_P */
+#line 155 "jsonpath_gram.y"
+ { (yyval.value) = makeItemNumeric(&(yyvsp[0].str)); }
+#line 1517 "jsonpath_gram.c"
+ break;
+
+ case 14: /* scalar_value: INT_P */
+#line 156 "jsonpath_gram.y"
+ { (yyval.value) = makeItemNumeric(&(yyvsp[0].str)); }
+#line 1523 "jsonpath_gram.c"
+ break;
+
+ case 15: /* scalar_value: VARIABLE_P */
+#line 157 "jsonpath_gram.y"
+ { (yyval.value) = makeItemVariable(&(yyvsp[0].str)); }
+#line 1529 "jsonpath_gram.c"
+ break;
+
+ case 16: /* comp_op: EQUAL_P */
+#line 161 "jsonpath_gram.y"
+ { (yyval.optype) = jpiEqual; }
+#line 1535 "jsonpath_gram.c"
+ break;
+
+ case 17: /* comp_op: NOTEQUAL_P */
+#line 162 "jsonpath_gram.y"
+ { (yyval.optype) = jpiNotEqual; }
+#line 1541 "jsonpath_gram.c"
+ break;
+
+ case 18: /* comp_op: LESS_P */
+#line 163 "jsonpath_gram.y"
+ { (yyval.optype) = jpiLess; }
+#line 1547 "jsonpath_gram.c"
+ break;
+
+ case 19: /* comp_op: GREATER_P */
+#line 164 "jsonpath_gram.y"
+ { (yyval.optype) = jpiGreater; }
+#line 1553 "jsonpath_gram.c"
+ break;
+
+ case 20: /* comp_op: LESSEQUAL_P */
+#line 165 "jsonpath_gram.y"
+ { (yyval.optype) = jpiLessOrEqual; }
+#line 1559 "jsonpath_gram.c"
+ break;
+
+ case 21: /* comp_op: GREATEREQUAL_P */
+#line 166 "jsonpath_gram.y"
+ { (yyval.optype) = jpiGreaterOrEqual; }
+#line 1565 "jsonpath_gram.c"
+ break;
+
+ case 22: /* delimited_predicate: '(' predicate ')' */
+#line 170 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[-1].value); }
+#line 1571 "jsonpath_gram.c"
+ break;
+
+ case 23: /* delimited_predicate: EXISTS_P '(' expr ')' */
+#line 171 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiExists, (yyvsp[-1].value)); }
+#line 1577 "jsonpath_gram.c"
+ break;
+
+ case 24: /* predicate: delimited_predicate */
+#line 175 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1583 "jsonpath_gram.c"
+ break;
+
+ case 25: /* predicate: expr comp_op expr */
+#line 176 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary((yyvsp[-1].optype), (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1589 "jsonpath_gram.c"
+ break;
+
+ case 26: /* predicate: predicate AND_P predicate */
+#line 177 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiAnd, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1595 "jsonpath_gram.c"
+ break;
+
+ case 27: /* predicate: predicate OR_P predicate */
+#line 178 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiOr, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1601 "jsonpath_gram.c"
+ break;
+
+ case 28: /* predicate: NOT_P delimited_predicate */
+#line 179 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiNot, (yyvsp[0].value)); }
+#line 1607 "jsonpath_gram.c"
+ break;
+
+ case 29: /* predicate: '(' predicate ')' IS_P UNKNOWN_P */
+#line 181 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiIsUnknown, (yyvsp[-3].value)); }
+#line 1613 "jsonpath_gram.c"
+ break;
+
+ case 30: /* predicate: expr STARTS_P WITH_P starts_with_initial */
+#line 183 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiStartsWith, (yyvsp[-3].value), (yyvsp[0].value)); }
+#line 1619 "jsonpath_gram.c"
+ break;
+
+ case 31: /* predicate: expr LIKE_REGEX_P STRING_P */
+#line 184 "jsonpath_gram.y"
+ { (yyval.value) = makeItemLikeRegex((yyvsp[-2].value), &(yyvsp[0].str), NULL); }
+#line 1625 "jsonpath_gram.c"
+ break;
+
+ case 32: /* predicate: expr LIKE_REGEX_P STRING_P FLAG_P STRING_P */
+#line 186 "jsonpath_gram.y"
+ { (yyval.value) = makeItemLikeRegex((yyvsp[-4].value), &(yyvsp[-2].str), &(yyvsp[0].str)); }
+#line 1631 "jsonpath_gram.c"
+ break;
+
+ case 33: /* starts_with_initial: STRING_P */
+#line 190 "jsonpath_gram.y"
+ { (yyval.value) = makeItemString(&(yyvsp[0].str)); }
+#line 1637 "jsonpath_gram.c"
+ break;
+
+ case 34: /* starts_with_initial: VARIABLE_P */
+#line 191 "jsonpath_gram.y"
+ { (yyval.value) = makeItemVariable(&(yyvsp[0].str)); }
+#line 1643 "jsonpath_gram.c"
+ break;
+
+ case 35: /* path_primary: scalar_value */
+#line 195 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1649 "jsonpath_gram.c"
+ break;
+
+ case 36: /* path_primary: '$' */
+#line 196 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType(jpiRoot); }
+#line 1655 "jsonpath_gram.c"
+ break;
+
+ case 37: /* path_primary: '@' */
+#line 197 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType(jpiCurrent); }
+#line 1661 "jsonpath_gram.c"
+ break;
+
+ case 38: /* path_primary: LAST_P */
+#line 198 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType(jpiLast); }
+#line 1667 "jsonpath_gram.c"
+ break;
+
+ case 39: /* accessor_expr: path_primary */
+#line 202 "jsonpath_gram.y"
+ { (yyval.elems) = list_make1((yyvsp[0].value)); }
+#line 1673 "jsonpath_gram.c"
+ break;
+
+ case 40: /* accessor_expr: '(' expr ')' accessor_op */
+#line 203 "jsonpath_gram.y"
+ { (yyval.elems) = list_make2((yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1679 "jsonpath_gram.c"
+ break;
+
+ case 41: /* accessor_expr: '(' predicate ')' accessor_op */
+#line 204 "jsonpath_gram.y"
+ { (yyval.elems) = list_make2((yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1685 "jsonpath_gram.c"
+ break;
+
+ case 42: /* accessor_expr: accessor_expr accessor_op */
+#line 205 "jsonpath_gram.y"
+ { (yyval.elems) = lappend((yyvsp[-1].elems), (yyvsp[0].value)); }
+#line 1691 "jsonpath_gram.c"
+ break;
+
+ case 43: /* expr: accessor_expr */
+#line 209 "jsonpath_gram.y"
+ { (yyval.value) = makeItemList((yyvsp[0].elems)); }
+#line 1697 "jsonpath_gram.c"
+ break;
+
+ case 44: /* expr: '(' expr ')' */
+#line 210 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[-1].value); }
+#line 1703 "jsonpath_gram.c"
+ break;
+
+ case 45: /* expr: '+' expr */
+#line 211 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiPlus, (yyvsp[0].value)); }
+#line 1709 "jsonpath_gram.c"
+ break;
+
+ case 46: /* expr: '-' expr */
+#line 212 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiMinus, (yyvsp[0].value)); }
+#line 1715 "jsonpath_gram.c"
+ break;
+
+ case 47: /* expr: expr '+' expr */
+#line 213 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiAdd, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1721 "jsonpath_gram.c"
+ break;
+
+ case 48: /* expr: expr '-' expr */
+#line 214 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiSub, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1727 "jsonpath_gram.c"
+ break;
+
+ case 49: /* expr: expr '*' expr */
+#line 215 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiMul, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1733 "jsonpath_gram.c"
+ break;
+
+ case 50: /* expr: expr '/' expr */
+#line 216 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiDiv, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1739 "jsonpath_gram.c"
+ break;
+
+ case 51: /* expr: expr '%' expr */
+#line 217 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiMod, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1745 "jsonpath_gram.c"
+ break;
+
+ case 52: /* index_elem: expr */
+#line 221 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiSubscript, (yyvsp[0].value), NULL); }
+#line 1751 "jsonpath_gram.c"
+ break;
+
+ case 53: /* index_elem: expr TO_P expr */
+#line 222 "jsonpath_gram.y"
+ { (yyval.value) = makeItemBinary(jpiSubscript, (yyvsp[-2].value), (yyvsp[0].value)); }
+#line 1757 "jsonpath_gram.c"
+ break;
+
+ case 54: /* index_list: index_elem */
+#line 226 "jsonpath_gram.y"
+ { (yyval.indexs) = list_make1((yyvsp[0].value)); }
+#line 1763 "jsonpath_gram.c"
+ break;
+
+ case 55: /* index_list: index_list ',' index_elem */
+#line 227 "jsonpath_gram.y"
+ { (yyval.indexs) = lappend((yyvsp[-2].indexs), (yyvsp[0].value)); }
+#line 1769 "jsonpath_gram.c"
+ break;
+
+ case 56: /* array_accessor: '[' '*' ']' */
+#line 231 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType(jpiAnyArray); }
+#line 1775 "jsonpath_gram.c"
+ break;
+
+ case 57: /* array_accessor: '[' index_list ']' */
+#line 232 "jsonpath_gram.y"
+ { (yyval.value) = makeIndexArray((yyvsp[-1].indexs)); }
+#line 1781 "jsonpath_gram.c"
+ break;
+
+ case 58: /* any_level: INT_P */
+#line 236 "jsonpath_gram.y"
+ { (yyval.integer) = pg_strtoint32((yyvsp[0].str).val); }
+#line 1787 "jsonpath_gram.c"
+ break;
+
+ case 59: /* any_level: LAST_P */
+#line 237 "jsonpath_gram.y"
+ { (yyval.integer) = -1; }
+#line 1793 "jsonpath_gram.c"
+ break;
+
+ case 60: /* any_path: ANY_P */
+#line 241 "jsonpath_gram.y"
+ { (yyval.value) = makeAny(0, -1); }
+#line 1799 "jsonpath_gram.c"
+ break;
+
+ case 61: /* any_path: ANY_P '{' any_level '}' */
+#line 242 "jsonpath_gram.y"
+ { (yyval.value) = makeAny((yyvsp[-1].integer), (yyvsp[-1].integer)); }
+#line 1805 "jsonpath_gram.c"
+ break;
+
+ case 62: /* any_path: ANY_P '{' any_level TO_P any_level '}' */
+#line 244 "jsonpath_gram.y"
+ { (yyval.value) = makeAny((yyvsp[-3].integer), (yyvsp[-1].integer)); }
+#line 1811 "jsonpath_gram.c"
+ break;
+
+ case 63: /* accessor_op: '.' key */
+#line 248 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1817 "jsonpath_gram.c"
+ break;
+
+ case 64: /* accessor_op: '.' '*' */
+#line 249 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType(jpiAnyKey); }
+#line 1823 "jsonpath_gram.c"
+ break;
+
+ case 65: /* accessor_op: array_accessor */
+#line 250 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1829 "jsonpath_gram.c"
+ break;
+
+ case 66: /* accessor_op: '.' any_path */
+#line 251 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1835 "jsonpath_gram.c"
+ break;
+
+ case 67: /* accessor_op: '.' method '(' ')' */
+#line 252 "jsonpath_gram.y"
+ { (yyval.value) = makeItemType((yyvsp[-2].optype)); }
+#line 1841 "jsonpath_gram.c"
+ break;
+
+ case 68: /* accessor_op: '.' DATETIME_P '(' opt_datetime_template ')' */
+#line 254 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiDatetime, (yyvsp[-1].value)); }
+#line 1847 "jsonpath_gram.c"
+ break;
+
+ case 69: /* accessor_op: '?' '(' predicate ')' */
+#line 255 "jsonpath_gram.y"
+ { (yyval.value) = makeItemUnary(jpiFilter, (yyvsp[-1].value)); }
+#line 1853 "jsonpath_gram.c"
+ break;
+
+ case 70: /* datetime_template: STRING_P */
+#line 259 "jsonpath_gram.y"
+ { (yyval.value) = makeItemString(&(yyvsp[0].str)); }
+#line 1859 "jsonpath_gram.c"
+ break;
+
+ case 71: /* opt_datetime_template: datetime_template */
+#line 263 "jsonpath_gram.y"
+ { (yyval.value) = (yyvsp[0].value); }
+#line 1865 "jsonpath_gram.c"
+ break;
+
+ case 72: /* opt_datetime_template: %empty */
+#line 264 "jsonpath_gram.y"
+ { (yyval.value) = NULL; }
+#line 1871 "jsonpath_gram.c"
+ break;
+
+ case 73: /* key: key_name */
+#line 268 "jsonpath_gram.y"
+ { (yyval.value) = makeItemKey(&(yyvsp[0].str)); }
+#line 1877 "jsonpath_gram.c"
+ break;
+
+ case 98: /* method: ABS_P */
+#line 299 "jsonpath_gram.y"
+ { (yyval.optype) = jpiAbs; }
+#line 1883 "jsonpath_gram.c"
+ break;
+
+ case 99: /* method: SIZE_P */
+#line 300 "jsonpath_gram.y"
+ { (yyval.optype) = jpiSize; }
+#line 1889 "jsonpath_gram.c"
+ break;
+
+ case 100: /* method: TYPE_P */
+#line 301 "jsonpath_gram.y"
+ { (yyval.optype) = jpiType; }
+#line 1895 "jsonpath_gram.c"
+ break;
+
+ case 101: /* method: FLOOR_P */
+#line 302 "jsonpath_gram.y"
+ { (yyval.optype) = jpiFloor; }
+#line 1901 "jsonpath_gram.c"
+ break;
+
+ case 102: /* method: DOUBLE_P */
+#line 303 "jsonpath_gram.y"
+ { (yyval.optype) = jpiDouble; }
+#line 1907 "jsonpath_gram.c"
+ break;
+
+ case 103: /* method: CEILING_P */
+#line 304 "jsonpath_gram.y"
+ { (yyval.optype) = jpiCeiling; }
+#line 1913 "jsonpath_gram.c"
+ break;
+
+ case 104: /* method: KEYVALUE_P */
+#line 305 "jsonpath_gram.y"
+ { (yyval.optype) = jpiKeyValue; }
+#line 1919 "jsonpath_gram.c"
+ break;
+
+
+#line 1923 "jsonpath_gram.c"
+
+ default: break;
+ }
+ /* User semantic actions sometimes alter yychar, and that requires
+ that yytoken be updated with the new translation. We take the
+ approach of translating immediately before every use of yytoken.
+ One alternative is translating here after every semantic action,
+ but that translation would be missed if the semantic action invokes
+ YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
+ if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an
+ incorrect destructor might then be invoked immediately. In the
+ case of YYERROR or YYBACKUP, subsequent parser actions might lead
+ to an incorrect destructor call or verbose syntax error message
+ before the lookahead is translated. */
+ YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc);
+
+ YYPOPSTACK (yylen);
+ yylen = 0;
+
+ *++yyvsp = yyval;
+
+ /* Now 'shift' the result of the reduction. Determine what state
+ that goes to, based on the state we popped back to and the rule
+ number reduced by. */
+ {
+ const int yylhs = yyr1[yyn] - YYNTOKENS;
+ const int yyi = yypgoto[yylhs] + *yyssp;
+ yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
+ ? yytable[yyi]
+ : yydefgoto[yylhs]);
+ }
+
+ goto yynewstate;
+
+
+/*--------------------------------------.
+| yyerrlab -- here on detecting error. |
+`--------------------------------------*/
+yyerrlab:
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar);
+ /* If not already recovering from an error, report this error. */
+ if (!yyerrstatus)
+ {
+ ++yynerrs;
+ yyerror (result, YY_("syntax error"));
+ }
+
+ if (yyerrstatus == 3)
+ {
+ /* If just tried and failed to reuse lookahead token after an
+ error, discard it. */
+
+ if (yychar <= YYEOF)
+ {
+ /* Return failure if at end of input. */
+ if (yychar == YYEOF)
+ YYABORT;
+ }
+ else
+ {
+ yydestruct ("Error: discarding",
+ yytoken, &yylval, result);
+ yychar = YYEMPTY;
+ }
+ }
+
+ /* Else will try to reuse lookahead token after shifting the error
+ token. */
+ goto yyerrlab1;
+
+
+/*---------------------------------------------------.
+| yyerrorlab -- error raised explicitly by YYERROR. |
+`---------------------------------------------------*/
+yyerrorlab:
+ /* Pacify compilers when the user code never invokes YYERROR and the
+ label yyerrorlab therefore never appears in user code. */
+ if (0)
+ YYERROR;
+
+ /* Do not reclaim the symbols of the rule whose action triggered
+ this YYERROR. */
+ YYPOPSTACK (yylen);
+ yylen = 0;
+ YY_STACK_PRINT (yyss, yyssp);
+ yystate = *yyssp;
+ goto yyerrlab1;
+
+
+/*-------------------------------------------------------------.
+| yyerrlab1 -- common code for both syntax error and YYERROR. |
+`-------------------------------------------------------------*/
+yyerrlab1:
+ yyerrstatus = 3; /* Each real token shifted decrements this. */
+
+ /* Pop stack until we find a state that shifts the error token. */
+ for (;;)
+ {
+ yyn = yypact[yystate];
+ if (!yypact_value_is_default (yyn))
+ {
+ yyn += YYSYMBOL_YYerror;
+ if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror)
+ {
+ yyn = yytable[yyn];
+ if (0 < yyn)
+ break;
+ }
+ }
+
+ /* Pop the current state because it cannot handle the error token. */
+ if (yyssp == yyss)
+ YYABORT;
+
+
+ yydestruct ("Error: popping",
+ YY_ACCESSING_SYMBOL (yystate), yyvsp, result);
+ YYPOPSTACK (1);
+ yystate = *yyssp;
+ YY_STACK_PRINT (yyss, yyssp);
+ }
+
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+ *++yyvsp = yylval;
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
+
+
+ /* Shift the error token. */
+ YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp);
+
+ yystate = yyn;
+ goto yynewstate;
+
+
+/*-------------------------------------.
+| yyacceptlab -- YYACCEPT comes here. |
+`-------------------------------------*/
+yyacceptlab:
+ yyresult = 0;
+ goto yyreturn;
+
+
+/*-----------------------------------.
+| yyabortlab -- YYABORT comes here. |
+`-----------------------------------*/
+yyabortlab:
+ yyresult = 1;
+ goto yyreturn;
+
+
+#if !defined yyoverflow
+/*-------------------------------------------------.
+| yyexhaustedlab -- memory exhaustion comes here. |
+`-------------------------------------------------*/
+yyexhaustedlab:
+ yyerror (result, YY_("memory exhausted"));
+ yyresult = 2;
+ goto yyreturn;
+#endif
+
+
+/*-------------------------------------------------------.
+| yyreturn -- parsing is finished, clean up and return. |
+`-------------------------------------------------------*/
+yyreturn:
+ if (yychar != YYEMPTY)
+ {
+ /* Make sure we have latest lookahead translation. See comments at
+ user semantic actions for why this is necessary. */
+ yytoken = YYTRANSLATE (yychar);
+ yydestruct ("Cleanup: discarding lookahead",
+ yytoken, &yylval, result);
+ }
+ /* Do not reclaim the symbols of the rule whose action triggered
+ this YYABORT or YYACCEPT. */
+ YYPOPSTACK (yylen);
+ YY_STACK_PRINT (yyss, yyssp);
+ while (yyssp != yyss)
+ {
+ yydestruct ("Cleanup: popping",
+ YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, result);
+ YYPOPSTACK (1);
+ }
+#ifndef yyoverflow
+ if (yyss != yyssa)
+ YYSTACK_FREE (yyss);
+#endif
+
+ return yyresult;
+}
+
+#line 307 "jsonpath_gram.y"
+
+
+/*
+ * The helper functions below allocate and fill JsonPathParseItem's of various
+ * types.
+ */
+
+static JsonPathParseItem *
+makeItemType(JsonPathItemType type)
+{
+ JsonPathParseItem *v = palloc(sizeof(*v));
+
+ CHECK_FOR_INTERRUPTS();
+
+ v->type = type;
+ v->next = NULL;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemString(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ if (s == NULL)
+ {
+ v = makeItemType(jpiNull);
+ }
+ else
+ {
+ v = makeItemType(jpiString);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemVariable(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiVariable);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemKey(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemString(s);
+ v->type = jpiKey;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemNumeric(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum(s->val),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBool(bool val)
+{
+ JsonPathParseItem *v = makeItemType(jpiBool);
+
+ v->value.boolean = val;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
+{
+ JsonPathParseItem *v = makeItemType(type);
+
+ v->value.args.left = la;
+ v->value.args.right = ra;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
+{
+ JsonPathParseItem *v;
+
+ if (type == jpiPlus && a->type == jpiNumeric && !a->next)
+ return a;
+
+ if (type == jpiMinus && a->type == jpiNumeric && !a->next)
+ {
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
+ NumericGetDatum(a->value.numeric)));
+ return v;
+ }
+
+ v = makeItemType(type);
+
+ v->value.arg = a;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemList(List *list)
+{
+ JsonPathParseItem *head,
+ *end;
+ ListCell *cell;
+
+ head = end = (JsonPathParseItem *) linitial(list);
+
+ if (list_length(list) == 1)
+ return head;
+
+ /* append items to the end of already existing list */
+ while (end->next)
+ end = end->next;
+
+ for_each_from(cell, list, 1)
+ {
+ JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
+
+ end->next = c;
+ end = c;
+ }
+
+ return head;
+}
+
+static JsonPathParseItem *
+makeIndexArray(List *list)
+{
+ JsonPathParseItem *v = makeItemType(jpiIndexArray);
+ ListCell *cell;
+ int i = 0;
+
+ Assert(list_length(list) > 0);
+ v->value.array.nelems = list_length(list);
+
+ v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
+ v->value.array.nelems);
+
+ foreach(cell, list)
+ {
+ JsonPathParseItem *jpi = lfirst(cell);
+
+ Assert(jpi->type == jpiSubscript);
+
+ v->value.array.elems[i].from = jpi->value.args.left;
+ v->value.array.elems[i++].to = jpi->value.args.right;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeAny(int first, int last)
+{
+ JsonPathParseItem *v = makeItemType(jpiAny);
+
+ v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
+ v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
+ JsonPathString *flags)
+{
+ JsonPathParseItem *v = makeItemType(jpiLikeRegex);
+ int i;
+ int cflags;
+
+ v->value.like_regex.expr = expr;
+ v->value.like_regex.pattern = pattern->val;
+ v->value.like_regex.patternlen = pattern->len;
+
+ /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
+ v->value.like_regex.flags = 0;
+ for (i = 0; flags && i < flags->len; i++)
+ {
+ switch (flags->val[i])
+ {
+ case 'i':
+ v->value.like_regex.flags |= JSP_REGEX_ICASE;
+ break;
+ case 's':
+ v->value.like_regex.flags |= JSP_REGEX_DOTALL;
+ break;
+ case 'm':
+ v->value.like_regex.flags |= JSP_REGEX_MLINE;
+ break;
+ case 'x':
+ v->value.like_regex.flags |= JSP_REGEX_WSPACE;
+ break;
+ case 'q':
+ v->value.like_regex.flags |= JSP_REGEX_QUOTE;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
+ pg_mblen(flags->val + i), flags->val + i)));
+ break;
+ }
+ }
+
+ /* Convert flags to what RE_compile_and_cache needs */
+ cflags = jspConvertRegexFlags(v->value.like_regex.flags);
+
+ /* check regex validity */
+ (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
+ pattern->len),
+ cflags, DEFAULT_COLLATION_OID);
+
+ return v;
+}
+
+/*
+ * Convert from XQuery regex flags to those recognized by our regex library.
+ */
+int
+jspConvertRegexFlags(uint32 xflags)
+{
+ /* By default, XQuery is very nearly the same as Spencer's AREs */
+ int cflags = REG_ADVANCED;
+
+ /* Ignore-case means the same thing, too, modulo locale issues */
+ if (xflags & JSP_REGEX_ICASE)
+ cflags |= REG_ICASE;
+
+ /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
+ if (xflags & JSP_REGEX_QUOTE)
+ {
+ cflags &= ~REG_ADVANCED;
+ cflags |= REG_QUOTE;
+ }
+ else
+ {
+ /* Note that dotall mode is the default in POSIX */
+ if (!(xflags & JSP_REGEX_DOTALL))
+ cflags |= REG_NLSTOP;
+ if (xflags & JSP_REGEX_MLINE)
+ cflags |= REG_NLANCH;
+
+ /*
+ * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
+ * not really enough alike to justify treating JSP_REGEX_WSPACE as
+ * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
+ * future we'll modify the regex library to have an option for
+ * XQuery-style ignore-whitespace mode.
+ */
+ if (xflags & JSP_REGEX_WSPACE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
+ }
+
+ /*
+ * We'll never need sub-match details at execution. While
+ * RE_compile_and_execute would set this flag anyway, force it on here to
+ * ensure that the regex cache entries created by makeItemLikeRegex are
+ * useful.
+ */
+ cflags |= REG_NOSUB;
+
+ return cflags;
+}
+
+/*
+ * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
+ * unavoidable because jsonpath_gram does not create a .h file to export its
+ * token symbols. If these files ever grow large enough to be worth compiling
+ * separately, that could be fixed; but for now it seems like useless
+ * complication.
+ */
+
+#include "jsonpath_scan.c"
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
new file mode 100644
index 0000000..91e4308
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -0,0 +1,606 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_gram.y
+ * Grammar definitions for jsonpath datatype
+ *
+ * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_gram.y
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/jsonpath.h"
+
+/* struct JsonPathString is shared between scan and gram */
+typedef struct JsonPathString
+{
+ char *val;
+ int len;
+ int total;
+} JsonPathString;
+
+union YYSTYPE;
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+int jsonpath_yylex(union YYSTYPE *yylval_param);
+int jsonpath_yyparse(JsonPathParseResult **result);
+void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
+
+static JsonPathParseItem *makeItemType(JsonPathItemType type);
+static JsonPathParseItem *makeItemString(JsonPathString *s);
+static JsonPathParseItem *makeItemVariable(JsonPathString *s);
+static JsonPathParseItem *makeItemKey(JsonPathString *s);
+static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
+static JsonPathParseItem *makeItemBool(bool val);
+static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
+ JsonPathParseItem *la,
+ JsonPathParseItem *ra);
+static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
+ JsonPathParseItem *a);
+static JsonPathParseItem *makeItemList(List *list);
+static JsonPathParseItem *makeIndexArray(List *list);
+static JsonPathParseItem *makeAny(int first, int last);
+static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
+ JsonPathString *pattern,
+ JsonPathString *flags);
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc. This prevents
+ * memory leaks if we error out during parsing. Note this only works with
+ * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE pfree
+
+%}
+
+/* BISON Declarations */
+%pure-parser
+%expect 0
+%name-prefix="jsonpath_yy"
+%parse-param {JsonPathParseResult **result}
+
+%union
+{
+ JsonPathString str;
+ List *elems; /* list of JsonPathParseItem */
+ List *indexs; /* list of integers */
+ JsonPathParseItem *value;
+ JsonPathParseResult *result;
+ JsonPathItemType optype;
+ bool boolean;
+ int integer;
+}
+
+%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
+%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
+%token <str> OR_P AND_P NOT_P
+%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
+%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
+%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
+%token <str> DATETIME_P
+
+%type <result> result
+
+%type <value> scalar_value path_primary expr array_accessor
+ any_path accessor_op key predicate delimited_predicate
+ index_elem starts_with_initial expr_or_predicate
+ datetime_template opt_datetime_template
+
+%type <elems> accessor_expr
+
+%type <indexs> index_list
+
+%type <optype> comp_op method
+
+%type <boolean> mode
+
+%type <str> key_name
+
+%type <integer> any_level
+
+%left OR_P
+%left AND_P
+%right NOT_P
+%left '+' '-'
+%left '*' '/' '%'
+%left UMINUS
+%nonassoc '(' ')'
+
+/* Grammar follows */
+%%
+
+result:
+ mode expr_or_predicate {
+ *result = palloc(sizeof(JsonPathParseResult));
+ (*result)->expr = $2;
+ (*result)->lax = $1;
+ (void) yynerrs;
+ }
+ | /* EMPTY */ { *result = NULL; }
+ ;
+
+expr_or_predicate:
+ expr { $$ = $1; }
+ | predicate { $$ = $1; }
+ ;
+
+mode:
+ STRICT_P { $$ = false; }
+ | LAX_P { $$ = true; }
+ | /* EMPTY */ { $$ = true; }
+ ;
+
+scalar_value:
+ STRING_P { $$ = makeItemString(&$1); }
+ | NULL_P { $$ = makeItemString(NULL); }
+ | TRUE_P { $$ = makeItemBool(true); }
+ | FALSE_P { $$ = makeItemBool(false); }
+ | NUMERIC_P { $$ = makeItemNumeric(&$1); }
+ | INT_P { $$ = makeItemNumeric(&$1); }
+ | VARIABLE_P { $$ = makeItemVariable(&$1); }
+ ;
+
+comp_op:
+ EQUAL_P { $$ = jpiEqual; }
+ | NOTEQUAL_P { $$ = jpiNotEqual; }
+ | LESS_P { $$ = jpiLess; }
+ | GREATER_P { $$ = jpiGreater; }
+ | LESSEQUAL_P { $$ = jpiLessOrEqual; }
+ | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
+ ;
+
+delimited_predicate:
+ '(' predicate ')' { $$ = $2; }
+ | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
+ ;
+
+predicate:
+ delimited_predicate { $$ = $1; }
+ | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
+ | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
+ | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
+ | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
+ | '(' predicate ')' IS_P UNKNOWN_P
+ { $$ = makeItemUnary(jpiIsUnknown, $2); }
+ | expr STARTS_P WITH_P starts_with_initial
+ { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
+ | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
+ | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
+ { $$ = makeItemLikeRegex($1, &$3, &$5); }
+ ;
+
+starts_with_initial:
+ STRING_P { $$ = makeItemString(&$1); }
+ | VARIABLE_P { $$ = makeItemVariable(&$1); }
+ ;
+
+path_primary:
+ scalar_value { $$ = $1; }
+ | '$' { $$ = makeItemType(jpiRoot); }
+ | '@' { $$ = makeItemType(jpiCurrent); }
+ | LAST_P { $$ = makeItemType(jpiLast); }
+ ;
+
+accessor_expr:
+ path_primary { $$ = list_make1($1); }
+ | '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
+ | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
+ | accessor_expr accessor_op { $$ = lappend($1, $2); }
+ ;
+
+expr:
+ accessor_expr { $$ = makeItemList($1); }
+ | '(' expr ')' { $$ = $2; }
+ | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
+ | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
+ | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
+ | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
+ | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
+ | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
+ | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
+ ;
+
+index_elem:
+ expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
+ | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
+ ;
+
+index_list:
+ index_elem { $$ = list_make1($1); }
+ | index_list ',' index_elem { $$ = lappend($1, $3); }
+ ;
+
+array_accessor:
+ '[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
+ | '[' index_list ']' { $$ = makeIndexArray($2); }
+ ;
+
+any_level:
+ INT_P { $$ = pg_strtoint32($1.val); }
+ | LAST_P { $$ = -1; }
+ ;
+
+any_path:
+ ANY_P { $$ = makeAny(0, -1); }
+ | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
+ | ANY_P '{' any_level TO_P any_level '}'
+ { $$ = makeAny($3, $5); }
+ ;
+
+accessor_op:
+ '.' key { $$ = $2; }
+ | '.' '*' { $$ = makeItemType(jpiAnyKey); }
+ | array_accessor { $$ = $1; }
+ | '.' any_path { $$ = $2; }
+ | '.' method '(' ')' { $$ = makeItemType($2); }
+ | '.' DATETIME_P '(' opt_datetime_template ')'
+ { $$ = makeItemUnary(jpiDatetime, $4); }
+ | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
+ ;
+
+datetime_template:
+ STRING_P { $$ = makeItemString(&$1); }
+ ;
+
+opt_datetime_template:
+ datetime_template { $$ = $1; }
+ | /* EMPTY */ { $$ = NULL; }
+ ;
+
+key:
+ key_name { $$ = makeItemKey(&$1); }
+ ;
+
+key_name:
+ IDENT_P
+ | STRING_P
+ | TO_P
+ | NULL_P
+ | TRUE_P
+ | FALSE_P
+ | IS_P
+ | UNKNOWN_P
+ | EXISTS_P
+ | STRICT_P
+ | LAX_P
+ | ABS_P
+ | SIZE_P
+ | TYPE_P
+ | FLOOR_P
+ | DOUBLE_P
+ | CEILING_P
+ | DATETIME_P
+ | KEYVALUE_P
+ | LAST_P
+ | STARTS_P
+ | WITH_P
+ | LIKE_REGEX_P
+ | FLAG_P
+ ;
+
+method:
+ ABS_P { $$ = jpiAbs; }
+ | SIZE_P { $$ = jpiSize; }
+ | TYPE_P { $$ = jpiType; }
+ | FLOOR_P { $$ = jpiFloor; }
+ | DOUBLE_P { $$ = jpiDouble; }
+ | CEILING_P { $$ = jpiCeiling; }
+ | KEYVALUE_P { $$ = jpiKeyValue; }
+ ;
+%%
+
+/*
+ * The helper functions below allocate and fill JsonPathParseItem's of various
+ * types.
+ */
+
+static JsonPathParseItem *
+makeItemType(JsonPathItemType type)
+{
+ JsonPathParseItem *v = palloc(sizeof(*v));
+
+ CHECK_FOR_INTERRUPTS();
+
+ v->type = type;
+ v->next = NULL;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemString(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ if (s == NULL)
+ {
+ v = makeItemType(jpiNull);
+ }
+ else
+ {
+ v = makeItemType(jpiString);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemVariable(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiVariable);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemKey(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemString(s);
+ v->type = jpiKey;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemNumeric(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum(s->val),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBool(bool val)
+{
+ JsonPathParseItem *v = makeItemType(jpiBool);
+
+ v->value.boolean = val;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
+{
+ JsonPathParseItem *v = makeItemType(type);
+
+ v->value.args.left = la;
+ v->value.args.right = ra;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
+{
+ JsonPathParseItem *v;
+
+ if (type == jpiPlus && a->type == jpiNumeric && !a->next)
+ return a;
+
+ if (type == jpiMinus && a->type == jpiNumeric && !a->next)
+ {
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
+ NumericGetDatum(a->value.numeric)));
+ return v;
+ }
+
+ v = makeItemType(type);
+
+ v->value.arg = a;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemList(List *list)
+{
+ JsonPathParseItem *head,
+ *end;
+ ListCell *cell;
+
+ head = end = (JsonPathParseItem *) linitial(list);
+
+ if (list_length(list) == 1)
+ return head;
+
+ /* append items to the end of already existing list */
+ while (end->next)
+ end = end->next;
+
+ for_each_from(cell, list, 1)
+ {
+ JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
+
+ end->next = c;
+ end = c;
+ }
+
+ return head;
+}
+
+static JsonPathParseItem *
+makeIndexArray(List *list)
+{
+ JsonPathParseItem *v = makeItemType(jpiIndexArray);
+ ListCell *cell;
+ int i = 0;
+
+ Assert(list_length(list) > 0);
+ v->value.array.nelems = list_length(list);
+
+ v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
+ v->value.array.nelems);
+
+ foreach(cell, list)
+ {
+ JsonPathParseItem *jpi = lfirst(cell);
+
+ Assert(jpi->type == jpiSubscript);
+
+ v->value.array.elems[i].from = jpi->value.args.left;
+ v->value.array.elems[i++].to = jpi->value.args.right;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeAny(int first, int last)
+{
+ JsonPathParseItem *v = makeItemType(jpiAny);
+
+ v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
+ v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
+ JsonPathString *flags)
+{
+ JsonPathParseItem *v = makeItemType(jpiLikeRegex);
+ int i;
+ int cflags;
+
+ v->value.like_regex.expr = expr;
+ v->value.like_regex.pattern = pattern->val;
+ v->value.like_regex.patternlen = pattern->len;
+
+ /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
+ v->value.like_regex.flags = 0;
+ for (i = 0; flags && i < flags->len; i++)
+ {
+ switch (flags->val[i])
+ {
+ case 'i':
+ v->value.like_regex.flags |= JSP_REGEX_ICASE;
+ break;
+ case 's':
+ v->value.like_regex.flags |= JSP_REGEX_DOTALL;
+ break;
+ case 'm':
+ v->value.like_regex.flags |= JSP_REGEX_MLINE;
+ break;
+ case 'x':
+ v->value.like_regex.flags |= JSP_REGEX_WSPACE;
+ break;
+ case 'q':
+ v->value.like_regex.flags |= JSP_REGEX_QUOTE;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
+ pg_mblen(flags->val + i), flags->val + i)));
+ break;
+ }
+ }
+
+ /* Convert flags to what RE_compile_and_cache needs */
+ cflags = jspConvertRegexFlags(v->value.like_regex.flags);
+
+ /* check regex validity */
+ (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
+ pattern->len),
+ cflags, DEFAULT_COLLATION_OID);
+
+ return v;
+}
+
+/*
+ * Convert from XQuery regex flags to those recognized by our regex library.
+ */
+int
+jspConvertRegexFlags(uint32 xflags)
+{
+ /* By default, XQuery is very nearly the same as Spencer's AREs */
+ int cflags = REG_ADVANCED;
+
+ /* Ignore-case means the same thing, too, modulo locale issues */
+ if (xflags & JSP_REGEX_ICASE)
+ cflags |= REG_ICASE;
+
+ /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
+ if (xflags & JSP_REGEX_QUOTE)
+ {
+ cflags &= ~REG_ADVANCED;
+ cflags |= REG_QUOTE;
+ }
+ else
+ {
+ /* Note that dotall mode is the default in POSIX */
+ if (!(xflags & JSP_REGEX_DOTALL))
+ cflags |= REG_NLSTOP;
+ if (xflags & JSP_REGEX_MLINE)
+ cflags |= REG_NLANCH;
+
+ /*
+ * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
+ * not really enough alike to justify treating JSP_REGEX_WSPACE as
+ * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
+ * future we'll modify the regex library to have an option for
+ * XQuery-style ignore-whitespace mode.
+ */
+ if (xflags & JSP_REGEX_WSPACE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
+ }
+
+ /*
+ * We'll never need sub-match details at execution. While
+ * RE_compile_and_execute would set this flag anyway, force it on here to
+ * ensure that the regex cache entries created by makeItemLikeRegex are
+ * useful.
+ */
+ cflags |= REG_NOSUB;
+
+ return cflags;
+}
+
+/*
+ * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
+ * unavoidable because jsonpath_gram does not create a .h file to export its
+ * token symbols. If these files ever grow large enough to be worth compiling
+ * separately, that could be fixed; but for now it seems like useless
+ * complication.
+ */
+
+#include "jsonpath_scan.c"
diff --git a/src/backend/utils/adt/jsonpath_scan.c b/src/backend/utils/adt/jsonpath_scan.c
new file mode 100644
index 0000000..e1a9d2a
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_scan.c
@@ -0,0 +1,4873 @@
+#line 2 "jsonpath_scan.c"
+
+#line 4 "jsonpath_scan.c"
+
+#define YY_INT_ALIGNED short int
+
+/* A lexical scanner generated by flex */
+
+#define yy_create_buffer jsonpath_yy_create_buffer
+#define yy_delete_buffer jsonpath_yy_delete_buffer
+#define yy_scan_buffer jsonpath_yy_scan_buffer
+#define yy_scan_string jsonpath_yy_scan_string
+#define yy_scan_bytes jsonpath_yy_scan_bytes
+#define yy_init_buffer jsonpath_yy_init_buffer
+#define yy_flush_buffer jsonpath_yy_flush_buffer
+#define yy_load_buffer_state jsonpath_yy_load_buffer_state
+#define yy_switch_to_buffer jsonpath_yy_switch_to_buffer
+#define yypush_buffer_state jsonpath_yypush_buffer_state
+#define yypop_buffer_state jsonpath_yypop_buffer_state
+#define yyensure_buffer_stack jsonpath_yyensure_buffer_stack
+#define yy_flex_debug jsonpath_yy_flex_debug
+#define yyin jsonpath_yyin
+#define yyleng jsonpath_yyleng
+#define yylex jsonpath_yylex
+#define yylineno jsonpath_yylineno
+#define yyout jsonpath_yyout
+#define yyrestart jsonpath_yyrestart
+#define yytext jsonpath_yytext
+#define yywrap jsonpath_yywrap
+#define yyalloc jsonpath_yyalloc
+#define yyrealloc jsonpath_yyrealloc
+#define yyfree jsonpath_yyfree
+
+#define FLEX_SCANNER
+#define YY_FLEX_MAJOR_VERSION 2
+#define YY_FLEX_MINOR_VERSION 6
+#define YY_FLEX_SUBMINOR_VERSION 4
+#if YY_FLEX_SUBMINOR_VERSION > 0
+#define FLEX_BETA
+#endif
+
+#ifdef yy_create_buffer
+#define jsonpath_yy_create_buffer_ALREADY_DEFINED
+#else
+#define yy_create_buffer jsonpath_yy_create_buffer
+#endif
+
+#ifdef yy_delete_buffer
+#define jsonpath_yy_delete_buffer_ALREADY_DEFINED
+#else
+#define yy_delete_buffer jsonpath_yy_delete_buffer
+#endif
+
+#ifdef yy_scan_buffer
+#define jsonpath_yy_scan_buffer_ALREADY_DEFINED
+#else
+#define yy_scan_buffer jsonpath_yy_scan_buffer
+#endif
+
+#ifdef yy_scan_string
+#define jsonpath_yy_scan_string_ALREADY_DEFINED
+#else
+#define yy_scan_string jsonpath_yy_scan_string
+#endif
+
+#ifdef yy_scan_bytes
+#define jsonpath_yy_scan_bytes_ALREADY_DEFINED
+#else
+#define yy_scan_bytes jsonpath_yy_scan_bytes
+#endif
+
+#ifdef yy_init_buffer
+#define jsonpath_yy_init_buffer_ALREADY_DEFINED
+#else
+#define yy_init_buffer jsonpath_yy_init_buffer
+#endif
+
+#ifdef yy_flush_buffer
+#define jsonpath_yy_flush_buffer_ALREADY_DEFINED
+#else
+#define yy_flush_buffer jsonpath_yy_flush_buffer
+#endif
+
+#ifdef yy_load_buffer_state
+#define jsonpath_yy_load_buffer_state_ALREADY_DEFINED
+#else
+#define yy_load_buffer_state jsonpath_yy_load_buffer_state
+#endif
+
+#ifdef yy_switch_to_buffer
+#define jsonpath_yy_switch_to_buffer_ALREADY_DEFINED
+#else
+#define yy_switch_to_buffer jsonpath_yy_switch_to_buffer
+#endif
+
+#ifdef yypush_buffer_state
+#define jsonpath_yypush_buffer_state_ALREADY_DEFINED
+#else
+#define yypush_buffer_state jsonpath_yypush_buffer_state
+#endif
+
+#ifdef yypop_buffer_state
+#define jsonpath_yypop_buffer_state_ALREADY_DEFINED
+#else
+#define yypop_buffer_state jsonpath_yypop_buffer_state
+#endif
+
+#ifdef yyensure_buffer_stack
+#define jsonpath_yyensure_buffer_stack_ALREADY_DEFINED
+#else
+#define yyensure_buffer_stack jsonpath_yyensure_buffer_stack
+#endif
+
+#ifdef yylex
+#define jsonpath_yylex_ALREADY_DEFINED
+#else
+#define yylex jsonpath_yylex
+#endif
+
+#ifdef yyrestart
+#define jsonpath_yyrestart_ALREADY_DEFINED
+#else
+#define yyrestart jsonpath_yyrestart
+#endif
+
+#ifdef yylex_init
+#define jsonpath_yylex_init_ALREADY_DEFINED
+#else
+#define yylex_init jsonpath_yylex_init
+#endif
+
+#ifdef yylex_init_extra
+#define jsonpath_yylex_init_extra_ALREADY_DEFINED
+#else
+#define yylex_init_extra jsonpath_yylex_init_extra
+#endif
+
+#ifdef yylex_destroy
+#define jsonpath_yylex_destroy_ALREADY_DEFINED
+#else
+#define yylex_destroy jsonpath_yylex_destroy
+#endif
+
+#ifdef yyget_debug
+#define jsonpath_yyget_debug_ALREADY_DEFINED
+#else
+#define yyget_debug jsonpath_yyget_debug
+#endif
+
+#ifdef yyset_debug
+#define jsonpath_yyset_debug_ALREADY_DEFINED
+#else
+#define yyset_debug jsonpath_yyset_debug
+#endif
+
+#ifdef yyget_extra
+#define jsonpath_yyget_extra_ALREADY_DEFINED
+#else
+#define yyget_extra jsonpath_yyget_extra
+#endif
+
+#ifdef yyset_extra
+#define jsonpath_yyset_extra_ALREADY_DEFINED
+#else
+#define yyset_extra jsonpath_yyset_extra
+#endif
+
+#ifdef yyget_in
+#define jsonpath_yyget_in_ALREADY_DEFINED
+#else
+#define yyget_in jsonpath_yyget_in
+#endif
+
+#ifdef yyset_in
+#define jsonpath_yyset_in_ALREADY_DEFINED
+#else
+#define yyset_in jsonpath_yyset_in
+#endif
+
+#ifdef yyget_out
+#define jsonpath_yyget_out_ALREADY_DEFINED
+#else
+#define yyget_out jsonpath_yyget_out
+#endif
+
+#ifdef yyset_out
+#define jsonpath_yyset_out_ALREADY_DEFINED
+#else
+#define yyset_out jsonpath_yyset_out
+#endif
+
+#ifdef yyget_leng
+#define jsonpath_yyget_leng_ALREADY_DEFINED
+#else
+#define yyget_leng jsonpath_yyget_leng
+#endif
+
+#ifdef yyget_text
+#define jsonpath_yyget_text_ALREADY_DEFINED
+#else
+#define yyget_text jsonpath_yyget_text
+#endif
+
+#ifdef yyget_lineno
+#define jsonpath_yyget_lineno_ALREADY_DEFINED
+#else
+#define yyget_lineno jsonpath_yyget_lineno
+#endif
+
+#ifdef yyset_lineno
+#define jsonpath_yyset_lineno_ALREADY_DEFINED
+#else
+#define yyset_lineno jsonpath_yyset_lineno
+#endif
+
+#ifdef yywrap
+#define jsonpath_yywrap_ALREADY_DEFINED
+#else
+#define yywrap jsonpath_yywrap
+#endif
+
+#ifdef yyget_lval
+#define jsonpath_yyget_lval_ALREADY_DEFINED
+#else
+#define yyget_lval jsonpath_yyget_lval
+#endif
+
+#ifdef yyset_lval
+#define jsonpath_yyset_lval_ALREADY_DEFINED
+#else
+#define yyset_lval jsonpath_yyset_lval
+#endif
+
+#ifdef yyalloc
+#define jsonpath_yyalloc_ALREADY_DEFINED
+#else
+#define yyalloc jsonpath_yyalloc
+#endif
+
+#ifdef yyrealloc
+#define jsonpath_yyrealloc_ALREADY_DEFINED
+#else
+#define yyrealloc jsonpath_yyrealloc
+#endif
+
+#ifdef yyfree
+#define jsonpath_yyfree_ALREADY_DEFINED
+#else
+#define yyfree jsonpath_yyfree
+#endif
+
+#ifdef yytext
+#define jsonpath_yytext_ALREADY_DEFINED
+#else
+#define yytext jsonpath_yytext
+#endif
+
+#ifdef yyleng
+#define jsonpath_yyleng_ALREADY_DEFINED
+#else
+#define yyleng jsonpath_yyleng
+#endif
+
+#ifdef yyin
+#define jsonpath_yyin_ALREADY_DEFINED
+#else
+#define yyin jsonpath_yyin
+#endif
+
+#ifdef yyout
+#define jsonpath_yyout_ALREADY_DEFINED
+#else
+#define yyout jsonpath_yyout
+#endif
+
+#ifdef yy_flex_debug
+#define jsonpath_yy_flex_debug_ALREADY_DEFINED
+#else
+#define yy_flex_debug jsonpath_yy_flex_debug
+#endif
+
+#ifdef yylineno
+#define jsonpath_yylineno_ALREADY_DEFINED
+#else
+#define yylineno jsonpath_yylineno
+#endif
+
+/* First, we deal with platform-specific or compiler-specific issues. */
+
+/* begin standard C headers. */
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+
+/* end standard C headers. */
+
+/* flex integer type definitions */
+
+#ifndef FLEXINT_H
+#define FLEXINT_H
+
+/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
+
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+
+/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
+ * if you want the limit (max/min) macros for int types.
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS 1
+#endif
+
+#include <inttypes.h>
+typedef int8_t flex_int8_t;
+typedef uint8_t flex_uint8_t;
+typedef int16_t flex_int16_t;
+typedef uint16_t flex_uint16_t;
+typedef int32_t flex_int32_t;
+typedef uint32_t flex_uint32_t;
+#else
+typedef signed char flex_int8_t;
+typedef short int flex_int16_t;
+typedef int flex_int32_t;
+typedef unsigned char flex_uint8_t;
+typedef unsigned short int flex_uint16_t;
+typedef unsigned int flex_uint32_t;
+
+/* Limits of integral types. */
+#ifndef INT8_MIN
+#define INT8_MIN (-128)
+#endif
+#ifndef INT16_MIN
+#define INT16_MIN (-32767-1)
+#endif
+#ifndef INT32_MIN
+#define INT32_MIN (-2147483647-1)
+#endif
+#ifndef INT8_MAX
+#define INT8_MAX (127)
+#endif
+#ifndef INT16_MAX
+#define INT16_MAX (32767)
+#endif
+#ifndef INT32_MAX
+#define INT32_MAX (2147483647)
+#endif
+#ifndef UINT8_MAX
+#define UINT8_MAX (255U)
+#endif
+#ifndef UINT16_MAX
+#define UINT16_MAX (65535U)
+#endif
+#ifndef UINT32_MAX
+#define UINT32_MAX (4294967295U)
+#endif
+
+#ifndef SIZE_MAX
+#define SIZE_MAX (~(size_t)0)
+#endif
+
+#endif /* ! C99 */
+
+#endif /* ! FLEXINT_H */
+
+/* begin standard C++ headers. */
+
+/* TODO: this is always defined, so inline it */
+#define yyconst const
+
+#if defined(__GNUC__) && __GNUC__ >= 3
+#define yynoreturn __attribute__((__noreturn__))
+#else
+#define yynoreturn
+#endif
+
+/* Returned upon end-of-file. */
+#define YY_NULL 0
+
+/* Promotes a possibly negative, possibly signed char to an
+ * integer in range [0..255] for use as an array index.
+ */
+#define YY_SC_TO_UI(c) ((YY_CHAR) (c))
+
+/* Enter a start condition. This macro really ought to take a parameter,
+ * but we do it the disgusting crufty way forced on us by the ()-less
+ * definition of BEGIN.
+ */
+#define BEGIN (yy_start) = 1 + 2 *
+/* Translate the current start state into a value that can be later handed
+ * to BEGIN to return to the state. The YYSTATE alias is for lex
+ * compatibility.
+ */
+#define YY_START (((yy_start) - 1) / 2)
+#define YYSTATE YY_START
+/* Action number for EOF rule of a given start state. */
+#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
+/* Special action meaning "start processing a new file". */
+#define YY_NEW_FILE yyrestart( yyin )
+#define YY_END_OF_BUFFER_CHAR 0
+
+/* Size of default input buffer. */
+#ifndef YY_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k.
+ * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
+ * Ditto for the __ia64__ case accordingly.
+ */
+#define YY_BUF_SIZE 32768
+#else
+#define YY_BUF_SIZE 16384
+#endif /* __ia64__ */
+#endif
+
+/* The state buf must be large enough to hold one state per character in the main buffer.
+ */
+#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
+
+#ifndef YY_TYPEDEF_YY_BUFFER_STATE
+#define YY_TYPEDEF_YY_BUFFER_STATE
+typedef struct yy_buffer_state *YY_BUFFER_STATE;
+#endif
+
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern int yyleng;
+
+extern FILE *yyin, *yyout;
+
+#define EOB_ACT_CONTINUE_SCAN 0
+#define EOB_ACT_END_OF_FILE 1
+#define EOB_ACT_LAST_MATCH 2
+
+ #define YY_LESS_LINENO(n)
+ #define YY_LINENO_REWIND_TO(ptr)
+
+/* Return all but the first "n" matched characters back to the input stream. */
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ *yy_cp = (yy_hold_char); \
+ YY_RESTORE_YY_MORE_OFFSET \
+ (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
+ YY_DO_BEFORE_ACTION; /* set up yytext again */ \
+ } \
+ while ( 0 )
+#define unput(c) yyunput( c, (yytext_ptr) )
+
+#ifndef YY_STRUCT_YY_BUFFER_STATE
+#define YY_STRUCT_YY_BUFFER_STATE
+struct yy_buffer_state
+ {
+ FILE *yy_input_file;
+
+ char *yy_ch_buf; /* input buffer */
+ char *yy_buf_pos; /* current position in input buffer */
+
+ /* Size of input buffer in bytes, not including room for EOB
+ * characters.
+ */
+ int yy_buf_size;
+
+ /* Number of characters read into yy_ch_buf, not including EOB
+ * characters.
+ */
+ int yy_n_chars;
+
+ /* Whether we "own" the buffer - i.e., we know we created it,
+ * and can realloc() it to grow it, and should free() it to
+ * delete it.
+ */
+ int yy_is_our_buffer;
+
+ /* Whether this is an "interactive" input source; if so, and
+ * if we're using stdio for input, then we want to use getc()
+ * instead of fread(), to make sure we stop fetching input after
+ * each newline.
+ */
+ int yy_is_interactive;
+
+ /* Whether we're considered to be at the beginning of a line.
+ * If so, '^' rules will be active on the next match, otherwise
+ * not.
+ */
+ int yy_at_bol;
+
+ int yy_bs_lineno; /**< The line count. */
+ int yy_bs_column; /**< The column count. */
+
+ /* Whether to try to fill the input buffer when we reach the
+ * end of it.
+ */
+ int yy_fill_buffer;
+
+ int yy_buffer_status;
+
+#define YY_BUFFER_NEW 0
+#define YY_BUFFER_NORMAL 1
+ /* When an EOF's been seen but there's still some text to process
+ * then we mark the buffer as YY_EOF_PENDING, to indicate that we
+ * shouldn't try reading from the input source any more. We might
+ * still have a bunch of tokens to match, though, because of
+ * possible backing-up.
+ *
+ * When we actually see the EOF, we change the status to "new"
+ * (via yyrestart()), so that the user can continue scanning by
+ * just pointing yyin at a new input file.
+ */
+#define YY_BUFFER_EOF_PENDING 2
+
+ };
+#endif /* !YY_STRUCT_YY_BUFFER_STATE */
+
+/* Stack of input buffers. */
+static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
+static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
+static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */
+
+/* We provide macros for accessing buffer states in case in the
+ * future we want to put the buffer states in a more general
+ * "scanner state".
+ *
+ * Returns the top of the stack, or NULL.
+ */
+#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
+ ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
+ : NULL)
+/* Same as previous macro, but useful when we know that the buffer stack is not
+ * NULL or when we need an lvalue. For internal use only.
+ */
+#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
+
+/* yy_hold_char holds the character lost when yytext is formed. */
+static char yy_hold_char;
+static int yy_n_chars; /* number of characters read into yy_ch_buf */
+int yyleng;
+
+/* Points to current character in buffer. */
+static char *yy_c_buf_p = NULL;
+static int yy_init = 0; /* whether we need to initialize */
+static int yy_start = 0; /* start state number */
+
+/* Flag which is used to allow yywrap()'s to do buffer switches
+ * instead of setting up a fresh yyin. A bit of a hack ...
+ */
+static int yy_did_buffer_switch_on_eof;
+
+void yyrestart ( FILE *input_file );
+void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer );
+YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size );
+void yy_delete_buffer ( YY_BUFFER_STATE b );
+void yy_flush_buffer ( YY_BUFFER_STATE b );
+void yypush_buffer_state ( YY_BUFFER_STATE new_buffer );
+void yypop_buffer_state ( void );
+
+static void yyensure_buffer_stack ( void );
+static void yy_load_buffer_state ( void );
+static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file );
+#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER )
+
+YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size );
+YY_BUFFER_STATE yy_scan_string ( const char *yy_str );
+YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, int len );
+
+void *yyalloc ( yy_size_t );
+void *yyrealloc ( void *, yy_size_t );
+void yyfree ( void * );
+
+#define yy_new_buffer yy_create_buffer
+#define yy_set_interactive(is_interactive) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){ \
+ yyensure_buffer_stack (); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
+ }
+#define yy_set_bol(at_bol) \
+ { \
+ if ( ! YY_CURRENT_BUFFER ){\
+ yyensure_buffer_stack (); \
+ YY_CURRENT_BUFFER_LVALUE = \
+ yy_create_buffer( yyin, YY_BUF_SIZE ); \
+ } \
+ YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
+ }
+#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
+
+/* Begin user sect3 */
+
+#define jsonpath_yywrap() (/*CONSTCOND*/1)
+#define YY_SKIP_YYWRAP
+typedef flex_uint8_t YY_CHAR;
+
+FILE *yyin = NULL, *yyout = NULL;
+
+typedef const struct yy_trans_info *yy_state_type;
+
+extern int yylineno;
+int yylineno = 1;
+
+extern char *yytext;
+#ifdef yytext_ptr
+#undef yytext_ptr
+#endif
+#define yytext_ptr yytext
+
+static yy_state_type yy_get_previous_state ( void );
+static yy_state_type yy_try_NUL_trans ( yy_state_type current_state );
+static int yy_get_next_buffer ( void );
+static void yynoreturn yy_fatal_error ( const char* msg );
+
+/* Done after the current pattern has been matched and before the
+ * corresponding action - sets up yytext.
+ */
+#define YY_DO_BEFORE_ACTION \
+ (yytext_ptr) = yy_bp; \
+ yyleng = (int) (yy_cp - yy_bp); \
+ (yy_hold_char) = *yy_cp; \
+ *yy_cp = '\0'; \
+ (yy_c_buf_p) = yy_cp;
+#define YY_NUM_RULES 50
+#define YY_END_OF_BUFFER 51
+struct yy_trans_info
+ {
+ flex_int16_t yy_verify;
+ flex_int16_t yy_nxt;
+ };
+static const struct yy_trans_info yy_transition[10558] =
+ {
+ { 0, 0 }, { 0,10302 }, { 0, 0 }, { 0,10300 }, { 1,2580 },
+ { 2,2580 }, { 3,2580 }, { 4,2580 }, { 5,2580 }, { 6,2580 },
+ { 7,2580 }, { 8,2580 }, { 9,2838 }, { 10,2838 }, { 11,2580 },
+ { 12,2838 }, { 13,2838 }, { 14,2580 }, { 15,2580 }, { 16,2580 },
+ { 17,2580 }, { 18,2580 }, { 19,2580 }, { 20,2580 }, { 21,2580 },
+ { 22,2580 }, { 23,2580 }, { 24,2580 }, { 25,2580 }, { 26,2580 },
+ { 27,2580 }, { 28,2580 }, { 29,2580 }, { 30,2580 }, { 31,2580 },
+ { 32,2838 }, { 33,2642 }, { 34,2644 }, { 35,2672 }, { 36,2857 },
+ { 37,2672 }, { 38,2854 }, { 39,2580 }, { 40,2672 }, { 41,2672 },
+ { 42,3115 }, { 43,2672 }, { 44,2672 }, { 45,2672 }, { 46,3117 },
+
+ { 47,3119 }, { 48,3176 }, { 49,3434 }, { 50,3434 }, { 51,3434 },
+ { 52,3434 }, { 53,3434 }, { 54,3434 }, { 55,3434 }, { 56,3434 },
+ { 57,3434 }, { 58,2672 }, { 59,2580 }, { 60,3124 }, { 61,3127 },
+ { 62,3147 }, { 63,2672 }, { 64,2672 }, { 65,2580 }, { 66,2580 },
+ { 67,2580 }, { 68,2580 }, { 69,2580 }, { 70,2580 }, { 71,2580 },
+ { 72,2580 }, { 73,2580 }, { 74,2580 }, { 75,2580 }, { 76,2580 },
+ { 77,2580 }, { 78,2580 }, { 79,2580 }, { 80,2580 }, { 81,2580 },
+ { 82,2580 }, { 83,2580 }, { 84,2580 }, { 85,2580 }, { 86,2580 },
+ { 87,2580 }, { 88,2580 }, { 89,2580 }, { 90,2580 }, { 91,2672 },
+ { 92,3149 }, { 93,2672 }, { 94,2580 }, { 95,2580 }, { 96,2580 },
+
+ { 97,2580 }, { 98,2580 }, { 99,2580 }, { 100,2580 }, { 101,2580 },
+ { 102,2580 }, { 103,2580 }, { 104,2580 }, { 105,2580 }, { 106,2580 },
+ { 107,2580 }, { 108,2580 }, { 109,2580 }, { 110,2580 }, { 111,2580 },
+ { 112,2580 }, { 113,2580 }, { 114,2580 }, { 115,2580 }, { 116,2580 },
+ { 117,2580 }, { 118,2580 }, { 119,2580 }, { 120,2580 }, { 121,2580 },
+ { 122,2580 }, { 123,2672 }, { 124,3692 }, { 125,2672 }, { 126,2580 },
+ { 127,2580 }, { 128,2580 }, { 129,2580 }, { 130,2580 }, { 131,2580 },
+ { 132,2580 }, { 133,2580 }, { 134,2580 }, { 135,2580 }, { 136,2580 },
+ { 137,2580 }, { 138,2580 }, { 139,2580 }, { 140,2580 }, { 141,2580 },
+ { 142,2580 }, { 143,2580 }, { 144,2580 }, { 145,2580 }, { 146,2580 },
+
+ { 147,2580 }, { 148,2580 }, { 149,2580 }, { 150,2580 }, { 151,2580 },
+ { 152,2580 }, { 153,2580 }, { 154,2580 }, { 155,2580 }, { 156,2580 },
+ { 157,2580 }, { 158,2580 }, { 159,2580 }, { 160,2580 }, { 161,2580 },
+ { 162,2580 }, { 163,2580 }, { 164,2580 }, { 165,2580 }, { 166,2580 },
+ { 167,2580 }, { 168,2580 }, { 169,2580 }, { 170,2580 }, { 171,2580 },
+ { 172,2580 }, { 173,2580 }, { 174,2580 }, { 175,2580 }, { 176,2580 },
+ { 177,2580 }, { 178,2580 }, { 179,2580 }, { 180,2580 }, { 181,2580 },
+ { 182,2580 }, { 183,2580 }, { 184,2580 }, { 185,2580 }, { 186,2580 },
+ { 187,2580 }, { 188,2580 }, { 189,2580 }, { 190,2580 }, { 191,2580 },
+ { 192,2580 }, { 193,2580 }, { 194,2580 }, { 195,2580 }, { 196,2580 },
+
+ { 197,2580 }, { 198,2580 }, { 199,2580 }, { 200,2580 }, { 201,2580 },
+ { 202,2580 }, { 203,2580 }, { 204,2580 }, { 205,2580 }, { 206,2580 },
+ { 207,2580 }, { 208,2580 }, { 209,2580 }, { 210,2580 }, { 211,2580 },
+ { 212,2580 }, { 213,2580 }, { 214,2580 }, { 215,2580 }, { 216,2580 },
+ { 217,2580 }, { 218,2580 }, { 219,2580 }, { 220,2580 }, { 221,2580 },
+ { 222,2580 }, { 223,2580 }, { 224,2580 }, { 225,2580 }, { 226,2580 },
+ { 227,2580 }, { 228,2580 }, { 229,2580 }, { 230,2580 }, { 231,2580 },
+ { 232,2580 }, { 233,2580 }, { 234,2580 }, { 235,2580 }, { 236,2580 },
+ { 237,2580 }, { 238,2580 }, { 239,2580 }, { 240,2580 }, { 241,2580 },
+ { 242,2580 }, { 243,2580 }, { 244,2580 }, { 245,2580 }, { 246,2580 },
+
+ { 247,2580 }, { 248,2580 }, { 249,2580 }, { 250,2580 }, { 251,2580 },
+ { 252,2580 }, { 253,2580 }, { 254,2580 }, { 255,2580 }, { 256,2580 },
+ { 0, 0 }, { 0,10042 }, { 1,2322 }, { 2,2322 }, { 3,2322 },
+ { 4,2322 }, { 5,2322 }, { 6,2322 }, { 7,2322 }, { 8,2322 },
+ { 9,2580 }, { 10,2580 }, { 11,2322 }, { 12,2580 }, { 13,2580 },
+ { 14,2322 }, { 15,2322 }, { 16,2322 }, { 17,2322 }, { 18,2322 },
+ { 19,2322 }, { 20,2322 }, { 21,2322 }, { 22,2322 }, { 23,2322 },
+ { 24,2322 }, { 25,2322 }, { 26,2322 }, { 27,2322 }, { 28,2322 },
+ { 29,2322 }, { 30,2322 }, { 31,2322 }, { 32,2580 }, { 33,2384 },
+ { 34,2386 }, { 35,2414 }, { 36,2599 }, { 37,2414 }, { 38,2596 },
+
+ { 39,2322 }, { 40,2414 }, { 41,2414 }, { 42,2857 }, { 43,2414 },
+ { 44,2414 }, { 45,2414 }, { 46,2859 }, { 47,2861 }, { 48,2918 },
+ { 49,3176 }, { 50,3176 }, { 51,3176 }, { 52,3176 }, { 53,3176 },
+ { 54,3176 }, { 55,3176 }, { 56,3176 }, { 57,3176 }, { 58,2414 },
+ { 59,2322 }, { 60,2866 }, { 61,2869 }, { 62,2889 }, { 63,2414 },
+ { 64,2414 }, { 65,2322 }, { 66,2322 }, { 67,2322 }, { 68,2322 },
+ { 69,2322 }, { 70,2322 }, { 71,2322 }, { 72,2322 }, { 73,2322 },
+ { 74,2322 }, { 75,2322 }, { 76,2322 }, { 77,2322 }, { 78,2322 },
+ { 79,2322 }, { 80,2322 }, { 81,2322 }, { 82,2322 }, { 83,2322 },
+ { 84,2322 }, { 85,2322 }, { 86,2322 }, { 87,2322 }, { 88,2322 },
+
+ { 89,2322 }, { 90,2322 }, { 91,2414 }, { 92,2891 }, { 93,2414 },
+ { 94,2322 }, { 95,2322 }, { 96,2322 }, { 97,2322 }, { 98,2322 },
+ { 99,2322 }, { 100,2322 }, { 101,2322 }, { 102,2322 }, { 103,2322 },
+ { 104,2322 }, { 105,2322 }, { 106,2322 }, { 107,2322 }, { 108,2322 },
+ { 109,2322 }, { 110,2322 }, { 111,2322 }, { 112,2322 }, { 113,2322 },
+ { 114,2322 }, { 115,2322 }, { 116,2322 }, { 117,2322 }, { 118,2322 },
+ { 119,2322 }, { 120,2322 }, { 121,2322 }, { 122,2322 }, { 123,2414 },
+ { 124,3434 }, { 125,2414 }, { 126,2322 }, { 127,2322 }, { 128,2322 },
+ { 129,2322 }, { 130,2322 }, { 131,2322 }, { 132,2322 }, { 133,2322 },
+ { 134,2322 }, { 135,2322 }, { 136,2322 }, { 137,2322 }, { 138,2322 },
+
+ { 139,2322 }, { 140,2322 }, { 141,2322 }, { 142,2322 }, { 143,2322 },
+ { 144,2322 }, { 145,2322 }, { 146,2322 }, { 147,2322 }, { 148,2322 },
+ { 149,2322 }, { 150,2322 }, { 151,2322 }, { 152,2322 }, { 153,2322 },
+ { 154,2322 }, { 155,2322 }, { 156,2322 }, { 157,2322 }, { 158,2322 },
+ { 159,2322 }, { 160,2322 }, { 161,2322 }, { 162,2322 }, { 163,2322 },
+ { 164,2322 }, { 165,2322 }, { 166,2322 }, { 167,2322 }, { 168,2322 },
+ { 169,2322 }, { 170,2322 }, { 171,2322 }, { 172,2322 }, { 173,2322 },
+ { 174,2322 }, { 175,2322 }, { 176,2322 }, { 177,2322 }, { 178,2322 },
+ { 179,2322 }, { 180,2322 }, { 181,2322 }, { 182,2322 }, { 183,2322 },
+ { 184,2322 }, { 185,2322 }, { 186,2322 }, { 187,2322 }, { 188,2322 },
+
+ { 189,2322 }, { 190,2322 }, { 191,2322 }, { 192,2322 }, { 193,2322 },
+ { 194,2322 }, { 195,2322 }, { 196,2322 }, { 197,2322 }, { 198,2322 },
+ { 199,2322 }, { 200,2322 }, { 201,2322 }, { 202,2322 }, { 203,2322 },
+ { 204,2322 }, { 205,2322 }, { 206,2322 }, { 207,2322 }, { 208,2322 },
+ { 209,2322 }, { 210,2322 }, { 211,2322 }, { 212,2322 }, { 213,2322 },
+ { 214,2322 }, { 215,2322 }, { 216,2322 }, { 217,2322 }, { 218,2322 },
+ { 219,2322 }, { 220,2322 }, { 221,2322 }, { 222,2322 }, { 223,2322 },
+ { 224,2322 }, { 225,2322 }, { 226,2322 }, { 227,2322 }, { 228,2322 },
+ { 229,2322 }, { 230,2322 }, { 231,2322 }, { 232,2322 }, { 233,2322 },
+ { 234,2322 }, { 235,2322 }, { 236,2322 }, { 237,2322 }, { 238,2322 },
+
+ { 239,2322 }, { 240,2322 }, { 241,2322 }, { 242,2322 }, { 243,2322 },
+ { 244,2322 }, { 245,2322 }, { 246,2322 }, { 247,2322 }, { 248,2322 },
+ { 249,2322 }, { 250,2322 }, { 251,2322 }, { 252,2322 }, { 253,2322 },
+ { 254,2322 }, { 255,2322 }, { 256,2322 }, { 0, 0 }, { 0,9784 },
+ { 1,3208 }, { 2,3208 }, { 3,3208 }, { 4,3208 }, { 5,3208 },
+ { 6,3208 }, { 7,3208 }, { 8,3208 }, { 9,3208 }, { 10,3208 },
+ { 11,3208 }, { 12,3208 }, { 13,3208 }, { 14,3208 }, { 15,3208 },
+ { 16,3208 }, { 17,3208 }, { 18,3208 }, { 19,3208 }, { 20,3208 },
+ { 21,3208 }, { 22,3208 }, { 23,3208 }, { 24,3208 }, { 25,3208 },
+ { 26,3208 }, { 27,3208 }, { 28,3208 }, { 29,3208 }, { 30,3208 },
+
+ { 31,3208 }, { 32,3208 }, { 33,3208 }, { 34,3178 }, { 35,3208 },
+ { 36,3208 }, { 37,3208 }, { 38,3208 }, { 39,3208 }, { 40,3208 },
+ { 41,3208 }, { 42,3208 }, { 43,3208 }, { 44,3208 }, { 45,3208 },
+ { 46,3208 }, { 47,3208 }, { 48,3208 }, { 49,3208 }, { 50,3208 },
+ { 51,3208 }, { 52,3208 }, { 53,3208 }, { 54,3208 }, { 55,3208 },
+ { 56,3208 }, { 57,3208 }, { 58,3208 }, { 59,3208 }, { 60,3208 },
+ { 61,3208 }, { 62,3208 }, { 63,3208 }, { 64,3208 }, { 65,3208 },
+ { 66,3208 }, { 67,3208 }, { 68,3208 }, { 69,3208 }, { 70,3208 },
+ { 71,3208 }, { 72,3208 }, { 73,3208 }, { 74,3208 }, { 75,3208 },
+ { 76,3208 }, { 77,3208 }, { 78,3208 }, { 79,3208 }, { 80,3208 },
+
+ { 81,3208 }, { 82,3208 }, { 83,3208 }, { 84,3208 }, { 85,3208 },
+ { 86,3208 }, { 87,3208 }, { 88,3208 }, { 89,3208 }, { 90,3208 },
+ { 91,3208 }, { 92,3466 }, { 93,3208 }, { 94,3208 }, { 95,3208 },
+ { 96,3208 }, { 97,3208 }, { 98,3208 }, { 99,3208 }, { 100,3208 },
+ { 101,3208 }, { 102,3208 }, { 103,3208 }, { 104,3208 }, { 105,3208 },
+ { 106,3208 }, { 107,3208 }, { 108,3208 }, { 109,3208 }, { 110,3208 },
+ { 111,3208 }, { 112,3208 }, { 113,3208 }, { 114,3208 }, { 115,3208 },
+ { 116,3208 }, { 117,3208 }, { 118,3208 }, { 119,3208 }, { 120,3208 },
+ { 121,3208 }, { 122,3208 }, { 123,3208 }, { 124,3208 }, { 125,3208 },
+ { 126,3208 }, { 127,3208 }, { 128,3208 }, { 129,3208 }, { 130,3208 },
+
+ { 131,3208 }, { 132,3208 }, { 133,3208 }, { 134,3208 }, { 135,3208 },
+ { 136,3208 }, { 137,3208 }, { 138,3208 }, { 139,3208 }, { 140,3208 },
+ { 141,3208 }, { 142,3208 }, { 143,3208 }, { 144,3208 }, { 145,3208 },
+ { 146,3208 }, { 147,3208 }, { 148,3208 }, { 149,3208 }, { 150,3208 },
+ { 151,3208 }, { 152,3208 }, { 153,3208 }, { 154,3208 }, { 155,3208 },
+ { 156,3208 }, { 157,3208 }, { 158,3208 }, { 159,3208 }, { 160,3208 },
+ { 161,3208 }, { 162,3208 }, { 163,3208 }, { 164,3208 }, { 165,3208 },
+ { 166,3208 }, { 167,3208 }, { 168,3208 }, { 169,3208 }, { 170,3208 },
+ { 171,3208 }, { 172,3208 }, { 173,3208 }, { 174,3208 }, { 175,3208 },
+ { 176,3208 }, { 177,3208 }, { 178,3208 }, { 179,3208 }, { 180,3208 },
+
+ { 181,3208 }, { 182,3208 }, { 183,3208 }, { 184,3208 }, { 185,3208 },
+ { 186,3208 }, { 187,3208 }, { 188,3208 }, { 189,3208 }, { 190,3208 },
+ { 191,3208 }, { 192,3208 }, { 193,3208 }, { 194,3208 }, { 195,3208 },
+ { 196,3208 }, { 197,3208 }, { 198,3208 }, { 199,3208 }, { 200,3208 },
+ { 201,3208 }, { 202,3208 }, { 203,3208 }, { 204,3208 }, { 205,3208 },
+ { 206,3208 }, { 207,3208 }, { 208,3208 }, { 209,3208 }, { 210,3208 },
+ { 211,3208 }, { 212,3208 }, { 213,3208 }, { 214,3208 }, { 215,3208 },
+ { 216,3208 }, { 217,3208 }, { 218,3208 }, { 219,3208 }, { 220,3208 },
+ { 221,3208 }, { 222,3208 }, { 223,3208 }, { 224,3208 }, { 225,3208 },
+ { 226,3208 }, { 227,3208 }, { 228,3208 }, { 229,3208 }, { 230,3208 },
+
+ { 231,3208 }, { 232,3208 }, { 233,3208 }, { 234,3208 }, { 235,3208 },
+ { 236,3208 }, { 237,3208 }, { 238,3208 }, { 239,3208 }, { 240,3208 },
+ { 241,3208 }, { 242,3208 }, { 243,3208 }, { 244,3208 }, { 245,3208 },
+ { 246,3208 }, { 247,3208 }, { 248,3208 }, { 249,3208 }, { 250,3208 },
+ { 251,3208 }, { 252,3208 }, { 253,3208 }, { 254,3208 }, { 255,3208 },
+ { 256,3208 }, { 0, 0 }, { 0,9526 }, { 1,2950 }, { 2,2950 },
+ { 3,2950 }, { 4,2950 }, { 5,2950 }, { 6,2950 }, { 7,2950 },
+ { 8,2950 }, { 9,2950 }, { 10,2950 }, { 11,2950 }, { 12,2950 },
+ { 13,2950 }, { 14,2950 }, { 15,2950 }, { 16,2950 }, { 17,2950 },
+ { 18,2950 }, { 19,2950 }, { 20,2950 }, { 21,2950 }, { 22,2950 },
+
+ { 23,2950 }, { 24,2950 }, { 25,2950 }, { 26,2950 }, { 27,2950 },
+ { 28,2950 }, { 29,2950 }, { 30,2950 }, { 31,2950 }, { 32,2950 },
+ { 33,2950 }, { 34,2920 }, { 35,2950 }, { 36,2950 }, { 37,2950 },
+ { 38,2950 }, { 39,2950 }, { 40,2950 }, { 41,2950 }, { 42,2950 },
+ { 43,2950 }, { 44,2950 }, { 45,2950 }, { 46,2950 }, { 47,2950 },
+ { 48,2950 }, { 49,2950 }, { 50,2950 }, { 51,2950 }, { 52,2950 },
+ { 53,2950 }, { 54,2950 }, { 55,2950 }, { 56,2950 }, { 57,2950 },
+ { 58,2950 }, { 59,2950 }, { 60,2950 }, { 61,2950 }, { 62,2950 },
+ { 63,2950 }, { 64,2950 }, { 65,2950 }, { 66,2950 }, { 67,2950 },
+ { 68,2950 }, { 69,2950 }, { 70,2950 }, { 71,2950 }, { 72,2950 },
+
+ { 73,2950 }, { 74,2950 }, { 75,2950 }, { 76,2950 }, { 77,2950 },
+ { 78,2950 }, { 79,2950 }, { 80,2950 }, { 81,2950 }, { 82,2950 },
+ { 83,2950 }, { 84,2950 }, { 85,2950 }, { 86,2950 }, { 87,2950 },
+ { 88,2950 }, { 89,2950 }, { 90,2950 }, { 91,2950 }, { 92,3208 },
+ { 93,2950 }, { 94,2950 }, { 95,2950 }, { 96,2950 }, { 97,2950 },
+ { 98,2950 }, { 99,2950 }, { 100,2950 }, { 101,2950 }, { 102,2950 },
+ { 103,2950 }, { 104,2950 }, { 105,2950 }, { 106,2950 }, { 107,2950 },
+ { 108,2950 }, { 109,2950 }, { 110,2950 }, { 111,2950 }, { 112,2950 },
+ { 113,2950 }, { 114,2950 }, { 115,2950 }, { 116,2950 }, { 117,2950 },
+ { 118,2950 }, { 119,2950 }, { 120,2950 }, { 121,2950 }, { 122,2950 },
+
+ { 123,2950 }, { 124,2950 }, { 125,2950 }, { 126,2950 }, { 127,2950 },
+ { 128,2950 }, { 129,2950 }, { 130,2950 }, { 131,2950 }, { 132,2950 },
+ { 133,2950 }, { 134,2950 }, { 135,2950 }, { 136,2950 }, { 137,2950 },
+ { 138,2950 }, { 139,2950 }, { 140,2950 }, { 141,2950 }, { 142,2950 },
+ { 143,2950 }, { 144,2950 }, { 145,2950 }, { 146,2950 }, { 147,2950 },
+ { 148,2950 }, { 149,2950 }, { 150,2950 }, { 151,2950 }, { 152,2950 },
+ { 153,2950 }, { 154,2950 }, { 155,2950 }, { 156,2950 }, { 157,2950 },
+ { 158,2950 }, { 159,2950 }, { 160,2950 }, { 161,2950 }, { 162,2950 },
+ { 163,2950 }, { 164,2950 }, { 165,2950 }, { 166,2950 }, { 167,2950 },
+ { 168,2950 }, { 169,2950 }, { 170,2950 }, { 171,2950 }, { 172,2950 },
+
+ { 173,2950 }, { 174,2950 }, { 175,2950 }, { 176,2950 }, { 177,2950 },
+ { 178,2950 }, { 179,2950 }, { 180,2950 }, { 181,2950 }, { 182,2950 },
+ { 183,2950 }, { 184,2950 }, { 185,2950 }, { 186,2950 }, { 187,2950 },
+ { 188,2950 }, { 189,2950 }, { 190,2950 }, { 191,2950 }, { 192,2950 },
+ { 193,2950 }, { 194,2950 }, { 195,2950 }, { 196,2950 }, { 197,2950 },
+ { 198,2950 }, { 199,2950 }, { 200,2950 }, { 201,2950 }, { 202,2950 },
+ { 203,2950 }, { 204,2950 }, { 205,2950 }, { 206,2950 }, { 207,2950 },
+ { 208,2950 }, { 209,2950 }, { 210,2950 }, { 211,2950 }, { 212,2950 },
+ { 213,2950 }, { 214,2950 }, { 215,2950 }, { 216,2950 }, { 217,2950 },
+ { 218,2950 }, { 219,2950 }, { 220,2950 }, { 221,2950 }, { 222,2950 },
+
+ { 223,2950 }, { 224,2950 }, { 225,2950 }, { 226,2950 }, { 227,2950 },
+ { 228,2950 }, { 229,2950 }, { 230,2950 }, { 231,2950 }, { 232,2950 },
+ { 233,2950 }, { 234,2950 }, { 235,2950 }, { 236,2950 }, { 237,2950 },
+ { 238,2950 }, { 239,2950 }, { 240,2950 }, { 241,2950 }, { 242,2950 },
+ { 243,2950 }, { 244,2950 }, { 245,2950 }, { 246,2950 }, { 247,2950 },
+ { 248,2950 }, { 249,2950 }, { 250,2950 }, { 251,2950 }, { 252,2950 },
+ { 253,2950 }, { 254,2950 }, { 255,2950 }, { 256,2950 }, { 0, 0 },
+ { 0,9268 }, { 1,3208 }, { 2,3208 }, { 3,3208 }, { 4,3208 },
+ { 5,3208 }, { 6,3208 }, { 7,3208 }, { 8,3208 }, { 9,3466 },
+ { 10,3466 }, { 11,3208 }, { 12,3466 }, { 13,3466 }, { 14,3208 },
+
+ { 15,3208 }, { 16,3208 }, { 17,3208 }, { 18,3208 }, { 19,3208 },
+ { 20,3208 }, { 21,3208 }, { 22,3208 }, { 23,3208 }, { 24,3208 },
+ { 25,3208 }, { 26,3208 }, { 27,3208 }, { 28,3208 }, { 29,3208 },
+ { 30,3208 }, { 31,3208 }, { 32,3466 }, { 33,2664 }, { 34,2664 },
+ { 35,2664 }, { 36,2664 }, { 37,2664 }, { 38,2664 }, { 39,3208 },
+ { 40,2664 }, { 41,2664 }, { 42,2664 }, { 43,2664 }, { 44,2664 },
+ { 45,2664 }, { 46,2664 }, { 47,2684 }, { 48,3208 }, { 49,3208 },
+ { 50,3208 }, { 51,3208 }, { 52,3208 }, { 53,3208 }, { 54,3208 },
+ { 55,3208 }, { 56,3208 }, { 57,3208 }, { 58,2664 }, { 59,3208 },
+ { 60,2664 }, { 61,2664 }, { 62,2664 }, { 63,2664 }, { 64,2664 },
+
+ { 65,3208 }, { 66,3208 }, { 67,3208 }, { 68,3208 }, { 69,3208 },
+ { 70,3208 }, { 71,3208 }, { 72,3208 }, { 73,3208 }, { 74,3208 },
+ { 75,3208 }, { 76,3208 }, { 77,3208 }, { 78,3208 }, { 79,3208 },
+ { 80,3208 }, { 81,3208 }, { 82,3208 }, { 83,3208 }, { 84,3208 },
+ { 85,3208 }, { 86,3208 }, { 87,3208 }, { 88,3208 }, { 89,3208 },
+ { 90,3208 }, { 91,2664 }, { 92,2950 }, { 93,2664 }, { 94,3208 },
+ { 95,3208 }, { 96,3208 }, { 97,3208 }, { 98,3208 }, { 99,3208 },
+ { 100,3208 }, { 101,3208 }, { 102,3208 }, { 103,3208 }, { 104,3208 },
+ { 105,3208 }, { 106,3208 }, { 107,3208 }, { 108,3208 }, { 109,3208 },
+ { 110,3208 }, { 111,3208 }, { 112,3208 }, { 113,3208 }, { 114,3208 },
+
+ { 115,3208 }, { 116,3208 }, { 117,3208 }, { 118,3208 }, { 119,3208 },
+ { 120,3208 }, { 121,3208 }, { 122,3208 }, { 123,2664 }, { 124,2664 },
+ { 125,2664 }, { 126,3208 }, { 127,3208 }, { 128,3208 }, { 129,3208 },
+ { 130,3208 }, { 131,3208 }, { 132,3208 }, { 133,3208 }, { 134,3208 },
+ { 135,3208 }, { 136,3208 }, { 137,3208 }, { 138,3208 }, { 139,3208 },
+ { 140,3208 }, { 141,3208 }, { 142,3208 }, { 143,3208 }, { 144,3208 },
+ { 145,3208 }, { 146,3208 }, { 147,3208 }, { 148,3208 }, { 149,3208 },
+ { 150,3208 }, { 151,3208 }, { 152,3208 }, { 153,3208 }, { 154,3208 },
+ { 155,3208 }, { 156,3208 }, { 157,3208 }, { 158,3208 }, { 159,3208 },
+ { 160,3208 }, { 161,3208 }, { 162,3208 }, { 163,3208 }, { 164,3208 },
+
+ { 165,3208 }, { 166,3208 }, { 167,3208 }, { 168,3208 }, { 169,3208 },
+ { 170,3208 }, { 171,3208 }, { 172,3208 }, { 173,3208 }, { 174,3208 },
+ { 175,3208 }, { 176,3208 }, { 177,3208 }, { 178,3208 }, { 179,3208 },
+ { 180,3208 }, { 181,3208 }, { 182,3208 }, { 183,3208 }, { 184,3208 },
+ { 185,3208 }, { 186,3208 }, { 187,3208 }, { 188,3208 }, { 189,3208 },
+ { 190,3208 }, { 191,3208 }, { 192,3208 }, { 193,3208 }, { 194,3208 },
+ { 195,3208 }, { 196,3208 }, { 197,3208 }, { 198,3208 }, { 199,3208 },
+ { 200,3208 }, { 201,3208 }, { 202,3208 }, { 203,3208 }, { 204,3208 },
+ { 205,3208 }, { 206,3208 }, { 207,3208 }, { 208,3208 }, { 209,3208 },
+ { 210,3208 }, { 211,3208 }, { 212,3208 }, { 213,3208 }, { 214,3208 },
+
+ { 215,3208 }, { 216,3208 }, { 217,3208 }, { 218,3208 }, { 219,3208 },
+ { 220,3208 }, { 221,3208 }, { 222,3208 }, { 223,3208 }, { 224,3208 },
+ { 225,3208 }, { 226,3208 }, { 227,3208 }, { 228,3208 }, { 229,3208 },
+ { 230,3208 }, { 231,3208 }, { 232,3208 }, { 233,3208 }, { 234,3208 },
+ { 235,3208 }, { 236,3208 }, { 237,3208 }, { 238,3208 }, { 239,3208 },
+ { 240,3208 }, { 241,3208 }, { 242,3208 }, { 243,3208 }, { 244,3208 },
+ { 245,3208 }, { 246,3208 }, { 247,3208 }, { 248,3208 }, { 249,3208 },
+ { 250,3208 }, { 251,3208 }, { 252,3208 }, { 253,3208 }, { 254,3208 },
+ { 255,3208 }, { 256,3208 }, { 0, 0 }, { 0,9010 }, { 1,2950 },
+ { 2,2950 }, { 3,2950 }, { 4,2950 }, { 5,2950 }, { 6,2950 },
+
+ { 7,2950 }, { 8,2950 }, { 9,3208 }, { 10,3208 }, { 11,2950 },
+ { 12,3208 }, { 13,3208 }, { 14,2950 }, { 15,2950 }, { 16,2950 },
+ { 17,2950 }, { 18,2950 }, { 19,2950 }, { 20,2950 }, { 21,2950 },
+ { 22,2950 }, { 23,2950 }, { 24,2950 }, { 25,2950 }, { 26,2950 },
+ { 27,2950 }, { 28,2950 }, { 29,2950 }, { 30,2950 }, { 31,2950 },
+ { 32,3208 }, { 33,2406 }, { 34,2406 }, { 35,2406 }, { 36,2406 },
+ { 37,2406 }, { 38,2406 }, { 39,2950 }, { 40,2406 }, { 41,2406 },
+ { 42,2406 }, { 43,2406 }, { 44,2406 }, { 45,2406 }, { 46,2406 },
+ { 47,2426 }, { 48,2950 }, { 49,2950 }, { 50,2950 }, { 51,2950 },
+ { 52,2950 }, { 53,2950 }, { 54,2950 }, { 55,2950 }, { 56,2950 },
+
+ { 57,2950 }, { 58,2406 }, { 59,2950 }, { 60,2406 }, { 61,2406 },
+ { 62,2406 }, { 63,2406 }, { 64,2406 }, { 65,2950 }, { 66,2950 },
+ { 67,2950 }, { 68,2950 }, { 69,2950 }, { 70,2950 }, { 71,2950 },
+ { 72,2950 }, { 73,2950 }, { 74,2950 }, { 75,2950 }, { 76,2950 },
+ { 77,2950 }, { 78,2950 }, { 79,2950 }, { 80,2950 }, { 81,2950 },
+ { 82,2950 }, { 83,2950 }, { 84,2950 }, { 85,2950 }, { 86,2950 },
+ { 87,2950 }, { 88,2950 }, { 89,2950 }, { 90,2950 }, { 91,2406 },
+ { 92,2692 }, { 93,2406 }, { 94,2950 }, { 95,2950 }, { 96,2950 },
+ { 97,2950 }, { 98,2950 }, { 99,2950 }, { 100,2950 }, { 101,2950 },
+ { 102,2950 }, { 103,2950 }, { 104,2950 }, { 105,2950 }, { 106,2950 },
+
+ { 107,2950 }, { 108,2950 }, { 109,2950 }, { 110,2950 }, { 111,2950 },
+ { 112,2950 }, { 113,2950 }, { 114,2950 }, { 115,2950 }, { 116,2950 },
+ { 117,2950 }, { 118,2950 }, { 119,2950 }, { 120,2950 }, { 121,2950 },
+ { 122,2950 }, { 123,2406 }, { 124,2406 }, { 125,2406 }, { 126,2950 },
+ { 127,2950 }, { 128,2950 }, { 129,2950 }, { 130,2950 }, { 131,2950 },
+ { 132,2950 }, { 133,2950 }, { 134,2950 }, { 135,2950 }, { 136,2950 },
+ { 137,2950 }, { 138,2950 }, { 139,2950 }, { 140,2950 }, { 141,2950 },
+ { 142,2950 }, { 143,2950 }, { 144,2950 }, { 145,2950 }, { 146,2950 },
+ { 147,2950 }, { 148,2950 }, { 149,2950 }, { 150,2950 }, { 151,2950 },
+ { 152,2950 }, { 153,2950 }, { 154,2950 }, { 155,2950 }, { 156,2950 },
+
+ { 157,2950 }, { 158,2950 }, { 159,2950 }, { 160,2950 }, { 161,2950 },
+ { 162,2950 }, { 163,2950 }, { 164,2950 }, { 165,2950 }, { 166,2950 },
+ { 167,2950 }, { 168,2950 }, { 169,2950 }, { 170,2950 }, { 171,2950 },
+ { 172,2950 }, { 173,2950 }, { 174,2950 }, { 175,2950 }, { 176,2950 },
+ { 177,2950 }, { 178,2950 }, { 179,2950 }, { 180,2950 }, { 181,2950 },
+ { 182,2950 }, { 183,2950 }, { 184,2950 }, { 185,2950 }, { 186,2950 },
+ { 187,2950 }, { 188,2950 }, { 189,2950 }, { 190,2950 }, { 191,2950 },
+ { 192,2950 }, { 193,2950 }, { 194,2950 }, { 195,2950 }, { 196,2950 },
+ { 197,2950 }, { 198,2950 }, { 199,2950 }, { 200,2950 }, { 201,2950 },
+ { 202,2950 }, { 203,2950 }, { 204,2950 }, { 205,2950 }, { 206,2950 },
+
+ { 207,2950 }, { 208,2950 }, { 209,2950 }, { 210,2950 }, { 211,2950 },
+ { 212,2950 }, { 213,2950 }, { 214,2950 }, { 215,2950 }, { 216,2950 },
+ { 217,2950 }, { 218,2950 }, { 219,2950 }, { 220,2950 }, { 221,2950 },
+ { 222,2950 }, { 223,2950 }, { 224,2950 }, { 225,2950 }, { 226,2950 },
+ { 227,2950 }, { 228,2950 }, { 229,2950 }, { 230,2950 }, { 231,2950 },
+ { 232,2950 }, { 233,2950 }, { 234,2950 }, { 235,2950 }, { 236,2950 },
+ { 237,2950 }, { 238,2950 }, { 239,2950 }, { 240,2950 }, { 241,2950 },
+ { 242,2950 }, { 243,2950 }, { 244,2950 }, { 245,2950 }, { 246,2950 },
+ { 247,2950 }, { 248,2950 }, { 249,2950 }, { 250,2950 }, { 251,2950 },
+ { 252,2950 }, { 253,2950 }, { 254,2950 }, { 255,2950 }, { 256,2950 },
+
+ { 0, 0 }, { 0,8752 }, { 1,2176 }, { 2,2176 }, { 3,2176 },
+ { 4,2176 }, { 5,2176 }, { 6,2176 }, { 7,2176 }, { 8,2176 },
+ { 9,2176 }, { 10,2176 }, { 11,2176 }, { 12,2176 }, { 13,2176 },
+ { 14,2176 }, { 15,2176 }, { 16,2176 }, { 17,2176 }, { 18,2176 },
+ { 19,2176 }, { 20,2176 }, { 21,2176 }, { 22,2176 }, { 23,2176 },
+ { 24,2176 }, { 25,2176 }, { 26,2176 }, { 27,2176 }, { 28,2176 },
+ { 29,2176 }, { 30,2176 }, { 31,2176 }, { 32,2176 }, { 33,2176 },
+ { 34,2170 }, { 35,2176 }, { 36,2176 }, { 37,2176 }, { 38,2176 },
+ { 39,2176 }, { 40,2176 }, { 41,2176 }, { 42,2176 }, { 43,2176 },
+ { 44,2176 }, { 45,2176 }, { 46,2176 }, { 47,2176 }, { 48,2176 },
+
+ { 49,2176 }, { 50,2176 }, { 51,2176 }, { 52,2176 }, { 53,2176 },
+ { 54,2176 }, { 55,2176 }, { 56,2176 }, { 57,2176 }, { 58,2176 },
+ { 59,2176 }, { 60,2176 }, { 61,2176 }, { 62,2176 }, { 63,2176 },
+ { 64,2176 }, { 65,2176 }, { 66,2176 }, { 67,2176 }, { 68,2176 },
+ { 69,2176 }, { 70,2176 }, { 71,2176 }, { 72,2176 }, { 73,2176 },
+ { 74,2176 }, { 75,2176 }, { 76,2176 }, { 77,2176 }, { 78,2176 },
+ { 79,2176 }, { 80,2176 }, { 81,2176 }, { 82,2176 }, { 83,2176 },
+ { 84,2176 }, { 85,2176 }, { 86,2176 }, { 87,2176 }, { 88,2176 },
+ { 89,2176 }, { 90,2176 }, { 91,2176 }, { 92,2434 }, { 93,2176 },
+ { 94,2176 }, { 95,2176 }, { 96,2176 }, { 97,2176 }, { 98,2176 },
+
+ { 99,2176 }, { 100,2176 }, { 101,2176 }, { 102,2176 }, { 103,2176 },
+ { 104,2176 }, { 105,2176 }, { 106,2176 }, { 107,2176 }, { 108,2176 },
+ { 109,2176 }, { 110,2176 }, { 111,2176 }, { 112,2176 }, { 113,2176 },
+ { 114,2176 }, { 115,2176 }, { 116,2176 }, { 117,2176 }, { 118,2176 },
+ { 119,2176 }, { 120,2176 }, { 121,2176 }, { 122,2176 }, { 123,2176 },
+ { 124,2176 }, { 125,2176 }, { 126,2176 }, { 127,2176 }, { 128,2176 },
+ { 129,2176 }, { 130,2176 }, { 131,2176 }, { 132,2176 }, { 133,2176 },
+ { 134,2176 }, { 135,2176 }, { 136,2176 }, { 137,2176 }, { 138,2176 },
+ { 139,2176 }, { 140,2176 }, { 141,2176 }, { 142,2176 }, { 143,2176 },
+ { 144,2176 }, { 145,2176 }, { 146,2176 }, { 147,2176 }, { 148,2176 },
+
+ { 149,2176 }, { 150,2176 }, { 151,2176 }, { 152,2176 }, { 153,2176 },
+ { 154,2176 }, { 155,2176 }, { 156,2176 }, { 157,2176 }, { 158,2176 },
+ { 159,2176 }, { 160,2176 }, { 161,2176 }, { 162,2176 }, { 163,2176 },
+ { 164,2176 }, { 165,2176 }, { 166,2176 }, { 167,2176 }, { 168,2176 },
+ { 169,2176 }, { 170,2176 }, { 171,2176 }, { 172,2176 }, { 173,2176 },
+ { 174,2176 }, { 175,2176 }, { 176,2176 }, { 177,2176 }, { 178,2176 },
+ { 179,2176 }, { 180,2176 }, { 181,2176 }, { 182,2176 }, { 183,2176 },
+ { 184,2176 }, { 185,2176 }, { 186,2176 }, { 187,2176 }, { 188,2176 },
+ { 189,2176 }, { 190,2176 }, { 191,2176 }, { 192,2176 }, { 193,2176 },
+ { 194,2176 }, { 195,2176 }, { 196,2176 }, { 197,2176 }, { 198,2176 },
+
+ { 199,2176 }, { 200,2176 }, { 201,2176 }, { 202,2176 }, { 203,2176 },
+ { 204,2176 }, { 205,2176 }, { 206,2176 }, { 207,2176 }, { 208,2176 },
+ { 209,2176 }, { 210,2176 }, { 211,2176 }, { 212,2176 }, { 213,2176 },
+ { 214,2176 }, { 215,2176 }, { 216,2176 }, { 217,2176 }, { 218,2176 },
+ { 219,2176 }, { 220,2176 }, { 221,2176 }, { 222,2176 }, { 223,2176 },
+ { 224,2176 }, { 225,2176 }, { 226,2176 }, { 227,2176 }, { 228,2176 },
+ { 229,2176 }, { 230,2176 }, { 231,2176 }, { 232,2176 }, { 233,2176 },
+ { 234,2176 }, { 235,2176 }, { 236,2176 }, { 237,2176 }, { 238,2176 },
+ { 239,2176 }, { 240,2176 }, { 241,2176 }, { 242,2176 }, { 243,2176 },
+ { 244,2176 }, { 245,2176 }, { 246,2176 }, { 247,2176 }, { 248,2176 },
+
+ { 249,2176 }, { 250,2176 }, { 251,2176 }, { 252,2176 }, { 253,2176 },
+ { 254,2176 }, { 255,2176 }, { 256,2176 }, { 0, 0 }, { 0,8494 },
+ { 1,1918 }, { 2,1918 }, { 3,1918 }, { 4,1918 }, { 5,1918 },
+ { 6,1918 }, { 7,1918 }, { 8,1918 }, { 9,1918 }, { 10,1918 },
+ { 11,1918 }, { 12,1918 }, { 13,1918 }, { 14,1918 }, { 15,1918 },
+ { 16,1918 }, { 17,1918 }, { 18,1918 }, { 19,1918 }, { 20,1918 },
+ { 21,1918 }, { 22,1918 }, { 23,1918 }, { 24,1918 }, { 25,1918 },
+ { 26,1918 }, { 27,1918 }, { 28,1918 }, { 29,1918 }, { 30,1918 },
+ { 31,1918 }, { 32,1918 }, { 33,1918 }, { 34,1912 }, { 35,1918 },
+ { 36,1918 }, { 37,1918 }, { 38,1918 }, { 39,1918 }, { 40,1918 },
+
+ { 41,1918 }, { 42,1918 }, { 43,1918 }, { 44,1918 }, { 45,1918 },
+ { 46,1918 }, { 47,1918 }, { 48,1918 }, { 49,1918 }, { 50,1918 },
+ { 51,1918 }, { 52,1918 }, { 53,1918 }, { 54,1918 }, { 55,1918 },
+ { 56,1918 }, { 57,1918 }, { 58,1918 }, { 59,1918 }, { 60,1918 },
+ { 61,1918 }, { 62,1918 }, { 63,1918 }, { 64,1918 }, { 65,1918 },
+ { 66,1918 }, { 67,1918 }, { 68,1918 }, { 69,1918 }, { 70,1918 },
+ { 71,1918 }, { 72,1918 }, { 73,1918 }, { 74,1918 }, { 75,1918 },
+ { 76,1918 }, { 77,1918 }, { 78,1918 }, { 79,1918 }, { 80,1918 },
+ { 81,1918 }, { 82,1918 }, { 83,1918 }, { 84,1918 }, { 85,1918 },
+ { 86,1918 }, { 87,1918 }, { 88,1918 }, { 89,1918 }, { 90,1918 },
+
+ { 91,1918 }, { 92,2176 }, { 93,1918 }, { 94,1918 }, { 95,1918 },
+ { 96,1918 }, { 97,1918 }, { 98,1918 }, { 99,1918 }, { 100,1918 },
+ { 101,1918 }, { 102,1918 }, { 103,1918 }, { 104,1918 }, { 105,1918 },
+ { 106,1918 }, { 107,1918 }, { 108,1918 }, { 109,1918 }, { 110,1918 },
+ { 111,1918 }, { 112,1918 }, { 113,1918 }, { 114,1918 }, { 115,1918 },
+ { 116,1918 }, { 117,1918 }, { 118,1918 }, { 119,1918 }, { 120,1918 },
+ { 121,1918 }, { 122,1918 }, { 123,1918 }, { 124,1918 }, { 125,1918 },
+ { 126,1918 }, { 127,1918 }, { 128,1918 }, { 129,1918 }, { 130,1918 },
+ { 131,1918 }, { 132,1918 }, { 133,1918 }, { 134,1918 }, { 135,1918 },
+ { 136,1918 }, { 137,1918 }, { 138,1918 }, { 139,1918 }, { 140,1918 },
+
+ { 141,1918 }, { 142,1918 }, { 143,1918 }, { 144,1918 }, { 145,1918 },
+ { 146,1918 }, { 147,1918 }, { 148,1918 }, { 149,1918 }, { 150,1918 },
+ { 151,1918 }, { 152,1918 }, { 153,1918 }, { 154,1918 }, { 155,1918 },
+ { 156,1918 }, { 157,1918 }, { 158,1918 }, { 159,1918 }, { 160,1918 },
+ { 161,1918 }, { 162,1918 }, { 163,1918 }, { 164,1918 }, { 165,1918 },
+ { 166,1918 }, { 167,1918 }, { 168,1918 }, { 169,1918 }, { 170,1918 },
+ { 171,1918 }, { 172,1918 }, { 173,1918 }, { 174,1918 }, { 175,1918 },
+ { 176,1918 }, { 177,1918 }, { 178,1918 }, { 179,1918 }, { 180,1918 },
+ { 181,1918 }, { 182,1918 }, { 183,1918 }, { 184,1918 }, { 185,1918 },
+ { 186,1918 }, { 187,1918 }, { 188,1918 }, { 189,1918 }, { 190,1918 },
+
+ { 191,1918 }, { 192,1918 }, { 193,1918 }, { 194,1918 }, { 195,1918 },
+ { 196,1918 }, { 197,1918 }, { 198,1918 }, { 199,1918 }, { 200,1918 },
+ { 201,1918 }, { 202,1918 }, { 203,1918 }, { 204,1918 }, { 205,1918 },
+ { 206,1918 }, { 207,1918 }, { 208,1918 }, { 209,1918 }, { 210,1918 },
+ { 211,1918 }, { 212,1918 }, { 213,1918 }, { 214,1918 }, { 215,1918 },
+ { 216,1918 }, { 217,1918 }, { 218,1918 }, { 219,1918 }, { 220,1918 },
+ { 221,1918 }, { 222,1918 }, { 223,1918 }, { 224,1918 }, { 225,1918 },
+ { 226,1918 }, { 227,1918 }, { 228,1918 }, { 229,1918 }, { 230,1918 },
+ { 231,1918 }, { 232,1918 }, { 233,1918 }, { 234,1918 }, { 235,1918 },
+ { 236,1918 }, { 237,1918 }, { 238,1918 }, { 239,1918 }, { 240,1918 },
+
+ { 241,1918 }, { 242,1918 }, { 243,1918 }, { 244,1918 }, { 245,1918 },
+ { 246,1918 }, { 247,1918 }, { 248,1918 }, { 249,1918 }, { 250,1918 },
+ { 251,1918 }, { 252,1918 }, { 253,1918 }, { 254,1918 }, { 255,1918 },
+ { 256,1918 }, { 0, 0 }, { 0,8236 }, { 1,2468 }, { 2,2468 },
+ { 3,2468 }, { 4,2468 }, { 5,2468 }, { 6,2468 }, { 7,2468 },
+ { 8,2468 }, { 9,2468 }, { 10,2468 }, { 11,2468 }, { 12,2468 },
+ { 13,2468 }, { 14,2468 }, { 15,2468 }, { 16,2468 }, { 17,2468 },
+ { 18,2468 }, { 19,2468 }, { 20,2468 }, { 21,2468 }, { 22,2468 },
+ { 23,2468 }, { 24,2468 }, { 25,2468 }, { 26,2468 }, { 27,2468 },
+ { 28,2468 }, { 29,2468 }, { 30,2468 }, { 31,2468 }, { 32,2468 },
+
+ { 33,2468 }, { 34,2468 }, { 35,2468 }, { 36,2468 }, { 37,2468 },
+ { 38,2468 }, { 39,2468 }, { 40,2468 }, { 41,2468 }, { 42,2189 },
+ { 43,2468 }, { 44,2468 }, { 45,2468 }, { 46,2468 }, { 47,2468 },
+ { 48,2468 }, { 49,2468 }, { 50,2468 }, { 51,2468 }, { 52,2468 },
+ { 53,2468 }, { 54,2468 }, { 55,2468 }, { 56,2468 }, { 57,2468 },
+ { 58,2468 }, { 59,2468 }, { 60,2468 }, { 61,2468 }, { 62,2468 },
+ { 63,2468 }, { 64,2468 }, { 65,2468 }, { 66,2468 }, { 67,2468 },
+ { 68,2468 }, { 69,2468 }, { 70,2468 }, { 71,2468 }, { 72,2468 },
+ { 73,2468 }, { 74,2468 }, { 75,2468 }, { 76,2468 }, { 77,2468 },
+ { 78,2468 }, { 79,2468 }, { 80,2468 }, { 81,2468 }, { 82,2468 },
+
+ { 83,2468 }, { 84,2468 }, { 85,2468 }, { 86,2468 }, { 87,2468 },
+ { 88,2468 }, { 89,2468 }, { 90,2468 }, { 91,2468 }, { 92,2468 },
+ { 93,2468 }, { 94,2468 }, { 95,2468 }, { 96,2468 }, { 97,2468 },
+ { 98,2468 }, { 99,2468 }, { 100,2468 }, { 101,2468 }, { 102,2468 },
+ { 103,2468 }, { 104,2468 }, { 105,2468 }, { 106,2468 }, { 107,2468 },
+ { 108,2468 }, { 109,2468 }, { 110,2468 }, { 111,2468 }, { 112,2468 },
+ { 113,2468 }, { 114,2468 }, { 115,2468 }, { 116,2468 }, { 117,2468 },
+ { 118,2468 }, { 119,2468 }, { 120,2468 }, { 121,2468 }, { 122,2468 },
+ { 123,2468 }, { 124,2468 }, { 125,2468 }, { 126,2468 }, { 127,2468 },
+ { 128,2468 }, { 129,2468 }, { 130,2468 }, { 131,2468 }, { 132,2468 },
+
+ { 133,2468 }, { 134,2468 }, { 135,2468 }, { 136,2468 }, { 137,2468 },
+ { 138,2468 }, { 139,2468 }, { 140,2468 }, { 141,2468 }, { 142,2468 },
+ { 143,2468 }, { 144,2468 }, { 145,2468 }, { 146,2468 }, { 147,2468 },
+ { 148,2468 }, { 149,2468 }, { 150,2468 }, { 151,2468 }, { 152,2468 },
+ { 153,2468 }, { 154,2468 }, { 155,2468 }, { 156,2468 }, { 157,2468 },
+ { 158,2468 }, { 159,2468 }, { 160,2468 }, { 161,2468 }, { 162,2468 },
+ { 163,2468 }, { 164,2468 }, { 165,2468 }, { 166,2468 }, { 167,2468 },
+ { 168,2468 }, { 169,2468 }, { 170,2468 }, { 171,2468 }, { 172,2468 },
+ { 173,2468 }, { 174,2468 }, { 175,2468 }, { 176,2468 }, { 177,2468 },
+ { 178,2468 }, { 179,2468 }, { 180,2468 }, { 181,2468 }, { 182,2468 },
+
+ { 183,2468 }, { 184,2468 }, { 185,2468 }, { 186,2468 }, { 187,2468 },
+ { 188,2468 }, { 189,2468 }, { 190,2468 }, { 191,2468 }, { 192,2468 },
+ { 193,2468 }, { 194,2468 }, { 195,2468 }, { 196,2468 }, { 197,2468 },
+ { 198,2468 }, { 199,2468 }, { 200,2468 }, { 201,2468 }, { 202,2468 },
+ { 203,2468 }, { 204,2468 }, { 205,2468 }, { 206,2468 }, { 207,2468 },
+ { 208,2468 }, { 209,2468 }, { 210,2468 }, { 211,2468 }, { 212,2468 },
+ { 213,2468 }, { 214,2468 }, { 215,2468 }, { 216,2468 }, { 217,2468 },
+ { 218,2468 }, { 219,2468 }, { 220,2468 }, { 221,2468 }, { 222,2468 },
+ { 223,2468 }, { 224,2468 }, { 225,2468 }, { 226,2468 }, { 227,2468 },
+ { 228,2468 }, { 229,2468 }, { 230,2468 }, { 231,2468 }, { 232,2468 },
+
+ { 233,2468 }, { 234,2468 }, { 235,2468 }, { 236,2468 }, { 237,2468 },
+ { 238,2468 }, { 239,2468 }, { 240,2468 }, { 241,2468 }, { 242,2468 },
+ { 243,2468 }, { 244,2468 }, { 245,2468 }, { 246,2468 }, { 247,2468 },
+ { 248,2468 }, { 249,2468 }, { 250,2468 }, { 251,2468 }, { 252,2468 },
+ { 253,2468 }, { 254,2468 }, { 255,2468 }, { 256,2468 }, { 0, 0 },
+ { 0,7978 }, { 1,2210 }, { 2,2210 }, { 3,2210 }, { 4,2210 },
+ { 5,2210 }, { 6,2210 }, { 7,2210 }, { 8,2210 }, { 9,2210 },
+ { 10,2210 }, { 11,2210 }, { 12,2210 }, { 13,2210 }, { 14,2210 },
+ { 15,2210 }, { 16,2210 }, { 17,2210 }, { 18,2210 }, { 19,2210 },
+ { 20,2210 }, { 21,2210 }, { 22,2210 }, { 23,2210 }, { 24,2210 },
+
+ { 25,2210 }, { 26,2210 }, { 27,2210 }, { 28,2210 }, { 29,2210 },
+ { 30,2210 }, { 31,2210 }, { 32,2210 }, { 33,2210 }, { 34,2210 },
+ { 35,2210 }, { 36,2210 }, { 37,2210 }, { 38,2210 }, { 39,2210 },
+ { 40,2210 }, { 41,2210 }, { 42,1931 }, { 43,2210 }, { 44,2210 },
+ { 45,2210 }, { 46,2210 }, { 47,2210 }, { 48,2210 }, { 49,2210 },
+ { 50,2210 }, { 51,2210 }, { 52,2210 }, { 53,2210 }, { 54,2210 },
+ { 55,2210 }, { 56,2210 }, { 57,2210 }, { 58,2210 }, { 59,2210 },
+ { 60,2210 }, { 61,2210 }, { 62,2210 }, { 63,2210 }, { 64,2210 },
+ { 65,2210 }, { 66,2210 }, { 67,2210 }, { 68,2210 }, { 69,2210 },
+ { 70,2210 }, { 71,2210 }, { 72,2210 }, { 73,2210 }, { 74,2210 },
+
+ { 75,2210 }, { 76,2210 }, { 77,2210 }, { 78,2210 }, { 79,2210 },
+ { 80,2210 }, { 81,2210 }, { 82,2210 }, { 83,2210 }, { 84,2210 },
+ { 85,2210 }, { 86,2210 }, { 87,2210 }, { 88,2210 }, { 89,2210 },
+ { 90,2210 }, { 91,2210 }, { 92,2210 }, { 93,2210 }, { 94,2210 },
+ { 95,2210 }, { 96,2210 }, { 97,2210 }, { 98,2210 }, { 99,2210 },
+ { 100,2210 }, { 101,2210 }, { 102,2210 }, { 103,2210 }, { 104,2210 },
+ { 105,2210 }, { 106,2210 }, { 107,2210 }, { 108,2210 }, { 109,2210 },
+ { 110,2210 }, { 111,2210 }, { 112,2210 }, { 113,2210 }, { 114,2210 },
+ { 115,2210 }, { 116,2210 }, { 117,2210 }, { 118,2210 }, { 119,2210 },
+ { 120,2210 }, { 121,2210 }, { 122,2210 }, { 123,2210 }, { 124,2210 },
+
+ { 125,2210 }, { 126,2210 }, { 127,2210 }, { 128,2210 }, { 129,2210 },
+ { 130,2210 }, { 131,2210 }, { 132,2210 }, { 133,2210 }, { 134,2210 },
+ { 135,2210 }, { 136,2210 }, { 137,2210 }, { 138,2210 }, { 139,2210 },
+ { 140,2210 }, { 141,2210 }, { 142,2210 }, { 143,2210 }, { 144,2210 },
+ { 145,2210 }, { 146,2210 }, { 147,2210 }, { 148,2210 }, { 149,2210 },
+ { 150,2210 }, { 151,2210 }, { 152,2210 }, { 153,2210 }, { 154,2210 },
+ { 155,2210 }, { 156,2210 }, { 157,2210 }, { 158,2210 }, { 159,2210 },
+ { 160,2210 }, { 161,2210 }, { 162,2210 }, { 163,2210 }, { 164,2210 },
+ { 165,2210 }, { 166,2210 }, { 167,2210 }, { 168,2210 }, { 169,2210 },
+ { 170,2210 }, { 171,2210 }, { 172,2210 }, { 173,2210 }, { 174,2210 },
+
+ { 175,2210 }, { 176,2210 }, { 177,2210 }, { 178,2210 }, { 179,2210 },
+ { 180,2210 }, { 181,2210 }, { 182,2210 }, { 183,2210 }, { 184,2210 },
+ { 185,2210 }, { 186,2210 }, { 187,2210 }, { 188,2210 }, { 189,2210 },
+ { 190,2210 }, { 191,2210 }, { 192,2210 }, { 193,2210 }, { 194,2210 },
+ { 195,2210 }, { 196,2210 }, { 197,2210 }, { 198,2210 }, { 199,2210 },
+ { 200,2210 }, { 201,2210 }, { 202,2210 }, { 203,2210 }, { 204,2210 },
+ { 205,2210 }, { 206,2210 }, { 207,2210 }, { 208,2210 }, { 209,2210 },
+ { 210,2210 }, { 211,2210 }, { 212,2210 }, { 213,2210 }, { 214,2210 },
+ { 215,2210 }, { 216,2210 }, { 217,2210 }, { 218,2210 }, { 219,2210 },
+ { 220,2210 }, { 221,2210 }, { 222,2210 }, { 223,2210 }, { 224,2210 },
+
+ { 225,2210 }, { 226,2210 }, { 227,2210 }, { 228,2210 }, { 229,2210 },
+ { 230,2210 }, { 231,2210 }, { 232,2210 }, { 233,2210 }, { 234,2210 },
+ { 235,2210 }, { 236,2210 }, { 237,2210 }, { 238,2210 }, { 239,2210 },
+ { 240,2210 }, { 241,2210 }, { 242,2210 }, { 243,2210 }, { 244,2210 },
+ { 245,2210 }, { 246,2210 }, { 247,2210 }, { 248,2210 }, { 249,2210 },
+ { 250,2210 }, { 251,2210 }, { 252,2210 }, { 253,2210 }, { 254,2210 },
+ { 255,2210 }, { 256,2210 }, { 0, 49 }, { 0,7720 }, { 1,2210 },
+ { 2,2210 }, { 3,2210 }, { 4,2210 }, { 5,2210 }, { 6,2210 },
+ { 7,2210 }, { 8,2210 }, { 0, 0 }, { 0, 0 }, { 11,2210 },
+ { 0, 0 }, { 0, 0 }, { 14,2210 }, { 15,2210 }, { 16,2210 },
+
+ { 17,2210 }, { 18,2210 }, { 19,2210 }, { 20,2210 }, { 21,2210 },
+ { 22,2210 }, { 23,2210 }, { 24,2210 }, { 25,2210 }, { 26,2210 },
+ { 27,2210 }, { 28,2210 }, { 29,2210 }, { 30,2210 }, { 31,2210 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39,2210 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48,2210 }, { 49,2210 }, { 50,2210 }, { 51,2210 },
+ { 52,2210 }, { 53,2210 }, { 54,2210 }, { 55,2210 }, { 56,2210 },
+ { 57,2210 }, { 0, 0 }, { 59,2210 }, { 0, 0 }, { 0, 26 },
+ { 0,7658 }, { 0, 47 }, { 0,7656 }, { 65,2210 }, { 66,2210 },
+
+ { 67,2210 }, { 68,2210 }, { 69,2210 }, { 70,2210 }, { 71,2210 },
+ { 72,2210 }, { 73,2210 }, { 74,2210 }, { 75,2210 }, { 76,2210 },
+ { 77,2210 }, { 78,2210 }, { 79,2210 }, { 80,2210 }, { 81,2210 },
+ { 82,2210 }, { 83,2210 }, { 84,2210 }, { 85,2210 }, { 86,2210 },
+ { 87,2210 }, { 88,2210 }, { 89,2210 }, { 90,2210 }, { 0, 37 },
+ { 0,7628 }, { 0, 0 }, { 94,2210 }, { 95,2210 }, { 96,2210 },
+ { 97,2210 }, { 98,2210 }, { 99,2210 }, { 100,2210 }, { 101,2210 },
+ { 102,2210 }, { 103,2210 }, { 104,2210 }, { 105,2210 }, { 106,2210 },
+ { 107,2210 }, { 108,2210 }, { 109,2210 }, { 110,2210 }, { 111,2210 },
+ { 112,2210 }, { 113,2210 }, { 114,2210 }, { 115,2210 }, { 116,2210 },
+
+ { 117,2210 }, { 118,2210 }, { 119,2210 }, { 120,2210 }, { 121,2210 },
+ { 122,2210 }, { 61,1631 }, { 0, 0 }, { 0, 0 }, { 126,2210 },
+ { 127,2210 }, { 128,2210 }, { 129,2210 }, { 130,2210 }, { 131,2210 },
+ { 132,2210 }, { 133,2210 }, { 134,2210 }, { 135,2210 }, { 136,2210 },
+ { 137,2210 }, { 138,2210 }, { 139,2210 }, { 140,2210 }, { 141,2210 },
+ { 142,2210 }, { 143,2210 }, { 144,2210 }, { 145,2210 }, { 146,2210 },
+ { 147,2210 }, { 148,2210 }, { 149,2210 }, { 150,2210 }, { 151,2210 },
+ { 152,2210 }, { 153,2210 }, { 154,2210 }, { 155,2210 }, { 156,2210 },
+ { 157,2210 }, { 158,2210 }, { 159,2210 }, { 160,2210 }, { 161,2210 },
+ { 162,2210 }, { 163,2210 }, { 164,2210 }, { 165,2210 }, { 166,2210 },
+
+ { 167,2210 }, { 168,2210 }, { 169,2210 }, { 170,2210 }, { 171,2210 },
+ { 172,2210 }, { 173,2210 }, { 174,2210 }, { 175,2210 }, { 176,2210 },
+ { 177,2210 }, { 178,2210 }, { 179,2210 }, { 180,2210 }, { 181,2210 },
+ { 182,2210 }, { 183,2210 }, { 184,2210 }, { 185,2210 }, { 186,2210 },
+ { 187,2210 }, { 188,2210 }, { 189,2210 }, { 190,2210 }, { 191,2210 },
+ { 192,2210 }, { 193,2210 }, { 194,2210 }, { 195,2210 }, { 196,2210 },
+ { 197,2210 }, { 198,2210 }, { 199,2210 }, { 200,2210 }, { 201,2210 },
+ { 202,2210 }, { 203,2210 }, { 204,2210 }, { 205,2210 }, { 206,2210 },
+ { 207,2210 }, { 208,2210 }, { 209,2210 }, { 210,2210 }, { 211,2210 },
+ { 212,2210 }, { 213,2210 }, { 214,2210 }, { 215,2210 }, { 216,2210 },
+
+ { 217,2210 }, { 218,2210 }, { 219,2210 }, { 220,2210 }, { 221,2210 },
+ { 222,2210 }, { 223,2210 }, { 224,2210 }, { 225,2210 }, { 226,2210 },
+ { 227,2210 }, { 228,2210 }, { 229,2210 }, { 230,2210 }, { 231,2210 },
+ { 232,2210 }, { 233,2210 }, { 234,2210 }, { 235,2210 }, { 236,2210 },
+ { 237,2210 }, { 238,2210 }, { 239,2210 }, { 240,2210 }, { 241,2210 },
+ { 242,2210 }, { 243,2210 }, { 244,2210 }, { 245,2210 }, { 246,2210 },
+ { 247,2210 }, { 248,2210 }, { 249,2210 }, { 250,2210 }, { 251,2210 },
+ { 252,2210 }, { 253,2210 }, { 254,2210 }, { 255,2210 }, { 256,2210 },
+ { 0, 38 }, { 0,7462 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 9,2210 }, { 10,2210 }, { 0, 0 }, { 12,2210 }, { 13,2210 },
+ { 0, 0 }, { 0, 37 }, { 0,7446 }, { 0, 0 }, { 0, 37 },
+ { 0,7443 }, { 1,2210 }, { 2,2210 }, { 3,2210 }, { 4,2210 },
+ { 5,2210 }, { 6,2210 }, { 7,2210 }, { 8,2210 }, { 0, 0 },
+ { 0, 0 }, { 11,2210 }, { 0, 0 }, { 32,2210 }, { 14,2210 },
+ { 15,2210 }, { 16,2210 }, { 17,2210 }, { 18,2210 }, { 19,2210 },
+ { 20,2210 }, { 21,2210 }, { 22,2210 }, { 23,2210 }, { 24,2210 },
+ { 25,2210 }, { 26,2210 }, { 27,2210 }, { 28,2210 }, { 29,2210 },
+ { 30,2210 }, { 31,2210 }, { 0, 0 }, { 0, 0 }, { 34,1418 },
+ { 38,1423 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,2210 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2210 }, { 49,2210 },
+ { 50,2210 }, { 51,2210 }, { 52,2210 }, { 53,2210 }, { 54,2210 },
+ { 55,2210 }, { 56,2210 }, { 57,2210 }, { 0, 0 }, { 59,2210 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65,2210 }, { 66,2210 }, { 67,2210 }, { 68,2210 }, { 69,2210 },
+ { 70,2210 }, { 71,2210 }, { 72,2210 }, { 73,2210 }, { 74,2210 },
+ { 75,2210 }, { 76,2210 }, { 77,2210 }, { 78,2210 }, { 79,2210 },
+ { 80,2210 }, { 81,2210 }, { 82,2210 }, { 83,2210 }, { 84,2210 },
+ { 85,2210 }, { 86,2210 }, { 87,2210 }, { 88,2210 }, { 89,2210 },
+
+ { 90,2210 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,2210 },
+ { 95,2210 }, { 96,2210 }, { 97,2210 }, { 98,2210 }, { 99,2210 },
+ { 100,2210 }, { 101,2210 }, { 102,2210 }, { 103,2210 }, { 104,2210 },
+ { 105,2210 }, { 106,2210 }, { 107,2210 }, { 108,2210 }, { 109,2210 },
+ { 110,2210 }, { 111,2210 }, { 112,2210 }, { 113,2210 }, { 114,2210 },
+ { 115,2210 }, { 116,2210 }, { 117,2210 }, { 118,2210 }, { 119,2210 },
+ { 120,2210 }, { 121,2210 }, { 122,2210 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 126,2210 }, { 127,2210 }, { 128,2210 }, { 129,2210 },
+ { 130,2210 }, { 131,2210 }, { 132,2210 }, { 133,2210 }, { 134,2210 },
+ { 135,2210 }, { 136,2210 }, { 137,2210 }, { 138,2210 }, { 139,2210 },
+
+ { 140,2210 }, { 141,2210 }, { 142,2210 }, { 143,2210 }, { 144,2210 },
+ { 145,2210 }, { 146,2210 }, { 147,2210 }, { 148,2210 }, { 149,2210 },
+ { 150,2210 }, { 151,2210 }, { 152,2210 }, { 153,2210 }, { 154,2210 },
+ { 155,2210 }, { 156,2210 }, { 157,2210 }, { 158,2210 }, { 159,2210 },
+ { 160,2210 }, { 161,2210 }, { 162,2210 }, { 163,2210 }, { 164,2210 },
+ { 165,2210 }, { 166,2210 }, { 167,2210 }, { 168,2210 }, { 169,2210 },
+ { 170,2210 }, { 171,2210 }, { 172,2210 }, { 173,2210 }, { 174,2210 },
+ { 175,2210 }, { 176,2210 }, { 177,2210 }, { 178,2210 }, { 179,2210 },
+ { 180,2210 }, { 181,2210 }, { 182,2210 }, { 183,2210 }, { 184,2210 },
+ { 185,2210 }, { 186,2210 }, { 187,2210 }, { 188,2210 }, { 189,2210 },
+
+ { 190,2210 }, { 191,2210 }, { 192,2210 }, { 193,2210 }, { 194,2210 },
+ { 195,2210 }, { 196,2210 }, { 197,2210 }, { 198,2210 }, { 199,2210 },
+ { 200,2210 }, { 201,2210 }, { 202,2210 }, { 203,2210 }, { 204,2210 },
+ { 205,2210 }, { 206,2210 }, { 207,2210 }, { 208,2210 }, { 209,2210 },
+ { 210,2210 }, { 211,2210 }, { 212,2210 }, { 213,2210 }, { 214,2210 },
+ { 215,2210 }, { 216,2210 }, { 217,2210 }, { 218,2210 }, { 219,2210 },
+ { 220,2210 }, { 221,2210 }, { 222,2210 }, { 223,2210 }, { 224,2210 },
+ { 225,2210 }, { 226,2210 }, { 227,2210 }, { 228,2210 }, { 229,2210 },
+ { 230,2210 }, { 231,2210 }, { 232,2210 }, { 233,2210 }, { 234,2210 },
+ { 235,2210 }, { 236,2210 }, { 237,2210 }, { 238,2210 }, { 239,2210 },
+
+ { 240,2210 }, { 241,2210 }, { 242,2210 }, { 243,2210 }, { 244,2210 },
+ { 245,2210 }, { 246,2210 }, { 247,2210 }, { 248,2210 }, { 249,2210 },
+ { 250,2210 }, { 251,2210 }, { 252,2210 }, { 253,2210 }, { 254,2210 },
+ { 255,2210 }, { 256,2210 }, { 0, 37 }, { 0,7185 }, { 0, 37 },
+ { 0,7183 }, { 0, 37 }, { 0,7181 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 28 }, { 0,7176 }, { 0, 0 }, { 0, 37 },
+ { 0,7173 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 34 },
+
+ { 0,7153 }, { 0, 48 }, { 0,7151 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 42,1166 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 42,1164 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,2208 }, { 49,2208 },
+ { 50,2208 }, { 51,2208 }, { 52,2208 }, { 53,2208 }, { 54,2208 },
+ { 55,2208 }, { 56,2208 }, { 57,2208 }, { 0, 42 }, { 0,7124 },
+ { 1,2407 }, { 2,2407 }, { 3,2407 }, { 4,2407 }, { 5,2407 },
+ { 6,2407 }, { 7,2407 }, { 8,2407 }, { 61,1161 }, { 62,1163 },
+ { 11,2407 }, { 61,1175 }, { 0, 0 }, { 14,2407 }, { 15,2407 },
+ { 16,2407 }, { 17,2407 }, { 18,2407 }, { 19,2407 }, { 20,2407 },
+
+ { 21,2407 }, { 22,2407 }, { 23,2407 }, { 24,2407 }, { 25,2407 },
+ { 26,2407 }, { 27,2407 }, { 28,2407 }, { 29,2407 }, { 30,2407 },
+ { 31,2407 }, { 61,1157 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,2407 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 46,2665 }, { 0, 0 }, { 48,2407 }, { 49,2407 }, { 50,2407 },
+ { 51,2407 }, { 52,2407 }, { 53,2407 }, { 54,2407 }, { 55,2407 },
+ { 56,2407 }, { 57,2407 }, { 0, 0 }, { 59,2407 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,2407 },
+ { 66,2407 }, { 67,2407 }, { 68,2407 }, { 69,2923 }, { 70,2407 },
+
+ { 71,2407 }, { 72,2407 }, { 73,2407 }, { 74,2407 }, { 75,2407 },
+ { 76,2407 }, { 77,2407 }, { 78,2407 }, { 79,2407 }, { 80,2407 },
+ { 81,2407 }, { 82,2407 }, { 83,2407 }, { 84,2407 }, { 85,2407 },
+ { 86,2407 }, { 87,2407 }, { 88,2407 }, { 89,2407 }, { 90,2407 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,2407 }, { 95,2407 },
+ { 96,2407 }, { 97,2407 }, { 98,2407 }, { 99,2407 }, { 100,2407 },
+ { 101,2923 }, { 102,2407 }, { 103,2407 }, { 104,2407 }, { 105,2407 },
+ { 106,2407 }, { 107,2407 }, { 108,2407 }, { 109,2407 }, { 110,2407 },
+ { 111,2407 }, { 112,2407 }, { 113,2407 }, { 114,2407 }, { 115,2407 },
+ { 116,2407 }, { 117,2407 }, { 118,2407 }, { 119,2407 }, { 120,2407 },
+
+ { 121,2407 }, { 122,2407 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 126,2407 }, { 127,2407 }, { 128,2407 }, { 129,2407 }, { 130,2407 },
+ { 131,2407 }, { 132,2407 }, { 133,2407 }, { 134,2407 }, { 135,2407 },
+ { 136,2407 }, { 137,2407 }, { 138,2407 }, { 139,2407 }, { 140,2407 },
+ { 141,2407 }, { 142,2407 }, { 143,2407 }, { 144,2407 }, { 145,2407 },
+ { 146,2407 }, { 147,2407 }, { 148,2407 }, { 149,2407 }, { 150,2407 },
+ { 151,2407 }, { 152,2407 }, { 153,2407 }, { 154,2407 }, { 155,2407 },
+ { 156,2407 }, { 157,2407 }, { 158,2407 }, { 159,2407 }, { 160,2407 },
+ { 161,2407 }, { 162,2407 }, { 163,2407 }, { 164,2407 }, { 165,2407 },
+ { 166,2407 }, { 167,2407 }, { 168,2407 }, { 169,2407 }, { 170,2407 },
+
+ { 171,2407 }, { 172,2407 }, { 173,2407 }, { 174,2407 }, { 175,2407 },
+ { 176,2407 }, { 177,2407 }, { 178,2407 }, { 179,2407 }, { 180,2407 },
+ { 181,2407 }, { 182,2407 }, { 183,2407 }, { 184,2407 }, { 185,2407 },
+ { 186,2407 }, { 187,2407 }, { 188,2407 }, { 189,2407 }, { 190,2407 },
+ { 191,2407 }, { 192,2407 }, { 193,2407 }, { 194,2407 }, { 195,2407 },
+ { 196,2407 }, { 197,2407 }, { 198,2407 }, { 199,2407 }, { 200,2407 },
+ { 201,2407 }, { 202,2407 }, { 203,2407 }, { 204,2407 }, { 205,2407 },
+ { 206,2407 }, { 207,2407 }, { 208,2407 }, { 209,2407 }, { 210,2407 },
+ { 211,2407 }, { 212,2407 }, { 213,2407 }, { 214,2407 }, { 215,2407 },
+ { 216,2407 }, { 217,2407 }, { 218,2407 }, { 219,2407 }, { 220,2407 },
+
+ { 221,2407 }, { 222,2407 }, { 223,2407 }, { 224,2407 }, { 225,2407 },
+ { 226,2407 }, { 227,2407 }, { 228,2407 }, { 229,2407 }, { 230,2407 },
+ { 231,2407 }, { 232,2407 }, { 233,2407 }, { 234,2407 }, { 235,2407 },
+ { 236,2407 }, { 237,2407 }, { 238,2407 }, { 239,2407 }, { 240,2407 },
+ { 241,2407 }, { 242,2407 }, { 243,2407 }, { 244,2407 }, { 245,2407 },
+ { 246,2407 }, { 247,2407 }, { 248,2407 }, { 249,2407 }, { 250,2407 },
+ { 251,2407 }, { 252,2407 }, { 253,2407 }, { 254,2407 }, { 255,2407 },
+ { 256,2407 }, { 0, 42 }, { 0,6866 }, { 1,2149 }, { 2,2149 },
+ { 3,2149 }, { 4,2149 }, { 5,2149 }, { 6,2149 }, { 7,2149 },
+ { 8,2149 }, { 0, 0 }, { 0, 0 }, { 11,2149 }, { 0, 0 },
+
+ { 0, 0 }, { 14,2149 }, { 15,2149 }, { 16,2149 }, { 17,2149 },
+ { 18,2149 }, { 19,2149 }, { 20,2149 }, { 21,2149 }, { 22,2149 },
+ { 23,2149 }, { 24,2149 }, { 25,2149 }, { 26,2149 }, { 27,2149 },
+ { 28,2149 }, { 29,2149 }, { 30,2149 }, { 31,2149 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 39,2149 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 46,2407 }, { 0, 0 },
+ { 48,2923 }, { 49,2923 }, { 50,2923 }, { 51,2923 }, { 52,2923 },
+ { 53,2923 }, { 54,2923 }, { 55,2923 }, { 56,2923 }, { 57,2923 },
+ { 0, 0 }, { 59,2149 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 65,2149 }, { 66,2149 }, { 67,2149 },
+ { 68,2149 }, { 69,2665 }, { 70,2149 }, { 71,2149 }, { 72,2149 },
+ { 73,2149 }, { 74,2149 }, { 75,2149 }, { 76,2149 }, { 77,2149 },
+ { 78,2149 }, { 79,2149 }, { 80,2149 }, { 81,2149 }, { 82,2149 },
+ { 83,2149 }, { 84,2149 }, { 85,2149 }, { 86,2149 }, { 87,2149 },
+ { 88,2149 }, { 89,2149 }, { 90,2149 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 94,2149 }, { 95,2149 }, { 96,2149 }, { 97,2149 },
+ { 98,2149 }, { 99,2149 }, { 100,2149 }, { 101,2665 }, { 102,2149 },
+ { 103,2149 }, { 104,2149 }, { 105,2149 }, { 106,2149 }, { 107,2149 },
+ { 108,2149 }, { 109,2149 }, { 110,2149 }, { 111,2149 }, { 112,2149 },
+
+ { 113,2149 }, { 114,2149 }, { 115,2149 }, { 116,2149 }, { 117,2149 },
+ { 118,2149 }, { 119,2149 }, { 120,2149 }, { 121,2149 }, { 122,2149 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,2149 }, { 127,2149 },
+ { 128,2149 }, { 129,2149 }, { 130,2149 }, { 131,2149 }, { 132,2149 },
+ { 133,2149 }, { 134,2149 }, { 135,2149 }, { 136,2149 }, { 137,2149 },
+ { 138,2149 }, { 139,2149 }, { 140,2149 }, { 141,2149 }, { 142,2149 },
+ { 143,2149 }, { 144,2149 }, { 145,2149 }, { 146,2149 }, { 147,2149 },
+ { 148,2149 }, { 149,2149 }, { 150,2149 }, { 151,2149 }, { 152,2149 },
+ { 153,2149 }, { 154,2149 }, { 155,2149 }, { 156,2149 }, { 157,2149 },
+ { 158,2149 }, { 159,2149 }, { 160,2149 }, { 161,2149 }, { 162,2149 },
+
+ { 163,2149 }, { 164,2149 }, { 165,2149 }, { 166,2149 }, { 167,2149 },
+ { 168,2149 }, { 169,2149 }, { 170,2149 }, { 171,2149 }, { 172,2149 },
+ { 173,2149 }, { 174,2149 }, { 175,2149 }, { 176,2149 }, { 177,2149 },
+ { 178,2149 }, { 179,2149 }, { 180,2149 }, { 181,2149 }, { 182,2149 },
+ { 183,2149 }, { 184,2149 }, { 185,2149 }, { 186,2149 }, { 187,2149 },
+ { 188,2149 }, { 189,2149 }, { 190,2149 }, { 191,2149 }, { 192,2149 },
+ { 193,2149 }, { 194,2149 }, { 195,2149 }, { 196,2149 }, { 197,2149 },
+ { 198,2149 }, { 199,2149 }, { 200,2149 }, { 201,2149 }, { 202,2149 },
+ { 203,2149 }, { 204,2149 }, { 205,2149 }, { 206,2149 }, { 207,2149 },
+ { 208,2149 }, { 209,2149 }, { 210,2149 }, { 211,2149 }, { 212,2149 },
+
+ { 213,2149 }, { 214,2149 }, { 215,2149 }, { 216,2149 }, { 217,2149 },
+ { 218,2149 }, { 219,2149 }, { 220,2149 }, { 221,2149 }, { 222,2149 },
+ { 223,2149 }, { 224,2149 }, { 225,2149 }, { 226,2149 }, { 227,2149 },
+ { 228,2149 }, { 229,2149 }, { 230,2149 }, { 231,2149 }, { 232,2149 },
+ { 233,2149 }, { 234,2149 }, { 235,2149 }, { 236,2149 }, { 237,2149 },
+ { 238,2149 }, { 239,2149 }, { 240,2149 }, { 241,2149 }, { 242,2149 },
+ { 243,2149 }, { 244,2149 }, { 245,2149 }, { 246,2149 }, { 247,2149 },
+ { 248,2149 }, { 249,2149 }, { 250,2149 }, { 251,2149 }, { 252,2149 },
+ { 253,2149 }, { 254,2149 }, { 255,2149 }, { 256,2149 }, { 0, 37 },
+ { 0,6608 }, { 0, 18 }, { 0,6606 }, { 0, 4 }, { 0,6604 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 4 }, { 0,6584 },
+ { 0, 19 }, { 0,6582 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 20 }, { 0,6576 }, { 1,2891 }, { 2,2891 },
+ { 3,2891 }, { 4,2891 }, { 5,2891 }, { 6,2891 }, { 7,2891 },
+ { 8,2891 }, { 9,2891 }, { 10,2891 }, { 11,2891 }, { 12,2891 },
+ { 13,2891 }, { 14,2891 }, { 15,2891 }, { 16,2891 }, { 17,2891 },
+ { 18,2891 }, { 19,2891 }, { 20,2891 }, { 21,2891 }, { 22,2891 },
+
+ { 23,2891 }, { 24,2891 }, { 25,2891 }, { 26,2891 }, { 27,2891 },
+ { 28,2891 }, { 29,2891 }, { 30,2891 }, { 31,2891 }, { 32,2891 },
+ { 33,2891 }, { 42, 801 }, { 35,2891 }, { 36,2891 }, { 37,2891 },
+ { 38,2891 }, { 39,2891 }, { 40,2891 }, { 41,2891 }, { 42,2891 },
+ { 43,2891 }, { 44,2891 }, { 45,2891 }, { 46,2891 }, { 47,2891 },
+ { 48,2891 }, { 49,2891 }, { 50,2891 }, { 51,2891 }, { 52,2891 },
+ { 53,2891 }, { 54,2891 }, { 55,2891 }, { 56,2891 }, { 57,2891 },
+ { 58,2891 }, { 59,2891 }, { 60,2891 }, { 61,2891 }, { 62,2891 },
+ { 63,2891 }, { 64,2891 }, { 65,2891 }, { 66,2891 }, { 67,2891 },
+ { 68,2891 }, { 69,2891 }, { 70,2891 }, { 71,2891 }, { 72,2891 },
+
+ { 73,2891 }, { 74,2891 }, { 75,2891 }, { 76,2891 }, { 77,2891 },
+ { 78,2891 }, { 79,2891 }, { 80,2891 }, { 81,2891 }, { 82,2891 },
+ { 83,2891 }, { 84,2891 }, { 85,2891 }, { 86,2891 }, { 87,2891 },
+ { 88,2891 }, { 89,2891 }, { 90,2891 }, { 91,2891 }, { 124, 640 },
+ { 93,2891 }, { 94,2891 }, { 95,2891 }, { 96,2891 }, { 97,2891 },
+ { 98,2891 }, { 99,2891 }, { 100,2891 }, { 101,2891 }, { 102,2891 },
+ { 103,2891 }, { 104,2891 }, { 105,2891 }, { 106,2891 }, { 107,2891 },
+ { 108,2891 }, { 109,2891 }, { 110,2891 }, { 111,2891 }, { 112,2891 },
+ { 113,2891 }, { 114,2891 }, { 115,2891 }, { 116,2891 }, { 117,2891 },
+ { 118,2891 }, { 119,2891 }, { 120,2891 }, { 121,2891 }, { 122,2891 },
+
+ { 123,2891 }, { 124,2891 }, { 125,2891 }, { 126,2891 }, { 127,2891 },
+ { 128,2891 }, { 129,2891 }, { 130,2891 }, { 131,2891 }, { 132,2891 },
+ { 133,2891 }, { 134,2891 }, { 135,2891 }, { 136,2891 }, { 137,2891 },
+ { 138,2891 }, { 139,2891 }, { 140,2891 }, { 141,2891 }, { 142,2891 },
+ { 143,2891 }, { 144,2891 }, { 145,2891 }, { 146,2891 }, { 147,2891 },
+ { 148,2891 }, { 149,2891 }, { 150,2891 }, { 151,2891 }, { 152,2891 },
+ { 153,2891 }, { 154,2891 }, { 155,2891 }, { 156,2891 }, { 157,2891 },
+ { 158,2891 }, { 159,2891 }, { 160,2891 }, { 161,2891 }, { 162,2891 },
+ { 163,2891 }, { 164,2891 }, { 165,2891 }, { 166,2891 }, { 167,2891 },
+ { 168,2891 }, { 169,2891 }, { 170,2891 }, { 171,2891 }, { 172,2891 },
+
+ { 173,2891 }, { 174,2891 }, { 175,2891 }, { 176,2891 }, { 177,2891 },
+ { 178,2891 }, { 179,2891 }, { 180,2891 }, { 181,2891 }, { 182,2891 },
+ { 183,2891 }, { 184,2891 }, { 185,2891 }, { 186,2891 }, { 187,2891 },
+ { 188,2891 }, { 189,2891 }, { 190,2891 }, { 191,2891 }, { 192,2891 },
+ { 193,2891 }, { 194,2891 }, { 195,2891 }, { 196,2891 }, { 197,2891 },
+ { 198,2891 }, { 199,2891 }, { 200,2891 }, { 201,2891 }, { 202,2891 },
+ { 203,2891 }, { 204,2891 }, { 205,2891 }, { 206,2891 }, { 207,2891 },
+ { 208,2891 }, { 209,2891 }, { 210,2891 }, { 211,2891 }, { 212,2891 },
+ { 213,2891 }, { 214,2891 }, { 215,2891 }, { 216,2891 }, { 217,2891 },
+ { 218,2891 }, { 219,2891 }, { 220,2891 }, { 221,2891 }, { 222,2891 },
+
+ { 223,2891 }, { 224,2891 }, { 225,2891 }, { 226,2891 }, { 227,2891 },
+ { 228,2891 }, { 229,2891 }, { 230,2891 }, { 231,2891 }, { 232,2891 },
+ { 233,2891 }, { 234,2891 }, { 235,2891 }, { 236,2891 }, { 237,2891 },
+ { 238,2891 }, { 239,2891 }, { 240,2891 }, { 241,2891 }, { 242,2891 },
+ { 243,2891 }, { 244,2891 }, { 245,2891 }, { 246,2891 }, { 247,2891 },
+ { 248,2891 }, { 249,2891 }, { 250,2891 }, { 251,2891 }, { 252,2891 },
+ { 253,2891 }, { 254,2891 }, { 255,2891 }, { 256,2891 }, { 0, 17 },
+ { 0,6318 }, { 1, 382 }, { 2, 382 }, { 3, 382 }, { 4, 382 },
+ { 5, 382 }, { 6, 382 }, { 7, 382 }, { 8, 382 }, { 9, 382 },
+ { 0, 0 }, { 11, 382 }, { 12, 382 }, { 13, 382 }, { 14, 382 },
+
+ { 15, 382 }, { 16, 382 }, { 17, 382 }, { 18, 382 }, { 19, 382 },
+ { 20, 382 }, { 21, 382 }, { 22, 382 }, { 23, 382 }, { 24, 382 },
+ { 25, 382 }, { 26, 382 }, { 27, 382 }, { 28, 382 }, { 29, 382 },
+ { 30, 382 }, { 31, 382 }, { 32, 382 }, { 33, 382 }, { 34, 382 },
+ { 35, 382 }, { 36, 382 }, { 37, 382 }, { 38, 382 }, { 39, 382 },
+ { 40, 382 }, { 41, 382 }, { 42, 382 }, { 43, 382 }, { 44, 382 },
+ { 45, 382 }, { 46, 382 }, { 47, 382 }, { 48, 382 }, { 49, 382 },
+ { 50, 382 }, { 51, 382 }, { 52, 382 }, { 53, 382 }, { 54, 382 },
+ { 55, 382 }, { 56, 382 }, { 57, 382 }, { 58, 382 }, { 59, 382 },
+ { 60, 382 }, { 61, 382 }, { 62, 382 }, { 63, 382 }, { 64, 382 },
+
+ { 65, 382 }, { 66, 382 }, { 67, 382 }, { 68, 382 }, { 69, 382 },
+ { 70, 382 }, { 71, 382 }, { 72, 382 }, { 73, 382 }, { 74, 382 },
+ { 75, 382 }, { 76, 382 }, { 77, 382 }, { 78, 382 }, { 79, 382 },
+ { 80, 382 }, { 81, 382 }, { 82, 382 }, { 83, 382 }, { 84, 382 },
+ { 85, 382 }, { 86, 382 }, { 87, 382 }, { 88, 382 }, { 89, 382 },
+ { 90, 382 }, { 91, 382 }, { 92, 382 }, { 93, 382 }, { 94, 382 },
+ { 95, 382 }, { 96, 382 }, { 97, 382 }, { 98, 518 }, { 99, 382 },
+ { 100, 382 }, { 101, 382 }, { 102, 520 }, { 103, 382 }, { 104, 382 },
+ { 105, 382 }, { 106, 382 }, { 107, 382 }, { 108, 382 }, { 109, 382 },
+ { 110, 522 }, { 111, 382 }, { 112, 382 }, { 113, 382 }, { 114, 524 },
+
+ { 115, 382 }, { 116, 531 }, { 117,2891 }, { 118, 533 }, { 119, 382 },
+ { 120,2929 }, { 121, 382 }, { 122, 382 }, { 123, 382 }, { 124, 382 },
+ { 125, 382 }, { 126, 382 }, { 127, 382 }, { 128, 382 }, { 129, 382 },
+ { 130, 382 }, { 131, 382 }, { 132, 382 }, { 133, 382 }, { 134, 382 },
+ { 135, 382 }, { 136, 382 }, { 137, 382 }, { 138, 382 }, { 139, 382 },
+ { 140, 382 }, { 141, 382 }, { 142, 382 }, { 143, 382 }, { 144, 382 },
+ { 145, 382 }, { 146, 382 }, { 147, 382 }, { 148, 382 }, { 149, 382 },
+ { 150, 382 }, { 151, 382 }, { 152, 382 }, { 153, 382 }, { 154, 382 },
+ { 155, 382 }, { 156, 382 }, { 157, 382 }, { 158, 382 }, { 159, 382 },
+ { 160, 382 }, { 161, 382 }, { 162, 382 }, { 163, 382 }, { 164, 382 },
+
+ { 165, 382 }, { 166, 382 }, { 167, 382 }, { 168, 382 }, { 169, 382 },
+ { 170, 382 }, { 171, 382 }, { 172, 382 }, { 173, 382 }, { 174, 382 },
+ { 175, 382 }, { 176, 382 }, { 177, 382 }, { 178, 382 }, { 179, 382 },
+ { 180, 382 }, { 181, 382 }, { 182, 382 }, { 183, 382 }, { 184, 382 },
+ { 185, 382 }, { 186, 382 }, { 187, 382 }, { 188, 382 }, { 189, 382 },
+ { 190, 382 }, { 191, 382 }, { 192, 382 }, { 193, 382 }, { 194, 382 },
+ { 195, 382 }, { 196, 382 }, { 197, 382 }, { 198, 382 }, { 199, 382 },
+ { 200, 382 }, { 201, 382 }, { 202, 382 }, { 203, 382 }, { 204, 382 },
+ { 205, 382 }, { 206, 382 }, { 207, 382 }, { 208, 382 }, { 209, 382 },
+ { 210, 382 }, { 211, 382 }, { 212, 382 }, { 213, 382 }, { 214, 382 },
+
+ { 215, 382 }, { 216, 382 }, { 217, 382 }, { 218, 382 }, { 219, 382 },
+ { 220, 382 }, { 221, 382 }, { 222, 382 }, { 223, 382 }, { 224, 382 },
+ { 225, 382 }, { 226, 382 }, { 227, 382 }, { 228, 382 }, { 229, 382 },
+ { 230, 382 }, { 231, 382 }, { 232, 382 }, { 233, 382 }, { 234, 382 },
+ { 235, 382 }, { 236, 382 }, { 237, 382 }, { 238, 382 }, { 239, 382 },
+ { 240, 382 }, { 241, 382 }, { 242, 382 }, { 243, 382 }, { 244, 382 },
+ { 245, 382 }, { 246, 382 }, { 247, 382 }, { 248, 382 }, { 249, 382 },
+ { 250, 382 }, { 251, 382 }, { 252, 382 }, { 253, 382 }, { 254, 382 },
+ { 255, 382 }, { 256, 382 }, { 0, 1 }, { 0,6060 }, { 1,2775 },
+ { 2,2775 }, { 3,2775 }, { 4,2775 }, { 5,2775 }, { 6,2775 },
+
+ { 7,2775 }, { 8,2775 }, { 0, 0 }, { 0, 0 }, { 11,2775 },
+ { 0, 23 }, { 0,6047 }, { 14,2775 }, { 15,2775 }, { 16,2775 },
+ { 17,2775 }, { 18,2775 }, { 19,2775 }, { 20,2775 }, { 21,2775 },
+ { 22,2775 }, { 23,2775 }, { 24,2775 }, { 25,2775 }, { 26,2775 },
+ { 27,2775 }, { 28,2775 }, { 29,2775 }, { 30,2775 }, { 31,2775 },
+ { 0, 32 }, { 0,6027 }, { 0, 36 }, { 0,6025 }, { 0, 24 },
+ { 0,6023 }, { 0, 0 }, { 39,2775 }, { 0, 27 }, { 0,6019 },
+ { 0, 39 }, { 0,6017 }, { 0, 29 }, { 0,6015 }, { 0, 31 },
+ { 0,6013 }, { 48,2775 }, { 49,2775 }, { 50,2775 }, { 51,2775 },
+ { 52,2775 }, { 53,2775 }, { 54,2775 }, { 55,2775 }, { 56,2775 },
+
+ { 57,2775 }, { 0, 0 }, { 59,2775 }, { 47, 266 }, { 0, 30 },
+ { 0,5998 }, { 0, 33 }, { 0,5996 }, { 65,2775 }, { 66,2775 },
+ { 67,2775 }, { 68,2775 }, { 69,2775 }, { 70,2775 }, { 71,2775 },
+ { 72,2775 }, { 73,2775 }, { 74,2775 }, { 75,2775 }, { 76,2775 },
+ { 77,2775 }, { 78,2775 }, { 79,2775 }, { 80,2775 }, { 81,2775 },
+ { 82,2775 }, { 83,2775 }, { 84,2775 }, { 85,2775 }, { 86,2775 },
+ { 87,2775 }, { 88,2775 }, { 89,2775 }, { 90,2775 }, { 0, 25 },
+ { 0,5968 }, { 0, 0 }, { 94,2775 }, { 95,2775 }, { 96,2775 },
+ { 97,2775 }, { 98,2775 }, { 99,2775 }, { 100,2775 }, { 101,2775 },
+ { 102,2775 }, { 103,2775 }, { 104,2775 }, { 105,2775 }, { 106,2775 },
+
+ { 107,2775 }, { 108,2775 }, { 109,2775 }, { 110,2775 }, { 111,2775 },
+ { 112,2775 }, { 113,2775 }, { 114,2775 }, { 115,2775 }, { 116,2775 },
+ { 117,2775 }, { 118,2775 }, { 119,2775 }, { 120,2775 }, { 121,2775 },
+ { 122,2775 }, { 0, 16 }, { 0,5936 }, { 0, 0 }, { 126,2775 },
+ { 127,2775 }, { 128,2775 }, { 129,2775 }, { 130,2775 }, { 131,2775 },
+ { 132,2775 }, { 133,2775 }, { 134,2775 }, { 135,2775 }, { 136,2775 },
+ { 137,2775 }, { 138,2775 }, { 139,2775 }, { 140,2775 }, { 141,2775 },
+ { 142,2775 }, { 143,2775 }, { 144,2775 }, { 145,2775 }, { 146,2775 },
+ { 147,2775 }, { 148,2775 }, { 149,2775 }, { 150,2775 }, { 151,2775 },
+ { 152,2775 }, { 153,2775 }, { 154,2775 }, { 155,2775 }, { 156,2775 },
+
+ { 157,2775 }, { 158,2775 }, { 159,2775 }, { 160,2775 }, { 161,2775 },
+ { 162,2775 }, { 163,2775 }, { 164,2775 }, { 165,2775 }, { 166,2775 },
+ { 167,2775 }, { 168,2775 }, { 169,2775 }, { 170,2775 }, { 171,2775 },
+ { 172,2775 }, { 173,2775 }, { 174,2775 }, { 175,2775 }, { 176,2775 },
+ { 177,2775 }, { 178,2775 }, { 179,2775 }, { 180,2775 }, { 181,2775 },
+ { 182,2775 }, { 183,2775 }, { 184,2775 }, { 185,2775 }, { 186,2775 },
+ { 187,2775 }, { 188,2775 }, { 189,2775 }, { 190,2775 }, { 191,2775 },
+ { 192,2775 }, { 193,2775 }, { 194,2775 }, { 195,2775 }, { 196,2775 },
+ { 197,2775 }, { 198,2775 }, { 199,2775 }, { 200,2775 }, { 201,2775 },
+ { 202,2775 }, { 203,2775 }, { 204,2775 }, { 205,2775 }, { 206,2775 },
+
+ { 207,2775 }, { 208,2775 }, { 209,2775 }, { 210,2775 }, { 211,2775 },
+ { 212,2775 }, { 213,2775 }, { 214,2775 }, { 215,2775 }, { 216,2775 },
+ { 217,2775 }, { 218,2775 }, { 219,2775 }, { 220,2775 }, { 221,2775 },
+ { 222,2775 }, { 223,2775 }, { 224,2775 }, { 225,2775 }, { 226,2775 },
+ { 227,2775 }, { 228,2775 }, { 229,2775 }, { 230,2775 }, { 231,2775 },
+ { 232,2775 }, { 233,2775 }, { 234,2775 }, { 235,2775 }, { 236,2775 },
+ { 237,2775 }, { 238,2775 }, { 239,2775 }, { 240,2775 }, { 241,2775 },
+ { 242,2775 }, { 243,2775 }, { 244,2775 }, { 245,2775 }, { 246,2775 },
+ { 247,2775 }, { 248,2775 }, { 249,2775 }, { 250,2775 }, { 251,2775 },
+ { 252,2775 }, { 253,2775 }, { 254,2775 }, { 255,2775 }, { 256,2775 },
+
+ { 0, 2 }, { 0,5802 }, { 0, 5 }, { 0,5800 }, { 0, 6 },
+ { 0,5798 }, { 0, 7 }, { 0,5796 }, { 0, 8 }, { 0,5794 },
+ { 9,2775 }, { 10,2775 }, { 0, 0 }, { 12,2775 }, { 13,2775 },
+ { 0, 9 }, { 0,5787 }, { 0, 10 }, { 0,5785 }, { 0, 3 },
+ { 0,5783 }, { 0, 21 }, { 0,5781 }, { 0, 45 }, { 0,5779 },
+ { 0, 12 }, { 0,5777 }, { 0, 46 }, { 0,5775 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 32,2775 }, { 0, 22 },
+ { 0,5768 }, { 1,2775 }, { 2,2775 }, { 3,2775 }, { 4,2775 },
+ { 5,2775 }, { 6,2775 }, { 7,2775 }, { 8,2775 }, { 9,2775 },
+ { 10,2775 }, { 11,2775 }, { 12,2775 }, { 13,2775 }, { 14,2775 },
+
+ { 15,2775 }, { 16,2775 }, { 17,2775 }, { 18,2775 }, { 19,2775 },
+ { 20,2775 }, { 21,2775 }, { 22,2775 }, { 23,2775 }, { 24,2775 },
+ { 25,2775 }, { 26,2775 }, { 27,2775 }, { 28,2775 }, { 29,2775 },
+ { 30,2775 }, { 31,2775 }, { 32,2775 }, { 33,2775 }, { 34,2775 },
+ { 35,2775 }, { 36,2775 }, { 37,2775 }, { 38,2775 }, { 39,2775 },
+ { 40,2775 }, { 41,2775 }, { 0, 0 }, { 43,2775 }, { 44,2775 },
+ { 45,2775 }, { 46,2775 }, { 47,2775 }, { 48,2775 }, { 49,2775 },
+ { 50,2775 }, { 51,2775 }, { 52,2775 }, { 53,2775 }, { 54,2775 },
+ { 55,2775 }, { 56,2775 }, { 57,2775 }, { 58,2775 }, { 59,2775 },
+ { 60,2775 }, { 61,2775 }, { 62,2775 }, { 63,2775 }, { 64,2775 },
+
+ { 65,2775 }, { 66,2775 }, { 67,2775 }, { 68,2775 }, { 69,2775 },
+ { 70,2775 }, { 71,2775 }, { 72,2775 }, { 73,2775 }, { 74,2775 },
+ { 75,2775 }, { 76,2775 }, { 77,2775 }, { 78,2775 }, { 79,2775 },
+ { 80,2775 }, { 81,2775 }, { 82,2775 }, { 83,2775 }, { 84,2775 },
+ { 85,2775 }, { 86,2775 }, { 87,2775 }, { 88,2775 }, { 89,2775 },
+ { 90,2775 }, { 91,2775 }, { 92,2775 }, { 93,2775 }, { 94,2775 },
+ { 95,2775 }, { 96,2775 }, { 97,2775 }, { 98,2775 }, { 99,2775 },
+ { 100,2775 }, { 101,2775 }, { 102,2775 }, { 103,2775 }, { 104,2775 },
+ { 105,2775 }, { 106,2775 }, { 107,2775 }, { 108,2775 }, { 109,2775 },
+ { 110,2775 }, { 111,2775 }, { 112,2775 }, { 113,2775 }, { 114,2775 },
+
+ { 115,2775 }, { 116,2775 }, { 117,2775 }, { 118,2775 }, { 119,2775 },
+ { 120,2775 }, { 121,2775 }, { 122,2775 }, { 123,2775 }, { 124,2775 },
+ { 125,2775 }, { 126,2775 }, { 127,2775 }, { 128,2775 }, { 129,2775 },
+ { 130,2775 }, { 131,2775 }, { 132,2775 }, { 133,2775 }, { 134,2775 },
+ { 135,2775 }, { 136,2775 }, { 137,2775 }, { 138,2775 }, { 139,2775 },
+ { 140,2775 }, { 141,2775 }, { 142,2775 }, { 143,2775 }, { 144,2775 },
+ { 145,2775 }, { 146,2775 }, { 147,2775 }, { 148,2775 }, { 149,2775 },
+ { 150,2775 }, { 151,2775 }, { 152,2775 }, { 153,2775 }, { 154,2775 },
+ { 155,2775 }, { 156,2775 }, { 157,2775 }, { 158,2775 }, { 159,2775 },
+ { 160,2775 }, { 161,2775 }, { 162,2775 }, { 163,2775 }, { 164,2775 },
+
+ { 165,2775 }, { 166,2775 }, { 167,2775 }, { 168,2775 }, { 169,2775 },
+ { 170,2775 }, { 171,2775 }, { 172,2775 }, { 173,2775 }, { 174,2775 },
+ { 175,2775 }, { 176,2775 }, { 177,2775 }, { 178,2775 }, { 179,2775 },
+ { 180,2775 }, { 181,2775 }, { 182,2775 }, { 183,2775 }, { 184,2775 },
+ { 185,2775 }, { 186,2775 }, { 187,2775 }, { 188,2775 }, { 189,2775 },
+ { 190,2775 }, { 191,2775 }, { 192,2775 }, { 193,2775 }, { 194,2775 },
+ { 195,2775 }, { 196,2775 }, { 197,2775 }, { 198,2775 }, { 199,2775 },
+ { 200,2775 }, { 201,2775 }, { 202,2775 }, { 203,2775 }, { 204,2775 },
+ { 205,2775 }, { 206,2775 }, { 207,2775 }, { 208,2775 }, { 209,2775 },
+ { 210,2775 }, { 211,2775 }, { 212,2775 }, { 213,2775 }, { 214,2775 },
+
+ { 215,2775 }, { 216,2775 }, { 217,2775 }, { 218,2775 }, { 219,2775 },
+ { 220,2775 }, { 221,2775 }, { 222,2775 }, { 223,2775 }, { 224,2775 },
+ { 225,2775 }, { 226,2775 }, { 227,2775 }, { 228,2775 }, { 229,2775 },
+ { 230,2775 }, { 231,2775 }, { 232,2775 }, { 233,2775 }, { 234,2775 },
+ { 235,2775 }, { 236,2775 }, { 237,2775 }, { 238,2775 }, { 239,2775 },
+ { 240,2775 }, { 241,2775 }, { 242,2775 }, { 243,2775 }, { 244,2775 },
+ { 245,2775 }, { 246,2775 }, { 247,2775 }, { 248,2775 }, { 249,2775 },
+ { 250,2775 }, { 251,2775 }, { 252,2775 }, { 253,2775 }, { 254,2775 },
+ { 255,2775 }, { 256,2775 }, { 0, 49 }, { 0,5510 }, { 1, 0 },
+ { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 6, 0 },
+
+ { 7, 0 }, { 8, 0 }, { 0, 0 }, { 0, 0 }, { 11, 0 },
+ { 0, 0 }, { 0, 0 }, { 14, 0 }, { 15, 0 }, { 16, 0 },
+ { 17, 0 }, { 18, 0 }, { 19, 0 }, { 20, 0 }, { 21, 0 },
+ { 22, 0 }, { 23, 0 }, { 24, 0 }, { 25, 0 }, { 26, 0 },
+ { 27, 0 }, { 28, 0 }, { 29, 0 }, { 30, 0 }, { 31, 0 },
+ { 0, 11 }, { 0,5477 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 },
+ { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 },
+
+ { 57, 0 }, { 0, 0 }, { 59, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 0 }, { 66, 0 },
+ { 67, 0 }, { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71, 0 },
+ { 72, 0 }, { 73, 0 }, { 74, 0 }, { 75, 0 }, { 76, 0 },
+ { 77, 0 }, { 78, 0 }, { 79, 0 }, { 80, 0 }, { 81, 0 },
+ { 82, 0 }, { 83, 0 }, { 84, 0 }, { 85, 0 }, { 86, 0 },
+ { 87, 0 }, { 88, 0 }, { 89, 0 }, { 90, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 94, 0 }, { 95, 0 }, { 96, 0 },
+ { 97, 0 }, { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 },
+ { 102, 0 }, { 103, 0 }, { 104, 0 }, { 105, 0 }, { 106, 0 },
+
+ { 107, 0 }, { 108, 0 }, { 109, 0 }, { 110, 0 }, { 111, 0 },
+ { 112, 0 }, { 113, 0 }, { 114, 0 }, { 115, 0 }, { 116, 0 },
+ { 117, 0 }, { 118, 0 }, { 119, 0 }, { 120, 0 }, { 121, 0 },
+ { 122, 0 }, { 0, 0 }, { 0, 0 }, { 92,2052 }, { 126, 0 },
+ { 127, 0 }, { 128, 0 }, { 129, 0 }, { 130, 0 }, { 131, 0 },
+ { 132, 0 }, { 133, 0 }, { 134, 0 }, { 135, 0 }, { 136, 0 },
+ { 137, 0 }, { 138, 0 }, { 139, 0 }, { 140, 0 }, { 141, 0 },
+ { 142, 0 }, { 143, 0 }, { 144, 0 }, { 145, 0 }, { 146, 0 },
+ { 147, 0 }, { 148, 0 }, { 149, 0 }, { 150, 0 }, { 151, 0 },
+ { 152, 0 }, { 153, 0 }, { 154, 0 }, { 155, 0 }, { 156, 0 },
+
+ { 157, 0 }, { 158, 0 }, { 159, 0 }, { 160, 0 }, { 161, 0 },
+ { 162, 0 }, { 163, 0 }, { 164, 0 }, { 165, 0 }, { 166, 0 },
+ { 167, 0 }, { 168, 0 }, { 169, 0 }, { 170, 0 }, { 171, 0 },
+ { 172, 0 }, { 173, 0 }, { 174, 0 }, { 175, 0 }, { 176, 0 },
+ { 177, 0 }, { 178, 0 }, { 179, 0 }, { 180, 0 }, { 181, 0 },
+ { 182, 0 }, { 183, 0 }, { 184, 0 }, { 185, 0 }, { 186, 0 },
+ { 187, 0 }, { 188, 0 }, { 189, 0 }, { 190, 0 }, { 191, 0 },
+ { 192, 0 }, { 193, 0 }, { 194, 0 }, { 195, 0 }, { 196, 0 },
+ { 197, 0 }, { 198, 0 }, { 199, 0 }, { 200, 0 }, { 201, 0 },
+ { 202, 0 }, { 203, 0 }, { 204, 0 }, { 205, 0 }, { 206, 0 },
+
+ { 207, 0 }, { 208, 0 }, { 209, 0 }, { 210, 0 }, { 211, 0 },
+ { 212, 0 }, { 213, 0 }, { 214, 0 }, { 215, 0 }, { 216, 0 },
+ { 217, 0 }, { 218, 0 }, { 219, 0 }, { 220, 0 }, { 221, 0 },
+ { 222, 0 }, { 223, 0 }, { 224, 0 }, { 225, 0 }, { 226, 0 },
+ { 227, 0 }, { 228, 0 }, { 229, 0 }, { 230, 0 }, { 231, 0 },
+ { 232, 0 }, { 233, 0 }, { 234, 0 }, { 235, 0 }, { 236, 0 },
+ { 237, 0 }, { 238, 0 }, { 239, 0 }, { 240, 0 }, { 241, 0 },
+ { 242, 0 }, { 243, 0 }, { 244, 0 }, { 245, 0 }, { 246, 0 },
+ { 247, 0 }, { 248, 0 }, { 249, 0 }, { 250, 0 }, { 251, 0 },
+ { 252, 0 }, { 253, 0 }, { 254, 0 }, { 255, 0 }, { 256, 0 },
+
+ { 0, 38 }, { 0,5252 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 9, 0 }, { 10, 0 }, { 0, 0 }, { 12, 0 }, { 13, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 35 },
+ { 0,5233 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 },
+ { 5, 0 }, { 6, 0 }, { 7, 0 }, { 8, 0 }, { 0, 0 },
+ { 0, 0 }, { 11, 0 }, { 0, 0 }, { 32, 0 }, { 14, 0 },
+ { 15, 0 }, { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 },
+ { 20, 0 }, { 21, 0 }, { 22, 0 }, { 23, 0 }, { 24, 0 },
+ { 25, 0 }, { 26, 0 }, { 27, 0 }, { 28, 0 }, { 29, 0 },
+
+ { 30, 0 }, { 31, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 },
+ { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 },
+ { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 }, { 69, 0 },
+ { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 }, { 74, 0 },
+ { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 }, { 79, 0 },
+
+ { 80, 0 }, { 81, 0 }, { 82, 0 }, { 83, 0 }, { 84, 0 },
+ { 85, 0 }, { 86, 0 }, { 87, 0 }, { 88, 0 }, { 89, 0 },
+ { 90, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94, 0 },
+ { 95, 0 }, { 96, 0 }, { 97, 0 }, { 98, 0 }, { 99, 0 },
+ { 100, 0 }, { 101, 0 }, { 102, 0 }, { 103, 0 }, { 104, 0 },
+ { 105, 0 }, { 106, 0 }, { 107, 0 }, { 108, 0 }, { 109, 0 },
+ { 110, 0 }, { 111, 0 }, { 112, 0 }, { 113, 0 }, { 114, 0 },
+ { 115, 0 }, { 116, 0 }, { 117, 0 }, { 118, 0 }, { 119, 0 },
+ { 120, 0 }, { 121, 0 }, { 122, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 126, 0 }, { 127, 0 }, { 128, 0 }, { 129, 0 },
+
+ { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, { 134, 0 },
+ { 135, 0 }, { 136, 0 }, { 137, 0 }, { 138, 0 }, { 139, 0 },
+ { 140, 0 }, { 141, 0 }, { 142, 0 }, { 143, 0 }, { 144, 0 },
+ { 145, 0 }, { 146, 0 }, { 147, 0 }, { 148, 0 }, { 149, 0 },
+ { 150, 0 }, { 151, 0 }, { 152, 0 }, { 153, 0 }, { 154, 0 },
+ { 155, 0 }, { 156, 0 }, { 157, 0 }, { 158, 0 }, { 159, 0 },
+ { 160, 0 }, { 161, 0 }, { 162, 0 }, { 163, 0 }, { 164, 0 },
+ { 165, 0 }, { 166, 0 }, { 167, 0 }, { 168, 0 }, { 169, 0 },
+ { 170, 0 }, { 171, 0 }, { 172, 0 }, { 173, 0 }, { 174, 0 },
+ { 175, 0 }, { 176, 0 }, { 177, 0 }, { 178, 0 }, { 179, 0 },
+
+ { 180, 0 }, { 181, 0 }, { 182, 0 }, { 183, 0 }, { 184, 0 },
+ { 185, 0 }, { 186, 0 }, { 187, 0 }, { 188, 0 }, { 189, 0 },
+ { 190, 0 }, { 191, 0 }, { 192, 0 }, { 193, 0 }, { 194, 0 },
+ { 195, 0 }, { 196, 0 }, { 197, 0 }, { 198, 0 }, { 199, 0 },
+ { 200, 0 }, { 201, 0 }, { 202, 0 }, { 203, 0 }, { 204, 0 },
+ { 205, 0 }, { 206, 0 }, { 207, 0 }, { 208, 0 }, { 209, 0 },
+ { 210, 0 }, { 211, 0 }, { 212, 0 }, { 213, 0 }, { 214, 0 },
+ { 215, 0 }, { 216, 0 }, { 217, 0 }, { 218, 0 }, { 219, 0 },
+ { 220, 0 }, { 221, 0 }, { 222, 0 }, { 223, 0 }, { 224, 0 },
+ { 225, 0 }, { 226, 0 }, { 227, 0 }, { 228, 0 }, { 229, 0 },
+
+ { 230, 0 }, { 231, 0 }, { 232, 0 }, { 233, 0 }, { 234, 0 },
+ { 235, 0 }, { 236, 0 }, { 237, 0 }, { 238, 0 }, { 239, 0 },
+ { 240, 0 }, { 241, 0 }, { 242, 0 }, { 243, 0 }, { 244, 0 },
+ { 245, 0 }, { 246, 0 }, { 247, 0 }, { 248, 0 }, { 249, 0 },
+ { 250, 0 }, { 251, 0 }, { 252, 0 }, { 253, 0 }, { 254, 0 },
+ { 255, 0 }, { 256, 0 }, { 0, 41 }, { 0,4975 }, { 1,-804 },
+ { 2,-804 }, { 3,-804 }, { 4,-804 }, { 5,-804 }, { 6,-804 },
+ { 7,-804 }, { 8,-804 }, { 0, 0 }, { 0, 0 }, { 11,-804 },
+ { 0, 0 }, { 0, 0 }, { 14,-804 }, { 15,-804 }, { 16,-804 },
+ { 17,-804 }, { 18,-804 }, { 19,-804 }, { 20,-804 }, { 21,-804 },
+
+ { 22,-804 }, { 23,-804 }, { 24,-804 }, { 25,-804 }, { 26,-804 },
+ { 27,-804 }, { 28,-804 }, { 29,-804 }, { 30,-804 }, { 31,-804 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39,-804 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48,2240 }, { 49,2240 }, { 50,2240 }, { 51,2240 },
+ { 52,2240 }, { 53,2240 }, { 54,2240 }, { 55,2240 }, { 56,2240 },
+ { 57,2240 }, { 0, 0 }, { 59,-804 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-804 }, { 66,-804 },
+ { 67,-804 }, { 68,-804 }, { 69,2498 }, { 70,-804 }, { 71,-804 },
+
+ { 72,-804 }, { 73,-804 }, { 74,-804 }, { 75,-804 }, { 76,-804 },
+ { 77,-804 }, { 78,-804 }, { 79,-804 }, { 80,-804 }, { 81,-804 },
+ { 82,-804 }, { 83,-804 }, { 84,-804 }, { 85,-804 }, { 86,-804 },
+ { 87,-804 }, { 88,-804 }, { 89,-804 }, { 90,-804 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 94,-804 }, { 95,-804 }, { 96,-804 },
+ { 97,-804 }, { 98,-804 }, { 99,-804 }, { 100,-804 }, { 101,2498 },
+ { 102,-804 }, { 103,-804 }, { 104,-804 }, { 105,-804 }, { 106,-804 },
+ { 107,-804 }, { 108,-804 }, { 109,-804 }, { 110,-804 }, { 111,-804 },
+ { 112,-804 }, { 113,-804 }, { 114,-804 }, { 115,-804 }, { 116,-804 },
+ { 117,-804 }, { 118,-804 }, { 119,-804 }, { 120,-804 }, { 121,-804 },
+
+ { 122,-804 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-804 },
+ { 127,-804 }, { 128,-804 }, { 129,-804 }, { 130,-804 }, { 131,-804 },
+ { 132,-804 }, { 133,-804 }, { 134,-804 }, { 135,-804 }, { 136,-804 },
+ { 137,-804 }, { 138,-804 }, { 139,-804 }, { 140,-804 }, { 141,-804 },
+ { 142,-804 }, { 143,-804 }, { 144,-804 }, { 145,-804 }, { 146,-804 },
+ { 147,-804 }, { 148,-804 }, { 149,-804 }, { 150,-804 }, { 151,-804 },
+ { 152,-804 }, { 153,-804 }, { 154,-804 }, { 155,-804 }, { 156,-804 },
+ { 157,-804 }, { 158,-804 }, { 159,-804 }, { 160,-804 }, { 161,-804 },
+ { 162,-804 }, { 163,-804 }, { 164,-804 }, { 165,-804 }, { 166,-804 },
+ { 167,-804 }, { 168,-804 }, { 169,-804 }, { 170,-804 }, { 171,-804 },
+
+ { 172,-804 }, { 173,-804 }, { 174,-804 }, { 175,-804 }, { 176,-804 },
+ { 177,-804 }, { 178,-804 }, { 179,-804 }, { 180,-804 }, { 181,-804 },
+ { 182,-804 }, { 183,-804 }, { 184,-804 }, { 185,-804 }, { 186,-804 },
+ { 187,-804 }, { 188,-804 }, { 189,-804 }, { 190,-804 }, { 191,-804 },
+ { 192,-804 }, { 193,-804 }, { 194,-804 }, { 195,-804 }, { 196,-804 },
+ { 197,-804 }, { 198,-804 }, { 199,-804 }, { 200,-804 }, { 201,-804 },
+ { 202,-804 }, { 203,-804 }, { 204,-804 }, { 205,-804 }, { 206,-804 },
+ { 207,-804 }, { 208,-804 }, { 209,-804 }, { 210,-804 }, { 211,-804 },
+ { 212,-804 }, { 213,-804 }, { 214,-804 }, { 215,-804 }, { 216,-804 },
+ { 217,-804 }, { 218,-804 }, { 219,-804 }, { 220,-804 }, { 221,-804 },
+
+ { 222,-804 }, { 223,-804 }, { 224,-804 }, { 225,-804 }, { 226,-804 },
+ { 227,-804 }, { 228,-804 }, { 229,-804 }, { 230,-804 }, { 231,-804 },
+ { 232,-804 }, { 233,-804 }, { 234,-804 }, { 235,-804 }, { 236,-804 },
+ { 237,-804 }, { 238,-804 }, { 239,-804 }, { 240,-804 }, { 241,-804 },
+ { 242,-804 }, { 243,-804 }, { 244,-804 }, { 245,-804 }, { 246,-804 },
+ { 247,-804 }, { 248,-804 }, { 249,-804 }, { 250,-804 }, { 251,-804 },
+ { 252,-804 }, { 253,-804 }, { 254,-804 }, { 255,-804 }, { 256,-804 },
+ { 0, 44 }, { 0,4717 }, { 1,-793 }, { 2,-793 }, { 3,-793 },
+ { 4,-793 }, { 5,-793 }, { 6,-793 }, { 7,-793 }, { 8,-793 },
+ { 0, 0 }, { 0, 0 }, { 11,-793 }, { 0, 0 }, { 0, 0 },
+
+ { 14,-793 }, { 15,-793 }, { 16,-793 }, { 17,-793 }, { 18,-793 },
+ { 19,-793 }, { 20,-793 }, { 21,-793 }, { 22,-793 }, { 23,-793 },
+ { 24,-793 }, { 25,-793 }, { 26,-793 }, { 27,-793 }, { 28,-793 },
+ { 29,-793 }, { 30,-793 }, { 31,-793 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 39,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-793 },
+ { 49,-793 }, { 50,-793 }, { 51,-793 }, { 52,-793 }, { 53,-793 },
+ { 54,-793 }, { 55,-793 }, { 56,-793 }, { 57,-793 }, { 0, 0 },
+ { 59,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 65,-793 }, { 66,-793 }, { 67,-793 }, { 68,-793 },
+ { 69,-793 }, { 70,-793 }, { 71,-793 }, { 72,-793 }, { 73,-793 },
+ { 74,-793 }, { 75,-793 }, { 76,-793 }, { 77,-793 }, { 78,-793 },
+ { 79,-793 }, { 80,-793 }, { 81,-793 }, { 82,-793 }, { 83,-793 },
+ { 84,-793 }, { 85,-793 }, { 86,-793 }, { 87,-793 }, { 88,-793 },
+ { 89,-793 }, { 90,-793 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 94,-793 }, { 95,-793 }, { 96,-793 }, { 97,-793 }, { 98,-793 },
+ { 99,-793 }, { 100,-793 }, { 101,-793 }, { 102,-793 }, { 103,-793 },
+ { 104,-793 }, { 105,-793 }, { 106,-793 }, { 107,-793 }, { 108,-793 },
+ { 109,-793 }, { 110,-793 }, { 111,-793 }, { 112,-793 }, { 113,-793 },
+
+ { 114,-793 }, { 115,-793 }, { 116,-793 }, { 117,-793 }, { 118,-793 },
+ { 119,-793 }, { 120,-793 }, { 121,-793 }, { 122,-793 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 126,-793 }, { 127,-793 }, { 128,-793 },
+ { 129,-793 }, { 130,-793 }, { 131,-793 }, { 132,-793 }, { 133,-793 },
+ { 134,-793 }, { 135,-793 }, { 136,-793 }, { 137,-793 }, { 138,-793 },
+ { 139,-793 }, { 140,-793 }, { 141,-793 }, { 142,-793 }, { 143,-793 },
+ { 144,-793 }, { 145,-793 }, { 146,-793 }, { 147,-793 }, { 148,-793 },
+ { 149,-793 }, { 150,-793 }, { 151,-793 }, { 152,-793 }, { 153,-793 },
+ { 154,-793 }, { 155,-793 }, { 156,-793 }, { 157,-793 }, { 158,-793 },
+ { 159,-793 }, { 160,-793 }, { 161,-793 }, { 162,-793 }, { 163,-793 },
+
+ { 164,-793 }, { 165,-793 }, { 166,-793 }, { 167,-793 }, { 168,-793 },
+ { 169,-793 }, { 170,-793 }, { 171,-793 }, { 172,-793 }, { 173,-793 },
+ { 174,-793 }, { 175,-793 }, { 176,-793 }, { 177,-793 }, { 178,-793 },
+ { 179,-793 }, { 180,-793 }, { 181,-793 }, { 182,-793 }, { 183,-793 },
+ { 184,-793 }, { 185,-793 }, { 186,-793 }, { 187,-793 }, { 188,-793 },
+ { 189,-793 }, { 190,-793 }, { 191,-793 }, { 192,-793 }, { 193,-793 },
+ { 194,-793 }, { 195,-793 }, { 196,-793 }, { 197,-793 }, { 198,-793 },
+ { 199,-793 }, { 200,-793 }, { 201,-793 }, { 202,-793 }, { 203,-793 },
+ { 204,-793 }, { 205,-793 }, { 206,-793 }, { 207,-793 }, { 208,-793 },
+ { 209,-793 }, { 210,-793 }, { 211,-793 }, { 212,-793 }, { 213,-793 },
+
+ { 214,-793 }, { 215,-793 }, { 216,-793 }, { 217,-793 }, { 218,-793 },
+ { 219,-793 }, { 220,-793 }, { 221,-793 }, { 222,-793 }, { 223,-793 },
+ { 224,-793 }, { 225,-793 }, { 226,-793 }, { 227,-793 }, { 228,-793 },
+ { 229,-793 }, { 230,-793 }, { 231,-793 }, { 232,-793 }, { 233,-793 },
+ { 234,-793 }, { 235,-793 }, { 236,-793 }, { 237,-793 }, { 238,-793 },
+ { 239,-793 }, { 240,-793 }, { 241,-793 }, { 242,-793 }, { 243,-793 },
+ { 244,-793 }, { 245,-793 }, { 246,-793 }, { 247,-793 }, { 248,-793 },
+ { 249,-793 }, { 250,-793 }, { 251,-793 }, { 252,-793 }, { 253,-793 },
+ { 254,-793 }, { 255,-793 }, { 256,-793 }, { 0, 41 }, { 0,4459 },
+ { 1,-1320 }, { 2,-1320 }, { 3,-1320 }, { 4,-1320 }, { 5,-1320 },
+
+ { 6,-1320 }, { 7,-1320 }, { 8,-1320 }, { 0, 0 }, { 0, 0 },
+ { 11,-1320 }, { 0, 0 }, { 0, 0 }, { 14,-1320 }, { 15,-1320 },
+ { 16,-1320 }, { 17,-1320 }, { 18,-1320 }, { 19,-1320 }, { 20,-1320 },
+ { 21,-1320 }, { 22,-1320 }, { 23,-1320 }, { 24,-1320 }, { 25,-1320 },
+ { 26,-1320 }, { 27,-1320 }, { 28,-1320 }, { 29,-1320 }, { 30,-1320 },
+ { 31,-1320 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-1320 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 48,2041 }, { 49,2041 }, { 50,2041 },
+ { 51,2041 }, { 52,2041 }, { 53,2041 }, { 54,2041 }, { 55,2041 },
+
+ { 56,2041 }, { 57,2041 }, { 0, 0 }, { 59,-1320 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-1320 },
+ { 66,-1320 }, { 67,-1320 }, { 68,-1320 }, { 69,1982 }, { 70,-1320 },
+ { 71,-1320 }, { 72,-1320 }, { 73,-1320 }, { 74,-1320 }, { 75,-1320 },
+ { 76,-1320 }, { 77,-1320 }, { 78,-1320 }, { 79,-1320 }, { 80,-1320 },
+ { 81,-1320 }, { 82,-1320 }, { 83,-1320 }, { 84,-1320 }, { 85,-1320 },
+ { 86,-1320 }, { 87,-1320 }, { 88,-1320 }, { 89,-1320 }, { 90,-1320 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-1320 }, { 95,-1320 },
+ { 96,-1320 }, { 97,-1320 }, { 98,-1320 }, { 99,-1320 }, { 100,-1320 },
+ { 101,1982 }, { 102,-1320 }, { 103,-1320 }, { 104,-1320 }, { 105,-1320 },
+
+ { 106,-1320 }, { 107,-1320 }, { 108,-1320 }, { 109,-1320 }, { 110,-1320 },
+ { 111,-1320 }, { 112,-1320 }, { 113,-1320 }, { 114,-1320 }, { 115,-1320 },
+ { 116,-1320 }, { 117,-1320 }, { 118,-1320 }, { 119,-1320 }, { 120,-1320 },
+ { 121,-1320 }, { 122,-1320 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 126,-1320 }, { 127,-1320 }, { 128,-1320 }, { 129,-1320 }, { 130,-1320 },
+ { 131,-1320 }, { 132,-1320 }, { 133,-1320 }, { 134,-1320 }, { 135,-1320 },
+ { 136,-1320 }, { 137,-1320 }, { 138,-1320 }, { 139,-1320 }, { 140,-1320 },
+ { 141,-1320 }, { 142,-1320 }, { 143,-1320 }, { 144,-1320 }, { 145,-1320 },
+ { 146,-1320 }, { 147,-1320 }, { 148,-1320 }, { 149,-1320 }, { 150,-1320 },
+ { 151,-1320 }, { 152,-1320 }, { 153,-1320 }, { 154,-1320 }, { 155,-1320 },
+
+ { 156,-1320 }, { 157,-1320 }, { 158,-1320 }, { 159,-1320 }, { 160,-1320 },
+ { 161,-1320 }, { 162,-1320 }, { 163,-1320 }, { 164,-1320 }, { 165,-1320 },
+ { 166,-1320 }, { 167,-1320 }, { 168,-1320 }, { 169,-1320 }, { 170,-1320 },
+ { 171,-1320 }, { 172,-1320 }, { 173,-1320 }, { 174,-1320 }, { 175,-1320 },
+ { 176,-1320 }, { 177,-1320 }, { 178,-1320 }, { 179,-1320 }, { 180,-1320 },
+ { 181,-1320 }, { 182,-1320 }, { 183,-1320 }, { 184,-1320 }, { 185,-1320 },
+ { 186,-1320 }, { 187,-1320 }, { 188,-1320 }, { 189,-1320 }, { 190,-1320 },
+ { 191,-1320 }, { 192,-1320 }, { 193,-1320 }, { 194,-1320 }, { 195,-1320 },
+ { 196,-1320 }, { 197,-1320 }, { 198,-1320 }, { 199,-1320 }, { 200,-1320 },
+ { 201,-1320 }, { 202,-1320 }, { 203,-1320 }, { 204,-1320 }, { 205,-1320 },
+
+ { 206,-1320 }, { 207,-1320 }, { 208,-1320 }, { 209,-1320 }, { 210,-1320 },
+ { 211,-1320 }, { 212,-1320 }, { 213,-1320 }, { 214,-1320 }, { 215,-1320 },
+ { 216,-1320 }, { 217,-1320 }, { 218,-1320 }, { 219,-1320 }, { 220,-1320 },
+ { 221,-1320 }, { 222,-1320 }, { 223,-1320 }, { 224,-1320 }, { 225,-1320 },
+ { 226,-1320 }, { 227,-1320 }, { 228,-1320 }, { 229,-1320 }, { 230,-1320 },
+ { 231,-1320 }, { 232,-1320 }, { 233,-1320 }, { 234,-1320 }, { 235,-1320 },
+ { 236,-1320 }, { 237,-1320 }, { 238,-1320 }, { 239,-1320 }, { 240,-1320 },
+ { 241,-1320 }, { 242,-1320 }, { 243,-1320 }, { 244,-1320 }, { 245,-1320 },
+ { 246,-1320 }, { 247,-1320 }, { 248,-1320 }, { 249,-1320 }, { 250,-1320 },
+ { 251,-1320 }, { 252,-1320 }, { 253,-1320 }, { 254,-1320 }, { 255,-1320 },
+
+ { 256,-1320 }, { 0, 44 }, { 0,4201 }, { 1,-1309 }, { 2,-1309 },
+ { 3,-1309 }, { 4,-1309 }, { 5,-1309 }, { 6,-1309 }, { 7,-1309 },
+ { 8,-1309 }, { 0, 0 }, { 0, 0 }, { 11,-1309 }, { 0, 0 },
+ { 0, 0 }, { 14,-1309 }, { 15,-1309 }, { 16,-1309 }, { 17,-1309 },
+ { 18,-1309 }, { 19,-1309 }, { 20,-1309 }, { 21,-1309 }, { 22,-1309 },
+ { 23,-1309 }, { 24,-1309 }, { 25,-1309 }, { 26,-1309 }, { 27,-1309 },
+ { 28,-1309 }, { 29,-1309 }, { 30,-1309 }, { 31,-1309 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 39,-1309 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 43,2041 }, { 0, 0 }, { 45,2041 }, { 0, 0 }, { 0, 0 },
+
+ { 48,2100 }, { 49,2100 }, { 50,2100 }, { 51,2100 }, { 52,2100 },
+ { 53,2100 }, { 54,2100 }, { 55,2100 }, { 56,2100 }, { 57,2100 },
+ { 0, 0 }, { 59,-1309 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 65,-1309 }, { 66,-1309 }, { 67,-1309 },
+ { 68,-1309 }, { 69,-1309 }, { 70,-1309 }, { 71,-1309 }, { 72,-1309 },
+ { 73,-1309 }, { 74,-1309 }, { 75,-1309 }, { 76,-1309 }, { 77,-1309 },
+ { 78,-1309 }, { 79,-1309 }, { 80,-1309 }, { 81,-1309 }, { 82,-1309 },
+ { 83,-1309 }, { 84,-1309 }, { 85,-1309 }, { 86,-1309 }, { 87,-1309 },
+ { 88,-1309 }, { 89,-1309 }, { 90,-1309 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 94,-1309 }, { 95,-1309 }, { 96,-1309 }, { 97,-1309 },
+
+ { 98,-1309 }, { 99,-1309 }, { 100,-1309 }, { 101,-1309 }, { 102,-1309 },
+ { 103,-1309 }, { 104,-1309 }, { 105,-1309 }, { 106,-1309 }, { 107,-1309 },
+ { 108,-1309 }, { 109,-1309 }, { 110,-1309 }, { 111,-1309 }, { 112,-1309 },
+ { 113,-1309 }, { 114,-1309 }, { 115,-1309 }, { 116,-1309 }, { 117,-1309 },
+ { 118,-1309 }, { 119,-1309 }, { 120,-1309 }, { 121,-1309 }, { 122,-1309 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-1309 }, { 127,-1309 },
+ { 128,-1309 }, { 129,-1309 }, { 130,-1309 }, { 131,-1309 }, { 132,-1309 },
+ { 133,-1309 }, { 134,-1309 }, { 135,-1309 }, { 136,-1309 }, { 137,-1309 },
+ { 138,-1309 }, { 139,-1309 }, { 140,-1309 }, { 141,-1309 }, { 142,-1309 },
+ { 143,-1309 }, { 144,-1309 }, { 145,-1309 }, { 146,-1309 }, { 147,-1309 },
+
+ { 148,-1309 }, { 149,-1309 }, { 150,-1309 }, { 151,-1309 }, { 152,-1309 },
+ { 153,-1309 }, { 154,-1309 }, { 155,-1309 }, { 156,-1309 }, { 157,-1309 },
+ { 158,-1309 }, { 159,-1309 }, { 160,-1309 }, { 161,-1309 }, { 162,-1309 },
+ { 163,-1309 }, { 164,-1309 }, { 165,-1309 }, { 166,-1309 }, { 167,-1309 },
+ { 168,-1309 }, { 169,-1309 }, { 170,-1309 }, { 171,-1309 }, { 172,-1309 },
+ { 173,-1309 }, { 174,-1309 }, { 175,-1309 }, { 176,-1309 }, { 177,-1309 },
+ { 178,-1309 }, { 179,-1309 }, { 180,-1309 }, { 181,-1309 }, { 182,-1309 },
+ { 183,-1309 }, { 184,-1309 }, { 185,-1309 }, { 186,-1309 }, { 187,-1309 },
+ { 188,-1309 }, { 189,-1309 }, { 190,-1309 }, { 191,-1309 }, { 192,-1309 },
+ { 193,-1309 }, { 194,-1309 }, { 195,-1309 }, { 196,-1309 }, { 197,-1309 },
+
+ { 198,-1309 }, { 199,-1309 }, { 200,-1309 }, { 201,-1309 }, { 202,-1309 },
+ { 203,-1309 }, { 204,-1309 }, { 205,-1309 }, { 206,-1309 }, { 207,-1309 },
+ { 208,-1309 }, { 209,-1309 }, { 210,-1309 }, { 211,-1309 }, { 212,-1309 },
+ { 213,-1309 }, { 214,-1309 }, { 215,-1309 }, { 216,-1309 }, { 217,-1309 },
+ { 218,-1309 }, { 219,-1309 }, { 220,-1309 }, { 221,-1309 }, { 222,-1309 },
+ { 223,-1309 }, { 224,-1309 }, { 225,-1309 }, { 226,-1309 }, { 227,-1309 },
+ { 228,-1309 }, { 229,-1309 }, { 230,-1309 }, { 231,-1309 }, { 232,-1309 },
+ { 233,-1309 }, { 234,-1309 }, { 235,-1309 }, { 236,-1309 }, { 237,-1309 },
+ { 238,-1309 }, { 239,-1309 }, { 240,-1309 }, { 241,-1309 }, { 242,-1309 },
+ { 243,-1309 }, { 244,-1309 }, { 245,-1309 }, { 246,-1309 }, { 247,-1309 },
+
+ { 248,-1309 }, { 249,-1309 }, { 250,-1309 }, { 251,-1309 }, { 252,-1309 },
+ { 253,-1309 }, { 254,-1309 }, { 255,-1309 }, { 256,-1309 }, { 0, 42 },
+ { 0,3943 }, { 1,-774 }, { 2,-774 }, { 3,-774 }, { 4,-774 },
+ { 5,-774 }, { 6,-774 }, { 7,-774 }, { 8,-774 }, { 0, 0 },
+ { 0, 0 }, { 11,-774 }, { 0, 0 }, { 0, 0 }, { 14,-774 },
+ { 15,-774 }, { 16,-774 }, { 17,-774 }, { 18,-774 }, { 19,-774 },
+ { 20,-774 }, { 21,-774 }, { 22,-774 }, { 23,-774 }, { 24,-774 },
+ { 25,-774 }, { 26,-774 }, { 27,-774 }, { 28,-774 }, { 29,-774 },
+ { 30,-774 }, { 31,-774 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-774 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 46,-516 }, { 0, 0 }, { 48, 0 }, { 49, 0 },
+ { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 },
+ { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-774 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65,-774 }, { 66,-774 }, { 67,-774 }, { 68,-774 }, { 69,-258 },
+ { 70,-774 }, { 71,-774 }, { 72,-774 }, { 73,-774 }, { 74,-774 },
+ { 75,-774 }, { 76,-774 }, { 77,-774 }, { 78,-774 }, { 79,-774 },
+ { 80,-774 }, { 81,-774 }, { 82,-774 }, { 83,-774 }, { 84,-774 },
+ { 85,-774 }, { 86,-774 }, { 87,-774 }, { 88,-774 }, { 89,-774 },
+
+ { 90,-774 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-774 },
+ { 95,-774 }, { 96,-774 }, { 97,-774 }, { 98,-774 }, { 99,-774 },
+ { 100,-774 }, { 101,-258 }, { 102,-774 }, { 103,-774 }, { 104,-774 },
+ { 105,-774 }, { 106,-774 }, { 107,-774 }, { 108,-774 }, { 109,-774 },
+ { 110,-774 }, { 111,-774 }, { 112,-774 }, { 113,-774 }, { 114,-774 },
+ { 115,-774 }, { 116,-774 }, { 117,-774 }, { 118,-774 }, { 119,-774 },
+ { 120,-774 }, { 121,-774 }, { 122,-774 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 126,-774 }, { 127,-774 }, { 128,-774 }, { 129,-774 },
+ { 130,-774 }, { 131,-774 }, { 132,-774 }, { 133,-774 }, { 134,-774 },
+ { 135,-774 }, { 136,-774 }, { 137,-774 }, { 138,-774 }, { 139,-774 },
+
+ { 140,-774 }, { 141,-774 }, { 142,-774 }, { 143,-774 }, { 144,-774 },
+ { 145,-774 }, { 146,-774 }, { 147,-774 }, { 148,-774 }, { 149,-774 },
+ { 150,-774 }, { 151,-774 }, { 152,-774 }, { 153,-774 }, { 154,-774 },
+ { 155,-774 }, { 156,-774 }, { 157,-774 }, { 158,-774 }, { 159,-774 },
+ { 160,-774 }, { 161,-774 }, { 162,-774 }, { 163,-774 }, { 164,-774 },
+ { 165,-774 }, { 166,-774 }, { 167,-774 }, { 168,-774 }, { 169,-774 },
+ { 170,-774 }, { 171,-774 }, { 172,-774 }, { 173,-774 }, { 174,-774 },
+ { 175,-774 }, { 176,-774 }, { 177,-774 }, { 178,-774 }, { 179,-774 },
+ { 180,-774 }, { 181,-774 }, { 182,-774 }, { 183,-774 }, { 184,-774 },
+ { 185,-774 }, { 186,-774 }, { 187,-774 }, { 188,-774 }, { 189,-774 },
+
+ { 190,-774 }, { 191,-774 }, { 192,-774 }, { 193,-774 }, { 194,-774 },
+ { 195,-774 }, { 196,-774 }, { 197,-774 }, { 198,-774 }, { 199,-774 },
+ { 200,-774 }, { 201,-774 }, { 202,-774 }, { 203,-774 }, { 204,-774 },
+ { 205,-774 }, { 206,-774 }, { 207,-774 }, { 208,-774 }, { 209,-774 },
+ { 210,-774 }, { 211,-774 }, { 212,-774 }, { 213,-774 }, { 214,-774 },
+ { 215,-774 }, { 216,-774 }, { 217,-774 }, { 218,-774 }, { 219,-774 },
+ { 220,-774 }, { 221,-774 }, { 222,-774 }, { 223,-774 }, { 224,-774 },
+ { 225,-774 }, { 226,-774 }, { 227,-774 }, { 228,-774 }, { 229,-774 },
+ { 230,-774 }, { 231,-774 }, { 232,-774 }, { 233,-774 }, { 234,-774 },
+ { 235,-774 }, { 236,-774 }, { 237,-774 }, { 238,-774 }, { 239,-774 },
+
+ { 240,-774 }, { 241,-774 }, { 242,-774 }, { 243,-774 }, { 244,-774 },
+ { 245,-774 }, { 246,-774 }, { 247,-774 }, { 248,-774 }, { 249,-774 },
+ { 250,-774 }, { 251,-774 }, { 252,-774 }, { 253,-774 }, { 254,-774 },
+ { 255,-774 }, { 256,-774 }, { 0, 20 }, { 0,3685 }, { 1, 0 },
+ { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 6, 0 },
+ { 7, 0 }, { 8, 0 }, { 9, 0 }, { 10, 0 }, { 11, 0 },
+ { 12, 0 }, { 13, 0 }, { 14, 0 }, { 15, 0 }, { 16, 0 },
+ { 17, 0 }, { 18, 0 }, { 19, 0 }, { 20, 0 }, { 21, 0 },
+ { 22, 0 }, { 23, 0 }, { 24, 0 }, { 25, 0 }, { 26, 0 },
+ { 27, 0 }, { 28, 0 }, { 29, 0 }, { 30, 0 }, { 31, 0 },
+
+ { 32, 0 }, { 33, 0 }, { 0, 0 }, { 35, 0 }, { 36, 0 },
+ { 37, 0 }, { 38, 0 }, { 39, 0 }, { 40, 0 }, { 41, 0 },
+ { 42, 0 }, { 43, 0 }, { 44, 0 }, { 45, 0 }, { 46, 0 },
+ { 47, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 },
+ { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 },
+ { 57, 0 }, { 58, 0 }, { 59, 0 }, { 60, 0 }, { 61, 0 },
+ { 62, 0 }, { 63, 0 }, { 64, 0 }, { 65, 0 }, { 66, 0 },
+ { 67, 0 }, { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71, 0 },
+ { 72, 0 }, { 73, 0 }, { 74, 0 }, { 75, 0 }, { 76, 0 },
+ { 77, 0 }, { 78, 0 }, { 79, 0 }, { 80, 0 }, { 81, 0 },
+
+ { 82, 0 }, { 83, 0 }, { 84, 0 }, { 85, 0 }, { 86, 0 },
+ { 87, 0 }, { 88, 0 }, { 89, 0 }, { 90, 0 }, { 91, 0 },
+ { 0, 0 }, { 93, 0 }, { 94, 0 }, { 95, 0 }, { 96, 0 },
+ { 97, 0 }, { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 },
+ { 102, 0 }, { 103, 0 }, { 104, 0 }, { 105, 0 }, { 106, 0 },
+ { 107, 0 }, { 108, 0 }, { 109, 0 }, { 110, 0 }, { 111, 0 },
+ { 112, 0 }, { 113, 0 }, { 114, 0 }, { 115, 0 }, { 116, 0 },
+ { 117, 0 }, { 118, 0 }, { 119, 0 }, { 120, 0 }, { 121, 0 },
+ { 122, 0 }, { 123, 0 }, { 124, 0 }, { 125, 0 }, { 126, 0 },
+ { 127, 0 }, { 128, 0 }, { 129, 0 }, { 130, 0 }, { 131, 0 },
+
+ { 132, 0 }, { 133, 0 }, { 134, 0 }, { 135, 0 }, { 136, 0 },
+ { 137, 0 }, { 138, 0 }, { 139, 0 }, { 140, 0 }, { 141, 0 },
+ { 142, 0 }, { 143, 0 }, { 144, 0 }, { 145, 0 }, { 146, 0 },
+ { 147, 0 }, { 148, 0 }, { 149, 0 }, { 150, 0 }, { 151, 0 },
+ { 152, 0 }, { 153, 0 }, { 154, 0 }, { 155, 0 }, { 156, 0 },
+ { 157, 0 }, { 158, 0 }, { 159, 0 }, { 160, 0 }, { 161, 0 },
+ { 162, 0 }, { 163, 0 }, { 164, 0 }, { 165, 0 }, { 166, 0 },
+ { 167, 0 }, { 168, 0 }, { 169, 0 }, { 170, 0 }, { 171, 0 },
+ { 172, 0 }, { 173, 0 }, { 174, 0 }, { 175, 0 }, { 176, 0 },
+ { 177, 0 }, { 178, 0 }, { 179, 0 }, { 180, 0 }, { 181, 0 },
+
+ { 182, 0 }, { 183, 0 }, { 184, 0 }, { 185, 0 }, { 186, 0 },
+ { 187, 0 }, { 188, 0 }, { 189, 0 }, { 190, 0 }, { 191, 0 },
+ { 192, 0 }, { 193, 0 }, { 194, 0 }, { 195, 0 }, { 196, 0 },
+ { 197, 0 }, { 198, 0 }, { 199, 0 }, { 200, 0 }, { 201, 0 },
+ { 202, 0 }, { 203, 0 }, { 204, 0 }, { 205, 0 }, { 206, 0 },
+ { 207, 0 }, { 208, 0 }, { 209, 0 }, { 210, 0 }, { 211, 0 },
+ { 212, 0 }, { 213, 0 }, { 214, 0 }, { 215, 0 }, { 216, 0 },
+ { 217, 0 }, { 218, 0 }, { 219, 0 }, { 220, 0 }, { 221, 0 },
+ { 222, 0 }, { 223, 0 }, { 224, 0 }, { 225, 0 }, { 226, 0 },
+ { 227, 0 }, { 228, 0 }, { 229, 0 }, { 230, 0 }, { 231, 0 },
+
+ { 232, 0 }, { 233, 0 }, { 234, 0 }, { 235, 0 }, { 236, 0 },
+ { 237, 0 }, { 238, 0 }, { 239, 0 }, { 240, 0 }, { 241, 0 },
+ { 242, 0 }, { 243, 0 }, { 244, 0 }, { 245, 0 }, { 246, 0 },
+ { 247, 0 }, { 248, 0 }, { 249, 0 }, { 250, 0 }, { 251, 0 },
+ { 252, 0 }, { 253, 0 }, { 254, 0 }, { 255, 0 }, { 256, 0 },
+ { 0, 13 }, { 0,3427 }, { 0, 15 }, { 0,3425 }, { 0, 13 },
+ { 0,3423 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 14 }, { 0,3389 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,1584 },
+ { 49,1584 }, { 50,1584 }, { 51,1584 }, { 52,1584 }, { 53,1584 },
+ { 54,1584 }, { 55,1584 }, { 56,1584 }, { 57,1584 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 65,1584 }, { 66,1584 }, { 67,1584 }, { 68,1584 },
+ { 69,1584 }, { 70,1584 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 48,1607 }, { 49,1607 }, { 50,1607 },
+ { 51,1607 }, { 52,1607 }, { 53,1607 }, { 54,1607 }, { 55,1607 },
+ { 56,1607 }, { 57,1607 }, { 0, 0 }, { 97,1584 }, { 98,1584 },
+ { 99,1584 }, { 100,1584 }, { 101,1584 }, { 102,1584 }, { 65,1607 },
+ { 66,1607 }, { 67,1607 }, { 68,1607 }, { 69,1607 }, { 70,1607 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 117,3091 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 123,1607 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 125,-2054 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 97,1607 }, { 98,1607 }, { 99,1607 }, { 100,1607 },
+ { 101,1607 }, { 102,1607 }, { 0, 1 }, { 0,3285 }, { 1, 0 },
+ { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 6, 0 },
+ { 7, 0 }, { 8, 0 }, { 0, 0 }, { 0, 0 }, { 11, 0 },
+ { 0, 0 }, { 0, 0 }, { 14, 0 }, { 15, 0 }, { 16, 0 },
+ { 17, 0 }, { 18, 0 }, { 19, 0 }, { 20, 0 }, { 21, 0 },
+ { 22, 0 }, { 23, 0 }, { 24, 0 }, { 25, 0 }, { 26, 0 },
+ { 27, 0 }, { 28, 0 }, { 29, 0 }, { 30, 0 }, { 31, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 },
+ { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 },
+ { 57, 0 }, { 0, 0 }, { 59, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 0 }, { 66, 0 },
+ { 67, 0 }, { 68, 0 }, { 69, 0 }, { 70, 0 }, { 71, 0 },
+ { 72, 0 }, { 73, 0 }, { 74, 0 }, { 75, 0 }, { 76, 0 },
+ { 77, 0 }, { 78, 0 }, { 79, 0 }, { 80, 0 }, { 81, 0 },
+
+ { 82, 0 }, { 83, 0 }, { 84, 0 }, { 85, 0 }, { 86, 0 },
+ { 87, 0 }, { 88, 0 }, { 89, 0 }, { 90, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 94, 0 }, { 95, 0 }, { 96, 0 },
+ { 97, 0 }, { 98, 0 }, { 99, 0 }, { 100, 0 }, { 101, 0 },
+ { 102, 0 }, { 103, 0 }, { 104, 0 }, { 105, 0 }, { 106, 0 },
+ { 107, 0 }, { 108, 0 }, { 109, 0 }, { 110, 0 }, { 111, 0 },
+ { 112, 0 }, { 113, 0 }, { 114, 0 }, { 115, 0 }, { 116, 0 },
+ { 117, 0 }, { 118, 0 }, { 119, 0 }, { 120, 0 }, { 121, 0 },
+ { 122, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126, 0 },
+ { 127, 0 }, { 128, 0 }, { 129, 0 }, { 130, 0 }, { 131, 0 },
+
+ { 132, 0 }, { 133, 0 }, { 134, 0 }, { 135, 0 }, { 136, 0 },
+ { 137, 0 }, { 138, 0 }, { 139, 0 }, { 140, 0 }, { 141, 0 },
+ { 142, 0 }, { 143, 0 }, { 144, 0 }, { 145, 0 }, { 146, 0 },
+ { 147, 0 }, { 148, 0 }, { 149, 0 }, { 150, 0 }, { 151, 0 },
+ { 152, 0 }, { 153, 0 }, { 154, 0 }, { 155, 0 }, { 156, 0 },
+ { 157, 0 }, { 158, 0 }, { 159, 0 }, { 160, 0 }, { 161, 0 },
+ { 162, 0 }, { 163, 0 }, { 164, 0 }, { 165, 0 }, { 166, 0 },
+ { 167, 0 }, { 168, 0 }, { 169, 0 }, { 170, 0 }, { 171, 0 },
+ { 172, 0 }, { 173, 0 }, { 174, 0 }, { 175, 0 }, { 176, 0 },
+ { 177, 0 }, { 178, 0 }, { 179, 0 }, { 180, 0 }, { 181, 0 },
+
+ { 182, 0 }, { 183, 0 }, { 184, 0 }, { 185, 0 }, { 186, 0 },
+ { 187, 0 }, { 188, 0 }, { 189, 0 }, { 190, 0 }, { 191, 0 },
+ { 192, 0 }, { 193, 0 }, { 194, 0 }, { 195, 0 }, { 196, 0 },
+ { 197, 0 }, { 198, 0 }, { 199, 0 }, { 200, 0 }, { 201, 0 },
+ { 202, 0 }, { 203, 0 }, { 204, 0 }, { 205, 0 }, { 206, 0 },
+ { 207, 0 }, { 208, 0 }, { 209, 0 }, { 210, 0 }, { 211, 0 },
+ { 212, 0 }, { 213, 0 }, { 214, 0 }, { 215, 0 }, { 216, 0 },
+ { 217, 0 }, { 218, 0 }, { 219, 0 }, { 220, 0 }, { 221, 0 },
+ { 222, 0 }, { 223, 0 }, { 224, 0 }, { 225, 0 }, { 226, 0 },
+ { 227, 0 }, { 228, 0 }, { 229, 0 }, { 230, 0 }, { 231, 0 },
+
+ { 232, 0 }, { 233, 0 }, { 234, 0 }, { 235, 0 }, { 236, 0 },
+ { 237, 0 }, { 238, 0 }, { 239, 0 }, { 240, 0 }, { 241, 0 },
+ { 242, 0 }, { 243, 0 }, { 244, 0 }, { 245, 0 }, { 246, 0 },
+ { 247, 0 }, { 248, 0 }, { 249, 0 }, { 250, 0 }, { 251, 0 },
+ { 252, 0 }, { 253, 0 }, { 254, 0 }, { 255, 0 }, { 256, 0 },
+ { 0, 2 }, { 0,3027 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 9, 0 }, { 10, 0 }, { 0, 0 }, { 12, 0 }, { 13, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 32, 0 }, { 0, 22 },
+ { 0,2993 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 },
+ { 5, 0 }, { 6, 0 }, { 7, 0 }, { 8, 0 }, { 9, 0 },
+ { 10, 0 }, { 11, 0 }, { 12, 0 }, { 13, 0 }, { 14, 0 },
+ { 15, 0 }, { 16, 0 }, { 17, 0 }, { 18, 0 }, { 19, 0 },
+ { 20, 0 }, { 21, 0 }, { 22, 0 }, { 23, 0 }, { 24, 0 },
+ { 25, 0 }, { 26, 0 }, { 27, 0 }, { 28, 0 }, { 29, 0 },
+ { 30, 0 }, { 31, 0 }, { 32, 0 }, { 33, 0 }, { 34, 0 },
+ { 35, 0 }, { 36, 0 }, { 37, 0 }, { 38, 0 }, { 39, 0 },
+
+ { 40, 0 }, { 41, 0 }, { 0, 0 }, { 43, 0 }, { 44, 0 },
+ { 45, 0 }, { 46, 0 }, { 47, 0 }, { 48, 0 }, { 49, 0 },
+ { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 },
+ { 55, 0 }, { 56, 0 }, { 57, 0 }, { 58, 0 }, { 59, 0 },
+ { 60, 0 }, { 61, 0 }, { 62, 0 }, { 63, 0 }, { 64, 0 },
+ { 65, 0 }, { 66, 0 }, { 67, 0 }, { 68, 0 }, { 69, 0 },
+ { 70, 0 }, { 71, 0 }, { 72, 0 }, { 73, 0 }, { 74, 0 },
+ { 75, 0 }, { 76, 0 }, { 77, 0 }, { 78, 0 }, { 79, 0 },
+ { 80, 0 }, { 81, 0 }, { 82, 0 }, { 83, 0 }, { 84, 0 },
+ { 85, 0 }, { 86, 0 }, { 87, 0 }, { 88, 0 }, { 89, 0 },
+
+ { 90, 0 }, { 91, 0 }, { 92, 0 }, { 93, 0 }, { 94, 0 },
+ { 95, 0 }, { 96, 0 }, { 97, 0 }, { 98, 0 }, { 99, 0 },
+ { 100, 0 }, { 101, 0 }, { 102, 0 }, { 103, 0 }, { 104, 0 },
+ { 105, 0 }, { 106, 0 }, { 107, 0 }, { 108, 0 }, { 109, 0 },
+ { 110, 0 }, { 111, 0 }, { 112, 0 }, { 113, 0 }, { 114, 0 },
+ { 115, 0 }, { 116, 0 }, { 117, 0 }, { 118, 0 }, { 119, 0 },
+ { 120, 0 }, { 121, 0 }, { 122, 0 }, { 123, 0 }, { 124, 0 },
+ { 125, 0 }, { 126, 0 }, { 127, 0 }, { 128, 0 }, { 129, 0 },
+ { 130, 0 }, { 131, 0 }, { 132, 0 }, { 133, 0 }, { 134, 0 },
+ { 135, 0 }, { 136, 0 }, { 137, 0 }, { 138, 0 }, { 139, 0 },
+
+ { 140, 0 }, { 141, 0 }, { 142, 0 }, { 143, 0 }, { 144, 0 },
+ { 145, 0 }, { 146, 0 }, { 147, 0 }, { 148, 0 }, { 149, 0 },
+ { 150, 0 }, { 151, 0 }, { 152, 0 }, { 153, 0 }, { 154, 0 },
+ { 155, 0 }, { 156, 0 }, { 157, 0 }, { 158, 0 }, { 159, 0 },
+ { 160, 0 }, { 161, 0 }, { 162, 0 }, { 163, 0 }, { 164, 0 },
+ { 165, 0 }, { 166, 0 }, { 167, 0 }, { 168, 0 }, { 169, 0 },
+ { 170, 0 }, { 171, 0 }, { 172, 0 }, { 173, 0 }, { 174, 0 },
+ { 175, 0 }, { 176, 0 }, { 177, 0 }, { 178, 0 }, { 179, 0 },
+ { 180, 0 }, { 181, 0 }, { 182, 0 }, { 183, 0 }, { 184, 0 },
+ { 185, 0 }, { 186, 0 }, { 187, 0 }, { 188, 0 }, { 189, 0 },
+
+ { 190, 0 }, { 191, 0 }, { 192, 0 }, { 193, 0 }, { 194, 0 },
+ { 195, 0 }, { 196, 0 }, { 197, 0 }, { 198, 0 }, { 199, 0 },
+ { 200, 0 }, { 201, 0 }, { 202, 0 }, { 203, 0 }, { 204, 0 },
+ { 205, 0 }, { 206, 0 }, { 207, 0 }, { 208, 0 }, { 209, 0 },
+ { 210, 0 }, { 211, 0 }, { 212, 0 }, { 213, 0 }, { 214, 0 },
+ { 215, 0 }, { 216, 0 }, { 217, 0 }, { 218, 0 }, { 219, 0 },
+ { 220, 0 }, { 221, 0 }, { 222, 0 }, { 223, 0 }, { 224, 0 },
+ { 225, 0 }, { 226, 0 }, { 227, 0 }, { 228, 0 }, { 229, 0 },
+ { 230, 0 }, { 231, 0 }, { 232, 0 }, { 233, 0 }, { 234, 0 },
+ { 235, 0 }, { 236, 0 }, { 237, 0 }, { 238, 0 }, { 239, 0 },
+
+ { 240, 0 }, { 241, 0 }, { 242, 0 }, { 243, 0 }, { 244, 0 },
+ { 245, 0 }, { 246, 0 }, { 247, 0 }, { 248, 0 }, { 249, 0 },
+ { 250, 0 }, { 251, 0 }, { 252, 0 }, { 253, 0 }, { 254, 0 },
+ { 255, 0 }, { 256, 0 }, { 0, 41 }, { 0,2735 }, { 1,-3044 },
+ { 2,-3044 }, { 3,-3044 }, { 4,-3044 }, { 5,-3044 }, { 6,-3044 },
+ { 7,-3044 }, { 8,-3044 }, { 0, 0 }, { 0, 0 }, { 11,-3044 },
+ { 0, 0 }, { 0, 0 }, { 14,-3044 }, { 15,-3044 }, { 16,-3044 },
+ { 17,-3044 }, { 18,-3044 }, { 19,-3044 }, { 20,-3044 }, { 21,-3044 },
+ { 22,-3044 }, { 23,-3044 }, { 24,-3044 }, { 25,-3044 }, { 26,-3044 },
+ { 27,-3044 }, { 28,-3044 }, { 29,-3044 }, { 30,-3044 }, { 31,-3044 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39,-3044 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 }, { 51, 0 },
+ { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 }, { 56, 0 },
+ { 57, 0 }, { 0, 0 }, { 59,-3044 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-3044 }, { 66,-3044 },
+ { 67,-3044 }, { 68,-3044 }, { 69, 258 }, { 70,-3044 }, { 71,-3044 },
+ { 72,-3044 }, { 73,-3044 }, { 74,-3044 }, { 75,-3044 }, { 76,-3044 },
+ { 77,-3044 }, { 78,-3044 }, { 79,-3044 }, { 80,-3044 }, { 81,-3044 },
+
+ { 82,-3044 }, { 83,-3044 }, { 84,-3044 }, { 85,-3044 }, { 86,-3044 },
+ { 87,-3044 }, { 88,-3044 }, { 89,-3044 }, { 90,-3044 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 94,-3044 }, { 95,-3044 }, { 96,-3044 },
+ { 97,-3044 }, { 98,-3044 }, { 99,-3044 }, { 100,-3044 }, { 101, 258 },
+ { 102,-3044 }, { 103,-3044 }, { 104,-3044 }, { 105,-3044 }, { 106,-3044 },
+ { 107,-3044 }, { 108,-3044 }, { 109,-3044 }, { 110,-3044 }, { 111,-3044 },
+ { 112,-3044 }, { 113,-3044 }, { 114,-3044 }, { 115,-3044 }, { 116,-3044 },
+ { 117,-3044 }, { 118,-3044 }, { 119,-3044 }, { 120,-3044 }, { 121,-3044 },
+ { 122,-3044 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-3044 },
+ { 127,-3044 }, { 128,-3044 }, { 129,-3044 }, { 130,-3044 }, { 131,-3044 },
+
+ { 132,-3044 }, { 133,-3044 }, { 134,-3044 }, { 135,-3044 }, { 136,-3044 },
+ { 137,-3044 }, { 138,-3044 }, { 139,-3044 }, { 140,-3044 }, { 141,-3044 },
+ { 142,-3044 }, { 143,-3044 }, { 144,-3044 }, { 145,-3044 }, { 146,-3044 },
+ { 147,-3044 }, { 148,-3044 }, { 149,-3044 }, { 150,-3044 }, { 151,-3044 },
+ { 152,-3044 }, { 153,-3044 }, { 154,-3044 }, { 155,-3044 }, { 156,-3044 },
+ { 157,-3044 }, { 158,-3044 }, { 159,-3044 }, { 160,-3044 }, { 161,-3044 },
+ { 162,-3044 }, { 163,-3044 }, { 164,-3044 }, { 165,-3044 }, { 166,-3044 },
+ { 167,-3044 }, { 168,-3044 }, { 169,-3044 }, { 170,-3044 }, { 171,-3044 },
+ { 172,-3044 }, { 173,-3044 }, { 174,-3044 }, { 175,-3044 }, { 176,-3044 },
+ { 177,-3044 }, { 178,-3044 }, { 179,-3044 }, { 180,-3044 }, { 181,-3044 },
+
+ { 182,-3044 }, { 183,-3044 }, { 184,-3044 }, { 185,-3044 }, { 186,-3044 },
+ { 187,-3044 }, { 188,-3044 }, { 189,-3044 }, { 190,-3044 }, { 191,-3044 },
+ { 192,-3044 }, { 193,-3044 }, { 194,-3044 }, { 195,-3044 }, { 196,-3044 },
+ { 197,-3044 }, { 198,-3044 }, { 199,-3044 }, { 200,-3044 }, { 201,-3044 },
+ { 202,-3044 }, { 203,-3044 }, { 204,-3044 }, { 205,-3044 }, { 206,-3044 },
+ { 207,-3044 }, { 208,-3044 }, { 209,-3044 }, { 210,-3044 }, { 211,-3044 },
+ { 212,-3044 }, { 213,-3044 }, { 214,-3044 }, { 215,-3044 }, { 216,-3044 },
+ { 217,-3044 }, { 218,-3044 }, { 219,-3044 }, { 220,-3044 }, { 221,-3044 },
+ { 222,-3044 }, { 223,-3044 }, { 224,-3044 }, { 225,-3044 }, { 226,-3044 },
+ { 227,-3044 }, { 228,-3044 }, { 229,-3044 }, { 230,-3044 }, { 231,-3044 },
+
+ { 232,-3044 }, { 233,-3044 }, { 234,-3044 }, { 235,-3044 }, { 236,-3044 },
+ { 237,-3044 }, { 238,-3044 }, { 239,-3044 }, { 240,-3044 }, { 241,-3044 },
+ { 242,-3044 }, { 243,-3044 }, { 244,-3044 }, { 245,-3044 }, { 246,-3044 },
+ { 247,-3044 }, { 248,-3044 }, { 249,-3044 }, { 250,-3044 }, { 251,-3044 },
+ { 252,-3044 }, { 253,-3044 }, { 254,-3044 }, { 255,-3044 }, { 256,-3044 },
+ { 0, 45 }, { 0,2477 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 43, 317 },
+ { 0, 0 }, { 45, 317 }, { 0, 0 }, { 0, 0 }, { 48, 799 },
+ { 49, 799 }, { 50, 799 }, { 51, 799 }, { 52, 799 }, { 53, 799 },
+ { 54, 799 }, { 55, 799 }, { 56, 799 }, { 57, 799 }, { 0, 41 },
+ { 0,2418 }, { 1,-3361 }, { 2,-3361 }, { 3,-3361 }, { 4,-3361 },
+ { 5,-3361 }, { 6,-3361 }, { 7,-3361 }, { 8,-3361 }, { 0, 0 },
+ { 0, 0 }, { 11,-3361 }, { 0, 0 }, { 0, 0 }, { 14,-3361 },
+
+ { 15,-3361 }, { 16,-3361 }, { 17,-3361 }, { 18,-3361 }, { 19,-3361 },
+ { 20,-3361 }, { 21,-3361 }, { 22,-3361 }, { 23,-3361 }, { 24,-3361 },
+ { 25,-3361 }, { 26,-3361 }, { 27,-3361 }, { 28,-3361 }, { 29,-3361 },
+ { 30,-3361 }, { 31,-3361 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-3361 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 },
+ { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 },
+ { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-3361 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 65,-3361 }, { 66,-3361 }, { 67,-3361 }, { 68,-3361 }, { 69, -59 },
+ { 70,-3361 }, { 71,-3361 }, { 72,-3361 }, { 73,-3361 }, { 74,-3361 },
+ { 75,-3361 }, { 76,-3361 }, { 77,-3361 }, { 78,-3361 }, { 79,-3361 },
+ { 80,-3361 }, { 81,-3361 }, { 82,-3361 }, { 83,-3361 }, { 84,-3361 },
+ { 85,-3361 }, { 86,-3361 }, { 87,-3361 }, { 88,-3361 }, { 89,-3361 },
+ { 90,-3361 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-3361 },
+ { 95,-3361 }, { 96,-3361 }, { 97,-3361 }, { 98,-3361 }, { 99,-3361 },
+ { 100,-3361 }, { 101, -59 }, { 102,-3361 }, { 103,-3361 }, { 104,-3361 },
+ { 105,-3361 }, { 106,-3361 }, { 107,-3361 }, { 108,-3361 }, { 109,-3361 },
+ { 110,-3361 }, { 111,-3361 }, { 112,-3361 }, { 113,-3361 }, { 114,-3361 },
+
+ { 115,-3361 }, { 116,-3361 }, { 117,-3361 }, { 118,-3361 }, { 119,-3361 },
+ { 120,-3361 }, { 121,-3361 }, { 122,-3361 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 126,-3361 }, { 127,-3361 }, { 128,-3361 }, { 129,-3361 },
+ { 130,-3361 }, { 131,-3361 }, { 132,-3361 }, { 133,-3361 }, { 134,-3361 },
+ { 135,-3361 }, { 136,-3361 }, { 137,-3361 }, { 138,-3361 }, { 139,-3361 },
+ { 140,-3361 }, { 141,-3361 }, { 142,-3361 }, { 143,-3361 }, { 144,-3361 },
+ { 145,-3361 }, { 146,-3361 }, { 147,-3361 }, { 148,-3361 }, { 149,-3361 },
+ { 150,-3361 }, { 151,-3361 }, { 152,-3361 }, { 153,-3361 }, { 154,-3361 },
+ { 155,-3361 }, { 156,-3361 }, { 157,-3361 }, { 158,-3361 }, { 159,-3361 },
+ { 160,-3361 }, { 161,-3361 }, { 162,-3361 }, { 163,-3361 }, { 164,-3361 },
+
+ { 165,-3361 }, { 166,-3361 }, { 167,-3361 }, { 168,-3361 }, { 169,-3361 },
+ { 170,-3361 }, { 171,-3361 }, { 172,-3361 }, { 173,-3361 }, { 174,-3361 },
+ { 175,-3361 }, { 176,-3361 }, { 177,-3361 }, { 178,-3361 }, { 179,-3361 },
+ { 180,-3361 }, { 181,-3361 }, { 182,-3361 }, { 183,-3361 }, { 184,-3361 },
+ { 185,-3361 }, { 186,-3361 }, { 187,-3361 }, { 188,-3361 }, { 189,-3361 },
+ { 190,-3361 }, { 191,-3361 }, { 192,-3361 }, { 193,-3361 }, { 194,-3361 },
+ { 195,-3361 }, { 196,-3361 }, { 197,-3361 }, { 198,-3361 }, { 199,-3361 },
+ { 200,-3361 }, { 201,-3361 }, { 202,-3361 }, { 203,-3361 }, { 204,-3361 },
+ { 205,-3361 }, { 206,-3361 }, { 207,-3361 }, { 208,-3361 }, { 209,-3361 },
+ { 210,-3361 }, { 211,-3361 }, { 212,-3361 }, { 213,-3361 }, { 214,-3361 },
+
+ { 215,-3361 }, { 216,-3361 }, { 217,-3361 }, { 218,-3361 }, { 219,-3361 },
+ { 220,-3361 }, { 221,-3361 }, { 222,-3361 }, { 223,-3361 }, { 224,-3361 },
+ { 225,-3361 }, { 226,-3361 }, { 227,-3361 }, { 228,-3361 }, { 229,-3361 },
+ { 230,-3361 }, { 231,-3361 }, { 232,-3361 }, { 233,-3361 }, { 234,-3361 },
+ { 235,-3361 }, { 236,-3361 }, { 237,-3361 }, { 238,-3361 }, { 239,-3361 },
+ { 240,-3361 }, { 241,-3361 }, { 242,-3361 }, { 243,-3361 }, { 244,-3361 },
+ { 245,-3361 }, { 246,-3361 }, { 247,-3361 }, { 248,-3361 }, { 249,-3361 },
+ { 250,-3361 }, { 251,-3361 }, { 252,-3361 }, { 253,-3361 }, { 254,-3361 },
+ { 255,-3361 }, { 256,-3361 }, { 0, 43 }, { 0,2160 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48, 482 }, { 49, 482 }, { 50, 482 }, { 51, 482 },
+ { 52, 482 }, { 53, 482 }, { 54, 482 }, { 55, 482 }, { 56, 482 },
+
+ { 57, 482 }, { 0, 40 }, { 0,2101 }, { 1, 681 }, { 2, 681 },
+ { 3, 681 }, { 4, 681 }, { 5, 681 }, { 6, 681 }, { 7, 681 },
+ { 8, 681 }, { 0, 0 }, { 0, 0 }, { 11, 681 }, { 0, 0 },
+ { 0, 0 }, { 14, 681 }, { 15, 681 }, { 16, 681 }, { 17, 681 },
+ { 18, 681 }, { 19, 681 }, { 20, 681 }, { 21, 681 }, { 22, 681 },
+ { 23, 681 }, { 24, 681 }, { 25, 681 }, { 26, 681 }, { 27, 681 },
+ { 28, 681 }, { 29, 681 }, { 30, 681 }, { 31, 681 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 39, 681 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 48, 939 }, { 49, 939 }, { 50, 939 }, { 51, 939 }, { 52, 939 },
+ { 53, 939 }, { 54, 939 }, { 55, 939 }, { 56, 939 }, { 57, 939 },
+ { 0, 0 }, { 59, 681 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 65, 681 }, { 66, 681 }, { 67, 681 },
+ { 68, 681 }, { 69, 681 }, { 70, 681 }, { 71, 681 }, { 72, 681 },
+ { 73, 681 }, { 74, 681 }, { 75, 681 }, { 76, 681 }, { 77, 681 },
+ { 78, 681 }, { 79, 681 }, { 80, 681 }, { 81, 681 }, { 82, 681 },
+ { 83, 681 }, { 84, 681 }, { 85, 681 }, { 86, 681 }, { 87, 681 },
+ { 88, 681 }, { 89, 681 }, { 90, 681 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 94, 681 }, { 95, 681 }, { 96, 681 }, { 97, 681 },
+
+ { 98, 681 }, { 99, 681 }, { 100, 681 }, { 101, 681 }, { 102, 681 },
+ { 103, 681 }, { 104, 681 }, { 105, 681 }, { 106, 681 }, { 107, 681 },
+ { 108, 681 }, { 109, 681 }, { 110, 681 }, { 111, 681 }, { 112, 681 },
+ { 113, 681 }, { 114, 681 }, { 115, 681 }, { 116, 681 }, { 117, 681 },
+ { 118, 681 }, { 119, 681 }, { 120, 681 }, { 121, 681 }, { 122, 681 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126, 681 }, { 127, 681 },
+ { 128, 681 }, { 129, 681 }, { 130, 681 }, { 131, 681 }, { 132, 681 },
+ { 133, 681 }, { 134, 681 }, { 135, 681 }, { 136, 681 }, { 137, 681 },
+ { 138, 681 }, { 139, 681 }, { 140, 681 }, { 141, 681 }, { 142, 681 },
+ { 143, 681 }, { 144, 681 }, { 145, 681 }, { 146, 681 }, { 147, 681 },
+
+ { 148, 681 }, { 149, 681 }, { 150, 681 }, { 151, 681 }, { 152, 681 },
+ { 153, 681 }, { 154, 681 }, { 155, 681 }, { 156, 681 }, { 157, 681 },
+ { 158, 681 }, { 159, 681 }, { 160, 681 }, { 161, 681 }, { 162, 681 },
+ { 163, 681 }, { 164, 681 }, { 165, 681 }, { 166, 681 }, { 167, 681 },
+ { 168, 681 }, { 169, 681 }, { 170, 681 }, { 171, 681 }, { 172, 681 },
+ { 173, 681 }, { 174, 681 }, { 175, 681 }, { 176, 681 }, { 177, 681 },
+ { 178, 681 }, { 179, 681 }, { 180, 681 }, { 181, 681 }, { 182, 681 },
+ { 183, 681 }, { 184, 681 }, { 185, 681 }, { 186, 681 }, { 187, 681 },
+ { 188, 681 }, { 189, 681 }, { 190, 681 }, { 191, 681 }, { 192, 681 },
+ { 193, 681 }, { 194, 681 }, { 195, 681 }, { 196, 681 }, { 197, 681 },
+
+ { 198, 681 }, { 199, 681 }, { 200, 681 }, { 201, 681 }, { 202, 681 },
+ { 203, 681 }, { 204, 681 }, { 205, 681 }, { 206, 681 }, { 207, 681 },
+ { 208, 681 }, { 209, 681 }, { 210, 681 }, { 211, 681 }, { 212, 681 },
+ { 213, 681 }, { 214, 681 }, { 215, 681 }, { 216, 681 }, { 217, 681 },
+ { 218, 681 }, { 219, 681 }, { 220, 681 }, { 221, 681 }, { 222, 681 },
+ { 223, 681 }, { 224, 681 }, { 225, 681 }, { 226, 681 }, { 227, 681 },
+ { 228, 681 }, { 229, 681 }, { 230, 681 }, { 231, 681 }, { 232, 681 },
+ { 233, 681 }, { 234, 681 }, { 235, 681 }, { 236, 681 }, { 237, 681 },
+ { 238, 681 }, { 239, 681 }, { 240, 681 }, { 241, 681 }, { 242, 681 },
+ { 243, 681 }, { 244, 681 }, { 245, 681 }, { 246, 681 }, { 247, 681 },
+
+ { 248, 681 }, { 249, 681 }, { 250, 681 }, { 251, 681 }, { 252, 681 },
+ { 253, 681 }, { 254, 681 }, { 255, 681 }, { 256, 681 }, { 0, 13 },
+ { 0,1843 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 13 }, { 0,1820 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 939 }, { 49, 939 },
+ { 50, 939 }, { 51, 939 }, { 52, 939 }, { 53, 939 }, { 54, 939 },
+ { 55, 939 }, { 56, 939 }, { 57, 939 }, { 0, 0 }, { 0, 0 },
+ { 0, 14 }, { 0,1782 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65, 939 }, { 66, 939 }, { 67, 939 }, { 68, 939 }, { 69, 939 },
+ { 70, 939 }, { 48, 939 }, { 49, 939 }, { 50, 939 }, { 51, 939 },
+ { 52, 939 }, { 53, 939 }, { 54, 939 }, { 55, 939 }, { 56, 939 },
+ { 57, 939 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 939 }, { 66, 939 },
+
+ { 67, 939 }, { 68, 939 }, { 69, 939 }, { 70, 939 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 97, 939 }, { 98, 939 }, { 99, 939 },
+ { 100, 939 }, { 101, 939 }, { 102, 939 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48,-3995 },
+ { 49,-3995 }, { 50,-3995 }, { 51,-3995 }, { 52,-3995 }, { 53,-3995 },
+ { 54,-3995 }, { 55,-3995 }, { 56,-3995 }, { 57,-3995 }, { 0, 0 },
+ { 97, 939 }, { 98, 939 }, { 99, 939 }, { 100, 939 }, { 101, 939 },
+ { 102, 939 }, { 65,-3995 }, { 66,-3995 }, { 67,-3995 }, { 68,-3995 },
+ { 69,-3995 }, { 70,-3995 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97,-3995 }, { 98,-3995 },
+ { 99,-3995 }, { 100,-3995 }, { 101,-3995 }, { 102,-3995 }, { 0, 40 },
+ { 0,1678 }, { 1,-4097 }, { 2,-4097 }, { 3,-4097 }, { 4,-4097 },
+ { 5,-4097 }, { 6,-4097 }, { 7,-4097 }, { 8,-4097 }, { 0, 0 },
+ { 0, 0 }, { 11,-4097 }, { 0, 0 }, { 0, 0 }, { 14,-4097 },
+ { 15,-4097 }, { 16,-4097 }, { 17,-4097 }, { 18,-4097 }, { 19,-4097 },
+ { 20,-4097 }, { 21,-4097 }, { 22,-4097 }, { 23,-4097 }, { 24,-4097 },
+
+ { 25,-4097 }, { 26,-4097 }, { 27,-4097 }, { 28,-4097 }, { 29,-4097 },
+ { 30,-4097 }, { 31,-4097 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-4097 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 909 }, { 49, 909 },
+ { 50, 909 }, { 51, 909 }, { 52, 909 }, { 53, 909 }, { 54, 909 },
+ { 55, 909 }, { 56, 909 }, { 57, 909 }, { 0, 0 }, { 59,-4097 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65,-4097 }, { 66,-4097 }, { 67,-4097 }, { 68,-4097 }, { 69,-4097 },
+ { 70,-4097 }, { 71,-4097 }, { 72,-4097 }, { 73,-4097 }, { 74,-4097 },
+
+ { 75,-4097 }, { 76,-4097 }, { 77,-4097 }, { 78,-4097 }, { 79,-4097 },
+ { 80,-4097 }, { 81,-4097 }, { 82,-4097 }, { 83,-4097 }, { 84,-4097 },
+ { 85,-4097 }, { 86,-4097 }, { 87,-4097 }, { 88,-4097 }, { 89,-4097 },
+ { 90,-4097 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-4097 },
+ { 95,-4097 }, { 96,-4097 }, { 97,-4097 }, { 98,-4097 }, { 99,-4097 },
+ { 100,-4097 }, { 101,-4097 }, { 102,-4097 }, { 103,-4097 }, { 104,-4097 },
+ { 105,-4097 }, { 106,-4097 }, { 107,-4097 }, { 108,-4097 }, { 109,-4097 },
+ { 110,-4097 }, { 111,-4097 }, { 112,-4097 }, { 113,-4097 }, { 114,-4097 },
+ { 115,-4097 }, { 116,-4097 }, { 117,-4097 }, { 118,-4097 }, { 119,-4097 },
+ { 120,-4097 }, { 121,-4097 }, { 122,-4097 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 126,-4097 }, { 127,-4097 }, { 128,-4097 }, { 129,-4097 },
+ { 130,-4097 }, { 131,-4097 }, { 132,-4097 }, { 133,-4097 }, { 134,-4097 },
+ { 135,-4097 }, { 136,-4097 }, { 137,-4097 }, { 138,-4097 }, { 139,-4097 },
+ { 140,-4097 }, { 141,-4097 }, { 142,-4097 }, { 143,-4097 }, { 144,-4097 },
+ { 145,-4097 }, { 146,-4097 }, { 147,-4097 }, { 148,-4097 }, { 149,-4097 },
+ { 150,-4097 }, { 151,-4097 }, { 152,-4097 }, { 153,-4097 }, { 154,-4097 },
+ { 155,-4097 }, { 156,-4097 }, { 157,-4097 }, { 158,-4097 }, { 159,-4097 },
+ { 160,-4097 }, { 161,-4097 }, { 162,-4097 }, { 163,-4097 }, { 164,-4097 },
+ { 165,-4097 }, { 166,-4097 }, { 167,-4097 }, { 168,-4097 }, { 169,-4097 },
+ { 170,-4097 }, { 171,-4097 }, { 172,-4097 }, { 173,-4097 }, { 174,-4097 },
+
+ { 175,-4097 }, { 176,-4097 }, { 177,-4097 }, { 178,-4097 }, { 179,-4097 },
+ { 180,-4097 }, { 181,-4097 }, { 182,-4097 }, { 183,-4097 }, { 184,-4097 },
+ { 185,-4097 }, { 186,-4097 }, { 187,-4097 }, { 188,-4097 }, { 189,-4097 },
+ { 190,-4097 }, { 191,-4097 }, { 192,-4097 }, { 193,-4097 }, { 194,-4097 },
+ { 195,-4097 }, { 196,-4097 }, { 197,-4097 }, { 198,-4097 }, { 199,-4097 },
+ { 200,-4097 }, { 201,-4097 }, { 202,-4097 }, { 203,-4097 }, { 204,-4097 },
+ { 205,-4097 }, { 206,-4097 }, { 207,-4097 }, { 208,-4097 }, { 209,-4097 },
+ { 210,-4097 }, { 211,-4097 }, { 212,-4097 }, { 213,-4097 }, { 214,-4097 },
+ { 215,-4097 }, { 216,-4097 }, { 217,-4097 }, { 218,-4097 }, { 219,-4097 },
+ { 220,-4097 }, { 221,-4097 }, { 222,-4097 }, { 223,-4097 }, { 224,-4097 },
+
+ { 225,-4097 }, { 226,-4097 }, { 227,-4097 }, { 228,-4097 }, { 229,-4097 },
+ { 230,-4097 }, { 231,-4097 }, { 232,-4097 }, { 233,-4097 }, { 234,-4097 },
+ { 235,-4097 }, { 236,-4097 }, { 237,-4097 }, { 238,-4097 }, { 239,-4097 },
+ { 240,-4097 }, { 241,-4097 }, { 242,-4097 }, { 243,-4097 }, { 244,-4097 },
+ { 245,-4097 }, { 246,-4097 }, { 247,-4097 }, { 248,-4097 }, { 249,-4097 },
+ { 250,-4097 }, { 251,-4097 }, { 252,-4097 }, { 253,-4097 }, { 254,-4097 },
+ { 255,-4097 }, { 256,-4097 }, { 0, 46 }, { 0,1420 }, { 1,-4090 },
+ { 2,-4090 }, { 3,-4090 }, { 4,-4090 }, { 5,-4090 }, { 6,-4090 },
+ { 7,-4090 }, { 8,-4090 }, { 0, 0 }, { 0, 0 }, { 11,-4090 },
+ { 0, 0 }, { 0, 0 }, { 14,-4090 }, { 15,-4090 }, { 16,-4090 },
+
+ { 17,-4090 }, { 18,-4090 }, { 19,-4090 }, { 20,-4090 }, { 21,-4090 },
+ { 22,-4090 }, { 23,-4090 }, { 24,-4090 }, { 25,-4090 }, { 26,-4090 },
+ { 27,-4090 }, { 28,-4090 }, { 29,-4090 }, { 30,-4090 }, { 31,-4090 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 39,-4090 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 48,-4090 }, { 49,-4090 }, { 50,-4090 }, { 51,-4090 },
+ { 52,-4090 }, { 53,-4090 }, { 54,-4090 }, { 55,-4090 }, { 56,-4090 },
+ { 57,-4090 }, { 0, 0 }, { 59,-4090 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-4090 }, { 66,-4090 },
+
+ { 67,-4090 }, { 68,-4090 }, { 69,-4090 }, { 70,-4090 }, { 71,-4090 },
+ { 72,-4090 }, { 73,-4090 }, { 74,-4090 }, { 75,-4090 }, { 76,-4090 },
+ { 77,-4090 }, { 78,-4090 }, { 79,-4090 }, { 80,-4090 }, { 81,-4090 },
+ { 82,-4090 }, { 83,-4090 }, { 84,-4090 }, { 85,-4090 }, { 86,-4090 },
+ { 87,-4090 }, { 88,-4090 }, { 89,-4090 }, { 90,-4090 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 94,-4090 }, { 95,-4090 }, { 96,-4090 },
+ { 97,-4090 }, { 98,-4090 }, { 99,-4090 }, { 100,-4090 }, { 101,-4090 },
+ { 102,-4090 }, { 103,-4090 }, { 104,-4090 }, { 105,-4090 }, { 106,-4090 },
+ { 107,-4090 }, { 108,-4090 }, { 109,-4090 }, { 110,-4090 }, { 111,-4090 },
+ { 112,-4090 }, { 113,-4090 }, { 114,-4090 }, { 115,-4090 }, { 116,-4090 },
+
+ { 117,-4090 }, { 118,-4090 }, { 119,-4090 }, { 120,-4090 }, { 121,-4090 },
+ { 122,-4090 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 126,-4090 },
+ { 127,-4090 }, { 128,-4090 }, { 129,-4090 }, { 130,-4090 }, { 131,-4090 },
+ { 132,-4090 }, { 133,-4090 }, { 134,-4090 }, { 135,-4090 }, { 136,-4090 },
+ { 137,-4090 }, { 138,-4090 }, { 139,-4090 }, { 140,-4090 }, { 141,-4090 },
+ { 142,-4090 }, { 143,-4090 }, { 144,-4090 }, { 145,-4090 }, { 146,-4090 },
+ { 147,-4090 }, { 148,-4090 }, { 149,-4090 }, { 150,-4090 }, { 151,-4090 },
+ { 152,-4090 }, { 153,-4090 }, { 154,-4090 }, { 155,-4090 }, { 156,-4090 },
+ { 157,-4090 }, { 158,-4090 }, { 159,-4090 }, { 160,-4090 }, { 161,-4090 },
+ { 162,-4090 }, { 163,-4090 }, { 164,-4090 }, { 165,-4090 }, { 166,-4090 },
+
+ { 167,-4090 }, { 168,-4090 }, { 169,-4090 }, { 170,-4090 }, { 171,-4090 },
+ { 172,-4090 }, { 173,-4090 }, { 174,-4090 }, { 175,-4090 }, { 176,-4090 },
+ { 177,-4090 }, { 178,-4090 }, { 179,-4090 }, { 180,-4090 }, { 181,-4090 },
+ { 182,-4090 }, { 183,-4090 }, { 184,-4090 }, { 185,-4090 }, { 186,-4090 },
+ { 187,-4090 }, { 188,-4090 }, { 189,-4090 }, { 190,-4090 }, { 191,-4090 },
+ { 192,-4090 }, { 193,-4090 }, { 194,-4090 }, { 195,-4090 }, { 196,-4090 },
+ { 197,-4090 }, { 198,-4090 }, { 199,-4090 }, { 200,-4090 }, { 201,-4090 },
+ { 202,-4090 }, { 203,-4090 }, { 204,-4090 }, { 205,-4090 }, { 206,-4090 },
+ { 207,-4090 }, { 208,-4090 }, { 209,-4090 }, { 210,-4090 }, { 211,-4090 },
+ { 212,-4090 }, { 213,-4090 }, { 214,-4090 }, { 215,-4090 }, { 216,-4090 },
+
+ { 217,-4090 }, { 218,-4090 }, { 219,-4090 }, { 220,-4090 }, { 221,-4090 },
+ { 222,-4090 }, { 223,-4090 }, { 224,-4090 }, { 225,-4090 }, { 226,-4090 },
+ { 227,-4090 }, { 228,-4090 }, { 229,-4090 }, { 230,-4090 }, { 231,-4090 },
+ { 232,-4090 }, { 233,-4090 }, { 234,-4090 }, { 235,-4090 }, { 236,-4090 },
+ { 237,-4090 }, { 238,-4090 }, { 239,-4090 }, { 240,-4090 }, { 241,-4090 },
+ { 242,-4090 }, { 243,-4090 }, { 244,-4090 }, { 245,-4090 }, { 246,-4090 },
+ { 247,-4090 }, { 248,-4090 }, { 249,-4090 }, { 250,-4090 }, { 251,-4090 },
+ { 252,-4090 }, { 253,-4090 }, { 254,-4090 }, { 255,-4090 }, { 256,-4090 },
+ { 0, 40 }, { 0,1162 }, { 1,-258 }, { 2,-258 }, { 3,-258 },
+ { 4,-258 }, { 5,-258 }, { 6,-258 }, { 7,-258 }, { 8,-258 },
+
+ { 0, 0 }, { 0, 0 }, { 11,-258 }, { 0, 0 }, { 0, 0 },
+ { 14,-258 }, { 15,-258 }, { 16,-258 }, { 17,-258 }, { 18,-258 },
+ { 19,-258 }, { 20,-258 }, { 21,-258 }, { 22,-258 }, { 23,-258 },
+ { 24,-258 }, { 25,-258 }, { 26,-258 }, { 27,-258 }, { 28,-258 },
+ { 29,-258 }, { 30,-258 }, { 31,-258 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 39,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 48, 0 },
+ { 49, 0 }, { 50, 0 }, { 51, 0 }, { 52, 0 }, { 53, 0 },
+ { 54, 0 }, { 55, 0 }, { 56, 0 }, { 57, 0 }, { 0, 0 },
+
+ { 59,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 65,-258 }, { 66,-258 }, { 67,-258 }, { 68,-258 },
+ { 69,-258 }, { 70,-258 }, { 71,-258 }, { 72,-258 }, { 73,-258 },
+ { 74,-258 }, { 75,-258 }, { 76,-258 }, { 77,-258 }, { 78,-258 },
+ { 79,-258 }, { 80,-258 }, { 81,-258 }, { 82,-258 }, { 83,-258 },
+ { 84,-258 }, { 85,-258 }, { 86,-258 }, { 87,-258 }, { 88,-258 },
+ { 89,-258 }, { 90,-258 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 94,-258 }, { 95,-258 }, { 96,-258 }, { 97,-258 }, { 98,-258 },
+ { 99,-258 }, { 100,-258 }, { 101,-258 }, { 102,-258 }, { 103,-258 },
+ { 104,-258 }, { 105,-258 }, { 106,-258 }, { 107,-258 }, { 108,-258 },
+
+ { 109,-258 }, { 110,-258 }, { 111,-258 }, { 112,-258 }, { 113,-258 },
+ { 114,-258 }, { 115,-258 }, { 116,-258 }, { 117,-258 }, { 118,-258 },
+ { 119,-258 }, { 120,-258 }, { 121,-258 }, { 122,-258 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 126,-258 }, { 127,-258 }, { 128,-258 },
+ { 129,-258 }, { 130,-258 }, { 131,-258 }, { 132,-258 }, { 133,-258 },
+ { 134,-258 }, { 135,-258 }, { 136,-258 }, { 137,-258 }, { 138,-258 },
+ { 139,-258 }, { 140,-258 }, { 141,-258 }, { 142,-258 }, { 143,-258 },
+ { 144,-258 }, { 145,-258 }, { 146,-258 }, { 147,-258 }, { 148,-258 },
+ { 149,-258 }, { 150,-258 }, { 151,-258 }, { 152,-258 }, { 153,-258 },
+ { 154,-258 }, { 155,-258 }, { 156,-258 }, { 157,-258 }, { 158,-258 },
+
+ { 159,-258 }, { 160,-258 }, { 161,-258 }, { 162,-258 }, { 163,-258 },
+ { 164,-258 }, { 165,-258 }, { 166,-258 }, { 167,-258 }, { 168,-258 },
+ { 169,-258 }, { 170,-258 }, { 171,-258 }, { 172,-258 }, { 173,-258 },
+ { 174,-258 }, { 175,-258 }, { 176,-258 }, { 177,-258 }, { 178,-258 },
+ { 179,-258 }, { 180,-258 }, { 181,-258 }, { 182,-258 }, { 183,-258 },
+ { 184,-258 }, { 185,-258 }, { 186,-258 }, { 187,-258 }, { 188,-258 },
+ { 189,-258 }, { 190,-258 }, { 191,-258 }, { 192,-258 }, { 193,-258 },
+ { 194,-258 }, { 195,-258 }, { 196,-258 }, { 197,-258 }, { 198,-258 },
+ { 199,-258 }, { 200,-258 }, { 201,-258 }, { 202,-258 }, { 203,-258 },
+ { 204,-258 }, { 205,-258 }, { 206,-258 }, { 207,-258 }, { 208,-258 },
+
+ { 209,-258 }, { 210,-258 }, { 211,-258 }, { 212,-258 }, { 213,-258 },
+ { 214,-258 }, { 215,-258 }, { 216,-258 }, { 217,-258 }, { 218,-258 },
+ { 219,-258 }, { 220,-258 }, { 221,-258 }, { 222,-258 }, { 223,-258 },
+ { 224,-258 }, { 225,-258 }, { 226,-258 }, { 227,-258 }, { 228,-258 },
+ { 229,-258 }, { 230,-258 }, { 231,-258 }, { 232,-258 }, { 233,-258 },
+ { 234,-258 }, { 235,-258 }, { 236,-258 }, { 237,-258 }, { 238,-258 },
+ { 239,-258 }, { 240,-258 }, { 241,-258 }, { 242,-258 }, { 243,-258 },
+ { 244,-258 }, { 245,-258 }, { 246,-258 }, { 247,-258 }, { 248,-258 },
+ { 249,-258 }, { 250,-258 }, { 251,-258 }, { 252,-258 }, { 253,-258 },
+ { 254,-258 }, { 255,-258 }, { 256,-258 }, { 0, 13 }, { 0, 904 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 13 }, { 0, 881 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 48, 393 }, { 49, 393 }, { 50, 393 },
+
+ { 51, 393 }, { 52, 393 }, { 53, 393 }, { 54, 393 }, { 55, 393 },
+ { 56, 393 }, { 57, 393 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65, 393 },
+ { 66, 393 }, { 67, 393 }, { 68, 393 }, { 69, 393 }, { 70, 393 },
+ { 48, 393 }, { 49, 393 }, { 50, 393 }, { 51, 393 }, { 52, 393 },
+ { 53, 393 }, { 54, 393 }, { 55, 393 }, { 56, 393 }, { 57, 393 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 65, 393 }, { 66, 393 }, { 67, 393 },
+ { 68, 393 }, { 69, 393 }, { 70, 393 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 97, 393 }, { 98, 393 }, { 99, 393 }, { 100, 393 },
+
+ { 101, 393 }, { 102, 393 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97, 393 },
+ { 98, 393 }, { 99, 393 }, { 100, 393 }, { 101, 393 }, { 102, 393 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 40 }, { 0, 769 },
+ { 1,-5006 }, { 2,-5006 }, { 3,-5006 }, { 4,-5006 }, { 5,-5006 },
+ { 6,-5006 }, { 7,-5006 }, { 8,-5006 }, { 0, 0 }, { 0, 0 },
+ { 11,-5006 }, { 0, 0 }, { 125,-4596 }, { 14,-5006 }, { 15,-5006 },
+
+ { 16,-5006 }, { 17,-5006 }, { 18,-5006 }, { 19,-5006 }, { 20,-5006 },
+ { 21,-5006 }, { 22,-5006 }, { 23,-5006 }, { 24,-5006 }, { 25,-5006 },
+ { 26,-5006 }, { 27,-5006 }, { 28,-5006 }, { 29,-5006 }, { 30,-5006 },
+ { 31,-5006 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 39,-5006 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 48, 0 }, { 49, 0 }, { 50, 0 },
+ { 51, 0 }, { 52, 0 }, { 53, 0 }, { 54, 0 }, { 55, 0 },
+ { 56, 0 }, { 57, 0 }, { 0, 0 }, { 59,-5006 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 65,-5006 },
+
+ { 66,-5006 }, { 67,-5006 }, { 68,-5006 }, { 69,-5006 }, { 70,-5006 },
+ { 71,-5006 }, { 72,-5006 }, { 73,-5006 }, { 74,-5006 }, { 75,-5006 },
+ { 76,-5006 }, { 77,-5006 }, { 78,-5006 }, { 79,-5006 }, { 80,-5006 },
+ { 81,-5006 }, { 82,-5006 }, { 83,-5006 }, { 84,-5006 }, { 85,-5006 },
+ { 86,-5006 }, { 87,-5006 }, { 88,-5006 }, { 89,-5006 }, { 90,-5006 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 94,-5006 }, { 95,-5006 },
+ { 96,-5006 }, { 97,-5006 }, { 98,-5006 }, { 99,-5006 }, { 100,-5006 },
+ { 101,-5006 }, { 102,-5006 }, { 103,-5006 }, { 104,-5006 }, { 105,-5006 },
+ { 106,-5006 }, { 107,-5006 }, { 108,-5006 }, { 109,-5006 }, { 110,-5006 },
+ { 111,-5006 }, { 112,-5006 }, { 113,-5006 }, { 114,-5006 }, { 115,-5006 },
+
+ { 116,-5006 }, { 117,-5006 }, { 118,-5006 }, { 119,-5006 }, { 120,-5006 },
+ { 121,-5006 }, { 122,-5006 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 126,-5006 }, { 127,-5006 }, { 128,-5006 }, { 129,-5006 }, { 130,-5006 },
+ { 131,-5006 }, { 132,-5006 }, { 133,-5006 }, { 134,-5006 }, { 135,-5006 },
+ { 136,-5006 }, { 137,-5006 }, { 138,-5006 }, { 139,-5006 }, { 140,-5006 },
+ { 141,-5006 }, { 142,-5006 }, { 143,-5006 }, { 144,-5006 }, { 145,-5006 },
+ { 146,-5006 }, { 147,-5006 }, { 148,-5006 }, { 149,-5006 }, { 150,-5006 },
+ { 151,-5006 }, { 152,-5006 }, { 153,-5006 }, { 154,-5006 }, { 155,-5006 },
+ { 156,-5006 }, { 157,-5006 }, { 158,-5006 }, { 159,-5006 }, { 160,-5006 },
+ { 161,-5006 }, { 162,-5006 }, { 163,-5006 }, { 164,-5006 }, { 165,-5006 },
+
+ { 166,-5006 }, { 167,-5006 }, { 168,-5006 }, { 169,-5006 }, { 170,-5006 },
+ { 171,-5006 }, { 172,-5006 }, { 173,-5006 }, { 174,-5006 }, { 175,-5006 },
+ { 176,-5006 }, { 177,-5006 }, { 178,-5006 }, { 179,-5006 }, { 180,-5006 },
+ { 181,-5006 }, { 182,-5006 }, { 183,-5006 }, { 184,-5006 }, { 185,-5006 },
+ { 186,-5006 }, { 187,-5006 }, { 188,-5006 }, { 189,-5006 }, { 190,-5006 },
+ { 191,-5006 }, { 192,-5006 }, { 193,-5006 }, { 194,-5006 }, { 195,-5006 },
+ { 196,-5006 }, { 197,-5006 }, { 198,-5006 }, { 199,-5006 }, { 200,-5006 },
+ { 201,-5006 }, { 202,-5006 }, { 203,-5006 }, { 204,-5006 }, { 205,-5006 },
+ { 206,-5006 }, { 207,-5006 }, { 208,-5006 }, { 209,-5006 }, { 210,-5006 },
+ { 211,-5006 }, { 212,-5006 }, { 213,-5006 }, { 214,-5006 }, { 215,-5006 },
+
+ { 216,-5006 }, { 217,-5006 }, { 218,-5006 }, { 219,-5006 }, { 220,-5006 },
+ { 221,-5006 }, { 222,-5006 }, { 223,-5006 }, { 224,-5006 }, { 225,-5006 },
+ { 226,-5006 }, { 227,-5006 }, { 228,-5006 }, { 229,-5006 }, { 230,-5006 },
+ { 231,-5006 }, { 232,-5006 }, { 233,-5006 }, { 234,-5006 }, { 235,-5006 },
+ { 236,-5006 }, { 237,-5006 }, { 238,-5006 }, { 239,-5006 }, { 240,-5006 },
+ { 241,-5006 }, { 242,-5006 }, { 243,-5006 }, { 244,-5006 }, { 245,-5006 },
+ { 246,-5006 }, { 247,-5006 }, { 248,-5006 }, { 249,-5006 }, { 250,-5006 },
+ { 251,-5006 }, { 252,-5006 }, { 253,-5006 }, { 254,-5006 }, { 255,-5006 },
+ { 256,-5006 }, { 0, 13 }, { 0, 511 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 13 },
+ { 0, 488 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 48,-4966 }, { 49,-4966 }, { 50,-4966 }, { 51,-4966 }, { 52,-4966 },
+ { 53,-4966 }, { 54,-4966 }, { 55,-4966 }, { 56,-4966 }, { 57,-4966 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 13 }, { 0, 450 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 65,-4966 }, { 66,-4966 }, { 67,-4966 },
+ { 68,-4966 }, { 69,-4966 }, { 70,-4966 }, { 48, 38 }, { 49, 38 },
+ { 50, 38 }, { 51, 38 }, { 52, 38 }, { 53, 38 }, { 54, 38 },
+ { 55, 38 }, { 56, 38 }, { 57, 38 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65, 38 }, { 66, 38 }, { 67, 38 }, { 68, 38 }, { 69, 38 },
+ { 70, 38 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 97,-4966 },
+ { 98,-4966 }, { 99,-4966 }, { 100,-4966 }, { 101,-4966 }, { 102,-4966 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 48, 78 }, { 49, 78 }, { 50, 78 }, { 51, 78 },
+ { 52, 78 }, { 53, 78 }, { 54, 78 }, { 55, 78 }, { 56, 78 },
+ { 57, 78 }, { 0, 0 }, { 97, 38 }, { 98, 38 }, { 99, 38 },
+ { 100, 38 }, { 101, 38 }, { 102, 38 }, { 65, 78 }, { 66, 78 },
+ { 67, 78 }, { 68, 78 }, { 69, 78 }, { 70, 78 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 13 }, { 0, 372 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 125,-4989 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 97, 78 }, { 98, 78 }, { 99, 78 }, { 100, 78 }, { 101, 78 },
+ { 102, 78 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 13 }, { 0, 334 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 125,-5027 }, { 48, 114 },
+ { 49, 114 }, { 50, 114 }, { 51, 114 }, { 52, 114 }, { 53, 114 },
+ { 54, 114 }, { 55, 114 }, { 56, 114 }, { 57, 114 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 65, 114 }, { 66, 114 }, { 67, 114 }, { 68, 114 },
+
+ { 69, 114 }, { 70, 114 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 48,-1509 }, { 49,-1509 }, { 50,-1509 },
+ { 51,-1509 }, { 52,-1509 }, { 53,-1509 }, { 54,-1509 }, { 55,-1509 },
+ { 56,-1509 }, { 57,-1509 }, { 0, 0 }, { 97, 114 }, { 98, 114 },
+ { 99, 114 }, { 100, 114 }, { 101, 114 }, { 102, 114 }, { 65,-1509 },
+ { 66,-1509 }, { 67,-1509 }, { 68,-1509 }, { 69,-1509 }, { 70,-1509 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 13 },
+ { 0, 258 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 125,-5105 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 97,-1509 }, { 98,-1509 }, { 99,-1509 }, { 100,-1509 },
+ { 101,-1509 }, { 102,-1509 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 123,-1486 }, { 48,-3165 }, { 49,-3165 },
+ { 50,-3165 }, { 51,-3165 }, { 52,-3165 }, { 53,-3165 }, { 54,-3165 },
+
+ { 55,-3165 }, { 56,-3165 }, { 57,-3165 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 65,-3165 }, { 66,-3165 }, { 67,-3165 }, { 68,-3165 }, { 69,-3165 },
+ { 70,-3165 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 97,-3165 }, { 98,-3165 }, { 99,-3165 },
+ { 100,-3165 }, { 101,-3165 }, { 102,-3165 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 125,-5219 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+ { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 },
+
+ { 0, 0 }, { 0, 0 }, { 257, 51 }, { 1, 0 }, };
+
+static const struct yy_trans_info *yy_start_state_list[11] =
+ {
+ &yy_transition[1],
+ &yy_transition[3],
+ &yy_transition[261],
+ &yy_transition[519],
+ &yy_transition[777],
+ &yy_transition[1035],
+ &yy_transition[1293],
+ &yy_transition[1551],
+ &yy_transition[1809],
+ &yy_transition[2067],
+ &yy_transition[2325],
+
+ } ;
+
+extern int yy_flex_debug;
+int yy_flex_debug = 0;
+
+/* The intent behind this definition is that it'll catch
+ * any uses of REJECT which flex missed.
+ */
+#define REJECT reject_used_but_not_detected
+#define yymore() yymore_used_but_not_detected
+#define YY_MORE_ADJ 0
+#define YY_RESTORE_YY_MORE_OFFSET
+char *yytext;
+#line 1 "jsonpath_scan.l"
+#line 2 "jsonpath_scan.l"
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_scan.l
+ * Lexical parser for jsonpath datatype
+ *
+ * Splits jsonpath string into tokens represented as JsonPathString structs.
+ * Decodes unicode and hex escaped strings.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_scan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "mb/pg_wchar.h"
+#include "nodes/pg_list.h"
+
+static JsonPathString scanstring;
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+static int scanbuflen;
+
+static void addstring(bool init, char *s, int l);
+static void addchar(bool init, char s);
+static enum yytokentype checkKeyword(void);
+static void parseUnicode(char *s, int l);
+static void parseHexChar(char *s);
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+ ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+
+/* LCOV_EXCL_START */
+
+#line 2982 "jsonpath_scan.c"
+#define YY_NO_INPUT 1
+/*
+ * We use exclusive states for quoted and non-quoted strings,
+ * quoted variable names and C-style comments.
+ * Exclusive states:
+ * <xq> - quoted strings
+ * <xnq> - non-quoted strings
+ * <xvq> - quoted variable names
+ * <xc> - C-style comment
+ */
+
+/* "other" means anything that's not special, blank, or '\' or '"' */
+#line 2995 "jsonpath_scan.c"
+
+#define INITIAL 0
+#define xq 1
+#define xnq 2
+#define xvq 3
+#define xc 4
+
+#ifndef YY_NO_UNISTD_H
+/* Special case for "unistd.h", since it is non-ANSI. We include it way
+ * down here because we want the user's section 1 to have been scanned first.
+ * The user has a chance to override it with an option.
+ */
+#include <unistd.h>
+#endif
+
+#ifndef YY_EXTRA_TYPE
+#define YY_EXTRA_TYPE void *
+#endif
+
+static int yy_init_globals ( void );
+
+/* Accessor methods to globals.
+ These are made visible to non-reentrant scanners for convenience. */
+
+int yylex_destroy ( void );
+
+int yyget_debug ( void );
+
+void yyset_debug ( int debug_flag );
+
+YY_EXTRA_TYPE yyget_extra ( void );
+
+void yyset_extra ( YY_EXTRA_TYPE user_defined );
+
+FILE *yyget_in ( void );
+
+void yyset_in ( FILE * _in_str );
+
+FILE *yyget_out ( void );
+
+void yyset_out ( FILE * _out_str );
+
+ int yyget_leng ( void );
+
+char *yyget_text ( void );
+
+int yyget_lineno ( void );
+
+void yyset_lineno ( int _line_number );
+
+YYSTYPE * yyget_lval ( void );
+
+void yyset_lval ( YYSTYPE * yylval_param );
+
+/* Macros after this point can all be overridden by user definitions in
+ * section 1.
+ */
+
+#ifndef YY_SKIP_YYWRAP
+#ifdef __cplusplus
+extern "C" int yywrap ( void );
+#else
+extern int yywrap ( void );
+#endif
+#endif
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy ( char *, const char *, int );
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen ( const char * );
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+static int yyinput ( void );
+#else
+static int input ( void );
+#endif
+
+#endif
+
+/* Amount of stuff to slurp up with each read. */
+#ifndef YY_READ_BUF_SIZE
+#ifdef __ia64__
+/* On IA-64, the buffer size is 16k, not 8k */
+#define YY_READ_BUF_SIZE 16384
+#else
+#define YY_READ_BUF_SIZE 8192
+#endif /* __ia64__ */
+#endif
+
+/* Copy whatever the last rule matched to the standard output. */
+#ifndef ECHO
+/* This used to be an fputs(), but since the string might contain NUL's,
+ * we now use fwrite().
+ */
+#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0)
+#endif
+
+/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
+ * is returned in "result".
+ */
+#ifndef YY_INPUT
+#define YY_INPUT(buf,result,max_size) \
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
+ { \
+ int c = '*'; \
+ int n; \
+ for ( n = 0; n < max_size && \
+ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
+ buf[n] = (char) c; \
+ if ( c == '\n' ) \
+ buf[n++] = (char) c; \
+ if ( c == EOF && ferror( yyin ) ) \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ result = n; \
+ } \
+ else \
+ { \
+ errno=0; \
+ while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \
+ { \
+ if( errno != EINTR) \
+ { \
+ YY_FATAL_ERROR( "input in flex scanner failed" ); \
+ break; \
+ } \
+ errno=0; \
+ clearerr(yyin); \
+ } \
+ }\
+\
+
+#endif
+
+/* No semi-colon after return; correct usage is to write "yyterminate();" -
+ * we don't want an extra ';' after the "return" because that will cause
+ * some compilers to complain about unreachable statements.
+ */
+#ifndef yyterminate
+#define yyterminate() return YY_NULL
+#endif
+
+/* Number of entries by which start-condition stack grows. */
+#ifndef YY_START_STACK_INCR
+#define YY_START_STACK_INCR 25
+#endif
+
+/* Report a fatal error. */
+#ifndef YY_FATAL_ERROR
+#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
+#endif
+
+/* end tables serialization structures and prototypes */
+
+/* Default declaration of generated scanner - a define so the user can
+ * easily add parameters.
+ */
+#ifndef YY_DECL
+#define YY_DECL_IS_OURS 1
+
+extern int yylex \
+ (YYSTYPE * yylval_param );
+
+#define YY_DECL int yylex \
+ (YYSTYPE * yylval_param )
+#endif /* !YY_DECL */
+
+/* Code executed at the beginning of each rule, after yytext and yyleng
+ * have been set up.
+ */
+#ifndef YY_USER_ACTION
+#define YY_USER_ACTION
+#endif
+
+/* Code executed at the end of each rule. */
+#ifndef YY_BREAK
+#define YY_BREAK /*LINTED*/break;
+#endif
+
+#define YY_RULE_SETUP \
+ YY_USER_ACTION
+
+/** The main scanner function which does all the work.
+ */
+YY_DECL
+{
+ yy_state_type yy_current_state;
+ char *yy_cp, *yy_bp;
+ int yy_act;
+
+ YYSTYPE * yylval;
+
+ yylval = yylval_param;
+
+ if ( !(yy_init) )
+ {
+ (yy_init) = 1;
+
+#ifdef YY_USER_INIT
+ YY_USER_INIT;
+#endif
+
+ if ( ! (yy_start) )
+ (yy_start) = 1; /* first start state */
+
+ if ( ! yyin )
+ yyin = stdin;
+
+ if ( ! yyout )
+ yyout = stdout;
+
+ if ( ! YY_CURRENT_BUFFER ) {
+ yyensure_buffer_stack ();
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE );
+ }
+
+ yy_load_buffer_state( );
+ }
+
+ {
+#line 99 "jsonpath_scan.l"
+
+
+#line 3227 "jsonpath_scan.c"
+
+ while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
+ {
+ yy_cp = (yy_c_buf_p);
+
+ /* Support of yytext. */
+ *yy_cp = (yy_hold_char);
+
+ /* yy_bp points to the position in yy_ch_buf of the start of
+ * the current run.
+ */
+ yy_bp = yy_cp;
+
+ yy_current_state = yy_start_state_list[(yy_start)];
+yy_match:
+ {
+ const struct yy_trans_info *yy_trans_info;
+
+ YY_CHAR yy_c;
+
+ for ( yy_c = YY_SC_TO_UI(*yy_cp);
+ (yy_trans_info = &yy_current_state[yy_c])->
+ yy_verify == yy_c;
+ yy_c = YY_SC_TO_UI(*++yy_cp) )
+ yy_current_state += yy_trans_info->yy_nxt;
+ }
+
+yy_find_action:
+ yy_act = yy_current_state[-1].yy_nxt;
+
+ YY_DO_BEFORE_ACTION;
+
+do_action: /* This label is used only to access EOF actions. */
+
+ switch ( yy_act )
+ { /* beginning of action switch */
+case 1:
+YY_RULE_SETUP
+#line 101 "jsonpath_scan.l"
+{
+ addstring(false, yytext, yyleng);
+ }
+ YY_BREAK
+case 2:
+/* rule 2 can match eol */
+YY_RULE_SETUP
+#line 105 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+ YY_BREAK
+case 3:
+YY_RULE_SETUP
+#line 111 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ BEGIN xc;
+ }
+ YY_BREAK
+case 4:
+YY_RULE_SETUP
+#line 116 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ yyless(0);
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+ YY_BREAK
+case YY_STATE_EOF(xnq):
+#line 123 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+ YY_BREAK
+case 5:
+YY_RULE_SETUP
+#line 129 "jsonpath_scan.l"
+{ addchar(false, '\b'); }
+ YY_BREAK
+case 6:
+YY_RULE_SETUP
+#line 131 "jsonpath_scan.l"
+{ addchar(false, '\f'); }
+ YY_BREAK
+case 7:
+YY_RULE_SETUP
+#line 133 "jsonpath_scan.l"
+{ addchar(false, '\n'); }
+ YY_BREAK
+case 8:
+YY_RULE_SETUP
+#line 135 "jsonpath_scan.l"
+{ addchar(false, '\r'); }
+ YY_BREAK
+case 9:
+YY_RULE_SETUP
+#line 137 "jsonpath_scan.l"
+{ addchar(false, '\t'); }
+ YY_BREAK
+case 10:
+YY_RULE_SETUP
+#line 139 "jsonpath_scan.l"
+{ addchar(false, '\v'); }
+ YY_BREAK
+case 11:
+YY_RULE_SETUP
+#line 141 "jsonpath_scan.l"
+{ parseUnicode(yytext, yyleng); }
+ YY_BREAK
+case 12:
+YY_RULE_SETUP
+#line 143 "jsonpath_scan.l"
+{ parseHexChar(yytext); }
+ YY_BREAK
+case 13:
+YY_RULE_SETUP
+#line 145 "jsonpath_scan.l"
+{ yyerror(NULL, "invalid unicode sequence"); }
+ YY_BREAK
+case 14:
+YY_RULE_SETUP
+#line 147 "jsonpath_scan.l"
+{ yyerror(NULL, "invalid hex character sequence"); }
+ YY_BREAK
+case 15:
+YY_RULE_SETUP
+#line 149 "jsonpath_scan.l"
+{
+ /* throw back the \\, and treat as unicode */
+ yyless(yyleng - 1);
+ parseUnicode(yytext, yyleng);
+ }
+ YY_BREAK
+case 16:
+YY_RULE_SETUP
+#line 155 "jsonpath_scan.l"
+{ addchar(false, yytext[1]); }
+ YY_BREAK
+case 17:
+YY_RULE_SETUP
+#line 157 "jsonpath_scan.l"
+{ yyerror(NULL, "unexpected end after backslash"); }
+ YY_BREAK
+case YY_STATE_EOF(xq):
+case YY_STATE_EOF(xvq):
+#line 159 "jsonpath_scan.l"
+{ yyerror(NULL, "unexpected end of quoted string"); }
+ YY_BREAK
+case 18:
+YY_RULE_SETUP
+#line 161 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return STRING_P;
+ }
+ YY_BREAK
+case 19:
+YY_RULE_SETUP
+#line 167 "jsonpath_scan.l"
+{
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return VARIABLE_P;
+ }
+ YY_BREAK
+case 20:
+/* rule 20 can match eol */
+YY_RULE_SETUP
+#line 173 "jsonpath_scan.l"
+{ addstring(false, yytext, yyleng); }
+ YY_BREAK
+case 21:
+YY_RULE_SETUP
+#line 175 "jsonpath_scan.l"
+{ BEGIN INITIAL; }
+ YY_BREAK
+case 22:
+/* rule 22 can match eol */
+YY_RULE_SETUP
+#line 177 "jsonpath_scan.l"
+{ }
+ YY_BREAK
+case 23:
+YY_RULE_SETUP
+#line 179 "jsonpath_scan.l"
+{ }
+ YY_BREAK
+case YY_STATE_EOF(xc):
+#line 181 "jsonpath_scan.l"
+{ yyerror(NULL, "unexpected end of comment"); }
+ YY_BREAK
+case 24:
+YY_RULE_SETUP
+#line 183 "jsonpath_scan.l"
+{ return AND_P; }
+ YY_BREAK
+case 25:
+YY_RULE_SETUP
+#line 185 "jsonpath_scan.l"
+{ return OR_P; }
+ YY_BREAK
+case 26:
+YY_RULE_SETUP
+#line 187 "jsonpath_scan.l"
+{ return NOT_P; }
+ YY_BREAK
+case 27:
+YY_RULE_SETUP
+#line 189 "jsonpath_scan.l"
+{ return ANY_P; }
+ YY_BREAK
+case 28:
+YY_RULE_SETUP
+#line 191 "jsonpath_scan.l"
+{ return LESS_P; }
+ YY_BREAK
+case 29:
+YY_RULE_SETUP
+#line 193 "jsonpath_scan.l"
+{ return LESSEQUAL_P; }
+ YY_BREAK
+case 30:
+YY_RULE_SETUP
+#line 195 "jsonpath_scan.l"
+{ return EQUAL_P; }
+ YY_BREAK
+case 31:
+YY_RULE_SETUP
+#line 197 "jsonpath_scan.l"
+{ return NOTEQUAL_P; }
+ YY_BREAK
+case 32:
+YY_RULE_SETUP
+#line 199 "jsonpath_scan.l"
+{ return NOTEQUAL_P; }
+ YY_BREAK
+case 33:
+YY_RULE_SETUP
+#line 201 "jsonpath_scan.l"
+{ return GREATEREQUAL_P; }
+ YY_BREAK
+case 34:
+YY_RULE_SETUP
+#line 203 "jsonpath_scan.l"
+{ return GREATER_P; }
+ YY_BREAK
+case 35:
+YY_RULE_SETUP
+#line 205 "jsonpath_scan.l"
+{
+ addstring(true, yytext + 1, yyleng - 1);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return VARIABLE_P;
+ }
+ YY_BREAK
+case 36:
+YY_RULE_SETUP
+#line 212 "jsonpath_scan.l"
+{
+ addchar(true, '\0');
+ BEGIN xvq;
+ }
+ YY_BREAK
+case 37:
+YY_RULE_SETUP
+#line 217 "jsonpath_scan.l"
+{ return *yytext; }
+ YY_BREAK
+case 38:
+/* rule 38 can match eol */
+YY_RULE_SETUP
+#line 219 "jsonpath_scan.l"
+{ /* ignore */ }
+ YY_BREAK
+case 39:
+YY_RULE_SETUP
+#line 221 "jsonpath_scan.l"
+{
+ addchar(true, '\0');
+ BEGIN xc;
+ }
+ YY_BREAK
+case 40:
+YY_RULE_SETUP
+#line 226 "jsonpath_scan.l"
+{
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return NUMERIC_P;
+ }
+ YY_BREAK
+case 41:
+YY_RULE_SETUP
+#line 233 "jsonpath_scan.l"
+{
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return NUMERIC_P;
+ }
+ YY_BREAK
+case 42:
+YY_RULE_SETUP
+#line 240 "jsonpath_scan.l"
+{
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return INT_P;
+ }
+ YY_BREAK
+case 43:
+YY_RULE_SETUP
+#line 247 "jsonpath_scan.l"
+{ yyerror(NULL, "invalid numeric literal"); }
+ YY_BREAK
+case 44:
+YY_RULE_SETUP
+#line 248 "jsonpath_scan.l"
+{ yyerror(NULL, "trailing junk after numeric literal"); }
+ YY_BREAK
+case 45:
+YY_RULE_SETUP
+#line 249 "jsonpath_scan.l"
+{ yyerror(NULL, "trailing junk after numeric literal"); }
+ YY_BREAK
+case 46:
+YY_RULE_SETUP
+#line 250 "jsonpath_scan.l"
+{ yyerror(NULL, "trailing junk after numeric literal"); }
+ YY_BREAK
+case 47:
+YY_RULE_SETUP
+#line 252 "jsonpath_scan.l"
+{
+ addchar(true, '\0');
+ BEGIN xq;
+ }
+ YY_BREAK
+case 48:
+YY_RULE_SETUP
+#line 257 "jsonpath_scan.l"
+{
+ yyless(0);
+ addchar(true, '\0');
+ BEGIN xnq;
+ }
+ YY_BREAK
+case 49:
+YY_RULE_SETUP
+#line 263 "jsonpath_scan.l"
+{
+ addstring(true, yytext, yyleng);
+ BEGIN xnq;
+ }
+ YY_BREAK
+case YY_STATE_EOF(INITIAL):
+#line 268 "jsonpath_scan.l"
+{ yyterminate(); }
+ YY_BREAK
+case 50:
+YY_RULE_SETUP
+#line 270 "jsonpath_scan.l"
+YY_FATAL_ERROR( "flex scanner jammed" );
+ YY_BREAK
+#line 3601 "jsonpath_scan.c"
+
+ case YY_END_OF_BUFFER:
+ {
+ /* Amount of text matched not including the EOB char. */
+ int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
+
+ /* Undo the effects of YY_DO_BEFORE_ACTION. */
+ *yy_cp = (yy_hold_char);
+ YY_RESTORE_YY_MORE_OFFSET
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
+ {
+ /* We're scanning a new file or input source. It's
+ * possible that this happened because the user
+ * just pointed yyin at a new source and called
+ * yylex(). If so, then we have to assure
+ * consistency between YY_CURRENT_BUFFER and our
+ * globals. Here is the right place to do so, because
+ * this is the first action (other than possibly a
+ * back-up) that will match for the new input source.
+ */
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
+ }
+
+ /* Note that here we test for yy_c_buf_p "<=" to the position
+ * of the first EOB in the buffer, since yy_c_buf_p will
+ * already have been incremented past the NUL character
+ * (since all states make transitions on EOB to the
+ * end-of-buffer state). Contrast this with the test
+ * in input().
+ */
+ if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ { /* This was really a NUL. */
+ yy_state_type yy_next_state;
+
+ (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ /* Okay, we're now positioned to make the NUL
+ * transition. We couldn't have
+ * yy_get_previous_state() go ahead and do it
+ * for us because it doesn't know how to deal
+ * with the possibility of jamming (and we don't
+ * want to build jamming into it because then it
+ * will run more slowly).
+ */
+
+ yy_next_state = yy_try_NUL_trans( yy_current_state );
+
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+
+ if ( yy_next_state )
+ {
+ /* Consume the NUL. */
+ yy_cp = ++(yy_c_buf_p);
+ yy_current_state = yy_next_state;
+ goto yy_match;
+ }
+
+ else
+ {
+ yy_cp = (yy_c_buf_p);
+ goto yy_find_action;
+ }
+ }
+
+ else switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_END_OF_FILE:
+ {
+ (yy_did_buffer_switch_on_eof) = 0;
+
+ if ( yywrap( ) )
+ {
+ /* Note: because we've taken care in
+ * yy_get_next_buffer() to have set up
+ * yytext, we can now set up
+ * yy_c_buf_p so that if some total
+ * hoser (like flex itself) wants to
+ * call the scanner after we return the
+ * YY_NULL, it'll still work - another
+ * YY_NULL will get returned.
+ */
+ (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
+
+ yy_act = YY_STATE_EOF(YY_START);
+ goto do_action;
+ }
+
+ else
+ {
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
+ }
+ break;
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) =
+ (yytext_ptr) + yy_amount_of_matched_text;
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_match;
+
+ case EOB_ACT_LAST_MATCH:
+ (yy_c_buf_p) =
+ &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
+
+ yy_current_state = yy_get_previous_state( );
+
+ yy_cp = (yy_c_buf_p);
+ yy_bp = (yytext_ptr) + YY_MORE_ADJ;
+ goto yy_find_action;
+ }
+ break;
+ }
+
+ default:
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--no action found" );
+ } /* end of action switch */
+ } /* end of scanning one token */
+ } /* end of user's declarations */
+} /* end of yylex */
+
+/* yy_get_next_buffer - try to read in a new buffer
+ *
+ * Returns a code representing an action:
+ * EOB_ACT_LAST_MATCH -
+ * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
+ * EOB_ACT_END_OF_FILE - end of file
+ */
+static int yy_get_next_buffer (void)
+{
+ char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
+ char *source = (yytext_ptr);
+ int number_to_move, i;
+ int ret_val;
+
+ if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
+ YY_FATAL_ERROR(
+ "fatal flex scanner internal error--end of buffer missed" );
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
+ { /* Don't try to fill the buffer, so this is an EOF. */
+ if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
+ {
+ /* We matched a single character, the EOB, so
+ * treat this as a final EOF.
+ */
+ return EOB_ACT_END_OF_FILE;
+ }
+
+ else
+ {
+ /* We matched some text prior to the EOB, first
+ * process it.
+ */
+ return EOB_ACT_LAST_MATCH;
+ }
+ }
+
+ /* Try to read more data. */
+
+ /* First move last chars to start of buffer. */
+ number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1);
+
+ for ( i = 0; i < number_to_move; ++i )
+ *(dest++) = *(source++);
+
+ if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
+ /* don't do the read, it's not guaranteed to return an EOF,
+ * just force an EOF
+ */
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
+
+ else
+ {
+ int num_to_read =
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
+
+ while ( num_to_read <= 0 )
+ { /* Not enough room in the buffer - grow it. */
+
+ /* just a shorter name for the current buffer */
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
+
+ int yy_c_buf_p_offset =
+ (int) ((yy_c_buf_p) - b->yy_ch_buf);
+
+ if ( b->yy_is_our_buffer )
+ {
+ int new_size = b->yy_buf_size * 2;
+
+ if ( new_size <= 0 )
+ b->yy_buf_size += b->yy_buf_size / 8;
+ else
+ b->yy_buf_size *= 2;
+
+ b->yy_ch_buf = (char *)
+ /* Include room in for 2 EOB chars. */
+ yyrealloc( (void *) b->yy_ch_buf,
+ (yy_size_t) (b->yy_buf_size + 2) );
+ }
+ else
+ /* Can't grow it, we don't own it. */
+ b->yy_ch_buf = NULL;
+
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR(
+ "fatal error - scanner input buffer overflow" );
+
+ (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
+
+ num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
+ number_to_move - 1;
+
+ }
+
+ if ( num_to_read > YY_READ_BUF_SIZE )
+ num_to_read = YY_READ_BUF_SIZE;
+
+ /* Read in more data. */
+ YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
+ (yy_n_chars), num_to_read );
+
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ if ( (yy_n_chars) == 0 )
+ {
+ if ( number_to_move == YY_MORE_ADJ )
+ {
+ ret_val = EOB_ACT_END_OF_FILE;
+ yyrestart( yyin );
+ }
+
+ else
+ {
+ ret_val = EOB_ACT_LAST_MATCH;
+ YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
+ YY_BUFFER_EOF_PENDING;
+ }
+ }
+
+ else
+ ret_val = EOB_ACT_CONTINUE_SCAN;
+
+ if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
+ /* Extend the array by 50%, plus the number we really need. */
+ int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc(
+ (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size );
+ if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
+ /* "- 2" to take care of EOB's */
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2);
+ }
+
+ (yy_n_chars) += number_to_move;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
+ YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
+
+ (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
+
+ return ret_val;
+}
+
+/* yy_get_previous_state - get the state just before the EOB char was reached */
+
+ static yy_state_type yy_get_previous_state (void)
+{
+ yy_state_type yy_current_state;
+ char *yy_cp;
+
+ yy_current_state = yy_start_state_list[(yy_start)];
+
+ for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
+ {
+ yy_current_state += yy_current_state[(*yy_cp ? YY_SC_TO_UI(*yy_cp) : 256)].yy_nxt;
+ }
+
+ return yy_current_state;
+}
+
+/* yy_try_NUL_trans - try to make a transition on the NUL character
+ *
+ * synopsis
+ * next_state = yy_try_NUL_trans( current_state );
+ */
+ static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
+{
+ int yy_is_jam;
+
+ int yy_c = 256;
+ const struct yy_trans_info *yy_trans_info;
+
+ yy_trans_info = &yy_current_state[(unsigned int) yy_c];
+ yy_current_state += yy_trans_info->yy_nxt;
+ yy_is_jam = (yy_trans_info->yy_verify != yy_c);
+
+ return yy_is_jam ? 0 : yy_current_state;
+}
+
+#ifndef YY_NO_UNPUT
+
+#endif
+
+#ifndef YY_NO_INPUT
+#ifdef __cplusplus
+ static int yyinput (void)
+#else
+ static int input (void)
+#endif
+
+{
+ int c;
+
+ *(yy_c_buf_p) = (yy_hold_char);
+
+ if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
+ {
+ /* yy_c_buf_p now points to the character we want to return.
+ * If this occurs *before* the EOB characters, then it's a
+ * valid NUL; if not, then we've hit the end of the buffer.
+ */
+ if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
+ /* This was really a NUL. */
+ *(yy_c_buf_p) = '\0';
+
+ else
+ { /* need more input */
+ int offset = (int) ((yy_c_buf_p) - (yytext_ptr));
+ ++(yy_c_buf_p);
+
+ switch ( yy_get_next_buffer( ) )
+ {
+ case EOB_ACT_LAST_MATCH:
+ /* This happens because yy_g_n_b()
+ * sees that we've accumulated a
+ * token and flags that we need to
+ * try matching the token before
+ * proceeding. But for input(),
+ * there's no matching to consider.
+ * So convert the EOB_ACT_LAST_MATCH
+ * to EOB_ACT_END_OF_FILE.
+ */
+
+ /* Reset buffer status. */
+ yyrestart( yyin );
+
+ /*FALLTHROUGH*/
+
+ case EOB_ACT_END_OF_FILE:
+ {
+ if ( yywrap( ) )
+ return 0;
+
+ if ( ! (yy_did_buffer_switch_on_eof) )
+ YY_NEW_FILE;
+#ifdef __cplusplus
+ return yyinput();
+#else
+ return input();
+#endif
+ }
+
+ case EOB_ACT_CONTINUE_SCAN:
+ (yy_c_buf_p) = (yytext_ptr) + offset;
+ break;
+ }
+ }
+ }
+
+ c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
+ *(yy_c_buf_p) = '\0'; /* preserve yytext */
+ (yy_hold_char) = *++(yy_c_buf_p);
+
+ return c;
+}
+#endif /* ifndef YY_NO_INPUT */
+
+/** Immediately switch to a different input stream.
+ * @param input_file A readable stream.
+ *
+ * @note This function does not reset the start condition to @c INITIAL .
+ */
+ void yyrestart (FILE * input_file )
+{
+
+ if ( ! YY_CURRENT_BUFFER ){
+ yyensure_buffer_stack ();
+ YY_CURRENT_BUFFER_LVALUE =
+ yy_create_buffer( yyin, YY_BUF_SIZE );
+ }
+
+ yy_init_buffer( YY_CURRENT_BUFFER, input_file );
+ yy_load_buffer_state( );
+}
+
+/** Switch to a different input buffer.
+ * @param new_buffer The new input buffer.
+ *
+ */
+ void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
+{
+
+ /* TODO. We should be able to replace this entire function body
+ * with
+ * yypop_buffer_state();
+ * yypush_buffer_state(new_buffer);
+ */
+ yyensure_buffer_stack ();
+ if ( YY_CURRENT_BUFFER == new_buffer )
+ return;
+
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+ yy_load_buffer_state( );
+
+ /* We don't actually know whether we did this switch during
+ * EOF (yywrap()) processing, but the only time this flag
+ * is looked at is after yywrap() is called, so it's safe
+ * to go ahead and always set it.
+ */
+ (yy_did_buffer_switch_on_eof) = 1;
+}
+
+static void yy_load_buffer_state (void)
+{
+ (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
+ (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
+ yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
+ (yy_hold_char) = *(yy_c_buf_p);
+}
+
+/** Allocate and initialize an input buffer state.
+ * @param file A readable stream.
+ * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
+ *
+ * @return the allocated buffer state.
+ */
+ YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
+{
+ YY_BUFFER_STATE b;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_buf_size = size;
+
+ /* yy_ch_buf has to be 2 characters longer than the size given because
+ * we need to put in 2 end-of-buffer characters.
+ */
+ b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) );
+ if ( ! b->yy_ch_buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
+
+ b->yy_is_our_buffer = 1;
+
+ yy_init_buffer( b, file );
+
+ return b;
+}
+
+/** Destroy the buffer.
+ * @param b a buffer created with yy_create_buffer()
+ *
+ */
+ void yy_delete_buffer (YY_BUFFER_STATE b )
+{
+
+ if ( ! b )
+ return;
+
+ if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
+ YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
+
+ if ( b->yy_is_our_buffer )
+ yyfree( (void *) b->yy_ch_buf );
+
+ yyfree( (void *) b );
+}
+
+/* Initializes or reinitializes a buffer.
+ * This function is sometimes called more than once on the same buffer,
+ * such as during a yyrestart() or at EOF.
+ */
+ static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
+
+{
+ int oerrno = errno;
+
+ yy_flush_buffer( b );
+
+ b->yy_input_file = file;
+ b->yy_fill_buffer = 1;
+
+ /* If b is the current buffer, then yy_init_buffer was _probably_
+ * called from yyrestart() or through yy_get_next_buffer.
+ * In that case, we don't want to reset the lineno or column.
+ */
+ if (b != YY_CURRENT_BUFFER){
+ b->yy_bs_lineno = 1;
+ b->yy_bs_column = 0;
+ }
+
+ b->yy_is_interactive = 0;
+
+ errno = oerrno;
+}
+
+/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
+ * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
+ *
+ */
+ void yy_flush_buffer (YY_BUFFER_STATE b )
+{
+ if ( ! b )
+ return;
+
+ b->yy_n_chars = 0;
+
+ /* We always need two end-of-buffer characters. The first causes
+ * a transition to the end-of-buffer state. The second causes
+ * a jam in that state.
+ */
+ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
+ b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
+
+ b->yy_buf_pos = &b->yy_ch_buf[0];
+
+ b->yy_at_bol = 1;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ if ( b == YY_CURRENT_BUFFER )
+ yy_load_buffer_state( );
+}
+
+/** Pushes the new state onto the stack. The new state becomes
+ * the current state. This function will allocate the stack
+ * if necessary.
+ * @param new_buffer The new state.
+ *
+ */
+void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
+{
+ if (new_buffer == NULL)
+ return;
+
+ yyensure_buffer_stack();
+
+ /* This block is copied from yy_switch_to_buffer. */
+ if ( YY_CURRENT_BUFFER )
+ {
+ /* Flush out information for old buffer. */
+ *(yy_c_buf_p) = (yy_hold_char);
+ YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
+ YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
+ }
+
+ /* Only push if top exists. Otherwise, replace top. */
+ if (YY_CURRENT_BUFFER)
+ (yy_buffer_stack_top)++;
+ YY_CURRENT_BUFFER_LVALUE = new_buffer;
+
+ /* copied from yy_switch_to_buffer. */
+ yy_load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
+}
+
+/** Removes and deletes the top of the stack, if present.
+ * The next element becomes the new top.
+ *
+ */
+void yypop_buffer_state (void)
+{
+ if (!YY_CURRENT_BUFFER)
+ return;
+
+ yy_delete_buffer(YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ if ((yy_buffer_stack_top) > 0)
+ --(yy_buffer_stack_top);
+
+ if (YY_CURRENT_BUFFER) {
+ yy_load_buffer_state( );
+ (yy_did_buffer_switch_on_eof) = 1;
+ }
+}
+
+/* Allocates the stack if it does not exist.
+ * Guarantees space for at least one push.
+ */
+static void yyensure_buffer_stack (void)
+{
+ yy_size_t num_to_alloc;
+
+ if (!(yy_buffer_stack)) {
+
+ /* First allocation is just for 2 elements, since we don't know if this
+ * scanner will even need a stack. We use 2 instead of 1 to avoid an
+ * immediate realloc on the next call.
+ */
+ num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
+ (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
+ (num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
+
+ (yy_buffer_stack_max) = num_to_alloc;
+ (yy_buffer_stack_top) = 0;
+ return;
+ }
+
+ if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
+
+ /* Increase the buffer to prepare for a possible push. */
+ yy_size_t grow_size = 8 /* arbitrary grow size */;
+
+ num_to_alloc = (yy_buffer_stack_max) + grow_size;
+ (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
+ ((yy_buffer_stack),
+ num_to_alloc * sizeof(struct yy_buffer_state*)
+ );
+ if ( ! (yy_buffer_stack) )
+ YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
+
+ /* zero only the new slots.*/
+ memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
+ (yy_buffer_stack_max) = num_to_alloc;
+ }
+}
+
+/** Setup the input buffer state to scan directly from a user-specified character buffer.
+ * @param base the character buffer
+ * @param size the size in bytes of the character buffer
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size )
+{
+ YY_BUFFER_STATE b;
+
+ if ( size < 2 ||
+ base[size-2] != YY_END_OF_BUFFER_CHAR ||
+ base[size-1] != YY_END_OF_BUFFER_CHAR )
+ /* They forgot to leave room for the EOB's. */
+ return NULL;
+
+ b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) );
+ if ( ! b )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
+
+ b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */
+ b->yy_buf_pos = b->yy_ch_buf = base;
+ b->yy_is_our_buffer = 0;
+ b->yy_input_file = NULL;
+ b->yy_n_chars = b->yy_buf_size;
+ b->yy_is_interactive = 0;
+ b->yy_at_bol = 1;
+ b->yy_fill_buffer = 0;
+ b->yy_buffer_status = YY_BUFFER_NEW;
+
+ yy_switch_to_buffer( b );
+
+ return b;
+}
+
+/** Setup the input buffer state to scan a string. The next call to yylex() will
+ * scan from a @e copy of @a str.
+ * @param yystr a NUL-terminated string to scan
+ *
+ * @return the newly allocated buffer state object.
+ * @note If you want to scan bytes that may contain NUL values, then use
+ * yy_scan_bytes() instead.
+ */
+YY_BUFFER_STATE yy_scan_string (const char * yystr )
+{
+
+ return yy_scan_bytes( yystr, (int) strlen(yystr) );
+}
+
+/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
+ * scan from a @e copy of @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
+ *
+ * @return the newly allocated buffer state object.
+ */
+YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, int _yybytes_len )
+{
+ YY_BUFFER_STATE b;
+ char *buf;
+ yy_size_t n;
+ int i;
+
+ /* Get memory for full buffer, including space for trailing EOB's. */
+ n = (yy_size_t) (_yybytes_len + 2);
+ buf = (char *) yyalloc( n );
+ if ( ! buf )
+ YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
+
+ for ( i = 0; i < _yybytes_len; ++i )
+ buf[i] = yybytes[i];
+
+ buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
+
+ b = yy_scan_buffer( buf, n );
+ if ( ! b )
+ YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
+
+ /* It's okay to grow etc. this buffer, and we should throw it
+ * away when we're done.
+ */
+ b->yy_is_our_buffer = 1;
+
+ return b;
+}
+
+#ifndef YY_EXIT_FAILURE
+#define YY_EXIT_FAILURE 2
+#endif
+
+static void yynoreturn yy_fatal_error (const char* msg )
+{
+ fprintf( stderr, "%s\n", msg );
+ exit( YY_EXIT_FAILURE );
+}
+
+/* Redefine yyless() so it works in section 3 code. */
+
+#undef yyless
+#define yyless(n) \
+ do \
+ { \
+ /* Undo effects of setting up yytext. */ \
+ int yyless_macro_arg = (n); \
+ YY_LESS_LINENO(yyless_macro_arg);\
+ yytext[yyleng] = (yy_hold_char); \
+ (yy_c_buf_p) = yytext + yyless_macro_arg; \
+ (yy_hold_char) = *(yy_c_buf_p); \
+ *(yy_c_buf_p) = '\0'; \
+ yyleng = yyless_macro_arg; \
+ } \
+ while ( 0 )
+
+/* Accessor methods (get/set functions) to struct members. */
+
+/** Get the current line number.
+ *
+ */
+int yyget_lineno (void)
+{
+
+ return yylineno;
+}
+
+/** Get the input stream.
+ *
+ */
+FILE *yyget_in (void)
+{
+ return yyin;
+}
+
+/** Get the output stream.
+ *
+ */
+FILE *yyget_out (void)
+{
+ return yyout;
+}
+
+/** Get the length of the current token.
+ *
+ */
+int yyget_leng (void)
+{
+ return yyleng;
+}
+
+/** Get the current token.
+ *
+ */
+
+char *yyget_text (void)
+{
+ return yytext;
+}
+
+/** Set the current line number.
+ * @param _line_number line number
+ *
+ */
+void yyset_lineno (int _line_number )
+{
+
+ yylineno = _line_number;
+}
+
+/** Set the input stream. This does not discard the current
+ * input buffer.
+ * @param _in_str A readable stream.
+ *
+ * @see yy_switch_to_buffer
+ */
+void yyset_in (FILE * _in_str )
+{
+ yyin = _in_str ;
+}
+
+void yyset_out (FILE * _out_str )
+{
+ yyout = _out_str ;
+}
+
+int yyget_debug (void)
+{
+ return yy_flex_debug;
+}
+
+void yyset_debug (int _bdebug )
+{
+ yy_flex_debug = _bdebug ;
+}
+
+static int yy_init_globals (void)
+{
+ /* Initialization is the same as for the non-reentrant scanner.
+ * This function is called from yylex_destroy(), so don't allocate here.
+ */
+
+ (yy_buffer_stack) = NULL;
+ (yy_buffer_stack_top) = 0;
+ (yy_buffer_stack_max) = 0;
+ (yy_c_buf_p) = NULL;
+ (yy_init) = 0;
+ (yy_start) = 0;
+
+/* Defined in main.c */
+#ifdef YY_STDINIT
+ yyin = stdin;
+ yyout = stdout;
+#else
+ yyin = NULL;
+ yyout = NULL;
+#endif
+
+ /* For future reference: Set errno on error, since we are called by
+ * yylex_init()
+ */
+ return 0;
+}
+
+/* yylex_destroy is for both reentrant and non-reentrant scanners. */
+int yylex_destroy (void)
+{
+
+ /* Pop the buffer stack, destroying each element. */
+ while(YY_CURRENT_BUFFER){
+ yy_delete_buffer( YY_CURRENT_BUFFER );
+ YY_CURRENT_BUFFER_LVALUE = NULL;
+ yypop_buffer_state();
+ }
+
+ /* Destroy the stack itself. */
+ yyfree((yy_buffer_stack) );
+ (yy_buffer_stack) = NULL;
+
+ /* Reset the globals. This is important in a non-reentrant scanner so the next time
+ * yylex() is called, initialization will occur. */
+ yy_init_globals( );
+
+ return 0;
+}
+
+/*
+ * Internal utility routines.
+ */
+
+#ifndef yytext_ptr
+static void yy_flex_strncpy (char* s1, const char * s2, int n )
+{
+
+ int i;
+ for ( i = 0; i < n; ++i )
+ s1[i] = s2[i];
+}
+#endif
+
+#ifdef YY_NEED_STRLEN
+static int yy_flex_strlen (const char * s )
+{
+ int n;
+ for ( n = 0; s[n]; ++n )
+ ;
+
+ return n;
+}
+#endif
+
+#define YYTABLES_NAME "yytables"
+
+#line 270 "jsonpath_scan.l"
+
+
+/* LCOV_EXCL_STOP */
+
+void
+jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+{
+ if (*yytext == YY_END_OF_BUFFER_CHAR)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /* translator: %s is typically "syntax error" */
+ errmsg("%s at end of jsonpath input", _(message))));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /* translator: first %s is typically "syntax error" */
+ errmsg("%s at or near \"%s\" of jsonpath input",
+ _(message), yytext)));
+ }
+}
+
+typedef struct JsonPathKeyword
+{
+ int16 len;
+ bool lowercase;
+ int val;
+ const char *keyword;
+} JsonPathKeyword;
+
+/*
+ * Array of key words should be sorted by length and then
+ * alphabetical order
+ */
+static const JsonPathKeyword keywords[] = {
+ { 2, false, IS_P, "is"},
+ { 2, false, TO_P, "to"},
+ { 3, false, ABS_P, "abs"},
+ { 3, false, LAX_P, "lax"},
+ { 4, false, FLAG_P, "flag"},
+ { 4, false, LAST_P, "last"},
+ { 4, true, NULL_P, "null"},
+ { 4, false, SIZE_P, "size"},
+ { 4, true, TRUE_P, "true"},
+ { 4, false, TYPE_P, "type"},
+ { 4, false, WITH_P, "with"},
+ { 5, true, FALSE_P, "false"},
+ { 5, false, FLOOR_P, "floor"},
+ { 6, false, DOUBLE_P, "double"},
+ { 6, false, EXISTS_P, "exists"},
+ { 6, false, STARTS_P, "starts"},
+ { 6, false, STRICT_P, "strict"},
+ { 7, false, CEILING_P, "ceiling"},
+ { 7, false, UNKNOWN_P, "unknown"},
+ { 8, false, DATETIME_P, "datetime"},
+ { 8, false, KEYVALUE_P, "keyvalue"},
+ { 10,false, LIKE_REGEX_P, "like_regex"},
+};
+
+/* Check if current scanstring value is a keyword */
+static enum yytokentype
+checkKeyword()
+{
+ int res = IDENT_P;
+ int diff;
+ const JsonPathKeyword *StopLow = keywords,
+ *StopHigh = keywords + lengthof(keywords),
+ *StopMiddle;
+
+ if (scanstring.len > keywords[lengthof(keywords) - 1].len)
+ return res;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ if (StopMiddle->len == scanstring.len)
+ diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
+ scanstring.len);
+ else
+ diff = StopMiddle->len - scanstring.len;
+
+ if (diff < 0)
+ StopLow = StopMiddle + 1;
+ else if (diff > 0)
+ StopHigh = StopMiddle;
+ else
+ {
+ if (StopMiddle->lowercase)
+ diff = strncmp(StopMiddle->keyword, scanstring.val,
+ scanstring.len);
+
+ if (diff == 0)
+ res = StopMiddle->val;
+
+ break;
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Called before any actual parsing is done
+ */
+static void
+jsonpath_scanner_init(const char *str, int slen)
+{
+ if (slen <= 0)
+ slen = strlen(str);
+
+ /*
+ * Might be left over after ereport()
+ */
+ yy_init_globals();
+
+ /*
+ * Make a scan buffer with special termination needed by flex.
+ */
+
+ scanbuflen = slen;
+ scanbuf = palloc(slen + 2);
+ memcpy(scanbuf, str, slen);
+ scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+ scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+ BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after jsonpath_scanner_init()
+ */
+static void
+jsonpath_scanner_finish(void)
+{
+ yy_delete_buffer(scanbufhandle);
+ pfree(scanbuf);
+}
+
+/*
+ * Resize scanstring so that it can append string of given length.
+ * Reinitialize if required.
+ */
+static void
+resizeString(bool init, int appendLen)
+{
+ if (init)
+ {
+ scanstring.total = Max(32, appendLen);
+ scanstring.val = (char *) palloc(scanstring.total);
+ scanstring.len = 0;
+ }
+ else
+ {
+ if (scanstring.len + appendLen >= scanstring.total)
+ {
+ while (scanstring.len + appendLen >= scanstring.total)
+ scanstring.total *= 2;
+ scanstring.val = repalloc(scanstring.val, scanstring.total);
+ }
+ }
+}
+
+/* Add set of bytes at "s" of length "l" to scanstring */
+static void
+addstring(bool init, char *s, int l)
+{
+ resizeString(init, l + 1);
+ memcpy(scanstring.val + scanstring.len, s, l);
+ scanstring.len += l;
+}
+
+/* Add single byte "c" to scanstring */
+static void
+addchar(bool init, char c)
+{
+ resizeString(init, 1);
+ scanstring.val[scanstring.len] = c;
+ if (c != '\0')
+ scanstring.len++;
+}
+
+/* Interface to jsonpath parser */
+JsonPathParseResult *
+parsejsonpath(const char *str, int len)
+{
+ JsonPathParseResult *parseresult;
+
+ jsonpath_scanner_init(str, len);
+
+ if (jsonpath_yyparse((void *) &parseresult) != 0)
+ jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */
+
+ jsonpath_scanner_finish();
+
+ return parseresult;
+}
+
+/* Turn hex character into integer */
+static int
+hexval(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+ jsonpath_yyerror(NULL, "invalid hexadecimal digit");
+ return 0; /* not reached */
+}
+
+/* Add given unicode character to scanstring */
+static void
+addUnicodeChar(int ch)
+{
+ if (ch == 0)
+ {
+ /* We can't allow this, since our TEXT type doesn't */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+ errmsg("unsupported Unicode escape sequence"),
+ errdetail("\\u0000 cannot be converted to text.")));
+ }
+ else
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ addstring(false, cbuf, strlen(cbuf));
+ }
+}
+
+/* Add unicode character, processing any surrogate pairs */
+static void
+addUnicode(int ch, int *hi_surrogate)
+{
+ if (is_utf16_surrogate_first(ch))
+ {
+ if (*hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode high surrogate must not follow "
+ "a high surrogate.")));
+ *hi_surrogate = ch;
+ return;
+ }
+ else if (is_utf16_surrogate_second(ch))
+ {
+ if (*hi_surrogate == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
+ *hi_surrogate = -1;
+ }
+ else if (*hi_surrogate != -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ }
+
+ addUnicodeChar(ch);
+}
+
+/*
+ * parseUnicode was adopted from json_lex_string() in
+ * src/backend/utils/adt/json.c
+ */
+static void
+parseUnicode(char *s, int l)
+{
+ int i = 2;
+ int hi_surrogate = -1;
+
+ for (i = 2; i < l; i += 2) /* skip '\u' */
+ {
+ int ch = 0;
+ int j;
+
+ if (s[i] == '{') /* parse '\u{XX...}' */
+ {
+ while (s[++i] != '}' && i < l)
+ ch = (ch << 4) | hexval(s[i]);
+ i++; /* skip '}' */
+ }
+ else /* parse '\uXXXX' */
+ {
+ for (j = 0; j < 4 && i < l; j++)
+ ch = (ch << 4) | hexval(s[i++]);
+ }
+
+ addUnicode(ch, &hi_surrogate);
+ }
+
+ if (hi_surrogate != -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ }
+}
+
+/* Parse sequence of hex-encoded characters */
+static void
+parseHexChar(char *s)
+{
+ int ch = (hexval(s[2]) << 4) |
+ hexval(s[3]);
+
+ addUnicodeChar(ch);
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+jsonpath_yyalloc(yy_size_t bytes)
+{
+ return palloc(bytes);
+}
+
+void *
+jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
+{
+ if (ptr)
+ return repalloc(ptr, bytes);
+ else
+ return palloc(bytes);
+}
+
+void
+jsonpath_yyfree(void *ptr)
+{
+ if (ptr)
+ pfree(ptr);
+}
+
diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
new file mode 100644
index 0000000..4351f6e
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -0,0 +1,619 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_scan.l
+ * Lexical parser for jsonpath datatype
+ *
+ * Splits jsonpath string into tokens represented as JsonPathString structs.
+ * Decodes unicode and hex escaped strings.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_scan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "mb/pg_wchar.h"
+#include "nodes/pg_list.h"
+
+static JsonPathString scanstring;
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+static int scanbuflen;
+
+static void addstring(bool init, char *s, int l);
+static void addchar(bool init, char s);
+static enum yytokentype checkKeyword(void);
+static void parseUnicode(char *s, int l);
+static void parseHexChar(char *s);
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+ ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+
+/* LCOV_EXCL_START */
+
+%}
+
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="jsonpath_yy"
+%option bison-bridge
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+
+/*
+ * We use exclusive states for quoted and non-quoted strings,
+ * quoted variable names and C-style comments.
+ * Exclusive states:
+ * <xq> - quoted strings
+ * <xnq> - non-quoted strings
+ * <xvq> - quoted variable names
+ * <xc> - C-style comment
+ */
+
+%x xq
+%x xnq
+%x xvq
+%x xc
+
+special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
+blank [ \t\n\r\f]
+/* "other" means anything that's not special, blank, or '\' or '"' */
+other [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
+
+digit [0-9]
+integer (0|[1-9]{digit}*)
+decimal ({integer}\.{digit}*|\.{digit}+)
+real ({integer}|{decimal})[Ee][-+]?{digit}+
+realfail ({integer}|{decimal})[Ee][-+]
+
+integer_junk {integer}{other}
+decimal_junk {decimal}{other}
+real_junk {real}{other}
+
+hex_dig [0-9A-Fa-f]
+unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
+unicodefail \\u({hex_dig}{0,3}|\{{hex_dig}{0,6})
+hex_char \\x{hex_dig}{2}
+hex_fail \\x{hex_dig}{0,1}
+
+%%
+
+<xnq>{other}+ {
+ addstring(false, yytext, yyleng);
+ }
+
+<xnq>{blank}+ {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+
+<xnq>\/\* {
+ yylval->str = scanstring;
+ BEGIN xc;
+ }
+
+<xnq>({special}|\") {
+ yylval->str = scanstring;
+ yyless(0);
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+
+<xnq><<EOF>> {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return checkKeyword();
+ }
+
+<xnq,xq,xvq>\\b { addchar(false, '\b'); }
+
+<xnq,xq,xvq>\\f { addchar(false, '\f'); }
+
+<xnq,xq,xvq>\\n { addchar(false, '\n'); }
+
+<xnq,xq,xvq>\\r { addchar(false, '\r'); }
+
+<xnq,xq,xvq>\\t { addchar(false, '\t'); }
+
+<xnq,xq,xvq>\\v { addchar(false, '\v'); }
+
+<xnq,xq,xvq>{unicode}+ { parseUnicode(yytext, yyleng); }
+
+<xnq,xq,xvq>{hex_char} { parseHexChar(yytext); }
+
+<xnq,xq,xvq>{unicode}*{unicodefail} { yyerror(NULL, "invalid unicode sequence"); }
+
+<xnq,xq,xvq>{hex_fail} { yyerror(NULL, "invalid hex character sequence"); }
+
+<xnq,xq,xvq>{unicode}+\\ {
+ /* throw back the \\, and treat as unicode */
+ yyless(yyleng - 1);
+ parseUnicode(yytext, yyleng);
+ }
+
+<xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
+
+<xnq,xq,xvq>\\ { yyerror(NULL, "unexpected end after backslash"); }
+
+<xq,xvq><<EOF>> { yyerror(NULL, "unexpected end of quoted string"); }
+
+<xq>\" {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return STRING_P;
+ }
+
+<xvq>\" {
+ yylval->str = scanstring;
+ BEGIN INITIAL;
+ return VARIABLE_P;
+ }
+
+<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
+
+<xc>\*\/ { BEGIN INITIAL; }
+
+<xc>[^\*]+ { }
+
+<xc>\* { }
+
+<xc><<EOF>> { yyerror(NULL, "unexpected end of comment"); }
+
+\&\& { return AND_P; }
+
+\|\| { return OR_P; }
+
+\! { return NOT_P; }
+
+\*\* { return ANY_P; }
+
+\< { return LESS_P; }
+
+\<\= { return LESSEQUAL_P; }
+
+\=\= { return EQUAL_P; }
+
+\<\> { return NOTEQUAL_P; }
+
+\!\= { return NOTEQUAL_P; }
+
+\>\= { return GREATEREQUAL_P; }
+
+\> { return GREATER_P; }
+
+\${other}+ {
+ addstring(true, yytext + 1, yyleng - 1);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return VARIABLE_P;
+ }
+
+\$\" {
+ addchar(true, '\0');
+ BEGIN xvq;
+ }
+
+{special} { return *yytext; }
+
+{blank}+ { /* ignore */ }
+
+\/\* {
+ addchar(true, '\0');
+ BEGIN xc;
+ }
+
+{real} {
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return NUMERIC_P;
+ }
+
+{decimal} {
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return NUMERIC_P;
+ }
+
+{integer} {
+ addstring(true, yytext, yyleng);
+ addchar(false, '\0');
+ yylval->str = scanstring;
+ return INT_P;
+ }
+
+{realfail} { yyerror(NULL, "invalid numeric literal"); }
+{integer_junk} { yyerror(NULL, "trailing junk after numeric literal"); }
+{decimal_junk} { yyerror(NULL, "trailing junk after numeric literal"); }
+{real_junk} { yyerror(NULL, "trailing junk after numeric literal"); }
+
+\" {
+ addchar(true, '\0');
+ BEGIN xq;
+ }
+
+\\ {
+ yyless(0);
+ addchar(true, '\0');
+ BEGIN xnq;
+ }
+
+{other}+ {
+ addstring(true, yytext, yyleng);
+ BEGIN xnq;
+ }
+
+<<EOF>> { yyterminate(); }
+
+%%
+
+/* LCOV_EXCL_STOP */
+
+void
+jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+{
+ if (*yytext == YY_END_OF_BUFFER_CHAR)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /* translator: %s is typically "syntax error" */
+ errmsg("%s at end of jsonpath input", _(message))));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ /* translator: first %s is typically "syntax error" */
+ errmsg("%s at or near \"%s\" of jsonpath input",
+ _(message), yytext)));
+ }
+}
+
+typedef struct JsonPathKeyword
+{
+ int16 len;
+ bool lowercase;
+ int val;
+ const char *keyword;
+} JsonPathKeyword;
+
+/*
+ * Array of key words should be sorted by length and then
+ * alphabetical order
+ */
+static const JsonPathKeyword keywords[] = {
+ { 2, false, IS_P, "is"},
+ { 2, false, TO_P, "to"},
+ { 3, false, ABS_P, "abs"},
+ { 3, false, LAX_P, "lax"},
+ { 4, false, FLAG_P, "flag"},
+ { 4, false, LAST_P, "last"},
+ { 4, true, NULL_P, "null"},
+ { 4, false, SIZE_P, "size"},
+ { 4, true, TRUE_P, "true"},
+ { 4, false, TYPE_P, "type"},
+ { 4, false, WITH_P, "with"},
+ { 5, true, FALSE_P, "false"},
+ { 5, false, FLOOR_P, "floor"},
+ { 6, false, DOUBLE_P, "double"},
+ { 6, false, EXISTS_P, "exists"},
+ { 6, false, STARTS_P, "starts"},
+ { 6, false, STRICT_P, "strict"},
+ { 7, false, CEILING_P, "ceiling"},
+ { 7, false, UNKNOWN_P, "unknown"},
+ { 8, false, DATETIME_P, "datetime"},
+ { 8, false, KEYVALUE_P, "keyvalue"},
+ { 10,false, LIKE_REGEX_P, "like_regex"},
+};
+
+/* Check if current scanstring value is a keyword */
+static enum yytokentype
+checkKeyword()
+{
+ int res = IDENT_P;
+ int diff;
+ const JsonPathKeyword *StopLow = keywords,
+ *StopHigh = keywords + lengthof(keywords),
+ *StopMiddle;
+
+ if (scanstring.len > keywords[lengthof(keywords) - 1].len)
+ return res;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+ if (StopMiddle->len == scanstring.len)
+ diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
+ scanstring.len);
+ else
+ diff = StopMiddle->len - scanstring.len;
+
+ if (diff < 0)
+ StopLow = StopMiddle + 1;
+ else if (diff > 0)
+ StopHigh = StopMiddle;
+ else
+ {
+ if (StopMiddle->lowercase)
+ diff = strncmp(StopMiddle->keyword, scanstring.val,
+ scanstring.len);
+
+ if (diff == 0)
+ res = StopMiddle->val;
+
+ break;
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Called before any actual parsing is done
+ */
+static void
+jsonpath_scanner_init(const char *str, int slen)
+{
+ if (slen <= 0)
+ slen = strlen(str);
+
+ /*
+ * Might be left over after ereport()
+ */
+ yy_init_globals();
+
+ /*
+ * Make a scan buffer with special termination needed by flex.
+ */
+
+ scanbuflen = slen;
+ scanbuf = palloc(slen + 2);
+ memcpy(scanbuf, str, slen);
+ scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+ scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+ BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after jsonpath_scanner_init()
+ */
+static void
+jsonpath_scanner_finish(void)
+{
+ yy_delete_buffer(scanbufhandle);
+ pfree(scanbuf);
+}
+
+/*
+ * Resize scanstring so that it can append string of given length.
+ * Reinitialize if required.
+ */
+static void
+resizeString(bool init, int appendLen)
+{
+ if (init)
+ {
+ scanstring.total = Max(32, appendLen);
+ scanstring.val = (char *) palloc(scanstring.total);
+ scanstring.len = 0;
+ }
+ else
+ {
+ if (scanstring.len + appendLen >= scanstring.total)
+ {
+ while (scanstring.len + appendLen >= scanstring.total)
+ scanstring.total *= 2;
+ scanstring.val = repalloc(scanstring.val, scanstring.total);
+ }
+ }
+}
+
+/* Add set of bytes at "s" of length "l" to scanstring */
+static void
+addstring(bool init, char *s, int l)
+{
+ resizeString(init, l + 1);
+ memcpy(scanstring.val + scanstring.len, s, l);
+ scanstring.len += l;
+}
+
+/* Add single byte "c" to scanstring */
+static void
+addchar(bool init, char c)
+{
+ resizeString(init, 1);
+ scanstring.val[scanstring.len] = c;
+ if (c != '\0')
+ scanstring.len++;
+}
+
+/* Interface to jsonpath parser */
+JsonPathParseResult *
+parsejsonpath(const char *str, int len)
+{
+ JsonPathParseResult *parseresult;
+
+ jsonpath_scanner_init(str, len);
+
+ if (jsonpath_yyparse((void *) &parseresult) != 0)
+ jsonpath_yyerror(NULL, "bogus input"); /* shouldn't happen */
+
+ jsonpath_scanner_finish();
+
+ return parseresult;
+}
+
+/* Turn hex character into integer */
+static int
+hexval(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+ jsonpath_yyerror(NULL, "invalid hexadecimal digit");
+ return 0; /* not reached */
+}
+
+/* Add given unicode character to scanstring */
+static void
+addUnicodeChar(int ch)
+{
+ if (ch == 0)
+ {
+ /* We can't allow this, since our TEXT type doesn't */
+ ereport(ERROR,
+ (errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+ errmsg("unsupported Unicode escape sequence"),
+ errdetail("\\u0000 cannot be converted to text.")));
+ }
+ else
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ pg_unicode_to_server(ch, (unsigned char *) cbuf);
+ addstring(false, cbuf, strlen(cbuf));
+ }
+}
+
+/* Add unicode character, processing any surrogate pairs */
+static void
+addUnicode(int ch, int *hi_surrogate)
+{
+ if (is_utf16_surrogate_first(ch))
+ {
+ if (*hi_surrogate != -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode high surrogate must not follow "
+ "a high surrogate.")));
+ *hi_surrogate = ch;
+ return;
+ }
+ else if (is_utf16_surrogate_second(ch))
+ {
+ if (*hi_surrogate == -1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ ch = surrogate_pair_to_codepoint(*hi_surrogate, ch);
+ *hi_surrogate = -1;
+ }
+ else if (*hi_surrogate != -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ }
+
+ addUnicodeChar(ch);
+}
+
+/*
+ * parseUnicode was adopted from json_lex_string() in
+ * src/backend/utils/adt/json.c
+ */
+static void
+parseUnicode(char *s, int l)
+{
+ int i = 2;
+ int hi_surrogate = -1;
+
+ for (i = 2; i < l; i += 2) /* skip '\u' */
+ {
+ int ch = 0;
+ int j;
+
+ if (s[i] == '{') /* parse '\u{XX...}' */
+ {
+ while (s[++i] != '}' && i < l)
+ ch = (ch << 4) | hexval(s[i]);
+ i++; /* skip '}' */
+ }
+ else /* parse '\uXXXX' */
+ {
+ for (j = 0; j < 4 && i < l; j++)
+ ch = (ch << 4) | hexval(s[i++]);
+ }
+
+ addUnicode(ch, &hi_surrogate);
+ }
+
+ if (hi_surrogate != -1)
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unicode low surrogate must follow a high "
+ "surrogate.")));
+ }
+}
+
+/* Parse sequence of hex-encoded characters */
+static void
+parseHexChar(char *s)
+{
+ int ch = (hexval(s[2]) << 4) |
+ hexval(s[3]);
+
+ addUnicodeChar(ch);
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+jsonpath_yyalloc(yy_size_t bytes)
+{
+ return palloc(bytes);
+}
+
+void *
+jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
+{
+ if (ptr)
+ return repalloc(ptr, bytes);
+ else
+ return palloc(bytes);
+}
+
+void
+jsonpath_yyfree(void *ptr)
+{
+ if (ptr)
+ pfree(ptr);
+}
diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c
new file mode 100644
index 0000000..3026cc2
--- /dev/null
+++ b/src/backend/utils/adt/levenshtein.c
@@ -0,0 +1,401 @@
+/*-------------------------------------------------------------------------
+ *
+ * levenshtein.c
+ * Levenshtein distance implementation.
+ *
+ * Original author: Joe Conway <mail@joeconway.com>
+ *
+ * This file is included by varlena.c twice, to provide matching code for (1)
+ * Levenshtein distance with custom costings, and (2) Levenshtein distance with
+ * custom costings and a "max" value above which exact distances are not
+ * interesting. Before the inclusion, we rely on the presence of the inline
+ * function rest_of_char_same().
+ *
+ * Written based on a description of the algorithm by Michael Gilleland found
+ * at http://www.merriampark.com/ld.htm. Also looked at levenshtein.c in the
+ * PHP 4.0.6 distribution for inspiration. Configurable penalty costs
+ * extension is introduced by Volkan YAZICI <volkan.yazici@gmail.com.
+ *
+ * Copyright (c) 2001-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/levenshtein.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#define MAX_LEVENSHTEIN_STRLEN 255
+
+/*
+ * Calculates Levenshtein distance metric between supplied strings, which are
+ * not necessarily null-terminated.
+ *
+ * source: source string, of length slen bytes.
+ * target: target string, of length tlen bytes.
+ * ins_c, del_c, sub_c: costs to charge for character insertion, deletion,
+ * and substitution respectively; (1, 1, 1) costs suffice for common
+ * cases, but your mileage may vary.
+ * max_d: if provided and >= 0, maximum distance we care about; see below.
+ * trusted: caller is trusted and need not obey MAX_LEVENSHTEIN_STRLEN.
+ *
+ * One way to compute Levenshtein distance is to incrementally construct
+ * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
+ * of operations required to transform the first i characters of s into
+ * the first j characters of t. The last column of the final row is the
+ * answer.
+ *
+ * We use that algorithm here with some modification. In lieu of holding
+ * the entire array in memory at once, we'll just use two arrays of size
+ * m+1 for storing accumulated values. At each step one array represents
+ * the "previous" row and one is the "current" row of the notional large
+ * array.
+ *
+ * If max_d >= 0, we only need to provide an accurate answer when that answer
+ * is less than or equal to max_d. From any cell in the matrix, there is
+ * theoretical "minimum residual distance" from that cell to the last column
+ * of the final row. This minimum residual distance is zero when the
+ * untransformed portions of the strings are of equal length (because we might
+ * get lucky and find all the remaining characters matching) and is otherwise
+ * based on the minimum number of insertions or deletions needed to make them
+ * equal length. The residual distance grows as we move toward the upper
+ * right or lower left corners of the matrix. When the max_d bound is
+ * usefully tight, we can use this property to avoid computing the entirety
+ * of each row; instead, we maintain a start_column and stop_column that
+ * identify the portion of the matrix close to the diagonal which can still
+ * affect the final answer.
+ */
+int
+#ifdef LEVENSHTEIN_LESS_EQUAL
+varstr_levenshtein_less_equal(const char *source, int slen,
+ const char *target, int tlen,
+ int ins_c, int del_c, int sub_c,
+ int max_d, bool trusted)
+#else
+varstr_levenshtein(const char *source, int slen,
+ const char *target, int tlen,
+ int ins_c, int del_c, int sub_c,
+ bool trusted)
+#endif
+{
+ int m,
+ n;
+ int *prev;
+ int *curr;
+ int *s_char_len = NULL;
+ int i,
+ j;
+ const char *y;
+
+ /*
+ * For varstr_levenshtein_less_equal, we have real variables called
+ * start_column and stop_column; otherwise it's just short-hand for 0 and
+ * m.
+ */
+#ifdef LEVENSHTEIN_LESS_EQUAL
+ int start_column,
+ stop_column;
+
+#undef START_COLUMN
+#undef STOP_COLUMN
+#define START_COLUMN start_column
+#define STOP_COLUMN stop_column
+#else
+#undef START_COLUMN
+#undef STOP_COLUMN
+#define START_COLUMN 0
+#define STOP_COLUMN m
+#endif
+
+ /* Convert string lengths (in bytes) to lengths in characters */
+ m = pg_mbstrlen_with_len(source, slen);
+ n = pg_mbstrlen_with_len(target, tlen);
+
+ /*
+ * We can transform an empty s into t with n insertions, or a non-empty t
+ * into an empty s with m deletions.
+ */
+ if (!m)
+ return n * ins_c;
+ if (!n)
+ return m * del_c;
+
+ /*
+ * For security concerns, restrict excessive CPU+RAM usage. (This
+ * implementation uses O(m) memory and has O(mn) complexity.) If
+ * "trusted" is true, caller is responsible for not making excessive
+ * requests, typically by using a small max_d along with strings that are
+ * bounded, though not necessarily to MAX_LEVENSHTEIN_STRLEN exactly.
+ */
+ if (!trusted &&
+ (m > MAX_LEVENSHTEIN_STRLEN ||
+ n > MAX_LEVENSHTEIN_STRLEN))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("levenshtein argument exceeds maximum length of %d characters",
+ MAX_LEVENSHTEIN_STRLEN)));
+
+#ifdef LEVENSHTEIN_LESS_EQUAL
+ /* Initialize start and stop columns. */
+ start_column = 0;
+ stop_column = m + 1;
+
+ /*
+ * If max_d >= 0, determine whether the bound is impossibly tight. If so,
+ * return max_d + 1 immediately. Otherwise, determine whether it's tight
+ * enough to limit the computation we must perform. If so, figure out
+ * initial stop column.
+ */
+ if (max_d >= 0)
+ {
+ int min_theo_d; /* Theoretical minimum distance. */
+ int max_theo_d; /* Theoretical maximum distance. */
+ int net_inserts = n - m;
+
+ min_theo_d = net_inserts < 0 ?
+ -net_inserts * del_c : net_inserts * ins_c;
+ if (min_theo_d > max_d)
+ return max_d + 1;
+ if (ins_c + del_c < sub_c)
+ sub_c = ins_c + del_c;
+ max_theo_d = min_theo_d + sub_c * Min(m, n);
+ if (max_d >= max_theo_d)
+ max_d = -1;
+ else if (ins_c + del_c > 0)
+ {
+ /*
+ * Figure out how much of the first row of the notional matrix we
+ * need to fill in. If the string is growing, the theoretical
+ * minimum distance already incorporates the cost of deleting the
+ * number of characters necessary to make the two strings equal in
+ * length. Each additional deletion forces another insertion, so
+ * the best-case total cost increases by ins_c + del_c. If the
+ * string is shrinking, the minimum theoretical cost assumes no
+ * excess deletions; that is, we're starting no further right than
+ * column n - m. If we do start further right, the best-case
+ * total cost increases by ins_c + del_c for each move right.
+ */
+ int slack_d = max_d - min_theo_d;
+ int best_column = net_inserts < 0 ? -net_inserts : 0;
+
+ stop_column = best_column + (slack_d / (ins_c + del_c)) + 1;
+ if (stop_column > m)
+ stop_column = m + 1;
+ }
+ }
+#endif
+
+ /*
+ * In order to avoid calling pg_mblen() repeatedly on each character in s,
+ * we cache all the lengths before starting the main loop -- but if all
+ * the characters in both strings are single byte, then we skip this and
+ * use a fast-path in the main loop. If only one string contains
+ * multi-byte characters, we still build the array, so that the fast-path
+ * needn't deal with the case where the array hasn't been initialized.
+ */
+ if (m != slen || n != tlen)
+ {
+ int i;
+ const char *cp = source;
+
+ s_char_len = (int *) palloc((m + 1) * sizeof(int));
+ for (i = 0; i < m; ++i)
+ {
+ s_char_len[i] = pg_mblen(cp);
+ cp += s_char_len[i];
+ }
+ s_char_len[i] = 0;
+ }
+
+ /* One more cell for initialization column and row. */
+ ++m;
+ ++n;
+
+ /* Previous and current rows of notional array. */
+ prev = (int *) palloc(2 * m * sizeof(int));
+ curr = prev + m;
+
+ /*
+ * To transform the first i characters of s into the first 0 characters of
+ * t, we must perform i deletions.
+ */
+ for (i = START_COLUMN; i < STOP_COLUMN; i++)
+ prev[i] = i * del_c;
+
+ /* Loop through rows of the notional array */
+ for (y = target, j = 1; j < n; j++)
+ {
+ int *temp;
+ const char *x = source;
+ int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1;
+
+#ifdef LEVENSHTEIN_LESS_EQUAL
+
+ /*
+ * In the best case, values percolate down the diagonal unchanged, so
+ * we must increment stop_column unless it's already on the right end
+ * of the array. The inner loop will read prev[stop_column], so we
+ * have to initialize it even though it shouldn't affect the result.
+ */
+ if (stop_column < m)
+ {
+ prev[stop_column] = max_d + 1;
+ ++stop_column;
+ }
+
+ /*
+ * The main loop fills in curr, but curr[0] needs a special case: to
+ * transform the first 0 characters of s into the first j characters
+ * of t, we must perform j insertions. However, if start_column > 0,
+ * this special case does not apply.
+ */
+ if (start_column == 0)
+ {
+ curr[0] = j * ins_c;
+ i = 1;
+ }
+ else
+ i = start_column;
+#else
+ curr[0] = j * ins_c;
+ i = 1;
+#endif
+
+ /*
+ * This inner loop is critical to performance, so we include a
+ * fast-path to handle the (fairly common) case where no multibyte
+ * characters are in the mix. The fast-path is entitled to assume
+ * that if s_char_len is not initialized then BOTH strings contain
+ * only single-byte characters.
+ */
+ if (s_char_len != NULL)
+ {
+ for (; i < STOP_COLUMN; i++)
+ {
+ int ins;
+ int del;
+ int sub;
+ int x_char_len = s_char_len[i - 1];
+
+ /*
+ * Calculate costs for insertion, deletion, and substitution.
+ *
+ * When calculating cost for substitution, we compare the last
+ * character of each possibly-multibyte character first,
+ * because that's enough to rule out most mis-matches. If we
+ * get past that test, then we compare the lengths and the
+ * remaining bytes.
+ */
+ ins = prev[i] + ins_c;
+ del = curr[i - 1] + del_c;
+ if (x[x_char_len - 1] == y[y_char_len - 1]
+ && x_char_len == y_char_len &&
+ (x_char_len == 1 || rest_of_char_same(x, y, x_char_len)))
+ sub = prev[i - 1];
+ else
+ sub = prev[i - 1] + sub_c;
+
+ /* Take the one with minimum cost. */
+ curr[i] = Min(ins, del);
+ curr[i] = Min(curr[i], sub);
+
+ /* Point to next character. */
+ x += x_char_len;
+ }
+ }
+ else
+ {
+ for (; i < STOP_COLUMN; i++)
+ {
+ int ins;
+ int del;
+ int sub;
+
+ /* Calculate costs for insertion, deletion, and substitution. */
+ ins = prev[i] + ins_c;
+ del = curr[i - 1] + del_c;
+ sub = prev[i - 1] + ((*x == *y) ? 0 : sub_c);
+
+ /* Take the one with minimum cost. */
+ curr[i] = Min(ins, del);
+ curr[i] = Min(curr[i], sub);
+
+ /* Point to next character. */
+ x++;
+ }
+ }
+
+ /* Swap current row with previous row. */
+ temp = curr;
+ curr = prev;
+ prev = temp;
+
+ /* Point to next character. */
+ y += y_char_len;
+
+#ifdef LEVENSHTEIN_LESS_EQUAL
+
+ /*
+ * This chunk of code represents a significant performance hit if used
+ * in the case where there is no max_d bound. This is probably not
+ * because the max_d >= 0 test itself is expensive, but rather because
+ * the possibility of needing to execute this code prevents tight
+ * optimization of the loop as a whole.
+ */
+ if (max_d >= 0)
+ {
+ /*
+ * The "zero point" is the column of the current row where the
+ * remaining portions of the strings are of equal length. There
+ * are (n - 1) characters in the target string, of which j have
+ * been transformed. There are (m - 1) characters in the source
+ * string, so we want to find the value for zp where (n - 1) - j =
+ * (m - 1) - zp.
+ */
+ int zp = j - (n - m);
+
+ /* Check whether the stop column can slide left. */
+ while (stop_column > 0)
+ {
+ int ii = stop_column - 1;
+ int net_inserts = ii - zp;
+
+ if (prev[ii] + (net_inserts > 0 ? net_inserts * ins_c :
+ -net_inserts * del_c) <= max_d)
+ break;
+ stop_column--;
+ }
+
+ /* Check whether the start column can slide right. */
+ while (start_column < stop_column)
+ {
+ int net_inserts = start_column - zp;
+
+ if (prev[start_column] +
+ (net_inserts > 0 ? net_inserts * ins_c :
+ -net_inserts * del_c) <= max_d)
+ break;
+
+ /*
+ * We'll never again update these values, so we must make sure
+ * there's nothing here that could confuse any future
+ * iteration of the outer loop.
+ */
+ prev[start_column] = max_d + 1;
+ curr[start_column] = max_d + 1;
+ if (start_column != 0)
+ source += (s_char_len != NULL) ? s_char_len[start_column - 1] : 1;
+ start_column++;
+ }
+
+ /* If they cross, we're going to exceed the bound. */
+ if (start_column >= stop_column)
+ return max_d + 1;
+ }
+#endif
+ }
+
+ /*
+ * Because the final value was swapped from the previous row to the
+ * current row, that's where we'll find it.
+ */
+ return prev[m - 1];
+}
diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c
new file mode 100644
index 0000000..e02fc37
--- /dev/null
+++ b/src/backend/utils/adt/like.c
@@ -0,0 +1,455 @@
+/*-------------------------------------------------------------------------
+ *
+ * like.c
+ * like expression handling code.
+ *
+ * NOTES
+ * A big hack of the regexp.c code!! Contributed by
+ * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/like.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "catalog/pg_collation.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/pg_locale.h"
+
+
+#define LIKE_TRUE 1
+#define LIKE_FALSE 0
+#define LIKE_ABORT (-1)
+
+
+static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+static text *SB_do_like_escape(text *, text *);
+
+static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+static text *MB_do_like_escape(text *, text *);
+
+static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+
+static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c);
+
+static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
+static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
+
+/*--------------------
+ * Support routine for MatchText. Compares given multibyte streams
+ * as wide characters. If they match, returns 1 otherwise returns 0.
+ *--------------------
+ */
+static inline int
+wchareq(const char *p1, const char *p2)
+{
+ int p1_len;
+
+ /* Optimization: quickly compare the first byte. */
+ if (*p1 != *p2)
+ return 0;
+
+ p1_len = pg_mblen(p1);
+ if (pg_mblen(p2) != p1_len)
+ return 0;
+
+ /* They are the same length */
+ while (p1_len--)
+ {
+ if (*p1++ != *p2++)
+ return 0;
+ }
+ return 1;
+}
+
+/*
+ * Formerly we had a routine iwchareq() here that tried to do case-insensitive
+ * comparison of multibyte characters. It did not work at all, however,
+ * because it relied on tolower() which has a single-byte API ... and
+ * towlower() wouldn't be much better since we have no suitably cheap way
+ * of getting a single character transformed to the system's wchar_t format.
+ * So now, we just downcase the strings using lower() and apply regular LIKE
+ * comparison. This should be revisited when we install better locale support.
+ */
+
+/*
+ * We do handle case-insensitive matching for single-byte encodings using
+ * fold-on-the-fly processing, however.
+ */
+static char
+SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
+{
+ if (locale_is_c)
+ return pg_ascii_tolower(c);
+#ifdef HAVE_LOCALE_T
+ else if (locale)
+ return tolower_l(c, locale->info.lt);
+#endif
+ else
+ return pg_tolower(c);
+}
+
+
+#define NextByte(p, plen) ((p)++, (plen)--)
+
+/* Set up to compile like_match.c for multibyte characters */
+#define CHAREQ(p1, p2) wchareq((p1), (p2))
+#define NextChar(p, plen) \
+ do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
+#define CopyAdvChar(dst, src, srclen) \
+ do { int __l = pg_mblen(src); \
+ (srclen) -= __l; \
+ while (__l-- > 0) \
+ *(dst)++ = *(src)++; \
+ } while (0)
+
+#define MatchText MB_MatchText
+#define do_like_escape MB_do_like_escape
+
+#include "like_match.c"
+
+/* Set up to compile like_match.c for single-byte characters */
+#define CHAREQ(p1, p2) (*(p1) == *(p2))
+#define NextChar(p, plen) NextByte((p), (plen))
+#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
+
+#define MatchText SB_MatchText
+#define do_like_escape SB_do_like_escape
+
+#include "like_match.c"
+
+/* setup to compile like_match.c for single byte case insensitive matches */
+#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
+#define NextChar(p, plen) NextByte((p), (plen))
+#define MatchText SB_IMatchText
+
+#include "like_match.c"
+
+/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
+
+#define NextChar(p, plen) \
+ do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
+#define MatchText UTF8_MatchText
+
+#include "like_match.c"
+
+/* Generic for all cases not requiring inline case-folding */
+static inline int
+GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
+{
+ if (collation && !lc_ctype_is_c(collation))
+ {
+ pg_locale_t locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for LIKE")));
+ }
+
+ if (pg_database_encoding_max_length() == 1)
+ return SB_MatchText(s, slen, p, plen, 0, true);
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ return UTF8_MatchText(s, slen, p, plen, 0, true);
+ else
+ return MB_MatchText(s, slen, p, plen, 0, true);
+}
+
+static inline int
+Generic_Text_IC_like(text *str, text *pat, Oid collation)
+{
+ char *s,
+ *p;
+ int slen,
+ plen;
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
+
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else
+ locale = pg_newlocale_from_collation(collation);
+
+ if (locale && !locale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for ILIKE")));
+
+ /*
+ * For efficiency reasons, in the single byte case we don't call lower()
+ * on the pattern and text, but instead call SB_lower_char on each
+ * character. In the multi-byte case we don't have much choice :-(. Also,
+ * ICU does not support single-character case folding, so we go the long
+ * way.
+ */
+
+ if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
+ {
+ pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
+ PointerGetDatum(pat)));
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
+ PointerGetDatum(str)));
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ if (GetDatabaseEncoding() == PG_UTF8)
+ return UTF8_MatchText(s, slen, p, plen, 0, true);
+ else
+ return MB_MatchText(s, slen, p, plen, 0, true);
+ }
+ else
+ {
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
+ }
+}
+
+/*
+ * interface routines called by the function manager
+ */
+
+Datum
+namelike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+namenlike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = NameStr(*str);
+ slen = strlen(s);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textnlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealike(PG_FUNCTION_ARGS)
+{
+ bytea *str = PG_GETARG_BYTEA_PP(0);
+ bytea *pat = PG_GETARG_BYTEA_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+byteanlike(PG_FUNCTION_ARGS)
+{
+ bytea *str = PG_GETARG_BYTEA_PP(0);
+ bytea *pat = PG_GETARG_BYTEA_PP(1);
+ bool result;
+ char *s,
+ *p;
+ int slen,
+ plen;
+
+ s = VARDATA_ANY(str);
+ slen = VARSIZE_ANY_EXHDR(str);
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+
+ result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Case-insensitive versions
+ */
+
+Datum
+nameiclike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ text *strtext;
+
+ strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+nameicnlike(PG_FUNCTION_ARGS)
+{
+ Name str = PG_GETARG_NAME(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+ text *strtext;
+
+ strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
+ NameGetDatum(str)));
+ result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+texticlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+texticnlike(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pat = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * like_escape() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape(PG_FUNCTION_ARGS)
+{
+ text *pat = PG_GETARG_TEXT_PP(0);
+ text *esc = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ if (pg_database_encoding_max_length() == 1)
+ result = SB_do_like_escape(pat, esc);
+ else
+ result = MB_do_like_escape(pat, esc);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * like_escape_bytea() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+Datum
+like_escape_bytea(PG_FUNCTION_ARGS)
+{
+ bytea *pat = PG_GETARG_BYTEA_PP(0);
+ bytea *esc = PG_GETARG_BYTEA_PP(1);
+ bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
+
+ PG_RETURN_BYTEA_P((bytea *) result);
+}
diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c
new file mode 100644
index 0000000..e876560
--- /dev/null
+++ b/src/backend/utils/adt/like_match.c
@@ -0,0 +1,360 @@
+/*-------------------------------------------------------------------------
+ *
+ * like_match.c
+ * LIKE pattern matching internal code.
+ *
+ * This file is included by like.c four times, to provide matching code for
+ * (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
+ * and (4) case insensitive matches in single-byte encodings.
+ * (UTF8 is a special case because we can use a much more efficient version
+ * of NextChar than can be used for general multi-byte encodings.)
+ *
+ * Before the inclusion, we need to define the following macros:
+ *
+ * NextChar
+ * MatchText - to name of function wanted
+ * do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
+ * MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/like_match.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+ * Rich $alz is now <rsalz@bbn.com>.
+ * Special thanks to Lars Mathiesen <thorinn@diku.dk> for the
+ * LIKE_ABORT code.
+ *
+ * This code was shamelessly stolen from the "pql" code by myself and
+ * slightly modified :)
+ *
+ * All references to the word "star" were replaced by "percent"
+ * All references to the word "wild" were replaced by "like"
+ *
+ * All the nice shell RE matching stuff was replaced by just "_" and "%"
+ *
+ * As I don't have a copy of the SQL standard handy I wasn't sure whether
+ * to leave in the '\' escape character handling.
+ *
+ * Keith Parks. <keith@mtcc.demon.co.uk>
+ *
+ * SQL lets you specify the escape character by saying
+ * LIKE <pattern> ESCAPE <escape character>. We are a small operation
+ * so we force you to use '\'. - ay 7/95
+ *
+ * Now we have the like_escape() function that converts patterns with
+ * any specified escape character (or none at all) to the internal
+ * default escape character, which is still '\'. - tgl 9/2000
+ *
+ * The code is rewritten to avoid requiring null-terminated strings,
+ * which in turn allows us to leave out some memcpy() operations.
+ * This code should be faster and take less memory, but no promises...
+ * - thomas 2000-08-06
+ */
+
+
+/*--------------------
+ * Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
+ *
+ * LIKE_TRUE: they match
+ * LIKE_FALSE: they don't match
+ * LIKE_ABORT: not only don't they match, but the text is too short.
+ *
+ * If LIKE_ABORT is returned, then no suffix of the text can match the
+ * pattern either, so an upper-level % scan can stop scanning now.
+ *--------------------
+ */
+
+#ifdef MATCH_LOWER
+#define GETCHAR(t) MATCH_LOWER(t)
+#else
+#define GETCHAR(t) (t)
+#endif
+
+static int
+MatchText(const char *t, int tlen, const char *p, int plen,
+ pg_locale_t locale, bool locale_is_c)
+{
+ /* Fast path for match-everything pattern */
+ if (plen == 1 && *p == '%')
+ return LIKE_TRUE;
+
+ /* Since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /*
+ * In this loop, we advance by char when matching wildcards (and thus on
+ * recursive entry to this function we are properly char-synced). On other
+ * occasions it is safe to advance by byte, as the text and pattern will
+ * be in lockstep. This allows us to perform all comparisons between the
+ * text and pattern on a byte by byte basis, even for multi-byte
+ * encodings.
+ */
+ while (tlen > 0 && plen > 0)
+ {
+ if (*p == '\\')
+ {
+ /* Next pattern byte must match literally, whatever it is */
+ NextByte(p, plen);
+ /* ... and there had better be one, per SQL standard */
+ if (plen <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("LIKE pattern must not end with escape character")));
+ if (GETCHAR(*p) != GETCHAR(*t))
+ return LIKE_FALSE;
+ }
+ else if (*p == '%')
+ {
+ char firstpat;
+
+ /*
+ * % processing is essentially a search for a text position at
+ * which the remainder of the text matches the remainder of the
+ * pattern, using a recursive call to check each potential match.
+ *
+ * If there are wildcards immediately following the %, we can skip
+ * over them first, using the idea that any sequence of N _'s and
+ * one or more %'s is equivalent to N _'s and one % (ie, it will
+ * match any sequence of at least N text characters). In this way
+ * we will always run the recursive search loop using a pattern
+ * fragment that begins with a literal character-to-match, thereby
+ * not recursing more than we have to.
+ */
+ NextByte(p, plen);
+
+ while (plen > 0)
+ {
+ if (*p == '%')
+ NextByte(p, plen);
+ else if (*p == '_')
+ {
+ /* If not enough text left to match the pattern, ABORT */
+ if (tlen <= 0)
+ return LIKE_ABORT;
+ NextChar(t, tlen);
+ NextByte(p, plen);
+ }
+ else
+ break; /* Reached a non-wildcard pattern char */
+ }
+
+ /*
+ * If we're at end of pattern, match: we have a trailing % which
+ * matches any remaining text string.
+ */
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * Otherwise, scan for a text position at which we can match the
+ * rest of the pattern. The first remaining pattern char is known
+ * to be a regular or escaped literal character, so we can compare
+ * the first pattern byte to each text byte to avoid recursing
+ * more than we have to. This fact also guarantees that we don't
+ * have to consider a match to the zero-length substring at the
+ * end of the text.
+ */
+ if (*p == '\\')
+ {
+ if (plen < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("LIKE pattern must not end with escape character")));
+ firstpat = GETCHAR(p[1]);
+ }
+ else
+ firstpat = GETCHAR(*p);
+
+ while (tlen > 0)
+ {
+ if (GETCHAR(*t) == firstpat)
+ {
+ int matched = MatchText(t, tlen, p, plen,
+ locale, locale_is_c);
+
+ if (matched != LIKE_FALSE)
+ return matched; /* TRUE or ABORT */
+ }
+
+ NextChar(t, tlen);
+ }
+
+ /*
+ * End of text with no match, so no point in trying later places
+ * to start matching this pattern.
+ */
+ return LIKE_ABORT;
+ }
+ else if (*p == '_')
+ {
+ /* _ matches any single character, and we know there is one */
+ NextChar(t, tlen);
+ NextByte(p, plen);
+ continue;
+ }
+ else if (GETCHAR(*p) != GETCHAR(*t))
+ {
+ /* non-wildcard pattern char fails to match text char */
+ return LIKE_FALSE;
+ }
+
+ /*
+ * Pattern and text match, so advance.
+ *
+ * It is safe to use NextByte instead of NextChar here, even for
+ * multi-byte character sets, because we are not following immediately
+ * after a wildcard character. If we are in the middle of a multibyte
+ * character, we must already have matched at least one byte of the
+ * character from both text and pattern; so we cannot get out-of-sync
+ * on character boundaries. And we know that no backend-legal
+ * encoding allows ASCII characters such as '%' to appear as non-first
+ * bytes of characters, so we won't mistakenly detect a new wildcard.
+ */
+ NextByte(t, tlen);
+ NextByte(p, plen);
+ }
+
+ if (tlen > 0)
+ return LIKE_FALSE; /* end of pattern, but not of text */
+
+ /*
+ * End of text, but perhaps not of pattern. Match iff the remaining
+ * pattern can match a zero-length string, ie, it's zero or more %'s.
+ */
+ while (plen > 0 && *p == '%')
+ NextByte(p, plen);
+ if (plen <= 0)
+ return LIKE_TRUE;
+
+ /*
+ * End of text with no match, so no point in trying later places to start
+ * matching this pattern.
+ */
+ return LIKE_ABORT;
+} /* MatchText() */
+
+/*
+ * like_escape() --- given a pattern and an ESCAPE string,
+ * convert the pattern to use Postgres' standard backslash escape convention.
+ */
+#ifdef do_like_escape
+
+static text *
+do_like_escape(text *pat, text *esc)
+{
+ text *result;
+ char *p,
+ *e,
+ *r;
+ int plen,
+ elen;
+ bool afterescape;
+
+ p = VARDATA_ANY(pat);
+ plen = VARSIZE_ANY_EXHDR(pat);
+ e = VARDATA_ANY(esc);
+ elen = VARSIZE_ANY_EXHDR(esc);
+
+ /*
+ * Worst-case pattern growth is 2x --- unlikely, but it's hardly worth
+ * trying to calculate the size more accurately than that.
+ */
+ result = (text *) palloc(plen * 2 + VARHDRSZ);
+ r = VARDATA(result);
+
+ if (elen == 0)
+ {
+ /*
+ * No escape character is wanted. Double any backslashes in the
+ * pattern to make them act like ordinary characters.
+ */
+ while (plen > 0)
+ {
+ if (*p == '\\')
+ *r++ = '\\';
+ CopyAdvChar(r, p, plen);
+ }
+ }
+ else
+ {
+ /*
+ * The specified escape must be only a single character.
+ */
+ NextChar(e, elen);
+ if (elen != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+
+ e = VARDATA_ANY(esc);
+
+ /*
+ * If specified escape is '\', just copy the pattern as-is.
+ */
+ if (*e == '\\')
+ {
+ memcpy(result, pat, VARSIZE_ANY(pat));
+ return result;
+ }
+
+ /*
+ * Otherwise, convert occurrences of the specified escape character to
+ * '\', and double occurrences of '\' --- unless they immediately
+ * follow an escape character!
+ */
+ afterescape = false;
+ while (plen > 0)
+ {
+ if (CHAREQ(p, e) && !afterescape)
+ {
+ *r++ = '\\';
+ NextChar(p, plen);
+ afterescape = true;
+ }
+ else if (*p == '\\')
+ {
+ *r++ = '\\';
+ if (!afterescape)
+ *r++ = '\\';
+ NextChar(p, plen);
+ afterescape = false;
+ }
+ else
+ {
+ CopyAdvChar(r, p, plen);
+ afterescape = false;
+ }
+ }
+ }
+
+ SET_VARSIZE(result, r - ((char *) result));
+
+ return result;
+}
+#endif /* do_like_escape */
+
+#ifdef CHAREQ
+#undef CHAREQ
+#endif
+
+#undef NextChar
+#undef CopyAdvChar
+#undef MatchText
+
+#ifdef do_like_escape
+#undef do_like_escape
+#endif
+
+#undef GETCHAR
+
+#ifdef MATCH_LOWER
+#undef MATCH_LOWER
+
+#endif
diff --git a/src/backend/utils/adt/like_support.c b/src/backend/utils/adt/like_support.c
new file mode 100644
index 0000000..2d3aaaa
--- /dev/null
+++ b/src/backend/utils/adt/like_support.c
@@ -0,0 +1,1800 @@
+/*-------------------------------------------------------------------------
+ *
+ * like_support.c
+ * Planner support functions for LIKE, regex, and related operators.
+ *
+ * These routines handle special optimization of operators that can be
+ * used with index scans even though they are not known to the executor's
+ * indexscan machinery. The key idea is that these operators allow us
+ * to derive approximate indexscan qual clauses, such that any tuples
+ * that pass the operator clause itself must also satisfy the simpler
+ * indexscan condition(s). Then we can use the indexscan machinery
+ * to avoid scanning as much of the table as we'd otherwise have to,
+ * while applying the original operator as a qpqual condition to ensure
+ * we deliver only the tuples we want. (In essence, we're using a regular
+ * index as if it were a lossy index.)
+ *
+ * An example of what we're doing is
+ * textfield LIKE 'abc%def'
+ * from which we can generate the indexscanable conditions
+ * textfield >= 'abc' AND textfield < 'abd'
+ * which allow efficient scanning of an index on textfield.
+ * (In reality, character set and collation issues make the transformation
+ * from LIKE to indexscan limits rather harder than one might think ...
+ * but that's the basic idea.)
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/like_support.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "access/stratnum.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/selfuncs.h"
+#include "utils/varlena.h"
+
+
+typedef enum
+{
+ Pattern_Type_Like,
+ Pattern_Type_Like_IC,
+ Pattern_Type_Regex,
+ Pattern_Type_Regex_IC,
+ Pattern_Type_Prefix
+} Pattern_Type;
+
+typedef enum
+{
+ Pattern_Prefix_None, Pattern_Prefix_Partial, Pattern_Prefix_Exact
+} Pattern_Prefix_Status;
+
+static Node *like_regex_support(Node *rawreq, Pattern_Type ptype);
+static List *match_pattern_prefix(Node *leftop,
+ Node *rightop,
+ Pattern_Type ptype,
+ Oid expr_coll,
+ Oid opfamily,
+ Oid indexcollation);
+static double patternsel_common(PlannerInfo *root,
+ Oid oprid,
+ Oid opfuncid,
+ List *args,
+ int varRelid,
+ Oid collation,
+ Pattern_Type ptype,
+ bool negate);
+static Pattern_Prefix_Status pattern_fixed_prefix(Const *patt,
+ Pattern_Type ptype,
+ Oid collation,
+ Const **prefix,
+ Selectivity *rest_selec);
+static Selectivity prefix_selectivity(PlannerInfo *root,
+ VariableStatData *vardata,
+ Oid eqopr, Oid ltopr, Oid geopr,
+ Oid collation,
+ Const *prefixcon);
+static Selectivity like_selectivity(const char *patt, int pattlen,
+ bool case_insensitive);
+static Selectivity regex_selectivity(const char *patt, int pattlen,
+ bool case_insensitive,
+ int fixed_prefix_len);
+static int pattern_char_isalpha(char c, bool is_multibyte,
+ pg_locale_t locale, bool locale_is_c);
+static Const *make_greater_string(const Const *str_const, FmgrInfo *ltproc,
+ Oid collation);
+static Datum string_to_datum(const char *str, Oid datatype);
+static Const *string_to_const(const char *str, Oid datatype);
+static Const *string_to_bytea_const(const char *str, size_t str_len);
+
+
+/*
+ * Planner support functions for LIKE, regex, and related operators
+ */
+Datum
+textlike_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like));
+}
+
+Datum
+texticlike_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Like_IC));
+}
+
+Datum
+textregexeq_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex));
+}
+
+Datum
+texticregexeq_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Regex_IC));
+}
+
+Datum
+text_starts_with_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ PG_RETURN_POINTER(like_regex_support(rawreq, Pattern_Type_Prefix));
+}
+
+/* Common code for the above */
+static Node *
+like_regex_support(Node *rawreq, Pattern_Type ptype)
+{
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSelectivity))
+ {
+ /*
+ * Make a selectivity estimate for a function call, just as we'd do if
+ * the call was via the corresponding operator.
+ */
+ SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
+ Selectivity s1;
+
+ if (req->is_join)
+ {
+ /*
+ * For the moment we just punt. If patternjoinsel is ever
+ * improved to do better, this should be made to call it.
+ */
+ s1 = DEFAULT_MATCH_SEL;
+ }
+ else
+ {
+ /* Share code with operator restriction selectivity functions */
+ s1 = patternsel_common(req->root,
+ InvalidOid,
+ req->funcid,
+ req->args,
+ req->varRelid,
+ req->inputcollid,
+ ptype,
+ false);
+ }
+ req->selectivity = s1;
+ ret = (Node *) req;
+ }
+ else if (IsA(rawreq, SupportRequestIndexCondition))
+ {
+ /* Try to convert operator/function call to index conditions */
+ SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
+
+ /*
+ * Currently we have no "reverse" match operators with the pattern on
+ * the left, so we only need consider cases with the indexkey on the
+ * left.
+ */
+ if (req->indexarg != 0)
+ return NULL;
+
+ if (is_opclause(req->node))
+ {
+ OpExpr *clause = (OpExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_pattern_prefix((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ ptype,
+ clause->inputcollid,
+ req->opfamily,
+ req->indexcollation);
+ }
+ else if (is_funcclause(req->node)) /* be paranoid */
+ {
+ FuncExpr *clause = (FuncExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_pattern_prefix((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ ptype,
+ clause->inputcollid,
+ req->opfamily,
+ req->indexcollation);
+ }
+ }
+
+ return ret;
+}
+
+/*
+ * match_pattern_prefix
+ * Try to generate an indexqual for a LIKE or regex operator.
+ */
+static List *
+match_pattern_prefix(Node *leftop,
+ Node *rightop,
+ Pattern_Type ptype,
+ Oid expr_coll,
+ Oid opfamily,
+ Oid indexcollation)
+{
+ List *result;
+ Const *patt;
+ Const *prefix;
+ Pattern_Prefix_Status pstatus;
+ Oid ldatatype;
+ Oid rdatatype;
+ Oid eqopr;
+ Oid ltopr;
+ Oid geopr;
+ Oid preopr = InvalidOid;
+ bool collation_aware;
+ Expr *expr;
+ FmgrInfo ltproc;
+ Const *greaterstr;
+
+ /*
+ * Can't do anything with a non-constant or NULL pattern argument.
+ *
+ * Note that since we restrict ourselves to cases with a hard constant on
+ * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
+ * about verifying that.
+ */
+ if (!IsA(rightop, Const) ||
+ ((Const *) rightop)->constisnull)
+ return NIL;
+ patt = (Const *) rightop;
+
+ /*
+ * Not supported if the expression collation is nondeterministic. The
+ * optimized equality or prefix tests use bytewise comparisons, which is
+ * not consistent with nondeterministic collations. The actual
+ * pattern-matching implementation functions will later error out that
+ * pattern-matching is not supported with nondeterministic collations. (We
+ * could also error out here, but by doing it later we get more precise
+ * error messages.) (It should be possible to support at least
+ * Pattern_Prefix_Exact, but no point as long as the actual
+ * pattern-matching implementations don't support it.)
+ *
+ * expr_coll is not set for a non-collation-aware data type such as bytea.
+ */
+ if (expr_coll && !get_collation_isdeterministic(expr_coll))
+ return NIL;
+
+ /*
+ * Try to extract a fixed prefix from the pattern.
+ */
+ pstatus = pattern_fixed_prefix(patt, ptype, expr_coll,
+ &prefix, NULL);
+
+ /* fail if no fixed prefix */
+ if (pstatus == Pattern_Prefix_None)
+ return NIL;
+
+ /*
+ * Identify the operators we want to use, based on the type of the
+ * left-hand argument. Usually these are just the type's regular
+ * comparison operators, but if we are considering one of the semi-legacy
+ * "pattern" opclasses, use the "pattern" operators instead. Those are
+ * not collation-sensitive but always use C collation, as we want. The
+ * selected operators also determine the needed type of the prefix
+ * constant.
+ */
+ ldatatype = exprType(leftop);
+ switch (ldatatype)
+ {
+ case TEXTOID:
+ if (opfamily == TEXT_PATTERN_BTREE_FAM_OID)
+ {
+ eqopr = TextEqualOperator;
+ ltopr = TextPatternLessOperator;
+ geopr = TextPatternGreaterEqualOperator;
+ collation_aware = false;
+ }
+ else if (opfamily == TEXT_SPGIST_FAM_OID)
+ {
+ eqopr = TextEqualOperator;
+ ltopr = TextPatternLessOperator;
+ geopr = TextPatternGreaterEqualOperator;
+ /* This opfamily has direct support for prefixing */
+ preopr = TextPrefixOperator;
+ collation_aware = false;
+ }
+ else
+ {
+ eqopr = TextEqualOperator;
+ ltopr = TextLessOperator;
+ geopr = TextGreaterEqualOperator;
+ collation_aware = true;
+ }
+ rdatatype = TEXTOID;
+ break;
+ case NAMEOID:
+
+ /*
+ * Note that here, we need the RHS type to be text, so that the
+ * comparison value isn't improperly truncated to NAMEDATALEN.
+ */
+ eqopr = NameEqualTextOperator;
+ ltopr = NameLessTextOperator;
+ geopr = NameGreaterEqualTextOperator;
+ collation_aware = true;
+ rdatatype = TEXTOID;
+ break;
+ case BPCHAROID:
+ if (opfamily == BPCHAR_PATTERN_BTREE_FAM_OID)
+ {
+ eqopr = BpcharEqualOperator;
+ ltopr = BpcharPatternLessOperator;
+ geopr = BpcharPatternGreaterEqualOperator;
+ collation_aware = false;
+ }
+ else
+ {
+ eqopr = BpcharEqualOperator;
+ ltopr = BpcharLessOperator;
+ geopr = BpcharGreaterEqualOperator;
+ collation_aware = true;
+ }
+ rdatatype = BPCHAROID;
+ break;
+ case BYTEAOID:
+ eqopr = ByteaEqualOperator;
+ ltopr = ByteaLessOperator;
+ geopr = ByteaGreaterEqualOperator;
+ collation_aware = false;
+ rdatatype = BYTEAOID;
+ break;
+ default:
+ /* Can't get here unless we're attached to the wrong operator */
+ return NIL;
+ }
+
+ /*
+ * If necessary, coerce the prefix constant to the right type. The given
+ * prefix constant is either text or bytea type, therefore the only case
+ * where we need to do anything is when converting text to bpchar. Those
+ * two types are binary-compatible, so relabeling the Const node is
+ * sufficient.
+ */
+ if (prefix->consttype != rdatatype)
+ {
+ Assert(prefix->consttype == TEXTOID &&
+ rdatatype == BPCHAROID);
+ prefix->consttype = rdatatype;
+ }
+
+ /*
+ * If we found an exact-match pattern, generate an "=" indexqual.
+ *
+ * Here and below, check to see whether the desired operator is actually
+ * supported by the index opclass, and fail quietly if not. This allows
+ * us to not be concerned with specific opclasses (except for the legacy
+ * "pattern" cases); any index that correctly implements the operators
+ * will work.
+ */
+ if (pstatus == Pattern_Prefix_Exact)
+ {
+ if (!op_in_opfamily(eqopr, opfamily))
+ return NIL;
+ expr = make_opclause(eqopr, BOOLOID, false,
+ (Expr *) leftop, (Expr *) prefix,
+ InvalidOid, indexcollation);
+ result = list_make1(expr);
+ return result;
+ }
+
+ /*
+ * Otherwise, we have a nonempty required prefix of the values. Some
+ * opclasses support prefix checks directly, otherwise we'll try to
+ * generate a range constraint.
+ */
+ if (OidIsValid(preopr) && op_in_opfamily(preopr, opfamily))
+ {
+ expr = make_opclause(preopr, BOOLOID, false,
+ (Expr *) leftop, (Expr *) prefix,
+ InvalidOid, indexcollation);
+ result = list_make1(expr);
+ return result;
+ }
+
+ /*
+ * Since we need a range constraint, it's only going to work reliably if
+ * the index is collation-insensitive or has "C" collation. Note that
+ * here we are looking at the index's collation, not the expression's
+ * collation -- this test is *not* dependent on the LIKE/regex operator's
+ * collation.
+ */
+ if (collation_aware &&
+ !lc_collate_is_c(indexcollation))
+ return NIL;
+
+ /*
+ * We can always say "x >= prefix".
+ */
+ if (!op_in_opfamily(geopr, opfamily))
+ return NIL;
+ expr = make_opclause(geopr, BOOLOID, false,
+ (Expr *) leftop, (Expr *) prefix,
+ InvalidOid, indexcollation);
+ result = list_make1(expr);
+
+ /*-------
+ * If we can create a string larger than the prefix, we can say
+ * "x < greaterstr". NB: we rely on make_greater_string() to generate
+ * a guaranteed-greater string, not just a probably-greater string.
+ * In general this is only guaranteed in C locale, so we'd better be
+ * using a C-locale index collation.
+ *-------
+ */
+ if (!op_in_opfamily(ltopr, opfamily))
+ return result;
+ fmgr_info(get_opcode(ltopr), &ltproc);
+ greaterstr = make_greater_string(prefix, &ltproc, indexcollation);
+ if (greaterstr)
+ {
+ expr = make_opclause(ltopr, BOOLOID, false,
+ (Expr *) leftop, (Expr *) greaterstr,
+ InvalidOid, indexcollation);
+ result = lappend(result, expr);
+ }
+
+ return result;
+}
+
+
+/*
+ * patternsel_common - generic code for pattern-match restriction selectivity.
+ *
+ * To support using this from either the operator or function paths, caller
+ * may pass either operator OID or underlying function OID; we look up the
+ * latter from the former if needed. (We could just have patternsel() call
+ * get_opcode(), but the work would be wasted if we don't have a need to
+ * compare a fixed prefix to the pg_statistic data.)
+ *
+ * Note that oprid and/or opfuncid should be for the positive-match operator
+ * even when negate is true.
+ */
+static double
+patternsel_common(PlannerInfo *root,
+ Oid oprid,
+ Oid opfuncid,
+ List *args,
+ int varRelid,
+ Oid collation,
+ Pattern_Type ptype,
+ bool negate)
+{
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Datum constval;
+ Oid consttype;
+ Oid vartype;
+ Oid rdatatype;
+ Oid eqopr;
+ Oid ltopr;
+ Oid geopr;
+ Pattern_Prefix_Status pstatus;
+ Const *patt;
+ Const *prefix = NULL;
+ Selectivity rest_selec = 0;
+ double nullfrac = 0.0;
+ double result;
+
+ /*
+ * Initialize result to the appropriate default estimate depending on
+ * whether it's a match or not-match operator.
+ */
+ if (negate)
+ result = 1.0 - DEFAULT_MATCH_SEL;
+ else
+ result = DEFAULT_MATCH_SEL;
+
+ /*
+ * If expression is not variable op constant, then punt and return the
+ * default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ return result;
+ if (!varonleft || !IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * If the constant is NULL, assume operator is strict and return zero, ie,
+ * operator will never return TRUE. (It's zero even for a negator op.)
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ return 0.0;
+ }
+ constval = ((Const *) other)->constvalue;
+ consttype = ((Const *) other)->consttype;
+
+ /*
+ * The right-hand const is type text or bytea for all supported operators.
+ * We do not expect to see binary-compatible types here, since
+ * const-folding should have relabeled the const to exactly match the
+ * operator's declared type.
+ */
+ if (consttype != TEXTOID && consttype != BYTEAOID)
+ {
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * Similarly, the exposed type of the left-hand side should be one of
+ * those we know. (Do not look at vardata.atttype, which might be
+ * something binary-compatible but different.) We can use it to identify
+ * the comparison operators and the required type of the comparison
+ * constant, much as in match_pattern_prefix().
+ */
+ vartype = vardata.vartype;
+
+ switch (vartype)
+ {
+ case TEXTOID:
+ eqopr = TextEqualOperator;
+ ltopr = TextLessOperator;
+ geopr = TextGreaterEqualOperator;
+ rdatatype = TEXTOID;
+ break;
+ case NAMEOID:
+
+ /*
+ * Note that here, we need the RHS type to be text, so that the
+ * comparison value isn't improperly truncated to NAMEDATALEN.
+ */
+ eqopr = NameEqualTextOperator;
+ ltopr = NameLessTextOperator;
+ geopr = NameGreaterEqualTextOperator;
+ rdatatype = TEXTOID;
+ break;
+ case BPCHAROID:
+ eqopr = BpcharEqualOperator;
+ ltopr = BpcharLessOperator;
+ geopr = BpcharGreaterEqualOperator;
+ rdatatype = BPCHAROID;
+ break;
+ case BYTEAOID:
+ eqopr = ByteaEqualOperator;
+ ltopr = ByteaLessOperator;
+ geopr = ByteaGreaterEqualOperator;
+ rdatatype = BYTEAOID;
+ break;
+ default:
+ /* Can't get here unless we're attached to the wrong operator */
+ ReleaseVariableStats(vardata);
+ return result;
+ }
+
+ /*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
+ * Pull out any fixed prefix implied by the pattern, and estimate the
+ * fractional selectivity of the remainder of the pattern. Unlike many
+ * other selectivity estimators, we use the pattern operator's actual
+ * collation for this step. This is not because we expect the collation
+ * to make a big difference in the selectivity estimate (it seldom would),
+ * but because we want to be sure we cache compiled regexps under the
+ * right cache key, so that they can be re-used at runtime.
+ */
+ patt = (Const *) other;
+ pstatus = pattern_fixed_prefix(patt, ptype, collation,
+ &prefix, &rest_selec);
+
+ /*
+ * If necessary, coerce the prefix constant to the right type. The only
+ * case where we need to do anything is when converting text to bpchar.
+ * Those two types are binary-compatible, so relabeling the Const node is
+ * sufficient.
+ */
+ if (prefix && prefix->consttype != rdatatype)
+ {
+ Assert(prefix->consttype == TEXTOID &&
+ rdatatype == BPCHAROID);
+ prefix->consttype = rdatatype;
+ }
+
+ if (pstatus == Pattern_Prefix_Exact)
+ {
+ /*
+ * Pattern specifies an exact match, so estimate as for '='
+ */
+ result = var_eq_const(&vardata, eqopr, collation, prefix->constvalue,
+ false, true, false);
+ }
+ else
+ {
+ /*
+ * Not exact-match pattern. If we have a sufficiently large
+ * histogram, estimate selectivity for the histogram part of the
+ * population by counting matches in the histogram. If not, estimate
+ * selectivity of the fixed prefix and remainder of pattern
+ * separately, then combine the two to get an estimate of the
+ * selectivity for the part of the column population represented by
+ * the histogram. (For small histograms, we combine these
+ * approaches.)
+ *
+ * We then add up data for any most-common-values values; these are
+ * not in the histogram population, and we can get exact answers for
+ * them by applying the pattern operator, so there's no reason to
+ * approximate. (If the MCVs cover a significant part of the total
+ * population, this gives us a big leg up in accuracy.)
+ */
+ Selectivity selec;
+ int hist_size;
+ FmgrInfo opproc;
+ double mcv_selec,
+ sumcommon;
+
+ /* Try to use the histogram entries to get selectivity */
+ if (!OidIsValid(opfuncid))
+ opfuncid = get_opcode(oprid);
+ fmgr_info(opfuncid, &opproc);
+
+ selec = histogram_selectivity(&vardata, &opproc, collation,
+ constval, true,
+ 10, 1, &hist_size);
+
+ /* If not at least 100 entries, use the heuristic method */
+ if (hist_size < 100)
+ {
+ Selectivity heursel;
+ Selectivity prefixsel;
+
+ if (pstatus == Pattern_Prefix_Partial)
+ prefixsel = prefix_selectivity(root, &vardata,
+ eqopr, ltopr, geopr,
+ collation,
+ prefix);
+ else
+ prefixsel = 1.0;
+ heursel = prefixsel * rest_selec;
+
+ if (selec < 0) /* fewer than 10 histogram entries? */
+ selec = heursel;
+ else
+ {
+ /*
+ * For histogram sizes from 10 to 100, we combine the
+ * histogram and heuristic selectivities, putting increasingly
+ * more trust in the histogram for larger sizes.
+ */
+ double hist_weight = hist_size / 100.0;
+
+ selec = selec * hist_weight + heursel * (1.0 - hist_weight);
+ }
+ }
+
+ /* In any case, don't believe extremely small or large estimates. */
+ if (selec < 0.0001)
+ selec = 0.0001;
+ else if (selec > 0.9999)
+ selec = 0.9999;
+
+ /*
+ * If we have most-common-values info, add up the fractions of the MCV
+ * entries that satisfy MCV OP PATTERN. These fractions contribute
+ * directly to the result selectivity. Also add up the total fraction
+ * represented by MCV entries.
+ */
+ mcv_selec = mcv_selectivity(&vardata, &opproc, collation,
+ constval, true,
+ &sumcommon);
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec *= 1.0 - nullfrac - sumcommon;
+ selec += mcv_selec;
+ result = selec;
+ }
+
+ /* now adjust if we wanted not-match rather than match */
+ if (negate)
+ result = 1.0 - result - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(result);
+
+ if (prefix)
+ {
+ pfree(DatumGetPointer(prefix->constvalue));
+ pfree(prefix);
+ }
+
+ ReleaseVariableStats(vardata);
+
+ return result;
+}
+
+/*
+ * Fix impedance mismatch between SQL-callable functions and patternsel_common
+ */
+static double
+patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+
+ /*
+ * If this is for a NOT LIKE or similar operator, get the corresponding
+ * positive-match operator and work with that.
+ */
+ if (negate)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ elog(ERROR, "patternsel called for operator without a negator");
+ }
+
+ return patternsel_common(root,
+ operator,
+ InvalidOid,
+ args,
+ varRelid,
+ collation,
+ ptype,
+ negate);
+}
+
+/*
+ * regexeqsel - Selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ * icregexeqsel - Selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ * likesel - Selectivity of LIKE pattern match.
+ */
+Datum
+likesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ * prefixsel - selectivity of prefix operator
+ */
+Datum
+prefixsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ *
+ * iclikesel - Selectivity of ILIKE pattern match.
+ */
+Datum
+iclikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ * regexnesel - Selectivity of regular-expression pattern non-match.
+ */
+Datum
+regexnesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ * icregexnesel - Selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ * nlikesel - Selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ * icnlikesel - Selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikesel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+/*
+ * patternjoinsel - Generic code for pattern-match join selectivity.
+ */
+static double
+patternjoinsel(PG_FUNCTION_ARGS, Pattern_Type ptype, bool negate)
+{
+ /* For the moment we just punt. */
+ return negate ? (1.0 - DEFAULT_MATCH_SEL) : DEFAULT_MATCH_SEL;
+}
+
+/*
+ * regexeqjoinsel - Join selectivity of regular-expression pattern match.
+ */
+Datum
+regexeqjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, false));
+}
+
+/*
+ * icregexeqjoinsel - Join selectivity of case-insensitive regex match.
+ */
+Datum
+icregexeqjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, false));
+}
+
+/*
+ * likejoinsel - Join selectivity of LIKE pattern match.
+ */
+Datum
+likejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, false));
+}
+
+/*
+ * prefixjoinsel - Join selectivity of prefix operator
+ */
+Datum
+prefixjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Prefix, false));
+}
+
+/*
+ * iclikejoinsel - Join selectivity of ILIKE pattern match.
+ */
+Datum
+iclikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, false));
+}
+
+/*
+ * regexnejoinsel - Join selectivity of regex non-match.
+ */
+Datum
+regexnejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex, true));
+}
+
+/*
+ * icregexnejoinsel - Join selectivity of case-insensitive regex non-match.
+ */
+Datum
+icregexnejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Regex_IC, true));
+}
+
+/*
+ * nlikejoinsel - Join selectivity of LIKE pattern non-match.
+ */
+Datum
+nlikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like, true));
+}
+
+/*
+ * icnlikejoinsel - Join selectivity of ILIKE pattern non-match.
+ */
+Datum
+icnlikejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(patternjoinsel(fcinfo, Pattern_Type_Like_IC, true));
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Pattern analysis functions
+ *
+ * These routines support analysis of LIKE and regular-expression patterns
+ * by the planner/optimizer. It's important that they agree with the
+ * regular-expression code in backend/regex/ and the LIKE code in
+ * backend/utils/adt/like.c. Also, the computation of the fixed prefix
+ * must be conservative: if we report a string longer than the true fixed
+ * prefix, the query may produce actually wrong answers, rather than just
+ * getting a bad selectivity estimate!
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the fixed prefix, if any, for a pattern.
+ *
+ * *prefix is set to a palloc'd prefix string (in the form of a Const node),
+ * or to NULL if no fixed prefix exists for the pattern.
+ * If rest_selec is not NULL, *rest_selec is set to an estimate of the
+ * selectivity of the remainder of the pattern (without any fixed prefix).
+ * The prefix Const has the same type (TEXT or BYTEA) as the input pattern.
+ *
+ * The return value distinguishes no fixed prefix, a partial prefix,
+ * or an exact-match-only pattern.
+ */
+
+static Pattern_Prefix_Status
+like_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+ Const **prefix_const, Selectivity *rest_selec)
+{
+ char *match;
+ char *patt;
+ int pattlen;
+ Oid typeid = patt_const->consttype;
+ int pos,
+ match_pos;
+ bool is_multibyte = (pg_database_encoding_max_length() > 1);
+ pg_locale_t locale = 0;
+ bool locale_is_c = false;
+
+ /* the right-hand const is type text or bytea */
+ Assert(typeid == BYTEAOID || typeid == TEXTOID);
+
+ if (case_insensitive)
+ {
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("case insensitive matching not supported on type bytea")));
+
+ if (!OidIsValid(collation))
+ {
+ /*
+ * This typically means that the parser could not resolve a
+ * conflict of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for ILIKE"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+
+ /* If case-insensitive, we need locale info */
+ if (lc_ctype_is_c(collation))
+ locale_is_c = true;
+ else
+ locale = pg_newlocale_from_collation(collation);
+ }
+
+ if (typeid != BYTEAOID)
+ {
+ patt = TextDatumGetCString(patt_const->constvalue);
+ pattlen = strlen(patt);
+ }
+ else
+ {
+ bytea *bstr = DatumGetByteaPP(patt_const->constvalue);
+
+ pattlen = VARSIZE_ANY_EXHDR(bstr);
+ patt = (char *) palloc(pattlen);
+ memcpy(patt, VARDATA_ANY(bstr), pattlen);
+ Assert((Pointer) bstr == DatumGetPointer(patt_const->constvalue));
+ }
+
+ match = palloc(pattlen + 1);
+ match_pos = 0;
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ /* % and _ are wildcard characters in LIKE */
+ if (patt[pos] == '%' ||
+ patt[pos] == '_')
+ break;
+
+ /* Backslash escapes the next character */
+ if (patt[pos] == '\\')
+ {
+ pos++;
+ if (pos >= pattlen)
+ break;
+ }
+
+ /* Stop if case-varying character (it's sort of a wildcard) */
+ if (case_insensitive &&
+ pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
+ break;
+
+ match[match_pos++] = patt[pos];
+ }
+
+ match[match_pos] = '\0';
+
+ if (typeid != BYTEAOID)
+ *prefix_const = string_to_const(match, typeid);
+ else
+ *prefix_const = string_to_bytea_const(match, match_pos);
+
+ if (rest_selec != NULL)
+ *rest_selec = like_selectivity(&patt[pos], pattlen - pos,
+ case_insensitive);
+
+ pfree(patt);
+ pfree(match);
+
+ /* in LIKE, an empty pattern is an exact match! */
+ if (pos == pattlen)
+ return Pattern_Prefix_Exact; /* reached end of pattern, so exact */
+
+ if (match_pos > 0)
+ return Pattern_Prefix_Partial;
+
+ return Pattern_Prefix_None;
+}
+
+static Pattern_Prefix_Status
+regex_fixed_prefix(Const *patt_const, bool case_insensitive, Oid collation,
+ Const **prefix_const, Selectivity *rest_selec)
+{
+ Oid typeid = patt_const->consttype;
+ char *prefix;
+ bool exact;
+
+ /*
+ * Should be unnecessary, there are no bytea regex operators defined. As
+ * such, it should be noted that the rest of this function has *not* been
+ * made safe for binary (possibly NULL containing) strings.
+ */
+ if (typeid == BYTEAOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("regular-expression matching not supported on type bytea")));
+
+ /* Use the regexp machinery to extract the prefix, if any */
+ prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
+ case_insensitive, collation,
+ &exact);
+
+ if (prefix == NULL)
+ {
+ *prefix_const = NULL;
+
+ if (rest_selec != NULL)
+ {
+ char *patt = TextDatumGetCString(patt_const->constvalue);
+
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ 0);
+ pfree(patt);
+ }
+
+ return Pattern_Prefix_None;
+ }
+
+ *prefix_const = string_to_const(prefix, typeid);
+
+ if (rest_selec != NULL)
+ {
+ if (exact)
+ {
+ /* Exact match, so there's no additional selectivity */
+ *rest_selec = 1.0;
+ }
+ else
+ {
+ char *patt = TextDatumGetCString(patt_const->constvalue);
+
+ *rest_selec = regex_selectivity(patt, strlen(patt),
+ case_insensitive,
+ strlen(prefix));
+ pfree(patt);
+ }
+ }
+
+ pfree(prefix);
+
+ if (exact)
+ return Pattern_Prefix_Exact; /* pattern specifies exact match */
+ else
+ return Pattern_Prefix_Partial;
+}
+
+static Pattern_Prefix_Status
+pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
+ Const **prefix, Selectivity *rest_selec)
+{
+ Pattern_Prefix_Status result;
+
+ switch (ptype)
+ {
+ case Pattern_Type_Like:
+ result = like_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Like_IC:
+ result = like_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Regex:
+ result = regex_fixed_prefix(patt, false, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Regex_IC:
+ result = regex_fixed_prefix(patt, true, collation,
+ prefix, rest_selec);
+ break;
+ case Pattern_Type_Prefix:
+ /* Prefix type work is trivial. */
+ result = Pattern_Prefix_Partial;
+ *prefix = makeConst(patt->consttype,
+ patt->consttypmod,
+ patt->constcollid,
+ patt->constlen,
+ datumCopy(patt->constvalue,
+ patt->constbyval,
+ patt->constlen),
+ patt->constisnull,
+ patt->constbyval);
+ if (rest_selec != NULL)
+ *rest_selec = 1.0; /* all */
+ break;
+ default:
+ elog(ERROR, "unrecognized ptype: %d", (int) ptype);
+ result = Pattern_Prefix_None; /* keep compiler quiet */
+ break;
+ }
+ return result;
+}
+
+/*
+ * Estimate the selectivity of a fixed prefix for a pattern match.
+ *
+ * A fixed prefix "foo" is estimated as the selectivity of the expression
+ * "variable >= 'foo' AND variable < 'fop'".
+ *
+ * The selectivity estimate is with respect to the portion of the column
+ * population represented by the histogram --- the caller must fold this
+ * together with info about MCVs and NULLs.
+ *
+ * We use the given comparison operators and collation to do the estimation.
+ * The given variable and Const must be of the associated datatype(s).
+ *
+ * XXX Note: we make use of the upper bound to estimate operator selectivity
+ * even if the locale is such that we cannot rely on the upper-bound string.
+ * The selectivity only needs to be approximately right anyway, so it seems
+ * more useful to use the upper-bound code than not.
+ */
+static Selectivity
+prefix_selectivity(PlannerInfo *root, VariableStatData *vardata,
+ Oid eqopr, Oid ltopr, Oid geopr,
+ Oid collation,
+ Const *prefixcon)
+{
+ Selectivity prefixsel;
+ FmgrInfo opproc;
+ Const *greaterstrcon;
+ Selectivity eq_sel;
+
+ /* Estimate the selectivity of "x >= prefix" */
+ fmgr_info(get_opcode(geopr), &opproc);
+
+ prefixsel = ineq_histogram_selectivity(root, vardata,
+ geopr, &opproc, true, true,
+ collation,
+ prefixcon->constvalue,
+ prefixcon->consttype);
+
+ if (prefixsel < 0.0)
+ {
+ /* No histogram is present ... return a suitable default estimate */
+ return DEFAULT_MATCH_SEL;
+ }
+
+ /*
+ * If we can create a string larger than the prefix, say "x < greaterstr".
+ */
+ fmgr_info(get_opcode(ltopr), &opproc);
+ greaterstrcon = make_greater_string(prefixcon, &opproc, collation);
+ if (greaterstrcon)
+ {
+ Selectivity topsel;
+
+ topsel = ineq_histogram_selectivity(root, vardata,
+ ltopr, &opproc, false, false,
+ collation,
+ greaterstrcon->constvalue,
+ greaterstrcon->consttype);
+
+ /* ineq_histogram_selectivity worked before, it shouldn't fail now */
+ Assert(topsel >= 0.0);
+
+ /*
+ * Merge the two selectivities in the same way as for a range query
+ * (see clauselist_selectivity()). Note that we don't need to worry
+ * about double-exclusion of nulls, since ineq_histogram_selectivity
+ * doesn't count those anyway.
+ */
+ prefixsel = topsel + prefixsel - 1.0;
+ }
+
+ /*
+ * If the prefix is long then the two bounding values might be too close
+ * together for the histogram to distinguish them usefully, resulting in a
+ * zero estimate (plus or minus roundoff error). To avoid returning a
+ * ridiculously small estimate, compute the estimated selectivity for
+ * "variable = 'foo'", and clamp to that. (Obviously, the resultant
+ * estimate should be at least that.)
+ *
+ * We apply this even if we couldn't make a greater string. That case
+ * suggests that the prefix is near the maximum possible, and thus
+ * probably off the end of the histogram, and thus we probably got a very
+ * small estimate from the >= condition; so we still need to clamp.
+ */
+ eq_sel = var_eq_const(vardata, eqopr, collation, prefixcon->constvalue,
+ false, true, false);
+
+ prefixsel = Max(prefixsel, eq_sel);
+
+ return prefixsel;
+}
+
+
+/*
+ * Estimate the selectivity of a pattern of the specified type.
+ * Note that any fixed prefix of the pattern will have been removed already,
+ * so actually we may be looking at just a fragment of the pattern.
+ *
+ * For now, we use a very simplistic approach: fixed characters reduce the
+ * selectivity a good deal, character ranges reduce it a little,
+ * wildcards (such as % for LIKE or .* for regex) increase it.
+ */
+
+#define FIXED_CHAR_SEL 0.20 /* about 1/5 */
+#define CHAR_RANGE_SEL 0.25
+#define ANY_CHAR_SEL 0.9 /* not 1, since it won't match end-of-string */
+#define FULL_WILDCARD_SEL 5.0
+#define PARTIAL_WILDCARD_SEL 2.0
+
+static Selectivity
+like_selectivity(const char *patt, int pattlen, bool case_insensitive)
+{
+ Selectivity sel = 1.0;
+ int pos;
+
+ /* Skip any leading wildcard; it's already factored into initial sel */
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ if (patt[pos] != '%' && patt[pos] != '_')
+ break;
+ }
+
+ for (; pos < pattlen; pos++)
+ {
+ /* % and _ are wildcard characters in LIKE */
+ if (patt[pos] == '%')
+ sel *= FULL_WILDCARD_SEL;
+ else if (patt[pos] == '_')
+ sel *= ANY_CHAR_SEL;
+ else if (patt[pos] == '\\')
+ {
+ /* Backslash quotes the next character */
+ pos++;
+ if (pos >= pattlen)
+ break;
+ sel *= FIXED_CHAR_SEL;
+ }
+ else
+ sel *= FIXED_CHAR_SEL;
+ }
+ /* Could get sel > 1 if multiple wildcards */
+ if (sel > 1.0)
+ sel = 1.0;
+ return sel;
+}
+
+static Selectivity
+regex_selectivity_sub(const char *patt, int pattlen, bool case_insensitive)
+{
+ Selectivity sel = 1.0;
+ int paren_depth = 0;
+ int paren_pos = 0; /* dummy init to keep compiler quiet */
+ int pos;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ for (pos = 0; pos < pattlen; pos++)
+ {
+ if (patt[pos] == '(')
+ {
+ if (paren_depth == 0)
+ paren_pos = pos; /* remember start of parenthesized item */
+ paren_depth++;
+ }
+ else if (patt[pos] == ')' && paren_depth > 0)
+ {
+ paren_depth--;
+ if (paren_depth == 0)
+ sel *= regex_selectivity_sub(patt + (paren_pos + 1),
+ pos - (paren_pos + 1),
+ case_insensitive);
+ }
+ else if (patt[pos] == '|' && paren_depth == 0)
+ {
+ /*
+ * If unquoted | is present at paren level 0 in pattern, we have
+ * multiple alternatives; sum their probabilities.
+ */
+ sel += regex_selectivity_sub(patt + (pos + 1),
+ pattlen - (pos + 1),
+ case_insensitive);
+ break; /* rest of pattern is now processed */
+ }
+ else if (patt[pos] == '[')
+ {
+ bool negclass = false;
+
+ if (patt[++pos] == '^')
+ {
+ negclass = true;
+ pos++;
+ }
+ if (patt[pos] == ']') /* ']' at start of class is not special */
+ pos++;
+ while (pos < pattlen && patt[pos] != ']')
+ pos++;
+ if (paren_depth == 0)
+ sel *= (negclass ? (1.0 - CHAR_RANGE_SEL) : CHAR_RANGE_SEL);
+ }
+ else if (patt[pos] == '.')
+ {
+ if (paren_depth == 0)
+ sel *= ANY_CHAR_SEL;
+ }
+ else if (patt[pos] == '*' ||
+ patt[pos] == '?' ||
+ patt[pos] == '+')
+ {
+ /* Ought to be smarter about quantifiers... */
+ if (paren_depth == 0)
+ sel *= PARTIAL_WILDCARD_SEL;
+ }
+ else if (patt[pos] == '{')
+ {
+ while (pos < pattlen && patt[pos] != '}')
+ pos++;
+ if (paren_depth == 0)
+ sel *= PARTIAL_WILDCARD_SEL;
+ }
+ else if (patt[pos] == '\\')
+ {
+ /* backslash quotes the next character */
+ pos++;
+ if (pos >= pattlen)
+ break;
+ if (paren_depth == 0)
+ sel *= FIXED_CHAR_SEL;
+ }
+ else
+ {
+ if (paren_depth == 0)
+ sel *= FIXED_CHAR_SEL;
+ }
+ }
+ /* Could get sel > 1 if multiple wildcards */
+ if (sel > 1.0)
+ sel = 1.0;
+ return sel;
+}
+
+static Selectivity
+regex_selectivity(const char *patt, int pattlen, bool case_insensitive,
+ int fixed_prefix_len)
+{
+ Selectivity sel;
+
+ /* If patt doesn't end with $, consider it to have a trailing wildcard */
+ if (pattlen > 0 && patt[pattlen - 1] == '$' &&
+ (pattlen == 1 || patt[pattlen - 2] != '\\'))
+ {
+ /* has trailing $ */
+ sel = regex_selectivity_sub(patt, pattlen - 1, case_insensitive);
+ }
+ else
+ {
+ /* no trailing $ */
+ sel = regex_selectivity_sub(patt, pattlen, case_insensitive);
+ sel *= FULL_WILDCARD_SEL;
+ }
+
+ /*
+ * If there's a fixed prefix, discount its selectivity. We have to be
+ * careful here since a very long prefix could result in pow's result
+ * underflowing to zero (in which case "sel" probably has as well).
+ */
+ if (fixed_prefix_len > 0)
+ {
+ double prefixsel = pow(FIXED_CHAR_SEL, fixed_prefix_len);
+
+ if (prefixsel > 0.0)
+ sel /= prefixsel;
+ }
+
+ /* Make sure result stays in range */
+ CLAMP_PROBABILITY(sel);
+ return sel;
+}
+
+/*
+ * Check whether char is a letter (and, hence, subject to case-folding)
+ *
+ * In multibyte character sets or with ICU, we can't use isalpha, and it does
+ * not seem worth trying to convert to wchar_t to use iswalpha or u_isalpha.
+ * Instead, just assume any non-ASCII char is potentially case-varying, and
+ * hard-wire knowledge of which ASCII chars are letters.
+ */
+static int
+pattern_char_isalpha(char c, bool is_multibyte,
+ pg_locale_t locale, bool locale_is_c)
+{
+ if (locale_is_c)
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+ else if (is_multibyte && IS_HIGHBIT_SET(c))
+ return true;
+ else if (locale && locale->provider == COLLPROVIDER_ICU)
+ return IS_HIGHBIT_SET(c) ||
+ (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+#ifdef HAVE_LOCALE_T
+ else if (locale && locale->provider == COLLPROVIDER_LIBC)
+ return isalpha_l((unsigned char) c, locale->info.lt);
+#endif
+ else
+ return isalpha((unsigned char) c);
+}
+
+
+/*
+ * For bytea, the increment function need only increment the current byte
+ * (there are no multibyte characters to worry about).
+ */
+static bool
+byte_increment(unsigned char *ptr, int len)
+{
+ if (*ptr >= 255)
+ return false;
+ (*ptr)++;
+ return true;
+}
+
+/*
+ * Try to generate a string greater than the given string or any
+ * string it is a prefix of. If successful, return a palloc'd string
+ * in the form of a Const node; else return NULL.
+ *
+ * The caller must provide the appropriate "less than" comparison function
+ * for testing the strings, along with the collation to use.
+ *
+ * The key requirement here is that given a prefix string, say "foo",
+ * we must be able to generate another string "fop" that is greater than
+ * all strings "foobar" starting with "foo". We can test that we have
+ * generated a string greater than the prefix string, but in non-C collations
+ * that is not a bulletproof guarantee that an extension of the string might
+ * not sort after it; an example is that "foo " is less than "foo!", but it
+ * is not clear that a "dictionary" sort ordering will consider "foo!" less
+ * than "foo bar". CAUTION: Therefore, this function should be used only for
+ * estimation purposes when working in a non-C collation.
+ *
+ * To try to catch most cases where an extended string might otherwise sort
+ * before the result value, we determine which of the strings "Z", "z", "y",
+ * and "9" is seen as largest by the collation, and append that to the given
+ * prefix before trying to find a string that compares as larger.
+ *
+ * To search for a greater string, we repeatedly "increment" the rightmost
+ * character, using an encoding-specific character incrementer function.
+ * When it's no longer possible to increment the last character, we truncate
+ * off that character and start incrementing the next-to-rightmost.
+ * For example, if "z" were the last character in the sort order, then we
+ * could produce "foo" as a string greater than "fonz".
+ *
+ * This could be rather slow in the worst case, but in most cases we
+ * won't have to try more than one or two strings before succeeding.
+ *
+ * Note that it's important for the character incrementer not to be too anal
+ * about producing every possible character code, since in some cases the only
+ * way to get a larger string is to increment a previous character position.
+ * So we don't want to spend too much time trying every possible character
+ * code at the last position. A good rule of thumb is to be sure that we
+ * don't try more than 256*K values for a K-byte character (and definitely
+ * not 256^K, which is what an exhaustive search would approach).
+ */
+static Const *
+make_greater_string(const Const *str_const, FmgrInfo *ltproc, Oid collation)
+{
+ Oid datatype = str_const->consttype;
+ char *workstr;
+ int len;
+ Datum cmpstr;
+ char *cmptxt = NULL;
+ mbcharacter_incrementer charinc;
+
+ /*
+ * Get a modifiable copy of the prefix string in C-string format, and set
+ * up the string we will compare to as a Datum. In C locale this can just
+ * be the given prefix string, otherwise we need to add a suffix. Type
+ * BYTEA sorts bytewise so it never needs a suffix either.
+ */
+ if (datatype == BYTEAOID)
+ {
+ bytea *bstr = DatumGetByteaPP(str_const->constvalue);
+
+ len = VARSIZE_ANY_EXHDR(bstr);
+ workstr = (char *) palloc(len);
+ memcpy(workstr, VARDATA_ANY(bstr), len);
+ Assert((Pointer) bstr == DatumGetPointer(str_const->constvalue));
+ cmpstr = str_const->constvalue;
+ }
+ else
+ {
+ if (datatype == NAMEOID)
+ workstr = DatumGetCString(DirectFunctionCall1(nameout,
+ str_const->constvalue));
+ else
+ workstr = TextDatumGetCString(str_const->constvalue);
+ len = strlen(workstr);
+ if (lc_collate_is_c(collation) || len == 0)
+ cmpstr = str_const->constvalue;
+ else
+ {
+ /* If first time through, determine the suffix to use */
+ static char suffixchar = 0;
+ static Oid suffixcollation = 0;
+
+ if (!suffixchar || suffixcollation != collation)
+ {
+ char *best;
+
+ best = "Z";
+ if (varstr_cmp(best, 1, "z", 1, collation) < 0)
+ best = "z";
+ if (varstr_cmp(best, 1, "y", 1, collation) < 0)
+ best = "y";
+ if (varstr_cmp(best, 1, "9", 1, collation) < 0)
+ best = "9";
+ suffixchar = *best;
+ suffixcollation = collation;
+ }
+
+ /* And build the string to compare to */
+ if (datatype == NAMEOID)
+ {
+ cmptxt = palloc(len + 2);
+ memcpy(cmptxt, workstr, len);
+ cmptxt[len] = suffixchar;
+ cmptxt[len + 1] = '\0';
+ cmpstr = PointerGetDatum(cmptxt);
+ }
+ else
+ {
+ cmptxt = palloc(VARHDRSZ + len + 1);
+ SET_VARSIZE(cmptxt, VARHDRSZ + len + 1);
+ memcpy(VARDATA(cmptxt), workstr, len);
+ *(VARDATA(cmptxt) + len) = suffixchar;
+ cmpstr = PointerGetDatum(cmptxt);
+ }
+ }
+ }
+
+ /* Select appropriate character-incrementer function */
+ if (datatype == BYTEAOID)
+ charinc = byte_increment;
+ else
+ charinc = pg_database_encoding_character_incrementer();
+
+ /* And search ... */
+ while (len > 0)
+ {
+ int charlen;
+ unsigned char *lastchar;
+
+ /* Identify the last character --- for bytea, just the last byte */
+ if (datatype == BYTEAOID)
+ charlen = 1;
+ else
+ charlen = len - pg_mbcliplen(workstr, len, len - 1);
+ lastchar = (unsigned char *) (workstr + len - charlen);
+
+ /*
+ * Try to generate a larger string by incrementing the last character
+ * (for BYTEA, we treat each byte as a character).
+ *
+ * Note: the incrementer function is expected to return true if it's
+ * generated a valid-per-the-encoding new character, otherwise false.
+ * The contents of the character on false return are unspecified.
+ */
+ while (charinc(lastchar, charlen))
+ {
+ Const *workstr_const;
+
+ if (datatype == BYTEAOID)
+ workstr_const = string_to_bytea_const(workstr, len);
+ else
+ workstr_const = string_to_const(workstr, datatype);
+
+ if (DatumGetBool(FunctionCall2Coll(ltproc,
+ collation,
+ cmpstr,
+ workstr_const->constvalue)))
+ {
+ /* Successfully made a string larger than cmpstr */
+ if (cmptxt)
+ pfree(cmptxt);
+ pfree(workstr);
+ return workstr_const;
+ }
+
+ /* No good, release unusable value and try again */
+ pfree(DatumGetPointer(workstr_const->constvalue));
+ pfree(workstr_const);
+ }
+
+ /*
+ * No luck here, so truncate off the last character and try to
+ * increment the next one.
+ */
+ len -= charlen;
+ workstr[len] = '\0';
+ }
+
+ /* Failed... */
+ if (cmptxt)
+ pfree(cmptxt);
+ pfree(workstr);
+
+ return NULL;
+}
+
+/*
+ * Generate a Datum of the appropriate type from a C string.
+ * Note that all of the supported types are pass-by-ref, so the
+ * returned value should be pfree'd if no longer needed.
+ */
+static Datum
+string_to_datum(const char *str, Oid datatype)
+{
+ Assert(str != NULL);
+
+ /*
+ * We cheat a little by assuming that CStringGetTextDatum() will do for
+ * bpchar and varchar constants too...
+ */
+ if (datatype == NAMEOID)
+ return DirectFunctionCall1(namein, CStringGetDatum(str));
+ else if (datatype == BYTEAOID)
+ return DirectFunctionCall1(byteain, CStringGetDatum(str));
+ else
+ return CStringGetTextDatum(str);
+}
+
+/*
+ * Generate a Const node of the appropriate type from a C string.
+ */
+static Const *
+string_to_const(const char *str, Oid datatype)
+{
+ Datum conval = string_to_datum(str, datatype);
+ Oid collation;
+ int constlen;
+
+ /*
+ * We only need to support a few datatypes here, so hard-wire properties
+ * instead of incurring the expense of catalog lookups.
+ */
+ switch (datatype)
+ {
+ case TEXTOID:
+ case VARCHAROID:
+ case BPCHAROID:
+ collation = DEFAULT_COLLATION_OID;
+ constlen = -1;
+ break;
+
+ case NAMEOID:
+ collation = C_COLLATION_OID;
+ constlen = NAMEDATALEN;
+ break;
+
+ case BYTEAOID:
+ collation = InvalidOid;
+ constlen = -1;
+ break;
+
+ default:
+ elog(ERROR, "unexpected datatype in string_to_const: %u",
+ datatype);
+ return NULL;
+ }
+
+ return makeConst(datatype, -1, collation, constlen,
+ conval, false, false);
+}
+
+/*
+ * Generate a Const node of bytea type from a binary C string and a length.
+ */
+static Const *
+string_to_bytea_const(const char *str, size_t str_len)
+{
+ bytea *bstr = palloc(VARHDRSZ + str_len);
+ Datum conval;
+
+ memcpy(VARDATA(bstr), str, str_len);
+ SET_VARSIZE(bstr, VARHDRSZ + str_len);
+ conval = PointerGetDatum(bstr);
+
+ return makeConst(BYTEAOID, -1, InvalidOid, -1, conval, false, false);
+}
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
new file mode 100644
index 0000000..023a004
--- /dev/null
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -0,0 +1,1069 @@
+/*-------------------------------------------------------------------------
+ *
+ * lockfuncs.c
+ * Functions for SQL access to various lock-manager capabilities.
+ *
+ * Copyright (c) 2002-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/lockfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/predicate_internals.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+
+/*
+ * This must match enum LockTagType! Also, be sure to document any changes
+ * in the docs for the pg_locks view and for wait event types.
+ */
+const char *const LockTagTypeNames[] = {
+ "relation",
+ "extend",
+ "frozenid",
+ "page",
+ "tuple",
+ "transactionid",
+ "virtualxid",
+ "spectoken",
+ "object",
+ "userlock",
+ "advisory"
+};
+
+StaticAssertDecl(lengthof(LockTagTypeNames) == (LOCKTAG_ADVISORY + 1),
+ "array length mismatch");
+
+/* This must match enum PredicateLockTargetType (predicate_internals.h) */
+static const char *const PredicateLockTagTypeNames[] = {
+ "relation",
+ "page",
+ "tuple"
+};
+
+StaticAssertDecl(lengthof(PredicateLockTagTypeNames) == (PREDLOCKTAG_TUPLE + 1),
+ "array length mismatch");
+
+/* Working status for pg_lock_status */
+typedef struct
+{
+ LockData *lockData; /* state data from lmgr */
+ int currIdx; /* current PROCLOCK index */
+ PredicateLockData *predLockData; /* state data for pred locks */
+ int predLockIdx; /* current index for pred lock */
+} PG_Lock_Status;
+
+/* Number of columns in pg_locks output */
+#define NUM_LOCK_STATUS_COLUMNS 16
+
+/*
+ * VXIDGetDatum - Construct a text representation of a VXID
+ *
+ * This is currently only used in pg_lock_status, so we put it here.
+ */
+static Datum
+VXIDGetDatum(BackendId bid, LocalTransactionId lxid)
+{
+ /*
+ * The representation is "<bid>/<lxid>", decimal and unsigned decimal
+ * respectively. Note that elog.c also knows how to format a vxid.
+ */
+ char vxidstr[32];
+
+ snprintf(vxidstr, sizeof(vxidstr), "%d/%u", bid, lxid);
+
+ return CStringGetTextDatum(vxidstr);
+}
+
+
+/*
+ * pg_lock_status - produce a view with one row per held or awaited lock mode
+ */
+Datum
+pg_lock_status(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ PG_Lock_Status *mystatus;
+ LockData *lockData;
+ PredicateLockData *predLockData;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* build tupdesc for result tuples */
+ /* this had better match function's declaration in pg_proc.h */
+ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple",
+ INT2OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid",
+ XIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid",
+ INT2OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 16, "waitstart",
+ TIMESTAMPTZOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /*
+ * Collect all the locking information that we will format and send
+ * out as a result set.
+ */
+ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status));
+ funcctx->user_fctx = (void *) mystatus;
+
+ mystatus->lockData = GetLockStatusData();
+ mystatus->currIdx = 0;
+ mystatus->predLockData = GetPredicateLockStatusData();
+ mystatus->predLockIdx = 0;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ mystatus = (PG_Lock_Status *) funcctx->user_fctx;
+ lockData = mystatus->lockData;
+
+ while (mystatus->currIdx < lockData->nelements)
+ {
+ bool granted;
+ LOCKMODE mode = 0;
+ const char *locktypename;
+ char tnbuf[32];
+ Datum values[NUM_LOCK_STATUS_COLUMNS];
+ bool nulls[NUM_LOCK_STATUS_COLUMNS];
+ HeapTuple tuple;
+ Datum result;
+ LockInstanceData *instance;
+
+ instance = &(lockData->locks[mystatus->currIdx]);
+
+ /*
+ * Look to see if there are any held lock modes in this PROCLOCK. If
+ * so, report, and destructively modify lockData so we don't report
+ * again.
+ */
+ granted = false;
+ if (instance->holdMask)
+ {
+ for (mode = 0; mode < MAX_LOCKMODES; mode++)
+ {
+ if (instance->holdMask & LOCKBIT_ON(mode))
+ {
+ granted = true;
+ instance->holdMask &= LOCKBIT_OFF(mode);
+ break;
+ }
+ }
+ }
+
+ /*
+ * If no (more) held modes to report, see if PROC is waiting for a
+ * lock on this lock.
+ */
+ if (!granted)
+ {
+ if (instance->waitLockMode != NoLock)
+ {
+ /* Yes, so report it with proper mode */
+ mode = instance->waitLockMode;
+
+ /*
+ * We are now done with this PROCLOCK, so advance pointer to
+ * continue with next one on next call.
+ */
+ mystatus->currIdx++;
+ }
+ else
+ {
+ /*
+ * Okay, we've displayed all the locks associated with this
+ * PROCLOCK, proceed to the next one.
+ */
+ mystatus->currIdx++;
+ continue;
+ }
+ }
+
+ /*
+ * Form tuple with appropriate data.
+ */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, false, sizeof(nulls));
+
+ if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE)
+ locktypename = LockTagTypeNames[instance->locktag.locktag_type];
+ else
+ {
+ snprintf(tnbuf, sizeof(tnbuf), "unknown %d",
+ (int) instance->locktag.locktag_type);
+ locktypename = tnbuf;
+ }
+ values[0] = CStringGetTextDatum(locktypename);
+
+ switch ((LockTagType) instance->locktag.locktag_type)
+ {
+ case LOCKTAG_RELATION:
+ case LOCKTAG_RELATION_EXTEND:
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_DATABASE_FROZEN_IDS:
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ nulls[2] = true;
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_PAGE:
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
+ values[3] = UInt32GetDatum(instance->locktag.locktag_field3);
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_TUPLE:
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
+ values[3] = UInt32GetDatum(instance->locktag.locktag_field3);
+ values[4] = UInt16GetDatum(instance->locktag.locktag_field4);
+ nulls[5] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_TRANSACTION:
+ values[6] =
+ TransactionIdGetDatum(instance->locktag.locktag_field1);
+ nulls[1] = true;
+ nulls[2] = true;
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_VIRTUALTRANSACTION:
+ values[5] = VXIDGetDatum(instance->locktag.locktag_field1,
+ instance->locktag.locktag_field2);
+ nulls[1] = true;
+ nulls[2] = true;
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
+ case LOCKTAG_OBJECT:
+ case LOCKTAG_USERLOCK:
+ case LOCKTAG_ADVISORY:
+ default: /* treat unknown locktags like OBJECT */
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2);
+ values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3);
+ values[9] = Int16GetDatum(instance->locktag.locktag_field4);
+ nulls[2] = true;
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[6] = true;
+ break;
+ }
+
+ values[10] = VXIDGetDatum(instance->backend, instance->lxid);
+ if (instance->pid != 0)
+ values[11] = Int32GetDatum(instance->pid);
+ else
+ nulls[11] = true;
+ values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode));
+ values[13] = BoolGetDatum(granted);
+ values[14] = BoolGetDatum(instance->fastpath);
+ if (!granted && instance->waitStart != 0)
+ values[15] = TimestampTzGetDatum(instance->waitStart);
+ else
+ nulls[15] = true;
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+
+ /*
+ * Have returned all regular locks. Now start on the SIREAD predicate
+ * locks.
+ */
+ predLockData = mystatus->predLockData;
+ if (mystatus->predLockIdx < predLockData->nelements)
+ {
+ PredicateLockTargetType lockType;
+
+ PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]);
+ SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]);
+ Datum values[NUM_LOCK_STATUS_COLUMNS];
+ bool nulls[NUM_LOCK_STATUS_COLUMNS];
+ HeapTuple tuple;
+ Datum result;
+
+ mystatus->predLockIdx++;
+
+ /*
+ * Form tuple with appropriate data.
+ */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, false, sizeof(nulls));
+
+ /* lock type */
+ lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag);
+
+ values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]);
+
+ /* lock target */
+ values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag);
+ values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag);
+ if (lockType == PREDLOCKTAG_TUPLE)
+ values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag);
+ else
+ nulls[4] = true;
+ if ((lockType == PREDLOCKTAG_TUPLE) ||
+ (lockType == PREDLOCKTAG_PAGE))
+ values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag);
+ else
+ nulls[3] = true;
+
+ /* these fields are targets for other types of locks */
+ nulls[5] = true; /* virtualxid */
+ nulls[6] = true; /* transactionid */
+ nulls[7] = true; /* classid */
+ nulls[8] = true; /* objid */
+ nulls[9] = true; /* objsubid */
+
+ /* lock holder */
+ values[10] = VXIDGetDatum(xact->vxid.backendId,
+ xact->vxid.localTransactionId);
+ if (xact->pid != 0)
+ values[11] = Int32GetDatum(xact->pid);
+ else
+ nulls[11] = true;
+
+ /*
+ * Lock mode. Currently all predicate locks are SIReadLocks, which are
+ * always held (never waiting) and have no fast path
+ */
+ values[12] = CStringGetTextDatum("SIReadLock");
+ values[13] = BoolGetDatum(true);
+ values[14] = BoolGetDatum(false);
+ nulls[15] = true;
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * pg_blocking_pids - produce an array of the PIDs blocking given PID
+ *
+ * The reported PIDs are those that hold a lock conflicting with blocked_pid's
+ * current request (hard block), or are requesting such a lock and are ahead
+ * of blocked_pid in the lock's wait queue (soft block).
+ *
+ * In parallel-query cases, we report all PIDs blocking any member of the
+ * given PID's lock group, and the reported PIDs are those of the blocking
+ * PIDs' lock group leaders. This allows callers to compare the result to
+ * lists of clients' pg_backend_pid() results even during a parallel query.
+ *
+ * Parallel query makes it possible for there to be duplicate PIDs in the
+ * result (either because multiple waiters are blocked by same PID, or
+ * because multiple blockers have same group leader PID). We do not bother
+ * to eliminate such duplicates from the result.
+ *
+ * We need not consider predicate locks here, since those don't block anything.
+ */
+Datum
+pg_blocking_pids(PG_FUNCTION_ARGS)
+{
+ int blocked_pid = PG_GETARG_INT32(0);
+ Datum *arrayelems;
+ int narrayelems;
+ BlockedProcsData *lockData; /* state data from lmgr */
+ int i,
+ j;
+
+ /* Collect a snapshot of lock manager state */
+ lockData = GetBlockerStatusData(blocked_pid);
+
+ /* We can't need more output entries than there are reported PROCLOCKs */
+ arrayelems = (Datum *) palloc(lockData->nlocks * sizeof(Datum));
+ narrayelems = 0;
+
+ /* For each blocked proc in the lock group ... */
+ for (i = 0; i < lockData->nprocs; i++)
+ {
+ BlockedProcData *bproc = &lockData->procs[i];
+ LockInstanceData *instances = &lockData->locks[bproc->first_lock];
+ int *preceding_waiters = &lockData->waiter_pids[bproc->first_waiter];
+ LockInstanceData *blocked_instance;
+ LockMethod lockMethodTable;
+ int conflictMask;
+
+ /*
+ * Locate the blocked proc's own entry in the LockInstanceData array.
+ * There should be exactly one matching entry.
+ */
+ blocked_instance = NULL;
+ for (j = 0; j < bproc->num_locks; j++)
+ {
+ LockInstanceData *instance = &(instances[j]);
+
+ if (instance->pid == bproc->pid)
+ {
+ Assert(blocked_instance == NULL);
+ blocked_instance = instance;
+ }
+ }
+ Assert(blocked_instance != NULL);
+
+ lockMethodTable = GetLockTagsMethodTable(&(blocked_instance->locktag));
+ conflictMask = lockMethodTable->conflictTab[blocked_instance->waitLockMode];
+
+ /* Now scan the PROCLOCK data for conflicting procs */
+ for (j = 0; j < bproc->num_locks; j++)
+ {
+ LockInstanceData *instance = &(instances[j]);
+
+ /* A proc never blocks itself, so ignore that entry */
+ if (instance == blocked_instance)
+ continue;
+ /* Members of same lock group never block each other, either */
+ if (instance->leaderPid == blocked_instance->leaderPid)
+ continue;
+
+ if (conflictMask & instance->holdMask)
+ {
+ /* hard block: blocked by lock already held by this entry */
+ }
+ else if (instance->waitLockMode != NoLock &&
+ (conflictMask & LOCKBIT_ON(instance->waitLockMode)))
+ {
+ /* conflict in lock requests; who's in front in wait queue? */
+ bool ahead = false;
+ int k;
+
+ for (k = 0; k < bproc->num_waiters; k++)
+ {
+ if (preceding_waiters[k] == instance->pid)
+ {
+ /* soft block: this entry is ahead of blocked proc */
+ ahead = true;
+ break;
+ }
+ }
+ if (!ahead)
+ continue; /* not blocked by this entry */
+ }
+ else
+ {
+ /* not blocked by this entry */
+ continue;
+ }
+
+ /* blocked by this entry, so emit a record */
+ arrayelems[narrayelems++] = Int32GetDatum(instance->leaderPid);
+ }
+ }
+
+ /* Assert we didn't overrun arrayelems[] */
+ Assert(narrayelems <= lockData->nlocks);
+
+ /* Construct array, using hardwired knowledge about int4 type */
+ PG_RETURN_ARRAYTYPE_P(construct_array(arrayelems, narrayelems,
+ INT4OID,
+ sizeof(int32), true, TYPALIGN_INT));
+}
+
+
+/*
+ * pg_safe_snapshot_blocking_pids - produce an array of the PIDs blocking
+ * given PID from getting a safe snapshot
+ *
+ * XXX this does not consider parallel-query cases; not clear how big a
+ * problem that is in practice
+ */
+Datum
+pg_safe_snapshot_blocking_pids(PG_FUNCTION_ARGS)
+{
+ int blocked_pid = PG_GETARG_INT32(0);
+ int *blockers;
+ int num_blockers;
+ Datum *blocker_datums;
+
+ /* A buffer big enough for any possible blocker list without truncation */
+ blockers = (int *) palloc(MaxBackends * sizeof(int));
+
+ /* Collect a snapshot of processes waited for by GetSafeSnapshot */
+ num_blockers =
+ GetSafeSnapshotBlockingPids(blocked_pid, blockers, MaxBackends);
+
+ /* Convert int array to Datum array */
+ if (num_blockers > 0)
+ {
+ int i;
+
+ blocker_datums = (Datum *) palloc(num_blockers * sizeof(Datum));
+ for (i = 0; i < num_blockers; ++i)
+ blocker_datums[i] = Int32GetDatum(blockers[i]);
+ }
+ else
+ blocker_datums = NULL;
+
+ /* Construct array, using hardwired knowledge about int4 type */
+ PG_RETURN_ARRAYTYPE_P(construct_array(blocker_datums, num_blockers,
+ INT4OID,
+ sizeof(int32), true, TYPALIGN_INT));
+}
+
+
+/*
+ * pg_isolation_test_session_is_blocked - support function for isolationtester
+ *
+ * Check if specified PID is blocked by any of the PIDs listed in the second
+ * argument. Currently, this looks for blocking caused by waiting for
+ * heavyweight locks or safe snapshots. We ignore blockage caused by PIDs
+ * not directly under the isolationtester's control, eg autovacuum.
+ *
+ * This is an undocumented function intended for use by the isolation tester,
+ * and may change in future releases as required for testing purposes.
+ */
+Datum
+pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS)
+{
+ int blocked_pid = PG_GETARG_INT32(0);
+ ArrayType *interesting_pids_a = PG_GETARG_ARRAYTYPE_P(1);
+ ArrayType *blocking_pids_a;
+ int32 *interesting_pids;
+ int32 *blocking_pids;
+ int num_interesting_pids;
+ int num_blocking_pids;
+ int dummy;
+ int i,
+ j;
+
+ /* Validate the passed-in array */
+ Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID);
+ if (array_contains_nulls(interesting_pids_a))
+ elog(ERROR, "array must not contain nulls");
+ interesting_pids = (int32 *) ARR_DATA_PTR(interesting_pids_a);
+ num_interesting_pids = ArrayGetNItems(ARR_NDIM(interesting_pids_a),
+ ARR_DIMS(interesting_pids_a));
+
+ /*
+ * Get the PIDs of all sessions blocking the given session's attempt to
+ * acquire heavyweight locks.
+ */
+ blocking_pids_a =
+ DatumGetArrayTypeP(DirectFunctionCall1(pg_blocking_pids, blocked_pid));
+
+ Assert(ARR_ELEMTYPE(blocking_pids_a) == INT4OID);
+ Assert(!array_contains_nulls(blocking_pids_a));
+ blocking_pids = (int32 *) ARR_DATA_PTR(blocking_pids_a);
+ num_blocking_pids = ArrayGetNItems(ARR_NDIM(blocking_pids_a),
+ ARR_DIMS(blocking_pids_a));
+
+ /*
+ * Check if any of these are in the list of interesting PIDs, that being
+ * the sessions that the isolation tester is running. We don't use
+ * "arrayoverlaps" here, because it would lead to cache lookups and one of
+ * our goals is to run quickly with debug_discard_caches > 0. We expect
+ * blocking_pids to be usually empty and otherwise a very small number in
+ * isolation tester cases, so make that the outer loop of a naive search
+ * for a match.
+ */
+ for (i = 0; i < num_blocking_pids; i++)
+ for (j = 0; j < num_interesting_pids; j++)
+ {
+ if (blocking_pids[i] == interesting_pids[j])
+ PG_RETURN_BOOL(true);
+ }
+
+ /*
+ * Check if blocked_pid is waiting for a safe snapshot. We could in
+ * theory check the resulting array of blocker PIDs against the
+ * interesting PIDs list, but since there is no danger of autovacuum
+ * blocking GetSafeSnapshot there seems to be no point in expending cycles
+ * on allocating a buffer and searching for overlap; so it's presently
+ * sufficient for the isolation tester's purposes to use a single element
+ * buffer and check if the number of safe snapshot blockers is non-zero.
+ */
+ if (GetSafeSnapshotBlockingPids(blocked_pid, &dummy, 1) > 0)
+ PG_RETURN_BOOL(true);
+
+ PG_RETURN_BOOL(false);
+}
+
+
+/*
+ * Functions for manipulating advisory locks
+ *
+ * We make use of the locktag fields as follows:
+ *
+ * field1: MyDatabaseId ... ensures locks are local to each database
+ * field2: first of 2 int4 keys, or high-order half of an int8 key
+ * field3: second of 2 int4 keys, or low-order half of an int8 key
+ * field4: 1 if using an int8 key, 2 if using 2 int4 keys
+ */
+#define SET_LOCKTAG_INT64(tag, key64) \
+ SET_LOCKTAG_ADVISORY(tag, \
+ MyDatabaseId, \
+ (uint32) ((key64) >> 32), \
+ (uint32) (key64), \
+ 1)
+#define SET_LOCKTAG_INT32(tag, key1, key2) \
+ SET_LOCKTAG_ADVISORY(tag, MyDatabaseId, key1, key2, 2)
+
+/*
+ * pg_advisory_lock(int8) - acquire exclusive lock on an int8 key
+ */
+Datum
+pg_advisory_lock_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ (void) LockAcquire(&tag, ExclusiveLock, true, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_xact_lock(int8) - acquire xact scoped
+ * exclusive lock on an int8 key
+ */
+Datum
+pg_advisory_xact_lock_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ (void) LockAcquire(&tag, ExclusiveLock, false, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_lock_shared(int8) - acquire share lock on an int8 key
+ */
+Datum
+pg_advisory_lock_shared_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ (void) LockAcquire(&tag, ShareLock, true, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_xact_lock_shared(int8) - acquire xact scoped
+ * share lock on an int8 key
+ */
+Datum
+pg_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ (void) LockAcquire(&tag, ShareLock, false, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_try_advisory_lock(int8) - acquire exclusive lock on an int8 key, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_lock_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockAcquire(&tag, ExclusiveLock, true, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_xact_lock(int8) - acquire xact scoped
+ * exclusive lock on an int8 key, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_xact_lock_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockAcquire(&tag, ExclusiveLock, false, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_lock_shared(int8) - acquire share lock on an int8 key, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_lock_shared_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockAcquire(&tag, ShareLock, true, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_xact_lock_shared(int8) - acquire xact scoped
+ * share lock on an int8 key, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_xact_lock_shared_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockAcquire(&tag, ShareLock, false, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_advisory_unlock(int8) - release exclusive lock on an int8 key
+ *
+ * Returns true if successful, false if lock was not held
+*/
+Datum
+pg_advisory_unlock_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ bool res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockRelease(&tag, ExclusiveLock, true);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * pg_advisory_unlock_shared(int8) - release share lock on an int8 key
+ *
+ * Returns true if successful, false if lock was not held
+ */
+Datum
+pg_advisory_unlock_shared_int8(PG_FUNCTION_ARGS)
+{
+ int64 key = PG_GETARG_INT64(0);
+ LOCKTAG tag;
+ bool res;
+
+ SET_LOCKTAG_INT64(tag, key);
+
+ res = LockRelease(&tag, ShareLock, true);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * pg_advisory_lock(int4, int4) - acquire exclusive lock on 2 int4 keys
+ */
+Datum
+pg_advisory_lock_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ (void) LockAcquire(&tag, ExclusiveLock, true, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_xact_lock(int4, int4) - acquire xact scoped
+ * exclusive lock on 2 int4 keys
+ */
+Datum
+pg_advisory_xact_lock_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ (void) LockAcquire(&tag, ExclusiveLock, false, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_lock_shared(int4, int4) - acquire share lock on 2 int4 keys
+ */
+Datum
+pg_advisory_lock_shared_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ (void) LockAcquire(&tag, ShareLock, true, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_advisory_xact_lock_shared(int4, int4) - acquire xact scoped
+ * share lock on 2 int4 keys
+ */
+Datum
+pg_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ (void) LockAcquire(&tag, ShareLock, false, false);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * pg_try_advisory_lock(int4, int4) - acquire exclusive lock on 2 int4 keys, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_lock_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockAcquire(&tag, ExclusiveLock, true, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_xact_lock(int4, int4) - acquire xact scoped
+ * exclusive lock on 2 int4 keys, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_xact_lock_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockAcquire(&tag, ExclusiveLock, false, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_lock_shared(int4, int4) - acquire share lock on 2 int4 keys, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_lock_shared_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockAcquire(&tag, ShareLock, true, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_try_advisory_xact_lock_shared(int4, int4) - acquire xact scoped
+ * share lock on 2 int4 keys, no wait
+ *
+ * Returns true if successful, false if lock not available
+ */
+Datum
+pg_try_advisory_xact_lock_shared_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ LockAcquireResult res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockAcquire(&tag, ShareLock, false, true);
+
+ PG_RETURN_BOOL(res != LOCKACQUIRE_NOT_AVAIL);
+}
+
+/*
+ * pg_advisory_unlock(int4, int4) - release exclusive lock on 2 int4 keys
+ *
+ * Returns true if successful, false if lock was not held
+*/
+Datum
+pg_advisory_unlock_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ bool res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockRelease(&tag, ExclusiveLock, true);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * pg_advisory_unlock_shared(int4, int4) - release share lock on 2 int4 keys
+ *
+ * Returns true if successful, false if lock was not held
+ */
+Datum
+pg_advisory_unlock_shared_int4(PG_FUNCTION_ARGS)
+{
+ int32 key1 = PG_GETARG_INT32(0);
+ int32 key2 = PG_GETARG_INT32(1);
+ LOCKTAG tag;
+ bool res;
+
+ SET_LOCKTAG_INT32(tag, key1, key2);
+
+ res = LockRelease(&tag, ShareLock, true);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * pg_advisory_unlock_all() - release all advisory locks
+ */
+Datum
+pg_advisory_unlock_all(PG_FUNCTION_ARGS)
+{
+ LockReleaseSession(USER_LOCKMETHOD);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/utils/adt/mac.c b/src/backend/utils/adt/mac.c
new file mode 100644
index 0000000..ac7342c
--- /dev/null
+++ b/src/backend/utils/adt/mac.c
@@ -0,0 +1,531 @@
+/*-------------------------------------------------------------------------
+ *
+ * mac.c
+ * PostgreSQL type definitions for 6 byte, EUI-48, MAC addresses.
+ *
+ * Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/mac.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "lib/hyperloglog.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/inet.h"
+#include "utils/sortsupport.h"
+
+
+/*
+ * Utility macros used for sorting and comparing:
+ */
+
+#define hibits(addr) \
+ ((unsigned long)(((addr)->a<<16)|((addr)->b<<8)|((addr)->c)))
+
+#define lobits(addr) \
+ ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
+
+/* sortsupport for macaddr */
+typedef struct
+{
+ int64 input_count; /* number of non-null values seen */
+ bool estimating; /* true if estimating cardinality */
+
+ hyperLogLogState abbr_card; /* cardinality estimator */
+} macaddr_sortsupport_state;
+
+static int macaddr_cmp_internal(macaddr *a1, macaddr *a2);
+static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup);
+
+/*
+ * MAC address reader. Accepts several common notations.
+ */
+
+Datum
+macaddr_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ macaddr *result;
+ int a,
+ b,
+ c,
+ d,
+ e,
+ f;
+ char junk[2];
+ int count;
+
+ /* %1s matches iff there is trailing non-whitespace garbage */
+
+ count = sscanf(str, "%x:%x:%x:%x:%x:%x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%x-%x-%x-%x-%x-%x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%2x%2x%2x:%2x%2x%2x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%2x%2x%2x-%2x%2x%2x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%2x%2x.%2x%2x.%2x%2x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%2x%2x-%2x%2x-%2x%2x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ count = sscanf(str, "%2x%2x%2x%2x%2x%2x%1s",
+ &a, &b, &c, &d, &e, &f, junk);
+ if (count != 6)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr",
+ str)));
+
+ if ((a < 0) || (a > 255) || (b < 0) || (b > 255) ||
+ (c < 0) || (c > 255) || (d < 0) || (d > 255) ||
+ (e < 0) || (e > 255) || (f < 0) || (f > 255))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("invalid octet value in \"macaddr\" value: \"%s\"", str)));
+
+ result = (macaddr *) palloc(sizeof(macaddr));
+
+ result->a = a;
+ result->b = b;
+ result->c = c;
+ result->d = d;
+ result->e = e;
+ result->f = f;
+
+ PG_RETURN_MACADDR_P(result);
+}
+
+/*
+ * MAC address output function. Fixed format.
+ */
+
+Datum
+macaddr_out(PG_FUNCTION_ARGS)
+{
+ macaddr *addr = PG_GETARG_MACADDR_P(0);
+ char *result;
+
+ result = (char *) palloc(32);
+
+ snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x",
+ addr->a, addr->b, addr->c, addr->d, addr->e, addr->f);
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * macaddr_recv - converts external binary format to macaddr
+ *
+ * The external representation is just the six bytes, MSB first.
+ */
+Datum
+macaddr_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ macaddr *addr;
+
+ addr = (macaddr *) palloc(sizeof(macaddr));
+
+ addr->a = pq_getmsgbyte(buf);
+ addr->b = pq_getmsgbyte(buf);
+ addr->c = pq_getmsgbyte(buf);
+ addr->d = pq_getmsgbyte(buf);
+ addr->e = pq_getmsgbyte(buf);
+ addr->f = pq_getmsgbyte(buf);
+
+ PG_RETURN_MACADDR_P(addr);
+}
+
+/*
+ * macaddr_send - converts macaddr to binary format
+ */
+Datum
+macaddr_send(PG_FUNCTION_ARGS)
+{
+ macaddr *addr = PG_GETARG_MACADDR_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, addr->a);
+ pq_sendbyte(&buf, addr->b);
+ pq_sendbyte(&buf, addr->c);
+ pq_sendbyte(&buf, addr->d);
+ pq_sendbyte(&buf, addr->e);
+ pq_sendbyte(&buf, addr->f);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * Comparison function for sorting:
+ */
+
+static int
+macaddr_cmp_internal(macaddr *a1, macaddr *a2)
+{
+ if (hibits(a1) < hibits(a2))
+ return -1;
+ else if (hibits(a1) > hibits(a2))
+ return 1;
+ else if (lobits(a1) < lobits(a2))
+ return -1;
+ else if (lobits(a1) > lobits(a2))
+ return 1;
+ else
+ return 0;
+}
+
+Datum
+macaddr_cmp(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_INT32(macaddr_cmp_internal(a1, a2));
+}
+
+/*
+ * Boolean comparisons.
+ */
+
+Datum
+macaddr_lt(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) < 0);
+}
+
+Datum
+macaddr_le(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) <= 0);
+}
+
+Datum
+macaddr_eq(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) == 0);
+}
+
+Datum
+macaddr_ge(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) >= 0);
+}
+
+Datum
+macaddr_gt(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) > 0);
+}
+
+Datum
+macaddr_ne(PG_FUNCTION_ARGS)
+{
+ macaddr *a1 = PG_GETARG_MACADDR_P(0);
+ macaddr *a2 = PG_GETARG_MACADDR_P(1);
+
+ PG_RETURN_BOOL(macaddr_cmp_internal(a1, a2) != 0);
+}
+
+/*
+ * Support function for hash indexes on macaddr.
+ */
+Datum
+hashmacaddr(PG_FUNCTION_ARGS)
+{
+ macaddr *key = PG_GETARG_MACADDR_P(0);
+
+ return hash_any((unsigned char *) key, sizeof(macaddr));
+}
+
+Datum
+hashmacaddrextended(PG_FUNCTION_ARGS)
+{
+ macaddr *key = PG_GETARG_MACADDR_P(0);
+
+ return hash_any_extended((unsigned char *) key, sizeof(macaddr),
+ PG_GETARG_INT64(1));
+}
+
+/*
+ * Arithmetic functions: bitwise NOT, AND, OR.
+ */
+Datum
+macaddr_not(PG_FUNCTION_ARGS)
+{
+ macaddr *addr = PG_GETARG_MACADDR_P(0);
+ macaddr *result;
+
+ result = (macaddr *) palloc(sizeof(macaddr));
+ result->a = ~addr->a;
+ result->b = ~addr->b;
+ result->c = ~addr->c;
+ result->d = ~addr->d;
+ result->e = ~addr->e;
+ result->f = ~addr->f;
+ PG_RETURN_MACADDR_P(result);
+}
+
+Datum
+macaddr_and(PG_FUNCTION_ARGS)
+{
+ macaddr *addr1 = PG_GETARG_MACADDR_P(0);
+ macaddr *addr2 = PG_GETARG_MACADDR_P(1);
+ macaddr *result;
+
+ result = (macaddr *) palloc(sizeof(macaddr));
+ result->a = addr1->a & addr2->a;
+ result->b = addr1->b & addr2->b;
+ result->c = addr1->c & addr2->c;
+ result->d = addr1->d & addr2->d;
+ result->e = addr1->e & addr2->e;
+ result->f = addr1->f & addr2->f;
+ PG_RETURN_MACADDR_P(result);
+}
+
+Datum
+macaddr_or(PG_FUNCTION_ARGS)
+{
+ macaddr *addr1 = PG_GETARG_MACADDR_P(0);
+ macaddr *addr2 = PG_GETARG_MACADDR_P(1);
+ macaddr *result;
+
+ result = (macaddr *) palloc(sizeof(macaddr));
+ result->a = addr1->a | addr2->a;
+ result->b = addr1->b | addr2->b;
+ result->c = addr1->c | addr2->c;
+ result->d = addr1->d | addr2->d;
+ result->e = addr1->e | addr2->e;
+ result->f = addr1->f | addr2->f;
+ PG_RETURN_MACADDR_P(result);
+}
+
+/*
+ * Truncation function to allow comparing mac manufacturers.
+ * From suggestion by Alex Pilosov <alex@pilosoft.com>
+ */
+Datum
+macaddr_trunc(PG_FUNCTION_ARGS)
+{
+ macaddr *addr = PG_GETARG_MACADDR_P(0);
+ macaddr *result;
+
+ result = (macaddr *) palloc(sizeof(macaddr));
+
+ result->a = addr->a;
+ result->b = addr->b;
+ result->c = addr->c;
+ result->d = 0;
+ result->e = 0;
+ result->f = 0;
+
+ PG_RETURN_MACADDR_P(result);
+}
+
+/*
+ * SortSupport strategy function. Populates a SortSupport struct with the
+ * information necessary to use comparison by abbreviated keys.
+ */
+Datum
+macaddr_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = macaddr_fast_cmp;
+ ssup->ssup_extra = NULL;
+
+ if (ssup->abbreviate)
+ {
+ macaddr_sortsupport_state *uss;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ uss = palloc(sizeof(macaddr_sortsupport_state));
+ uss->input_count = 0;
+ uss->estimating = true;
+ initHyperLogLog(&uss->abbr_card, 10);
+
+ ssup->ssup_extra = uss;
+
+ ssup->comparator = ssup_datum_unsigned_cmp;
+ ssup->abbrev_converter = macaddr_abbrev_convert;
+ ssup->abbrev_abort = macaddr_abbrev_abort;
+ ssup->abbrev_full_comparator = macaddr_fast_cmp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport "traditional" comparison function. Pulls two MAC addresses from
+ * the heap and runs a standard comparison on them.
+ */
+static int
+macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+ macaddr *arg1 = DatumGetMacaddrP(x);
+ macaddr *arg2 = DatumGetMacaddrP(y);
+
+ return macaddr_cmp_internal(arg1, arg2);
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative macaddr comparator.
+ */
+static bool
+macaddr_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ macaddr_sortsupport_state *uss = ssup->ssup_extra;
+ double abbr_card;
+
+ if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+ return false;
+
+ abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+ /*
+ * If we have >100k distinct values, then even if we were sorting many
+ * billion rows we'd likely still break even, and the penalty of undoing
+ * that many rows of abbrevs would probably not be worth it. At this point
+ * we stop counting because we know that we're now fully committed.
+ */
+ if (abbr_card > 100000.0)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "macaddr_abbrev: estimation ends at cardinality %f"
+ " after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count, memtupcount);
+#endif
+ uss->estimating = false;
+ return false;
+ }
+
+ /*
+ * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
+ * fudge factor allows us to abort earlier on genuinely pathological data
+ * where we've had exactly one abbreviated value in the first 2k
+ * (non-null) rows.
+ */
+ if (abbr_card < uss->input_count / 2000.0 + 0.5)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "macaddr_abbrev: aborting abbreviation at cardinality %f"
+ " below threshold %f after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+ memtupcount);
+#endif
+ return true;
+ }
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "macaddr_abbrev: cardinality %f after " INT64_FORMAT
+ " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+ return false;
+}
+
+/*
+ * SortSupport conversion routine. Converts original macaddr representation
+ * to abbreviated key representation.
+ *
+ * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an
+ * unsigned integer for purposes of comparison. On a 64-bit machine, there
+ * will be two zeroed bytes of padding. The integer is converted to native
+ * endianness to facilitate easy comparison.
+ */
+static Datum
+macaddr_abbrev_convert(Datum original, SortSupport ssup)
+{
+ macaddr_sortsupport_state *uss = ssup->ssup_extra;
+ macaddr *authoritative = DatumGetMacaddrP(original);
+ Datum res;
+
+ /*
+ * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of
+ * the MAC address in. There will be two bytes of zero padding on the end
+ * of the least significant bits.
+ */
+#if SIZEOF_DATUM == 8
+ memset(&res, 0, SIZEOF_DATUM);
+ memcpy(&res, authoritative, sizeof(macaddr));
+#else /* SIZEOF_DATUM != 8 */
+ memcpy(&res, authoritative, SIZEOF_DATUM);
+#endif
+ uss->input_count += 1;
+
+ /*
+ * Cardinality estimation. The estimate uses uint32, so on a 64-bit
+ * architecture, XOR the two 32-bit halves together to produce slightly
+ * more entropy. The two zeroed bytes won't have any practical impact on
+ * this operation.
+ */
+ if (uss->estimating)
+ {
+ uint32 tmp;
+
+#if SIZEOF_DATUM == 8
+ tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else /* SIZEOF_DATUM != 8 */
+ tmp = (uint32) res;
+#endif
+
+ addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+ }
+
+ /*
+ * Byteswap on little-endian machines.
+ *
+ * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
+ * 3-way comparator) works correctly on all platforms. Without this, the
+ * comparator would have to call memcmp() with a pair of pointers to the
+ * first byte of each abbreviated key, which is slower.
+ */
+ res = DatumBigEndianToNative(res);
+
+ return res;
+}
diff --git a/src/backend/utils/adt/mac8.c b/src/backend/utils/adt/mac8.c
new file mode 100644
index 0000000..24d219f
--- /dev/null
+++ b/src/backend/utils/adt/mac8.c
@@ -0,0 +1,577 @@
+/*-------------------------------------------------------------------------
+ *
+ * mac8.c
+ * PostgreSQL type definitions for 8 byte (EUI-64) MAC addresses.
+ *
+ * EUI-48 (6 byte) MAC addresses are accepted as input and are stored in
+ * EUI-64 format, with the 4th and 5th bytes set to FF and FE, respectively.
+ *
+ * Output is always in 8 byte (EUI-64) format.
+ *
+ * The following code is written with the assumption that the OUI field
+ * size is 24 bits.
+ *
+ * Portions Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/mac8.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+#include "utils/inet.h"
+
+/*
+ * Utility macros used for sorting and comparing:
+ */
+#define hibits(addr) \
+ ((unsigned long)(((addr)->a<<24) | ((addr)->b<<16) | ((addr)->c<<8) | ((addr)->d)))
+
+#define lobits(addr) \
+ ((unsigned long)(((addr)->e<<24) | ((addr)->f<<16) | ((addr)->g<<8) | ((addr)->h)))
+
+static unsigned char hex2_to_uchar(const unsigned char *ptr, const unsigned char *str);
+
+static const signed char hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
+/*
+ * hex2_to_uchar - convert 2 hex digits to a byte (unsigned char)
+ *
+ * This will ereport() if the end of the string is reached ('\0' found), or if
+ * either character is not a valid hex digit.
+ *
+ * ptr is the pointer to where the digits to convert are in the string, str is
+ * the entire string, which is used only for error reporting.
+ */
+static inline unsigned char
+hex2_to_uchar(const unsigned char *ptr, const unsigned char *str)
+{
+ unsigned char ret = 0;
+ signed char lookup;
+
+ /* Handle the first character */
+ if (*ptr > 127)
+ goto invalid_input;
+
+ lookup = hexlookup[*ptr];
+ if (lookup < 0)
+ goto invalid_input;
+
+ ret = lookup << 4;
+
+ /* Move to the second character */
+ ptr++;
+
+ if (*ptr > 127)
+ goto invalid_input;
+
+ lookup = hexlookup[*ptr];
+ if (lookup < 0)
+ goto invalid_input;
+
+ ret += lookup;
+
+ return ret;
+
+invalid_input:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8",
+ str)));
+
+ /* We do not actually reach here */
+ return 0;
+}
+
+/*
+ * MAC address (EUI-48 and EUI-64) reader. Accepts several common notations.
+ */
+Datum
+macaddr8_in(PG_FUNCTION_ARGS)
+{
+ const unsigned char *str = (unsigned char *) PG_GETARG_CSTRING(0);
+ const unsigned char *ptr = str;
+ macaddr8 *result;
+ unsigned char a = 0,
+ b = 0,
+ c = 0,
+ d = 0,
+ e = 0,
+ f = 0,
+ g = 0,
+ h = 0;
+ int count = 0;
+ unsigned char spacer = '\0';
+
+ /* skip leading spaces */
+ while (*ptr && isspace(*ptr))
+ ptr++;
+
+ /* digits must always come in pairs */
+ while (*ptr && *(ptr + 1))
+ {
+ /*
+ * Attempt to decode each byte, which must be 2 hex digits in a row.
+ * If either digit is not hex, hex2_to_uchar will throw ereport() for
+ * us. Either 6 or 8 byte MAC addresses are supported.
+ */
+
+ /* Attempt to collect a byte */
+ count++;
+
+ switch (count)
+ {
+ case 1:
+ a = hex2_to_uchar(ptr, str);
+ break;
+ case 2:
+ b = hex2_to_uchar(ptr, str);
+ break;
+ case 3:
+ c = hex2_to_uchar(ptr, str);
+ break;
+ case 4:
+ d = hex2_to_uchar(ptr, str);
+ break;
+ case 5:
+ e = hex2_to_uchar(ptr, str);
+ break;
+ case 6:
+ f = hex2_to_uchar(ptr, str);
+ break;
+ case 7:
+ g = hex2_to_uchar(ptr, str);
+ break;
+ case 8:
+ h = hex2_to_uchar(ptr, str);
+ break;
+ default:
+ /* must be trailing garbage... */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8",
+ str)));
+ }
+
+ /* Move forward to where the next byte should be */
+ ptr += 2;
+
+ /* Check for a spacer, these are valid, anything else is not */
+ if (*ptr == ':' || *ptr == '-' || *ptr == '.')
+ {
+ /* remember the spacer used, if it changes then it isn't valid */
+ if (spacer == '\0')
+ spacer = *ptr;
+
+ /* Have to use the same spacer throughout */
+ else if (spacer != *ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8",
+ str)));
+
+ /* move past the spacer */
+ ptr++;
+ }
+
+ /* allow trailing whitespace after if we have 6 or 8 bytes */
+ if (count == 6 || count == 8)
+ {
+ if (isspace(*ptr))
+ {
+ while (*++ptr && isspace(*ptr));
+
+ /* If we found a space and then non-space, it's invalid */
+ if (*ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8",
+ str)));
+ }
+ }
+ }
+
+ /* Convert a 6 byte MAC address to macaddr8 */
+ if (count == 6)
+ {
+ h = f;
+ g = e;
+ f = d;
+
+ d = 0xFF;
+ e = 0xFE;
+ }
+ else if (count != 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"", "macaddr8",
+ str)));
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+
+ result->a = a;
+ result->b = b;
+ result->c = c;
+ result->d = d;
+ result->e = e;
+ result->f = f;
+ result->g = g;
+ result->h = h;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+/*
+ * MAC8 address (EUI-64) output function. Fixed format.
+ */
+Datum
+macaddr8_out(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ char *result;
+
+ result = (char *) palloc(32);
+
+ snprintf(result, 32, "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
+ addr->a, addr->b, addr->c, addr->d,
+ addr->e, addr->f, addr->g, addr->h);
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * macaddr8_recv - converts external binary format(EUI-48 and EUI-64) to macaddr8
+ *
+ * The external representation is just the eight bytes, MSB first.
+ */
+Datum
+macaddr8_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ macaddr8 *addr;
+
+ addr = (macaddr8 *) palloc0(sizeof(macaddr8));
+
+ addr->a = pq_getmsgbyte(buf);
+ addr->b = pq_getmsgbyte(buf);
+ addr->c = pq_getmsgbyte(buf);
+
+ if (buf->len == 6)
+ {
+ addr->d = 0xFF;
+ addr->e = 0xFE;
+ }
+ else
+ {
+ addr->d = pq_getmsgbyte(buf);
+ addr->e = pq_getmsgbyte(buf);
+ }
+
+ addr->f = pq_getmsgbyte(buf);
+ addr->g = pq_getmsgbyte(buf);
+ addr->h = pq_getmsgbyte(buf);
+
+ PG_RETURN_MACADDR8_P(addr);
+}
+
+/*
+ * macaddr8_send - converts macaddr8(EUI-64) to binary format
+ */
+Datum
+macaddr8_send(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, addr->a);
+ pq_sendbyte(&buf, addr->b);
+ pq_sendbyte(&buf, addr->c);
+ pq_sendbyte(&buf, addr->d);
+ pq_sendbyte(&buf, addr->e);
+ pq_sendbyte(&buf, addr->f);
+ pq_sendbyte(&buf, addr->g);
+ pq_sendbyte(&buf, addr->h);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * macaddr8_cmp_internal - comparison function for sorting:
+ */
+static int32
+macaddr8_cmp_internal(macaddr8 *a1, macaddr8 *a2)
+{
+ if (hibits(a1) < hibits(a2))
+ return -1;
+ else if (hibits(a1) > hibits(a2))
+ return 1;
+ else if (lobits(a1) < lobits(a2))
+ return -1;
+ else if (lobits(a1) > lobits(a2))
+ return 1;
+ else
+ return 0;
+}
+
+Datum
+macaddr8_cmp(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_INT32(macaddr8_cmp_internal(a1, a2));
+}
+
+/*
+ * Boolean comparison functions.
+ */
+
+Datum
+macaddr8_lt(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) < 0);
+}
+
+Datum
+macaddr8_le(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) <= 0);
+}
+
+Datum
+macaddr8_eq(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) == 0);
+}
+
+Datum
+macaddr8_ge(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) >= 0);
+}
+
+Datum
+macaddr8_gt(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) > 0);
+}
+
+Datum
+macaddr8_ne(PG_FUNCTION_ARGS)
+{
+ macaddr8 *a1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *a2 = PG_GETARG_MACADDR8_P(1);
+
+ PG_RETURN_BOOL(macaddr8_cmp_internal(a1, a2) != 0);
+}
+
+/*
+ * Support function for hash indexes on macaddr8.
+ */
+Datum
+hashmacaddr8(PG_FUNCTION_ARGS)
+{
+ macaddr8 *key = PG_GETARG_MACADDR8_P(0);
+
+ return hash_any((unsigned char *) key, sizeof(macaddr8));
+}
+
+Datum
+hashmacaddr8extended(PG_FUNCTION_ARGS)
+{
+ macaddr8 *key = PG_GETARG_MACADDR8_P(0);
+
+ return hash_any_extended((unsigned char *) key, sizeof(macaddr8),
+ PG_GETARG_INT64(1));
+}
+
+/*
+ * Arithmetic functions: bitwise NOT, AND, OR.
+ */
+Datum
+macaddr8_not(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+ result->a = ~addr->a;
+ result->b = ~addr->b;
+ result->c = ~addr->c;
+ result->d = ~addr->d;
+ result->e = ~addr->e;
+ result->f = ~addr->f;
+ result->g = ~addr->g;
+ result->h = ~addr->h;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+Datum
+macaddr8_and(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *addr2 = PG_GETARG_MACADDR8_P(1);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+ result->a = addr1->a & addr2->a;
+ result->b = addr1->b & addr2->b;
+ result->c = addr1->c & addr2->c;
+ result->d = addr1->d & addr2->d;
+ result->e = addr1->e & addr2->e;
+ result->f = addr1->f & addr2->f;
+ result->g = addr1->g & addr2->g;
+ result->h = addr1->h & addr2->h;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+Datum
+macaddr8_or(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr1 = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *addr2 = PG_GETARG_MACADDR8_P(1);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+ result->a = addr1->a | addr2->a;
+ result->b = addr1->b | addr2->b;
+ result->c = addr1->c | addr2->c;
+ result->d = addr1->d | addr2->d;
+ result->e = addr1->e | addr2->e;
+ result->f = addr1->f | addr2->f;
+ result->g = addr1->g | addr2->g;
+ result->h = addr1->h | addr2->h;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+/*
+ * Truncation function to allow comparing macaddr8 manufacturers.
+ */
+Datum
+macaddr8_trunc(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+
+ result->a = addr->a;
+ result->b = addr->b;
+ result->c = addr->c;
+ result->d = 0;
+ result->e = 0;
+ result->f = 0;
+ result->g = 0;
+ result->h = 0;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+/*
+ * Set 7th bit for modified EUI-64 as used in IPv6.
+ */
+Datum
+macaddr8_set7bit(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+
+ result->a = addr->a | 0x02;
+ result->b = addr->b;
+ result->c = addr->c;
+ result->d = addr->d;
+ result->e = addr->e;
+ result->f = addr->f;
+ result->g = addr->g;
+ result->h = addr->h;
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+/*----------------------------------------------------------
+ * Conversion operators.
+ *---------------------------------------------------------*/
+
+Datum
+macaddrtomacaddr8(PG_FUNCTION_ARGS)
+{
+ macaddr *addr6 = PG_GETARG_MACADDR_P(0);
+ macaddr8 *result;
+
+ result = (macaddr8 *) palloc0(sizeof(macaddr8));
+
+ result->a = addr6->a;
+ result->b = addr6->b;
+ result->c = addr6->c;
+ result->d = 0xFF;
+ result->e = 0xFE;
+ result->f = addr6->d;
+ result->g = addr6->e;
+ result->h = addr6->f;
+
+
+ PG_RETURN_MACADDR8_P(result);
+}
+
+Datum
+macaddr8tomacaddr(PG_FUNCTION_ARGS)
+{
+ macaddr8 *addr = PG_GETARG_MACADDR8_P(0);
+ macaddr *result;
+
+ result = (macaddr *) palloc0(sizeof(macaddr));
+
+ if ((addr->d != 0xFF) || (addr->e != 0xFE))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("macaddr8 data out of range to convert to macaddr"),
+ errhint("Only addresses that have FF and FE as values in the "
+ "4th and 5th bytes from the left, for example "
+ "xx:xx:xx:ff:fe:xx:xx:xx, are eligible to be converted "
+ "from macaddr8 to macaddr.")));
+
+ result->a = addr->a;
+ result->b = addr->b;
+ result->c = addr->c;
+ result->d = addr->f;
+ result->e = addr->g;
+ result->f = addr->h;
+
+ PG_RETURN_MACADDR_P(result);
+}
diff --git a/src/backend/utils/adt/mcxtfuncs.c b/src/backend/utils/adt/mcxtfuncs.c
new file mode 100644
index 0000000..04b7aa2
--- /dev/null
+++ b/src/backend/utils/adt/mcxtfuncs.c
@@ -0,0 +1,195 @@
+/*-------------------------------------------------------------------------
+ *
+ * mcxtfuncs.c
+ * Functions to show backend memory context.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/mcxtfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "mb/pg_wchar.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+
+/* ----------
+ * The max bytes for showing identifiers of MemoryContext.
+ * ----------
+ */
+#define MEMORY_CONTEXT_IDENT_DISPLAY_SIZE 1024
+
+/*
+ * PutMemoryContextsStatsTupleStore
+ * One recursion level for pg_get_backend_memory_contexts.
+ */
+static void
+PutMemoryContextsStatsTupleStore(Tuplestorestate *tupstore,
+ TupleDesc tupdesc, MemoryContext context,
+ const char *parent, int level)
+{
+#define PG_GET_BACKEND_MEMORY_CONTEXTS_COLS 9
+
+ Datum values[PG_GET_BACKEND_MEMORY_CONTEXTS_COLS];
+ bool nulls[PG_GET_BACKEND_MEMORY_CONTEXTS_COLS];
+ MemoryContextCounters stat;
+ MemoryContext child;
+ const char *name;
+ const char *ident;
+
+ AssertArg(MemoryContextIsValid(context));
+
+ name = context->name;
+ ident = context->ident;
+
+ /*
+ * To be consistent with logging output, we label dynahash contexts with
+ * just the hash table name as with MemoryContextStatsPrint().
+ */
+ if (ident && strcmp(name, "dynahash") == 0)
+ {
+ name = ident;
+ ident = NULL;
+ }
+
+ /* Examine the context itself */
+ memset(&stat, 0, sizeof(stat));
+ (*context->methods->stats) (context, NULL, (void *) &level, &stat, true);
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, 0, sizeof(nulls));
+
+ if (name)
+ values[0] = CStringGetTextDatum(name);
+ else
+ nulls[0] = true;
+
+ if (ident)
+ {
+ int idlen = strlen(ident);
+ char clipped_ident[MEMORY_CONTEXT_IDENT_DISPLAY_SIZE];
+
+ /*
+ * Some identifiers such as SQL query string can be very long,
+ * truncate oversize identifiers.
+ */
+ if (idlen >= MEMORY_CONTEXT_IDENT_DISPLAY_SIZE)
+ idlen = pg_mbcliplen(ident, idlen, MEMORY_CONTEXT_IDENT_DISPLAY_SIZE - 1);
+
+ memcpy(clipped_ident, ident, idlen);
+ clipped_ident[idlen] = '\0';
+ values[1] = CStringGetTextDatum(clipped_ident);
+ }
+ else
+ nulls[1] = true;
+
+ if (parent)
+ values[2] = CStringGetTextDatum(parent);
+ else
+ nulls[2] = true;
+
+ values[3] = Int32GetDatum(level);
+ values[4] = Int64GetDatum(stat.totalspace);
+ values[5] = Int64GetDatum(stat.nblocks);
+ values[6] = Int64GetDatum(stat.freespace);
+ values[7] = Int64GetDatum(stat.freechunks);
+ values[8] = Int64GetDatum(stat.totalspace - stat.freespace);
+ tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+
+ for (child = context->firstchild; child != NULL; child = child->nextchild)
+ {
+ PutMemoryContextsStatsTupleStore(tupstore, tupdesc,
+ child, name, level + 1);
+ }
+}
+
+/*
+ * pg_get_backend_memory_contexts
+ * SQL SRF showing backend memory context.
+ */
+Datum
+pg_get_backend_memory_contexts(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ InitMaterializedSRF(fcinfo, 0);
+ PutMemoryContextsStatsTupleStore(rsinfo->setResult, rsinfo->setDesc,
+ TopMemoryContext, NULL, 0);
+
+ return (Datum) 0;
+}
+
+/*
+ * pg_log_backend_memory_contexts
+ * Signal a backend or an auxiliary process to log its memory contexts.
+ *
+ * By default, only superusers are allowed to signal to log the memory
+ * contexts because allowing any users to issue this request at an unbounded
+ * rate would cause lots of log messages and which can lead to denial of
+ * service. Additional roles can be permitted with GRANT.
+ *
+ * On receipt of this signal, a backend or an auxiliary process sets the flag
+ * in the signal handler, which causes the next CHECK_FOR_INTERRUPTS()
+ * or process-specific interrupt handler to log the memory contexts.
+ */
+Datum
+pg_log_backend_memory_contexts(PG_FUNCTION_ARGS)
+{
+ int pid = PG_GETARG_INT32(0);
+ PGPROC *proc;
+ BackendId backendId = InvalidBackendId;
+
+ proc = BackendPidGetProc(pid);
+
+ /*
+ * See if the process with given pid is a backend or an auxiliary process.
+ *
+ * If the given process is a backend, use its backend id in
+ * SendProcSignal() later to speed up the operation. Otherwise, don't do
+ * that because auxiliary processes (except the startup process) don't
+ * have a valid backend id.
+ */
+ if (proc != NULL)
+ backendId = proc->backendId;
+ else
+ proc = AuxiliaryPidGetProc(pid);
+
+ /*
+ * BackendPidGetProc() and AuxiliaryPidGetProc() return NULL if the pid
+ * isn't valid; but by the time we reach kill(), a process for which we
+ * get a valid proc here might have terminated on its own. There's no way
+ * to acquire a lock on an arbitrary process to prevent that. But since
+ * this mechanism is usually used to debug a backend or an auxiliary
+ * process running and consuming lots of memory, that it might end on its
+ * own first and its memory contexts are not logged is not a problem.
+ */
+ if (proc == NULL)
+ {
+ /*
+ * This is just a warning so a loop-through-resultset will not abort
+ * if one backend terminated on its own during the run.
+ */
+ ereport(WARNING,
+ (errmsg("PID %d is not a PostgreSQL server process", pid)));
+ PG_RETURN_BOOL(false);
+ }
+
+ if (SendProcSignal(pid, PROCSIG_LOG_MEMORY_CONTEXT, backendId) < 0)
+ {
+ /* Again, just a warning to allow loops */
+ ereport(WARNING,
+ (errmsg("could not send signal to process %d: %m", pid)));
+ PG_RETURN_BOOL(false);
+ }
+
+ PG_RETURN_BOOL(true);
+}
diff --git a/src/backend/utils/adt/misc.c b/src/backend/utils/adt/misc.c
new file mode 100644
index 0000000..6d8fa92
--- /dev/null
+++ b/src/backend/utils/adt/misc.c
@@ -0,0 +1,950 @@
+/*-------------------------------------------------------------------------
+ *
+ * misc.c
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/misc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <math.h>
+#include <unistd.h>
+
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/catalog.h"
+#include "catalog/pg_tablespace.h"
+#include "catalog/pg_type.h"
+#include "catalog/system_fk_info.h"
+#include "commands/dbcommands.h"
+#include "commands/tablespace.h"
+#include "common/keywords.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "parser/scansup.h"
+#include "pgstat.h"
+#include "postmaster/syslogger.h"
+#include "rewrite/rewriteHandler.h"
+#include "storage/fd.h"
+#include "storage/latch.h"
+#include "tcop/tcopprot.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/lsyscache.h"
+#include "utils/ruleutils.h"
+#include "utils/timestamp.h"
+
+/*
+ * Common subroutine for num_nulls() and num_nonnulls().
+ * Returns true if successful, false if function should return NULL.
+ * If successful, total argument count and number of nulls are
+ * returned into *nargs and *nulls.
+ */
+static bool
+count_nulls(FunctionCallInfo fcinfo,
+ int32 *nargs, int32 *nulls)
+{
+ int32 count = 0;
+ int i;
+
+ /* Did we get a VARIADIC array argument, or separate arguments? */
+ if (get_fn_expr_variadic(fcinfo->flinfo))
+ {
+ ArrayType *arr;
+ int ndims,
+ nitems,
+ *dims;
+ bits8 *bitmap;
+
+ Assert(PG_NARGS() == 1);
+
+ /*
+ * If we get a null as VARIADIC array argument, we can't say anything
+ * useful about the number of elements, so return NULL. This behavior
+ * is consistent with other variadic functions - see concat_internal.
+ */
+ if (PG_ARGISNULL(0))
+ return false;
+
+ /*
+ * Non-null argument had better be an array. We assume that any call
+ * context that could let get_fn_expr_variadic return true will have
+ * checked that a VARIADIC-labeled parameter actually is an array. So
+ * it should be okay to just Assert that it's an array rather than
+ * doing a full-fledged error check.
+ */
+ Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 0))));
+
+ /* OK, safe to fetch the array value */
+ arr = PG_GETARG_ARRAYTYPE_P(0);
+
+ /* Count the array elements */
+ ndims = ARR_NDIM(arr);
+ dims = ARR_DIMS(arr);
+ nitems = ArrayGetNItems(ndims, dims);
+
+ /* Count those that are NULL */
+ bitmap = ARR_NULLBITMAP(arr);
+ if (bitmap)
+ {
+ int bitmask = 1;
+
+ for (i = 0; i < nitems; i++)
+ {
+ if ((*bitmap & bitmask) == 0)
+ count++;
+
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+
+ *nargs = nitems;
+ *nulls = count;
+ }
+ else
+ {
+ /* Separate arguments, so just count 'em */
+ for (i = 0; i < PG_NARGS(); i++)
+ {
+ if (PG_ARGISNULL(i))
+ count++;
+ }
+
+ *nargs = PG_NARGS();
+ *nulls = count;
+ }
+
+ return true;
+}
+
+/*
+ * num_nulls()
+ * Count the number of NULL arguments
+ */
+Datum
+pg_num_nulls(PG_FUNCTION_ARGS)
+{
+ int32 nargs,
+ nulls;
+
+ if (!count_nulls(fcinfo, &nargs, &nulls))
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(nulls);
+}
+
+/*
+ * num_nonnulls()
+ * Count the number of non-NULL arguments
+ */
+Datum
+pg_num_nonnulls(PG_FUNCTION_ARGS)
+{
+ int32 nargs,
+ nulls;
+
+ if (!count_nulls(fcinfo, &nargs, &nulls))
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(nargs - nulls);
+}
+
+
+/*
+ * current_database()
+ * Expose the current database to the user
+ */
+Datum
+current_database(PG_FUNCTION_ARGS)
+{
+ Name db;
+
+ db = (Name) palloc(NAMEDATALEN);
+
+ namestrcpy(db, get_database_name(MyDatabaseId));
+ PG_RETURN_NAME(db);
+}
+
+
+/*
+ * current_query()
+ * Expose the current query to the user (useful in stored procedures)
+ * We might want to use ActivePortal->sourceText someday.
+ */
+Datum
+current_query(PG_FUNCTION_ARGS)
+{
+ /* there is no easy way to access the more concise 'query_string' */
+ if (debug_query_string)
+ PG_RETURN_TEXT_P(cstring_to_text(debug_query_string));
+ else
+ PG_RETURN_NULL();
+}
+
+/* Function to find out which databases make use of a tablespace */
+
+Datum
+pg_tablespace_databases(PG_FUNCTION_ARGS)
+{
+ Oid tablespaceOid = PG_GETARG_OID(0);
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ char *location;
+ DIR *dirdesc;
+ struct dirent *de;
+
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
+
+ if (tablespaceOid == GLOBALTABLESPACE_OID)
+ {
+ ereport(WARNING,
+ (errmsg("global tablespace never has databases")));
+ /* return empty tuplestore */
+ return (Datum) 0;
+ }
+
+ if (tablespaceOid == DEFAULTTABLESPACE_OID)
+ location = psprintf("base");
+ else
+ location = psprintf("pg_tblspc/%u/%s", tablespaceOid,
+ TABLESPACE_VERSION_DIRECTORY);
+
+ dirdesc = AllocateDir(location);
+
+ if (!dirdesc)
+ {
+ /* the only expected error is ENOENT */
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not open directory \"%s\": %m",
+ location)));
+ ereport(WARNING,
+ (errmsg("%u is not a tablespace OID", tablespaceOid)));
+ /* return empty tuplestore */
+ return (Datum) 0;
+ }
+
+ while ((de = ReadDir(dirdesc, location)) != NULL)
+ {
+ Oid datOid = atooid(de->d_name);
+ char *subdir;
+ bool isempty;
+ Datum values[1];
+ bool nulls[1];
+
+ /* this test skips . and .., but is awfully weak */
+ if (!datOid)
+ continue;
+
+ /* if database subdir is empty, don't report tablespace as used */
+
+ subdir = psprintf("%s/%s", location, de->d_name);
+ isempty = directory_is_empty(subdir);
+ pfree(subdir);
+
+ if (isempty)
+ continue; /* indeed, nothing in it */
+
+ values[0] = ObjectIdGetDatum(datOid);
+ nulls[0] = false;
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc,
+ values, nulls);
+ }
+
+ FreeDir(dirdesc);
+ return (Datum) 0;
+}
+
+
+/*
+ * pg_tablespace_location - get location for a tablespace
+ */
+Datum
+pg_tablespace_location(PG_FUNCTION_ARGS)
+{
+ Oid tablespaceOid = PG_GETARG_OID(0);
+ char sourcepath[MAXPGPATH];
+ char targetpath[MAXPGPATH];
+ int rllen;
+#ifndef WIN32
+ struct stat st;
+#endif
+
+ /*
+ * It's useful to apply this function to pg_class.reltablespace, wherein
+ * zero means "the database's default tablespace". So, rather than
+ * throwing an error for zero, we choose to assume that's what is meant.
+ */
+ if (tablespaceOid == InvalidOid)
+ tablespaceOid = MyDatabaseTableSpace;
+
+ /*
+ * Return empty string for the cluster's default tablespaces
+ */
+ if (tablespaceOid == DEFAULTTABLESPACE_OID ||
+ tablespaceOid == GLOBALTABLESPACE_OID)
+ PG_RETURN_TEXT_P(cstring_to_text(""));
+
+#if defined(HAVE_READLINK) || defined(WIN32)
+
+ /*
+ * Find the location of the tablespace by reading the symbolic link that
+ * is in pg_tblspc/<oid>.
+ */
+ snprintf(sourcepath, sizeof(sourcepath), "pg_tblspc/%u", tablespaceOid);
+
+ /*
+ * Before reading the link, check if the source path is a link or a
+ * junction point. Note that a directory is possible for a tablespace
+ * created with allow_in_place_tablespaces enabled. If a directory is
+ * found, a relative path to the data directory is returned.
+ */
+#ifdef WIN32
+ if (!pgwin32_is_junction(sourcepath))
+ PG_RETURN_TEXT_P(cstring_to_text(sourcepath));
+#else
+ if (lstat(sourcepath, &st) < 0)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not stat file \"%s\": %m",
+ sourcepath)));
+ }
+
+ if (!S_ISLNK(st.st_mode))
+ PG_RETURN_TEXT_P(cstring_to_text(sourcepath));
+#endif
+
+ /*
+ * In presence of a link or a junction point, return the path pointing to.
+ */
+ rllen = readlink(sourcepath, targetpath, sizeof(targetpath));
+ if (rllen < 0)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read symbolic link \"%s\": %m",
+ sourcepath)));
+ if (rllen >= sizeof(targetpath))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("symbolic link \"%s\" target is too long",
+ sourcepath)));
+ targetpath[rllen] = '\0';
+
+ PG_RETURN_TEXT_P(cstring_to_text(targetpath));
+#else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("tablespaces are not supported on this platform")));
+ PG_RETURN_NULL();
+#endif
+}
+
+/*
+ * pg_sleep - delay for N seconds
+ */
+Datum
+pg_sleep(PG_FUNCTION_ARGS)
+{
+ float8 secs = PG_GETARG_FLOAT8(0);
+ float8 endtime;
+
+ /*
+ * We sleep using WaitLatch, to ensure that we'll wake up promptly if an
+ * important signal (such as SIGALRM or SIGINT) arrives. Because
+ * WaitLatch's upper limit of delay is INT_MAX milliseconds, and the user
+ * might ask for more than that, we sleep for at most 10 minutes and then
+ * loop.
+ *
+ * By computing the intended stop time initially, we avoid accumulation of
+ * extra delay across multiple sleeps. This also ensures we won't delay
+ * less than the specified time when WaitLatch is terminated early by a
+ * non-query-canceling signal such as SIGHUP.
+ */
+#define GetNowFloat() ((float8) GetCurrentTimestamp() / 1000000.0)
+
+ endtime = GetNowFloat() + secs;
+
+ for (;;)
+ {
+ float8 delay;
+ long delay_ms;
+
+ CHECK_FOR_INTERRUPTS();
+
+ delay = endtime - GetNowFloat();
+ if (delay >= 600.0)
+ delay_ms = 600000;
+ else if (delay > 0.0)
+ delay_ms = (long) ceil(delay * 1000.0);
+ else
+ break;
+
+ (void) WaitLatch(MyLatch,
+ WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
+ delay_ms,
+ WAIT_EVENT_PG_SLEEP);
+ ResetLatch(MyLatch);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/* Function to return the list of grammar keywords */
+Datum
+pg_get_keywords(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(5);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "catcode",
+ CHAROID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "barelabel",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "catdesc",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "baredesc",
+ TEXTOID, -1, 0);
+
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+
+ if (funcctx->call_cntr < ScanKeywords.num_keywords)
+ {
+ char *values[5];
+ HeapTuple tuple;
+
+ /* cast-away-const is ugly but alternatives aren't much better */
+ values[0] = unconstify(char *,
+ GetScanKeyword(funcctx->call_cntr,
+ &ScanKeywords));
+
+ switch (ScanKeywordCategories[funcctx->call_cntr])
+ {
+ case UNRESERVED_KEYWORD:
+ values[1] = "U";
+ values[3] = _("unreserved");
+ break;
+ case COL_NAME_KEYWORD:
+ values[1] = "C";
+ values[3] = _("unreserved (cannot be function or type name)");
+ break;
+ case TYPE_FUNC_NAME_KEYWORD:
+ values[1] = "T";
+ values[3] = _("reserved (can be function or type name)");
+ break;
+ case RESERVED_KEYWORD:
+ values[1] = "R";
+ values[3] = _("reserved");
+ break;
+ default: /* shouldn't be possible */
+ values[1] = NULL;
+ values[3] = NULL;
+ break;
+ }
+
+ if (ScanKeywordBareLabel[funcctx->call_cntr])
+ {
+ values[2] = "true";
+ values[4] = _("can be bare label");
+ }
+ else
+ {
+ values[2] = "false";
+ values[4] = _("requires AS");
+ }
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/* Function to return the list of catalog foreign key relationships */
+Datum
+pg_get_catalog_foreign_keys(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ FmgrInfo *arrayinp;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(6);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "fktable",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "fkcols",
+ TEXTARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "pktable",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "pkcols",
+ TEXTARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "is_array",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "is_opt",
+ BOOLOID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /*
+ * We use array_in to convert the C strings in sys_fk_relationships[]
+ * to text arrays. But we cannot use DirectFunctionCallN to call
+ * array_in, and it wouldn't be very efficient if we could. Fill an
+ * FmgrInfo to use for the call.
+ */
+ arrayinp = (FmgrInfo *) palloc(sizeof(FmgrInfo));
+ fmgr_info(F_ARRAY_IN, arrayinp);
+ funcctx->user_fctx = arrayinp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ arrayinp = (FmgrInfo *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < lengthof(sys_fk_relationships))
+ {
+ const SysFKRelationship *fkrel = &sys_fk_relationships[funcctx->call_cntr];
+ Datum values[6];
+ bool nulls[6];
+ HeapTuple tuple;
+
+ memset(nulls, false, sizeof(nulls));
+
+ values[0] = ObjectIdGetDatum(fkrel->fk_table);
+ values[1] = FunctionCall3(arrayinp,
+ CStringGetDatum(fkrel->fk_columns),
+ ObjectIdGetDatum(TEXTOID),
+ Int32GetDatum(-1));
+ values[2] = ObjectIdGetDatum(fkrel->pk_table);
+ values[3] = FunctionCall3(arrayinp,
+ CStringGetDatum(fkrel->pk_columns),
+ ObjectIdGetDatum(TEXTOID),
+ Int32GetDatum(-1));
+ values[4] = BoolGetDatum(fkrel->is_array);
+ values[5] = BoolGetDatum(fkrel->is_opt);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * Return the type of the argument.
+ */
+Datum
+pg_typeof(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_OID(get_fn_expr_argtype(fcinfo->flinfo, 0));
+}
+
+
+/*
+ * Implementation of the COLLATE FOR expression; returns the collation
+ * of the argument.
+ */
+Datum
+pg_collation_for(PG_FUNCTION_ARGS)
+{
+ Oid typeid;
+ Oid collid;
+
+ typeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+ if (!typeid)
+ PG_RETURN_NULL();
+ if (!type_is_collatable(typeid) && typeid != UNKNOWNOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("collations are not supported by type %s",
+ format_type_be(typeid))));
+
+ collid = PG_GET_COLLATION();
+ if (!collid)
+ PG_RETURN_NULL();
+ PG_RETURN_TEXT_P(cstring_to_text(generate_collation_name(collid)));
+}
+
+
+/*
+ * pg_relation_is_updatable - determine which update events the specified
+ * relation supports.
+ *
+ * This relies on relation_is_updatable() in rewriteHandler.c, which see
+ * for additional information.
+ */
+Datum
+pg_relation_is_updatable(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+ bool include_triggers = PG_GETARG_BOOL(1);
+
+ PG_RETURN_INT32(relation_is_updatable(reloid, NIL, include_triggers, NULL));
+}
+
+/*
+ * pg_column_is_updatable - determine whether a column is updatable
+ *
+ * This function encapsulates the decision about just what
+ * information_schema.columns.is_updatable actually means. It's not clear
+ * whether deletability of the column's relation should be required, so
+ * we want that decision in C code where we could change it without initdb.
+ */
+Datum
+pg_column_is_updatable(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+ AttrNumber attnum = PG_GETARG_INT16(1);
+ AttrNumber col = attnum - FirstLowInvalidHeapAttributeNumber;
+ bool include_triggers = PG_GETARG_BOOL(2);
+ int events;
+
+ /* System columns are never updatable */
+ if (attnum <= 0)
+ PG_RETURN_BOOL(false);
+
+ events = relation_is_updatable(reloid, NIL, include_triggers,
+ bms_make_singleton(col));
+
+ /* We require both updatability and deletability of the relation */
+#define REQ_EVENTS ((1 << CMD_UPDATE) | (1 << CMD_DELETE))
+
+ PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
+}
+
+
+/*
+ * Is character a valid identifier start?
+ * Must match scan.l's {ident_start} character class.
+ */
+static bool
+is_ident_start(unsigned char c)
+{
+ /* Underscores and ASCII letters are OK */
+ if (c == '_')
+ return true;
+ if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
+ return true;
+ /* Any high-bit-set character is OK (might be part of a multibyte char) */
+ if (IS_HIGHBIT_SET(c))
+ return true;
+ return false;
+}
+
+/*
+ * Is character a valid identifier continuation?
+ * Must match scan.l's {ident_cont} character class.
+ */
+static bool
+is_ident_cont(unsigned char c)
+{
+ /* Can be digit or dollar sign ... */
+ if ((c >= '0' && c <= '9') || c == '$')
+ return true;
+ /* ... or an identifier start character */
+ return is_ident_start(c);
+}
+
+/*
+ * parse_ident - parse a SQL qualified identifier into separate identifiers.
+ * When strict mode is active (second parameter), then any chars after
+ * the last identifier are disallowed.
+ */
+Datum
+parse_ident(PG_FUNCTION_ARGS)
+{
+ text *qualname = PG_GETARG_TEXT_PP(0);
+ bool strict = PG_GETARG_BOOL(1);
+ char *qualname_str = text_to_cstring(qualname);
+ ArrayBuildState *astate = NULL;
+ char *nextp;
+ bool after_dot = false;
+
+ /*
+ * The code below scribbles on qualname_str in some cases, so we should
+ * reconvert qualname if we need to show the original string in error
+ * messages.
+ */
+ nextp = qualname_str;
+
+ /* skip leading whitespace */
+ while (scanner_isspace(*nextp))
+ nextp++;
+
+ for (;;)
+ {
+ char *curname;
+ bool missing_ident = true;
+
+ if (*nextp == '"')
+ {
+ char *endp;
+
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '"');
+ if (endp == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("String has unclosed double quotes.")));
+ if (endp[1] != '"')
+ break;
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ nextp = endp + 1;
+ *endp = '\0';
+
+ if (endp - curname == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("Quoted identifier must not be empty.")));
+
+ astate = accumArrayResult(astate, CStringGetTextDatum(curname),
+ false, TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+ else if (is_ident_start((unsigned char) *nextp))
+ {
+ char *downname;
+ int len;
+ text *part;
+
+ curname = nextp++;
+ while (is_ident_cont((unsigned char) *nextp))
+ nextp++;
+
+ len = nextp - curname;
+
+ /*
+ * We don't implicitly truncate identifiers. This is useful for
+ * allowing the user to check for specific parts of the identifier
+ * being too long. It's easy enough for the user to get the
+ * truncated names by casting our output to name[].
+ */
+ downname = downcase_identifier(curname, len, false, false);
+ part = cstring_to_text_with_len(downname, len);
+ astate = accumArrayResult(astate, PointerGetDatum(part), false,
+ TEXTOID, CurrentMemoryContext);
+ missing_ident = false;
+ }
+
+ if (missing_ident)
+ {
+ /* Different error messages based on where we failed. */
+ if (*nextp == '.')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier before \".\".")));
+ else if (after_dot)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname)),
+ errdetail("No valid identifier after \".\".")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
+ }
+
+ while (scanner_isspace(*nextp))
+ nextp++;
+
+ if (*nextp == '.')
+ {
+ after_dot = true;
+ nextp++;
+ while (scanner_isspace(*nextp))
+ nextp++;
+ }
+ else if (*nextp == '\0')
+ {
+ break;
+ }
+ else
+ {
+ if (strict)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("string is not a valid identifier: \"%s\"",
+ text_to_cstring(qualname))));
+ break;
+ }
+ }
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+/*
+ * pg_current_logfile
+ *
+ * Report current log file used by log collector by scanning current_logfiles.
+ */
+Datum
+pg_current_logfile(PG_FUNCTION_ARGS)
+{
+ FILE *fd;
+ char lbuffer[MAXPGPATH];
+ char *logfmt;
+
+ /* The log format parameter is optional */
+ if (PG_NARGS() == 0 || PG_ARGISNULL(0))
+ logfmt = NULL;
+ else
+ {
+ logfmt = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+ if (strcmp(logfmt, "stderr") != 0 &&
+ strcmp(logfmt, "csvlog") != 0 &&
+ strcmp(logfmt, "jsonlog") != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("log format \"%s\" is not supported", logfmt),
+ errhint("The supported log formats are \"stderr\", \"csvlog\", and \"jsonlog\".")));
+ }
+
+ fd = AllocateFile(LOG_METAINFO_DATAFILE, "r");
+ if (fd == NULL)
+ {
+ if (errno != ENOENT)
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not read file \"%s\": %m",
+ LOG_METAINFO_DATAFILE)));
+ PG_RETURN_NULL();
+ }
+
+#ifdef WIN32
+ /* syslogger.c writes CRLF line endings on Windows */
+ _setmode(_fileno(fd), _O_TEXT);
+#endif
+
+ /*
+ * Read the file to gather current log filename(s) registered by the
+ * syslogger.
+ */
+ while (fgets(lbuffer, sizeof(lbuffer), fd) != NULL)
+ {
+ char *log_format;
+ char *log_filepath;
+ char *nlpos;
+
+ /* Extract log format and log file path from the line. */
+ log_format = lbuffer;
+ log_filepath = strchr(lbuffer, ' ');
+ if (log_filepath == NULL)
+ {
+ /* Uh oh. No space found, so file content is corrupted. */
+ elog(ERROR,
+ "missing space character in \"%s\"", LOG_METAINFO_DATAFILE);
+ break;
+ }
+
+ *log_filepath = '\0';
+ log_filepath++;
+ nlpos = strchr(log_filepath, '\n');
+ if (nlpos == NULL)
+ {
+ /* Uh oh. No newline found, so file content is corrupted. */
+ elog(ERROR,
+ "missing newline character in \"%s\"", LOG_METAINFO_DATAFILE);
+ break;
+ }
+ *nlpos = '\0';
+
+ if (logfmt == NULL || strcmp(logfmt, log_format) == 0)
+ {
+ FreeFile(fd);
+ PG_RETURN_TEXT_P(cstring_to_text(log_filepath));
+ }
+ }
+
+ /* Close the current log filename file. */
+ FreeFile(fd);
+
+ PG_RETURN_NULL();
+}
+
+/*
+ * Report current log file used by log collector (1 argument version)
+ *
+ * note: this wrapper is necessary to pass the sanity check in opr_sanity,
+ * which checks that all built-in functions that share the implementing C
+ * function take the same number of arguments
+ */
+Datum
+pg_current_logfile_1arg(PG_FUNCTION_ARGS)
+{
+ return pg_current_logfile(fcinfo);
+}
+
+/*
+ * SQL wrapper around RelationGetReplicaIndex().
+ */
+Datum
+pg_get_replica_identity_index(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+ Oid idxoid;
+ Relation rel;
+
+ rel = table_open(reloid, AccessShareLock);
+ idxoid = RelationGetReplicaIndex(rel);
+ table_close(rel, AccessShareLock);
+
+ if (OidIsValid(idxoid))
+ PG_RETURN_OID(idxoid);
+ else
+ PG_RETURN_NULL();
+}
diff --git a/src/backend/utils/adt/multirangetypes.c b/src/backend/utils/adt/multirangetypes.c
new file mode 100644
index 0000000..da5c7d0
--- /dev/null
+++ b/src/backend/utils/adt/multirangetypes.c
@@ -0,0 +1,2920 @@
+/*-------------------------------------------------------------------------
+ *
+ * multirangetypes.c
+ * I/O functions, operators, and support functions for multirange types.
+ *
+ * The stored (serialized) format of a multirange value is:
+ *
+ * 12 bytes: MultirangeType struct including varlena header, multirange
+ * type's OID and the number of ranges in the multirange.
+ * 4 * (rangesCount - 1) bytes: 32-bit items pointing to the each range
+ * in the multirange starting from
+ * the second one.
+ * 1 * rangesCount bytes : 8-bit flags for each range in the multirange
+ * The rest of the multirange are range bound values pointed by multirange
+ * items.
+ *
+ * Majority of items contain lengths of corresponding range bound values.
+ * Thanks to that items are typically low numbers. This makes multiranges
+ * compression-friendly. Every MULTIRANGE_ITEM_OFFSET_STRIDE item contains
+ * an offset of the corresponding range bound values. That allows fast lookups
+ * for a particular range index. Offsets are counted starting from the end of
+ * flags aligned to the bound type.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/multirangetypes.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tupmacs.h"
+#include "common/hashfn.h"
+#include "funcapi.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/multirangetypes.h"
+#include "utils/array.h"
+#include "utils/memutils.h"
+
+/* fn_extra cache entry for one of the range I/O functions */
+typedef struct MultirangeIOData
+{
+ TypeCacheEntry *typcache; /* multirange type's typcache entry */
+ FmgrInfo typioproc; /* range type's I/O proc */
+ Oid typioparam; /* range type's I/O parameter */
+} MultirangeIOData;
+
+typedef enum
+{
+ MULTIRANGE_BEFORE_RANGE,
+ MULTIRANGE_IN_RANGE,
+ MULTIRANGE_IN_RANGE_ESCAPED,
+ MULTIRANGE_IN_RANGE_QUOTED,
+ MULTIRANGE_IN_RANGE_QUOTED_ESCAPED,
+ MULTIRANGE_AFTER_RANGE,
+ MULTIRANGE_FINISHED,
+} MultirangeParseState;
+
+/*
+ * Macros for accessing past MultirangeType parts of multirange: items, flags
+ * and boundaries.
+ */
+#define MultirangeGetItemsPtr(mr) ((uint32 *) ((Pointer) (mr) + \
+ sizeof(MultirangeType)))
+#define MultirangeGetFlagsPtr(mr) ((uint8 *) ((Pointer) (mr) + \
+ sizeof(MultirangeType) + ((mr)->rangeCount - 1) * sizeof(uint32)))
+#define MultirangeGetBoundariesPtr(mr, align) ((Pointer) (mr) + \
+ att_align_nominal(sizeof(MultirangeType) + \
+ ((mr)->rangeCount - 1) * sizeof(uint32) + \
+ (mr)->rangeCount * sizeof(uint8), (align)))
+
+#define MULTIRANGE_ITEM_OFF_BIT 0x80000000
+#define MULTIRANGE_ITEM_GET_OFFLEN(item) ((item) & 0x7FFFFFFF)
+#define MULTIRANGE_ITEM_HAS_OFF(item) ((item) & MULTIRANGE_ITEM_OFF_BIT)
+#define MULTIRANGE_ITEM_OFFSET_STRIDE 4
+
+typedef int (*multirange_bsearch_comparison) (TypeCacheEntry *typcache,
+ RangeBound *lower,
+ RangeBound *upper,
+ void *key,
+ bool *match);
+
+static MultirangeIOData *get_multirange_io_data(FunctionCallInfo fcinfo,
+ Oid mltrngtypid,
+ IOFuncSelector func);
+static int32 multirange_canonicalize(TypeCacheEntry *rangetyp,
+ int32 input_range_count,
+ RangeType **ranges);
+
+/*
+ *----------------------------------------------------------
+ * I/O FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+/*
+ * Converts string to multirange.
+ *
+ * We expect curly brackets to bound the list, with zero or more ranges
+ * separated by commas. We accept whitespace anywhere: before/after our
+ * brackets and around the commas. Ranges can be the empty literal or some
+ * stuff inside parens/brackets. Mostly we delegate parsing the individual
+ * range contents to range_in, but we have to detect quoting and
+ * backslash-escaping which can happen for range bounds. Backslashes can
+ * escape something inside or outside a quoted string, and a quoted string
+ * can escape quote marks with either backslashes or double double-quotes.
+ */
+Datum
+multirange_in(PG_FUNCTION_ARGS)
+{
+ char *input_str = PG_GETARG_CSTRING(0);
+ Oid mltrngtypoid = PG_GETARG_OID(1);
+ Oid typmod = PG_GETARG_INT32(2);
+ TypeCacheEntry *rangetyp;
+ int32 ranges_seen = 0;
+ int32 range_count = 0;
+ int32 range_capacity = 8;
+ RangeType *range;
+ RangeType **ranges = palloc(range_capacity * sizeof(RangeType *));
+ MultirangeIOData *cache;
+ MultirangeType *ret;
+ MultirangeParseState parse_state;
+ const char *ptr = input_str;
+ const char *range_str_begin = NULL;
+ int32 range_str_len;
+ char *range_str;
+
+ cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_input);
+ rangetyp = cache->typcache->rngtype;
+
+ /* consume whitespace */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (*ptr == '{')
+ ptr++;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed multirange literal: \"%s\"",
+ input_str),
+ errdetail("Missing left brace.")));
+
+ /* consume ranges */
+ parse_state = MULTIRANGE_BEFORE_RANGE;
+ for (; parse_state != MULTIRANGE_FINISHED; ptr++)
+ {
+ char ch = *ptr;
+
+ if (ch == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed multirange literal: \"%s\"",
+ input_str),
+ errdetail("Unexpected end of input.")));
+
+ /* skip whitespace */
+ if (isspace((unsigned char) ch))
+ continue;
+
+ switch (parse_state)
+ {
+ case MULTIRANGE_BEFORE_RANGE:
+ if (ch == '[' || ch == '(')
+ {
+ range_str_begin = ptr;
+ parse_state = MULTIRANGE_IN_RANGE;
+ }
+ else if (ch == '}' && ranges_seen == 0)
+ parse_state = MULTIRANGE_FINISHED;
+ else if (pg_strncasecmp(ptr, RANGE_EMPTY_LITERAL,
+ strlen(RANGE_EMPTY_LITERAL)) == 0)
+ {
+ ranges_seen++;
+ /* nothing to do with an empty range */
+ ptr += strlen(RANGE_EMPTY_LITERAL) - 1;
+ parse_state = MULTIRANGE_AFTER_RANGE;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed multirange literal: \"%s\"",
+ input_str),
+ errdetail("Expected range start.")));
+ break;
+ case MULTIRANGE_IN_RANGE:
+ if (ch == ']' || ch == ')')
+ {
+ range_str_len = ptr - range_str_begin + 1;
+ range_str = pnstrdup(range_str_begin, range_str_len);
+ if (range_capacity == range_count)
+ {
+ range_capacity *= 2;
+ ranges = (RangeType **)
+ repalloc(ranges, range_capacity * sizeof(RangeType *));
+ }
+ ranges_seen++;
+ range = DatumGetRangeTypeP(InputFunctionCall(&cache->typioproc,
+ range_str,
+ cache->typioparam,
+ typmod));
+ if (!RangeIsEmpty(range))
+ ranges[range_count++] = range;
+ parse_state = MULTIRANGE_AFTER_RANGE;
+ }
+ else
+ {
+ if (ch == '"')
+ parse_state = MULTIRANGE_IN_RANGE_QUOTED;
+ else if (ch == '\\')
+ parse_state = MULTIRANGE_IN_RANGE_ESCAPED;
+
+ /*
+ * We will include this character into range_str once we
+ * find the end of the range value.
+ */
+ }
+ break;
+ case MULTIRANGE_IN_RANGE_ESCAPED:
+
+ /*
+ * We will include this character into range_str once we find
+ * the end of the range value.
+ */
+ parse_state = MULTIRANGE_IN_RANGE;
+ break;
+ case MULTIRANGE_IN_RANGE_QUOTED:
+ if (ch == '"')
+ if (*(ptr + 1) == '"')
+ {
+ /* two quote marks means an escaped quote mark */
+ ptr++;
+ }
+ else
+ parse_state = MULTIRANGE_IN_RANGE;
+ else if (ch == '\\')
+ parse_state = MULTIRANGE_IN_RANGE_QUOTED_ESCAPED;
+
+ /*
+ * We will include this character into range_str once we find
+ * the end of the range value.
+ */
+ break;
+ case MULTIRANGE_AFTER_RANGE:
+ if (ch == ',')
+ parse_state = MULTIRANGE_BEFORE_RANGE;
+ else if (ch == '}')
+ parse_state = MULTIRANGE_FINISHED;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed multirange literal: \"%s\"",
+ input_str),
+ errdetail("Expected comma or end of multirange.")));
+ break;
+ case MULTIRANGE_IN_RANGE_QUOTED_ESCAPED:
+
+ /*
+ * We will include this character into range_str once we find
+ * the end of the range value.
+ */
+ parse_state = MULTIRANGE_IN_RANGE_QUOTED;
+ break;
+ default:
+ elog(ERROR, "unknown parse state: %d", parse_state);
+ }
+ }
+
+ /* consume whitespace */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (*ptr != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed multirange literal: \"%s\"",
+ input_str),
+ errdetail("Junk after closing right brace.")));
+
+ ret = make_multirange(mltrngtypoid, rangetyp, range_count, ranges);
+ PG_RETURN_MULTIRANGE_P(ret);
+}
+
+Datum
+multirange_out(PG_FUNCTION_ARGS)
+{
+ MultirangeType *multirange = PG_GETARG_MULTIRANGE_P(0);
+ Oid mltrngtypoid = MultirangeTypeGetOid(multirange);
+ MultirangeIOData *cache;
+ StringInfoData buf;
+ RangeType *range;
+ char *rangeStr;
+ int32 range_count;
+ int32 i;
+ RangeType **ranges;
+
+ cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_output);
+
+ initStringInfo(&buf);
+
+ appendStringInfoChar(&buf, '{');
+
+ multirange_deserialize(cache->typcache->rngtype, multirange, &range_count, &ranges);
+ for (i = 0; i < range_count; i++)
+ {
+ if (i > 0)
+ appendStringInfoChar(&buf, ',');
+ range = ranges[i];
+ rangeStr = OutputFunctionCall(&cache->typioproc, RangeTypePGetDatum(range));
+ appendStringInfoString(&buf, rangeStr);
+ }
+
+ appendStringInfoChar(&buf, '}');
+
+ PG_RETURN_CSTRING(buf.data);
+}
+
+/*
+ * Binary representation: First a int32-sized count of ranges, followed by
+ * ranges in their native binary representation.
+ */
+Datum
+multirange_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Oid mltrngtypoid = PG_GETARG_OID(1);
+ int32 typmod = PG_GETARG_INT32(2);
+ MultirangeIOData *cache;
+ uint32 range_count;
+ RangeType **ranges;
+ MultirangeType *ret;
+ StringInfoData tmpbuf;
+
+ cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_receive);
+
+ range_count = pq_getmsgint(buf, 4);
+ ranges = palloc(range_count * sizeof(RangeType *));
+
+ initStringInfo(&tmpbuf);
+ for (int i = 0; i < range_count; i++)
+ {
+ uint32 range_len = pq_getmsgint(buf, 4);
+ const char *range_data = pq_getmsgbytes(buf, range_len);
+
+ resetStringInfo(&tmpbuf);
+ appendBinaryStringInfo(&tmpbuf, range_data, range_len);
+
+ ranges[i] = DatumGetRangeTypeP(ReceiveFunctionCall(&cache->typioproc,
+ &tmpbuf,
+ cache->typioparam,
+ typmod));
+ }
+ pfree(tmpbuf.data);
+
+ pq_getmsgend(buf);
+
+ ret = make_multirange(mltrngtypoid, cache->typcache->rngtype,
+ range_count, ranges);
+ PG_RETURN_MULTIRANGE_P(ret);
+}
+
+Datum
+multirange_send(PG_FUNCTION_ARGS)
+{
+ MultirangeType *multirange = PG_GETARG_MULTIRANGE_P(0);
+ Oid mltrngtypoid = MultirangeTypeGetOid(multirange);
+ StringInfo buf = makeStringInfo();
+ RangeType **ranges;
+ int32 range_count;
+ MultirangeIOData *cache;
+
+ cache = get_multirange_io_data(fcinfo, mltrngtypoid, IOFunc_send);
+
+ /* construct output */
+ pq_begintypsend(buf);
+
+ pq_sendint32(buf, multirange->rangeCount);
+
+ multirange_deserialize(cache->typcache->rngtype, multirange, &range_count, &ranges);
+ for (int i = 0; i < range_count; i++)
+ {
+ Datum range;
+
+ range = RangeTypePGetDatum(ranges[i]);
+ range = PointerGetDatum(SendFunctionCall(&cache->typioproc, range));
+
+ pq_sendint32(buf, VARSIZE(range) - VARHDRSZ);
+ pq_sendbytes(buf, VARDATA(range), VARSIZE(range) - VARHDRSZ);
+ }
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(buf));
+}
+
+/*
+ * get_multirange_io_data: get cached information needed for multirange type I/O
+ *
+ * The multirange I/O functions need a bit more cached info than other multirange
+ * functions, so they store a MultirangeIOData struct in fn_extra, not just a
+ * pointer to a type cache entry.
+ */
+static MultirangeIOData *
+get_multirange_io_data(FunctionCallInfo fcinfo, Oid mltrngtypid, IOFuncSelector func)
+{
+ MultirangeIOData *cache = (MultirangeIOData *) fcinfo->flinfo->fn_extra;
+
+ if (cache == NULL || cache->typcache->type_id != mltrngtypid)
+ {
+ Oid typiofunc;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ char typdelim;
+
+ cache = (MultirangeIOData *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(MultirangeIOData));
+ cache->typcache = lookup_type_cache(mltrngtypid, TYPECACHE_MULTIRANGE_INFO);
+ if (cache->typcache->rngtype == NULL)
+ elog(ERROR, "type %u is not a multirange type", mltrngtypid);
+
+ /* get_type_io_data does more than we need, but is convenient */
+ get_type_io_data(cache->typcache->rngtype->type_id,
+ func,
+ &typlen,
+ &typbyval,
+ &typalign,
+ &typdelim,
+ &cache->typioparam,
+ &typiofunc);
+
+ if (!OidIsValid(typiofunc))
+ {
+ /* this could only happen for receive or send */
+ if (func == IOFunc_receive)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary input function available for type %s",
+ format_type_be(cache->typcache->rngtype->type_id))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary output function available for type %s",
+ format_type_be(cache->typcache->rngtype->type_id))));
+ }
+ fmgr_info_cxt(typiofunc, &cache->typioproc,
+ fcinfo->flinfo->fn_mcxt);
+
+ fcinfo->flinfo->fn_extra = (void *) cache;
+ }
+
+ return cache;
+}
+
+/*
+ * Converts a list of arbitrary ranges into a list that is sorted and merged.
+ * Changes the contents of `ranges`.
+ *
+ * Returns the number of slots actually used, which may be less than
+ * input_range_count but never more.
+ *
+ * We assume that no input ranges are null, but empties are okay.
+ */
+static int32
+multirange_canonicalize(TypeCacheEntry *rangetyp, int32 input_range_count,
+ RangeType **ranges)
+{
+ RangeType *lastRange = NULL;
+ RangeType *currentRange;
+ int32 i;
+ int32 output_range_count = 0;
+
+ /* Sort the ranges so we can find the ones that overlap/meet. */
+ qsort_arg(ranges, input_range_count, sizeof(RangeType *), range_compare,
+ rangetyp);
+
+ /* Now merge where possible: */
+ for (i = 0; i < input_range_count; i++)
+ {
+ currentRange = ranges[i];
+ if (RangeIsEmpty(currentRange))
+ continue;
+
+ if (lastRange == NULL)
+ {
+ ranges[output_range_count++] = lastRange = currentRange;
+ continue;
+ }
+
+ /*
+ * range_adjacent_internal gives true if *either* A meets B or B meets
+ * A, which is not quite want we want, but we rely on the sorting
+ * above to rule out B meets A ever happening.
+ */
+ if (range_adjacent_internal(rangetyp, lastRange, currentRange))
+ {
+ /* The two ranges touch (without overlap), so merge them: */
+ ranges[output_range_count - 1] = lastRange =
+ range_union_internal(rangetyp, lastRange, currentRange, false);
+ }
+ else if (range_before_internal(rangetyp, lastRange, currentRange))
+ {
+ /* There's a gap, so make a new entry: */
+ lastRange = ranges[output_range_count] = currentRange;
+ output_range_count++;
+ }
+ else
+ {
+ /* They must overlap, so merge them: */
+ ranges[output_range_count - 1] = lastRange =
+ range_union_internal(rangetyp, lastRange, currentRange, true);
+ }
+ }
+
+ return output_range_count;
+}
+
+/*
+ *----------------------------------------------------------
+ * SUPPORT FUNCTIONS
+ *
+ * These functions aren't in pg_proc, but are useful for
+ * defining new generic multirange functions in C.
+ *----------------------------------------------------------
+ */
+
+/*
+ * multirange_get_typcache: get cached information about a multirange type
+ *
+ * This is for use by multirange-related functions that follow the convention
+ * of using the fn_extra field as a pointer to the type cache entry for
+ * the multirange type. Functions that need to cache more information than
+ * that must fend for themselves.
+ */
+TypeCacheEntry *
+multirange_get_typcache(FunctionCallInfo fcinfo, Oid mltrngtypid)
+{
+ TypeCacheEntry *typcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+
+ if (typcache == NULL ||
+ typcache->type_id != mltrngtypid)
+ {
+ typcache = lookup_type_cache(mltrngtypid, TYPECACHE_MULTIRANGE_INFO);
+ if (typcache->rngtype == NULL)
+ elog(ERROR, "type %u is not a multirange type", mltrngtypid);
+ fcinfo->flinfo->fn_extra = (void *) typcache;
+ }
+
+ return typcache;
+}
+
+
+/*
+ * Estimate size occupied by serialized multirange.
+ */
+static Size
+multirange_size_estimate(TypeCacheEntry *rangetyp, int32 range_count,
+ RangeType **ranges)
+{
+ char elemalign = rangetyp->rngelemtype->typalign;
+ Size size;
+ int32 i;
+
+ /*
+ * Count space for MultirangeType struct, items and flags.
+ */
+ size = att_align_nominal(sizeof(MultirangeType) +
+ Max(range_count - 1, 0) * sizeof(uint32) +
+ range_count * sizeof(uint8), elemalign);
+
+ /* Count space for range bounds */
+ for (i = 0; i < range_count; i++)
+ size += att_align_nominal(VARSIZE(ranges[i]) -
+ sizeof(RangeType) -
+ sizeof(char), elemalign);
+
+ return size;
+}
+
+/*
+ * Write multirange data into pre-allocated space.
+ */
+static void
+write_multirange_data(MultirangeType *multirange, TypeCacheEntry *rangetyp,
+ int32 range_count, RangeType **ranges)
+{
+ uint32 *items;
+ uint32 prev_offset = 0;
+ uint8 *flags;
+ int32 i;
+ Pointer begin,
+ ptr;
+ char elemalign = rangetyp->rngelemtype->typalign;
+
+ items = MultirangeGetItemsPtr(multirange);
+ flags = MultirangeGetFlagsPtr(multirange);
+ ptr = begin = MultirangeGetBoundariesPtr(multirange, elemalign);
+ for (i = 0; i < range_count; i++)
+ {
+ uint32 len;
+
+ if (i > 0)
+ {
+ /*
+ * Every range, except the first one, has an item. Every
+ * MULTIRANGE_ITEM_OFFSET_STRIDE item contains an offset, others
+ * contain lengths.
+ */
+ items[i - 1] = ptr - begin;
+ if ((i % MULTIRANGE_ITEM_OFFSET_STRIDE) != 0)
+ items[i - 1] -= prev_offset;
+ else
+ items[i - 1] |= MULTIRANGE_ITEM_OFF_BIT;
+ prev_offset = ptr - begin;
+ }
+ flags[i] = *((Pointer) ranges[i] + VARSIZE(ranges[i]) - sizeof(char));
+ len = VARSIZE(ranges[i]) - sizeof(RangeType) - sizeof(char);
+ memcpy(ptr, (Pointer) (ranges[i] + 1), len);
+ ptr += att_align_nominal(len, elemalign);
+ }
+}
+
+
+/*
+ * This serializes the multirange from a list of non-null ranges. It also
+ * sorts the ranges and merges any that touch. The ranges should already be
+ * detoasted, and there should be no NULLs. This should be used by most
+ * callers.
+ *
+ * Note that we may change the `ranges` parameter (the pointers, but not
+ * any already-existing RangeType contents).
+ */
+MultirangeType *
+make_multirange(Oid mltrngtypoid, TypeCacheEntry *rangetyp, int32 range_count,
+ RangeType **ranges)
+{
+ MultirangeType *multirange;
+ Size size;
+
+ /* Sort and merge input ranges. */
+ range_count = multirange_canonicalize(rangetyp, range_count, ranges);
+
+ /* Note: zero-fill is required here, just as in heap tuples */
+ size = multirange_size_estimate(rangetyp, range_count, ranges);
+ multirange = palloc0(size);
+ SET_VARSIZE(multirange, size);
+
+ /* Now fill in the datum */
+ multirange->multirangetypid = mltrngtypoid;
+ multirange->rangeCount = range_count;
+
+ write_multirange_data(multirange, rangetyp, range_count, ranges);
+
+ return multirange;
+}
+
+/*
+ * Get offset of bounds values of the i'th range in the multirange.
+ */
+static uint32
+multirange_get_bounds_offset(const MultirangeType *multirange, int32 i)
+{
+ uint32 *items = MultirangeGetItemsPtr(multirange);
+ uint32 offset = 0;
+
+ /*
+ * Summarize lengths till we meet an offset.
+ */
+ while (i > 0)
+ {
+ offset += MULTIRANGE_ITEM_GET_OFFLEN(items[i - 1]);
+ if (MULTIRANGE_ITEM_HAS_OFF(items[i - 1]))
+ break;
+ i--;
+ }
+ return offset;
+}
+
+/*
+ * Fetch the i'th range from the multirange.
+ */
+RangeType *
+multirange_get_range(TypeCacheEntry *rangetyp,
+ const MultirangeType *multirange, int i)
+{
+ uint32 offset;
+ uint8 flags;
+ Pointer begin,
+ ptr;
+ int16 typlen = rangetyp->rngelemtype->typlen;
+ char typalign = rangetyp->rngelemtype->typalign;
+ uint32 len;
+ RangeType *range;
+
+ Assert(i < multirange->rangeCount);
+
+ offset = multirange_get_bounds_offset(multirange, i);
+ flags = MultirangeGetFlagsPtr(multirange)[i];
+ ptr = begin = MultirangeGetBoundariesPtr(multirange, typalign) + offset;
+
+ /*
+ * Calculate the size of bound values. In principle, we could get offset
+ * of the next range bound values and calculate accordingly. But range
+ * bound values are aligned, so we have to walk the values to get the
+ * exact size.
+ */
+ if (RANGE_HAS_LBOUND(flags))
+ ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr);
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr);
+ ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr);
+ }
+ len = (ptr - begin) + sizeof(RangeType) + sizeof(uint8);
+
+ range = palloc0(len);
+ SET_VARSIZE(range, len);
+ range->rangetypid = rangetyp->type_id;
+
+ memcpy(range + 1, begin, ptr - begin);
+ *((uint8 *) (range + 1) + (ptr - begin)) = flags;
+
+ return range;
+}
+
+/*
+ * Fetch bounds from the i'th range of the multirange. This is the shortcut for
+ * doing the same thing as multirange_get_range() + range_deserialize(), but
+ * performing fewer operations.
+ */
+void
+multirange_get_bounds(TypeCacheEntry *rangetyp,
+ const MultirangeType *multirange,
+ uint32 i, RangeBound *lower, RangeBound *upper)
+{
+ uint32 offset;
+ uint8 flags;
+ Pointer ptr;
+ int16 typlen = rangetyp->rngelemtype->typlen;
+ char typalign = rangetyp->rngelemtype->typalign;
+ bool typbyval = rangetyp->rngelemtype->typbyval;
+ Datum lbound;
+ Datum ubound;
+
+ Assert(i < multirange->rangeCount);
+
+ offset = multirange_get_bounds_offset(multirange, i);
+ flags = MultirangeGetFlagsPtr(multirange)[i];
+ ptr = MultirangeGetBoundariesPtr(multirange, typalign) + offset;
+
+ /* multirange can't contain empty ranges */
+ Assert((flags & RANGE_EMPTY) == 0);
+
+ /* fetch lower bound, if any */
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ /* att_align_pointer cannot be necessary here */
+ lbound = fetch_att(ptr, typbyval, typlen);
+ ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr);
+ }
+ else
+ lbound = (Datum) 0;
+
+ /* fetch upper bound, if any */
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr);
+ ubound = fetch_att(ptr, typbyval, typlen);
+ /* no need for att_addlength_pointer */
+ }
+ else
+ ubound = (Datum) 0;
+
+ /* emit results */
+ lower->val = lbound;
+ lower->infinite = (flags & RANGE_LB_INF) != 0;
+ lower->inclusive = (flags & RANGE_LB_INC) != 0;
+ lower->lower = true;
+
+ upper->val = ubound;
+ upper->infinite = (flags & RANGE_UB_INF) != 0;
+ upper->inclusive = (flags & RANGE_UB_INC) != 0;
+ upper->lower = false;
+}
+
+/*
+ * Construct union range from the multirange.
+ */
+RangeType *
+multirange_get_union_range(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr)
+{
+ RangeBound lower,
+ upper,
+ tmp;
+
+ if (MultirangeIsEmpty(mr))
+ return make_empty_range(rangetyp);
+
+ multirange_get_bounds(rangetyp, mr, 0, &lower, &tmp);
+ multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper);
+
+ return make_range(rangetyp, &lower, &upper, false);
+}
+
+
+/*
+ * multirange_deserialize: deconstruct a multirange value
+ *
+ * NB: the given multirange object must be fully detoasted; it cannot have a
+ * short varlena header.
+ */
+void
+multirange_deserialize(TypeCacheEntry *rangetyp,
+ const MultirangeType *multirange, int32 *range_count,
+ RangeType ***ranges)
+{
+ *range_count = multirange->rangeCount;
+
+ /* Convert each ShortRangeType into a RangeType */
+ if (*range_count > 0)
+ {
+ int i;
+
+ *ranges = palloc(*range_count * sizeof(RangeType *));
+ for (i = 0; i < *range_count; i++)
+ (*ranges)[i] = multirange_get_range(rangetyp, multirange, i);
+ }
+ else
+ {
+ *ranges = NULL;
+ }
+}
+
+MultirangeType *
+make_empty_multirange(Oid mltrngtypoid, TypeCacheEntry *rangetyp)
+{
+ return make_multirange(mltrngtypoid, rangetyp, 0, NULL);
+}
+
+/*
+ * Similar to range_overlaps_internal(), but takes range bounds instead of
+ * ranges as arguments.
+ */
+static bool
+range_bounds_overlaps(TypeCacheEntry *typcache,
+ RangeBound *lower1, RangeBound *upper1,
+ RangeBound *lower2, RangeBound *upper2)
+{
+ if (range_cmp_bounds(typcache, lower1, lower2) >= 0 &&
+ range_cmp_bounds(typcache, lower1, upper2) <= 0)
+ return true;
+
+ if (range_cmp_bounds(typcache, lower2, lower1) >= 0 &&
+ range_cmp_bounds(typcache, lower2, upper1) <= 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * Similar to range_contains_internal(), but takes range bounds instead of
+ * ranges as arguments.
+ */
+static bool
+range_bounds_contains(TypeCacheEntry *typcache,
+ RangeBound *lower1, RangeBound *upper1,
+ RangeBound *lower2, RangeBound *upper2)
+{
+ if (range_cmp_bounds(typcache, lower1, lower2) <= 0 &&
+ range_cmp_bounds(typcache, upper1, upper2) >= 0)
+ return true;
+
+ return false;
+}
+
+/*
+ * Check if the given key matches any range in multirange using binary search.
+ * If the required range isn't found, that counts as a mismatch. When the
+ * required range is found, the comparison function can still report this as
+ * either match or mismatch. For instance, if we search for containment, we can
+ * found a range, which is overlapping but not containing the key range, and
+ * that would count as a mismatch.
+ */
+static bool
+multirange_bsearch_match(TypeCacheEntry *typcache, const MultirangeType *mr,
+ void *key, multirange_bsearch_comparison cmp_func)
+{
+ uint32 l,
+ u,
+ idx;
+ int comparison;
+ bool match = false;
+
+ l = 0;
+ u = mr->rangeCount;
+ while (l < u)
+ {
+ RangeBound lower,
+ upper;
+
+ idx = (l + u) / 2;
+ multirange_get_bounds(typcache, mr, idx, &lower, &upper);
+ comparison = (*cmp_func) (typcache, &lower, &upper, key, &match);
+
+ if (comparison < 0)
+ u = idx;
+ else if (comparison > 0)
+ l = idx + 1;
+ else
+ return match;
+ }
+
+ return false;
+}
+
+/*
+ *----------------------------------------------------------
+ * GENERIC FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+/*
+ * Construct multirange value from zero or more ranges. Since this is a
+ * variadic function we get passed an array. The array must contain ranges
+ * that match our return value, and there must be no NULLs.
+ */
+Datum
+multirange_constructor2(PG_FUNCTION_ARGS)
+{
+ Oid mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo);
+ Oid rngtypid;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rangetyp;
+ ArrayType *rangeArray;
+ int range_count;
+ Datum *elements;
+ bool *nulls;
+ RangeType **ranges;
+ int dims;
+ int i;
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypid);
+ rangetyp = typcache->rngtype;
+
+ /*
+ * A no-arg invocation should call multirange_constructor0 instead, but
+ * returning an empty range is what that does.
+ */
+
+ if (PG_NARGS() == 0)
+ PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 0, NULL));
+
+ /*
+ * This check should be guaranteed by our signature, but let's do it just
+ * in case.
+ */
+
+ if (PG_ARGISNULL(0))
+ elog(ERROR,
+ "multirange values cannot contain null members");
+
+ rangeArray = PG_GETARG_ARRAYTYPE_P(0);
+
+ dims = ARR_NDIM(rangeArray);
+ if (dims > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ errmsg("multiranges cannot be constructed from multidimensional arrays")));
+
+ rngtypid = ARR_ELEMTYPE(rangeArray);
+ if (rngtypid != rangetyp->type_id)
+ elog(ERROR, "type %u does not match constructor type", rngtypid);
+
+ /*
+ * Be careful: we can still be called with zero ranges, like this:
+ * `int4multirange(variadic '{}'::int4range[])
+ */
+ if (dims == 0)
+ {
+ range_count = 0;
+ ranges = NULL;
+ }
+ else
+ {
+ deconstruct_array(rangeArray, rngtypid, rangetyp->typlen, rangetyp->typbyval,
+ rangetyp->typalign, &elements, &nulls, &range_count);
+
+ ranges = palloc0(range_count * sizeof(RangeType *));
+ for (i = 0; i < range_count; i++)
+ {
+ if (nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("multirange values cannot contain null members")));
+
+ /* make_multirange will do its own copy */
+ ranges[i] = DatumGetRangeTypeP(elements[i]);
+ }
+ }
+
+ PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, range_count, ranges));
+}
+
+/*
+ * Construct multirange value from a single range. It'd be nice if we could
+ * just use multirange_constructor2 for this case, but we need a non-variadic
+ * single-arg function to let us define a CAST from a range to its multirange.
+ */
+Datum
+multirange_constructor1(PG_FUNCTION_ARGS)
+{
+ Oid mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo);
+ Oid rngtypid;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rangetyp;
+ RangeType *range;
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypid);
+ rangetyp = typcache->rngtype;
+
+ /*
+ * This check should be guaranteed by our signature, but let's do it just
+ * in case.
+ */
+
+ if (PG_ARGISNULL(0))
+ elog(ERROR,
+ "multirange values cannot contain null members");
+
+ range = PG_GETARG_RANGE_P(0);
+
+ /* Make sure the range type matches. */
+ rngtypid = RangeTypeGetOid(range);
+ if (rngtypid != rangetyp->type_id)
+ elog(ERROR, "type %u does not match constructor type", rngtypid);
+
+ PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 1, &range));
+}
+
+/*
+ * Constructor just like multirange_constructor1, but opr_sanity gets angry
+ * if the same internal function handles multiple functions with different arg
+ * counts.
+ */
+Datum
+multirange_constructor0(PG_FUNCTION_ARGS)
+{
+ Oid mltrngtypid;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rangetyp;
+
+ /* This should always be called without arguments */
+ if (PG_NARGS() != 0)
+ elog(ERROR,
+ "niladic multirange constructor must not receive arguments");
+
+ mltrngtypid = get_fn_expr_rettype(fcinfo->flinfo);
+ typcache = multirange_get_typcache(fcinfo, mltrngtypid);
+ rangetyp = typcache->rngtype;
+
+ PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypid, rangetyp, 0, NULL));
+}
+
+
+/* multirange, multirange -> multirange type functions */
+
+/* multirange union */
+Datum
+multirange_union(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+ int32 range_count1;
+ int32 range_count2;
+ int32 range_count3;
+ RangeType **ranges1;
+ RangeType **ranges2;
+ RangeType **ranges3;
+
+ if (MultirangeIsEmpty(mr1))
+ PG_RETURN_MULTIRANGE_P(mr2);
+ if (MultirangeIsEmpty(mr2))
+ PG_RETURN_MULTIRANGE_P(mr1);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ multirange_deserialize(typcache->rngtype, mr1, &range_count1, &ranges1);
+ multirange_deserialize(typcache->rngtype, mr2, &range_count2, &ranges2);
+
+ range_count3 = range_count1 + range_count2;
+ ranges3 = palloc0(range_count3 * sizeof(RangeType *));
+ memcpy(ranges3, ranges1, range_count1 * sizeof(RangeType *));
+ memcpy(ranges3 + range_count1, ranges2, range_count2 * sizeof(RangeType *));
+ PG_RETURN_MULTIRANGE_P(make_multirange(typcache->type_id, typcache->rngtype,
+ range_count3, ranges3));
+}
+
+/* multirange minus */
+Datum
+multirange_minus(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ Oid mltrngtypoid = MultirangeTypeGetOid(mr1);
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rangetyp;
+ int32 range_count1;
+ int32 range_count2;
+ RangeType **ranges1;
+ RangeType **ranges2;
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+ rangetyp = typcache->rngtype;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ PG_RETURN_MULTIRANGE_P(mr1);
+
+ multirange_deserialize(typcache->rngtype, mr1, &range_count1, &ranges1);
+ multirange_deserialize(typcache->rngtype, mr2, &range_count2, &ranges2);
+
+ PG_RETURN_MULTIRANGE_P(multirange_minus_internal(mltrngtypoid,
+ rangetyp,
+ range_count1,
+ ranges1,
+ range_count2,
+ ranges2));
+}
+
+MultirangeType *
+multirange_minus_internal(Oid mltrngtypoid, TypeCacheEntry *rangetyp,
+ int32 range_count1, RangeType **ranges1,
+ int32 range_count2, RangeType **ranges2)
+{
+ RangeType *r1;
+ RangeType *r2;
+ RangeType **ranges3;
+ int32 range_count3;
+ int32 i1;
+ int32 i2;
+
+ /*
+ * Worst case: every range in ranges1 makes a different cut to some range
+ * in ranges2.
+ */
+ ranges3 = palloc0((range_count1 + range_count2) * sizeof(RangeType *));
+ range_count3 = 0;
+
+ /*
+ * For each range in mr1, keep subtracting until it's gone or the ranges
+ * in mr2 have passed it. After a subtraction we assign what's left back
+ * to r1. The parallel progress through mr1 and mr2 is similar to
+ * multirange_overlaps_multirange_internal.
+ */
+ r2 = ranges2[0];
+ for (i1 = 0, i2 = 0; i1 < range_count1; i1++)
+ {
+ r1 = ranges1[i1];
+
+ /* Discard r2s while r2 << r1 */
+ while (r2 != NULL && range_before_internal(rangetyp, r2, r1))
+ {
+ r2 = ++i2 >= range_count2 ? NULL : ranges2[i2];
+ }
+
+ while (r2 != NULL)
+ {
+ if (range_split_internal(rangetyp, r1, r2, &ranges3[range_count3], &r1))
+ {
+ /*
+ * If r2 takes a bite out of the middle of r1, we need two
+ * outputs
+ */
+ range_count3++;
+ r2 = ++i2 >= range_count2 ? NULL : ranges2[i2];
+ }
+ else if (range_overlaps_internal(rangetyp, r1, r2))
+ {
+ /*
+ * If r2 overlaps r1, replace r1 with r1 - r2.
+ */
+ r1 = range_minus_internal(rangetyp, r1, r2);
+
+ /*
+ * If r2 goes past r1, then we need to stay with it, in case
+ * it hits future r1s. Otherwise we need to keep r1, in case
+ * future r2s hit it. Since we already subtracted, there's no
+ * point in using the overright/overleft calls.
+ */
+ if (RangeIsEmpty(r1) || range_before_internal(rangetyp, r1, r2))
+ break;
+ else
+ r2 = ++i2 >= range_count2 ? NULL : ranges2[i2];
+ }
+ else
+ {
+ /*
+ * This and all future r2s are past r1, so keep them. Also
+ * assign whatever is left of r1 to the result.
+ */
+ break;
+ }
+ }
+
+ /*
+ * Nothing else can remove anything from r1, so keep it. Even if r1 is
+ * empty here, make_multirange will remove it.
+ */
+ ranges3[range_count3++] = r1;
+ }
+
+ return make_multirange(mltrngtypoid, rangetyp, range_count3, ranges3);
+}
+
+/* multirange intersection */
+Datum
+multirange_intersect(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ Oid mltrngtypoid = MultirangeTypeGetOid(mr1);
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rangetyp;
+ int32 range_count1;
+ int32 range_count2;
+ RangeType **ranges1;
+ RangeType **ranges2;
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+ rangetyp = typcache->rngtype;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ PG_RETURN_MULTIRANGE_P(make_empty_multirange(mltrngtypoid, rangetyp));
+
+ multirange_deserialize(rangetyp, mr1, &range_count1, &ranges1);
+ multirange_deserialize(rangetyp, mr2, &range_count2, &ranges2);
+
+ PG_RETURN_MULTIRANGE_P(multirange_intersect_internal(mltrngtypoid,
+ rangetyp,
+ range_count1,
+ ranges1,
+ range_count2,
+ ranges2));
+}
+
+MultirangeType *
+multirange_intersect_internal(Oid mltrngtypoid, TypeCacheEntry *rangetyp,
+ int32 range_count1, RangeType **ranges1,
+ int32 range_count2, RangeType **ranges2)
+{
+ RangeType *r1;
+ RangeType *r2;
+ RangeType **ranges3;
+ int32 range_count3;
+ int32 i1;
+ int32 i2;
+
+ if (range_count1 == 0 || range_count2 == 0)
+ return make_multirange(mltrngtypoid, rangetyp, 0, NULL);
+
+ /*-----------------------------------------------
+ * Worst case is a stitching pattern like this:
+ *
+ * mr1: --- --- --- ---
+ * mr2: --- --- ---
+ * mr3: - - - - - -
+ *
+ * That seems to be range_count1 + range_count2 - 1,
+ * but one extra won't hurt.
+ *-----------------------------------------------
+ */
+ ranges3 = palloc0((range_count1 + range_count2) * sizeof(RangeType *));
+ range_count3 = 0;
+
+ /*
+ * For each range in mr1, keep intersecting until the ranges in mr2 have
+ * passed it. The parallel progress through mr1 and mr2 is similar to
+ * multirange_minus_multirange_internal, but we don't have to assign back
+ * to r1.
+ */
+ r2 = ranges2[0];
+ for (i1 = 0, i2 = 0; i1 < range_count1; i1++)
+ {
+ r1 = ranges1[i1];
+
+ /* Discard r2s while r2 << r1 */
+ while (r2 != NULL && range_before_internal(rangetyp, r2, r1))
+ {
+ r2 = ++i2 >= range_count2 ? NULL : ranges2[i2];
+ }
+
+ while (r2 != NULL)
+ {
+ if (range_overlaps_internal(rangetyp, r1, r2))
+ {
+ /* Keep the overlapping part */
+ ranges3[range_count3++] = range_intersect_internal(rangetyp, r1, r2);
+
+ /* If we "used up" all of r2, go to the next one... */
+ if (range_overleft_internal(rangetyp, r2, r1))
+ r2 = ++i2 >= range_count2 ? NULL : ranges2[i2];
+
+ /* ...otherwise go to the next r1 */
+ else
+ break;
+ }
+ else
+ /* We're past r1, so move to the next one */
+ break;
+ }
+
+ /* If we're out of r2s, there can be no more intersections */
+ if (r2 == NULL)
+ break;
+ }
+
+ return make_multirange(mltrngtypoid, rangetyp, range_count3, ranges3);
+}
+
+/*
+ * range_agg_transfn: combine adjacent/overlapping ranges.
+ *
+ * All we do here is gather the input ranges into an array
+ * so that the finalfn can sort and combine them.
+ */
+Datum
+range_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggContext;
+ Oid rngtypoid;
+ ArrayBuildState *state;
+
+ if (!AggCheckCallContext(fcinfo, &aggContext))
+ elog(ERROR, "range_agg_transfn called in non-aggregate context");
+
+ rngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ if (!type_is_range(rngtypoid))
+ elog(ERROR, "range_agg must be called with a range");
+
+ if (PG_ARGISNULL(0))
+ state = initArrayResult(rngtypoid, aggContext, false);
+ else
+ state = (ArrayBuildState *) PG_GETARG_POINTER(0);
+
+ /* skip NULLs */
+ if (!PG_ARGISNULL(1))
+ accumArrayResult(state, PG_GETARG_DATUM(1), false, rngtypoid, aggContext);
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * range_agg_finalfn: use our internal array to merge touching ranges.
+ *
+ * Shared by range_agg_finalfn(anyrange) and
+ * multirange_agg_finalfn(anymultirange).
+ */
+Datum
+range_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggContext;
+ Oid mltrngtypoid;
+ TypeCacheEntry *typcache;
+ ArrayBuildState *state;
+ int32 range_count;
+ RangeType **ranges;
+ int i;
+
+ if (!AggCheckCallContext(fcinfo, &aggContext))
+ elog(ERROR, "range_agg_finalfn called in non-aggregate context");
+
+ state = PG_ARGISNULL(0) ? NULL : (ArrayBuildState *) PG_GETARG_POINTER(0);
+ if (state == NULL)
+ /* This shouldn't be possible, but just in case.... */
+ PG_RETURN_NULL();
+
+ /* Also return NULL if we had zero inputs, like other aggregates */
+ range_count = state->nelems;
+ if (range_count == 0)
+ PG_RETURN_NULL();
+
+ mltrngtypoid = get_fn_expr_rettype(fcinfo->flinfo);
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+
+ ranges = palloc0(range_count * sizeof(RangeType *));
+ for (i = 0; i < range_count; i++)
+ ranges[i] = DatumGetRangeTypeP(state->dvalues[i]);
+
+ PG_RETURN_MULTIRANGE_P(make_multirange(mltrngtypoid, typcache->rngtype, range_count, ranges));
+}
+
+/*
+ * multirange_agg_transfn: combine adjacent/overlapping multiranges.
+ *
+ * All we do here is gather the input multiranges' ranges into an array so
+ * that the finalfn can sort and combine them.
+ */
+Datum
+multirange_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggContext;
+ Oid mltrngtypoid;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *rngtypcache;
+ ArrayBuildState *state;
+
+ if (!AggCheckCallContext(fcinfo, &aggContext))
+ elog(ERROR, "multirange_agg_transfn called in non-aggregate context");
+
+ mltrngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ if (!type_is_multirange(mltrngtypoid))
+ elog(ERROR, "range_agg must be called with a multirange");
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+ rngtypcache = typcache->rngtype;
+
+ if (PG_ARGISNULL(0))
+ state = initArrayResult(rngtypcache->type_id, aggContext, false);
+ else
+ state = (ArrayBuildState *) PG_GETARG_POINTER(0);
+
+ /* skip NULLs */
+ if (!PG_ARGISNULL(1))
+ {
+ MultirangeType *current;
+ int32 range_count;
+ RangeType **ranges;
+
+ current = PG_GETARG_MULTIRANGE_P(1);
+ multirange_deserialize(rngtypcache, current, &range_count, &ranges);
+ if (range_count == 0)
+ {
+ /*
+ * Add an empty range so we get an empty result (not a null
+ * result).
+ */
+ accumArrayResult(state,
+ RangeTypePGetDatum(make_empty_range(rngtypcache)),
+ false, rngtypcache->type_id, aggContext);
+ }
+ else
+ {
+ for (int32 i = 0; i < range_count; i++)
+ accumArrayResult(state, RangeTypePGetDatum(ranges[i]), false, rngtypcache->type_id, aggContext);
+ }
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+multirange_intersect_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggContext;
+ Oid mltrngtypoid;
+ TypeCacheEntry *typcache;
+ MultirangeType *result;
+ MultirangeType *current;
+ int32 range_count1;
+ int32 range_count2;
+ RangeType **ranges1;
+ RangeType **ranges2;
+
+ if (!AggCheckCallContext(fcinfo, &aggContext))
+ elog(ERROR, "multirange_intersect_agg_transfn called in non-aggregate context");
+
+ mltrngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ if (!type_is_multirange(mltrngtypoid))
+ elog(ERROR, "range_intersect_agg must be called with a multirange");
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+
+ /* strictness ensures these are non-null */
+ result = PG_GETARG_MULTIRANGE_P(0);
+ current = PG_GETARG_MULTIRANGE_P(1);
+
+ multirange_deserialize(typcache->rngtype, result, &range_count1, &ranges1);
+ multirange_deserialize(typcache->rngtype, current, &range_count2, &ranges2);
+
+ result = multirange_intersect_internal(mltrngtypoid,
+ typcache->rngtype,
+ range_count1,
+ ranges1,
+ range_count2,
+ ranges2);
+ PG_RETURN_RANGE_P(result);
+}
+
+
+/* multirange -> element type functions */
+
+/* extract lower bound value */
+Datum
+multirange_lower(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_NULL();
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ multirange_get_bounds(typcache->rngtype, mr, 0,
+ &lower, &upper);
+
+ if (!lower.infinite)
+ PG_RETURN_DATUM(lower.val);
+ else
+ PG_RETURN_NULL();
+}
+
+/* extract upper bound value */
+Datum
+multirange_upper(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_NULL();
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1,
+ &lower, &upper);
+
+ if (!upper.infinite)
+ PG_RETURN_DATUM(upper.val);
+ else
+ PG_RETURN_NULL();
+}
+
+
+/* multirange -> bool functions */
+
+/* is multirange empty? */
+Datum
+multirange_empty(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+
+ PG_RETURN_BOOL(MultirangeIsEmpty(mr));
+}
+
+/* is lower bound inclusive? */
+Datum
+multirange_lower_inc(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ multirange_get_bounds(typcache->rngtype, mr, 0,
+ &lower, &upper);
+
+ PG_RETURN_BOOL(lower.inclusive);
+}
+
+/* is upper bound inclusive? */
+Datum
+multirange_upper_inc(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1,
+ &lower, &upper);
+
+ PG_RETURN_BOOL(upper.inclusive);
+}
+
+/* is lower bound infinite? */
+Datum
+multirange_lower_inf(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ multirange_get_bounds(typcache->rngtype, mr, 0,
+ &lower, &upper);
+
+ PG_RETURN_BOOL(lower.infinite);
+}
+
+/* is upper bound infinite? */
+Datum
+multirange_upper_inf(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ if (MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1,
+ &lower, &upper);
+
+ PG_RETURN_BOOL(upper.infinite);
+}
+
+
+
+/* multirange, element -> bool functions */
+
+/* contains? */
+Datum
+multirange_contains_elem(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ Datum val = PG_GETARG_DATUM(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val));
+}
+
+/* contained by? */
+Datum
+elem_contained_by_multirange(PG_FUNCTION_ARGS)
+{
+ Datum val = PG_GETARG_DATUM(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(multirange_contains_elem_internal(typcache->rngtype, mr, val));
+}
+
+/*
+ * Comparison function for checking if any range of multirange contains given
+ * key element using binary search.
+ */
+static int
+multirange_elem_bsearch_comparison(TypeCacheEntry *typcache,
+ RangeBound *lower, RangeBound *upper,
+ void *key, bool *match)
+{
+ Datum val = *((Datum *) key);
+ int cmp;
+
+ if (!lower->infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ lower->val, val));
+ if (cmp > 0 || (cmp == 0 && !lower->inclusive))
+ return -1;
+ }
+
+ if (!upper->infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ upper->val, val));
+ if (cmp < 0 || (cmp == 0 && !upper->inclusive))
+ return 1;
+ }
+
+ *match = true;
+ return 0;
+}
+
+/*
+ * Test whether multirange mr contains a specific element value.
+ */
+bool
+multirange_contains_elem_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr, Datum val)
+{
+ if (MultirangeIsEmpty(mr))
+ return false;
+
+ return multirange_bsearch_match(rangetyp, mr, &val,
+ multirange_elem_bsearch_comparison);
+}
+
+/* multirange, range -> bool functions */
+
+/* contains? */
+Datum
+multirange_contains_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r));
+}
+
+Datum
+range_contains_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr));
+}
+
+/* contained by? */
+Datum
+range_contained_by_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(multirange_contains_range_internal(typcache->rngtype, mr, r));
+}
+
+Datum
+multirange_contained_by_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_contains_multirange_internal(typcache->rngtype, r, mr));
+}
+
+/*
+ * Comparison function for checking if any range of multirange contains given
+ * key range using binary search.
+ */
+static int
+multirange_range_contains_bsearch_comparison(TypeCacheEntry *typcache,
+ RangeBound *lower, RangeBound *upper,
+ void *key, bool *match)
+{
+ RangeBound *keyLower = (RangeBound *) key;
+ RangeBound *keyUpper = (RangeBound *) key + 1;
+
+ /* Check if key range is strictly in the left or in the right */
+ if (range_cmp_bounds(typcache, keyUpper, lower) < 0)
+ return -1;
+ if (range_cmp_bounds(typcache, keyLower, upper) > 0)
+ return 1;
+
+ /*
+ * At this point we found overlapping range. But we have to check if it
+ * really contains the key range. Anyway, we have to stop our search
+ * here, because multirange contains only non-overlapping ranges.
+ */
+ *match = range_bounds_contains(typcache, lower, upper, keyLower, keyUpper);
+
+ return 0;
+}
+
+/*
+ * Test whether multirange mr contains a specific range r.
+ */
+bool
+multirange_contains_range_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr,
+ const RangeType *r)
+{
+ RangeBound bounds[2];
+ bool empty;
+
+ /*
+ * Every multirange contains an infinite number of empty ranges, even an
+ * empty one.
+ */
+ if (RangeIsEmpty(r))
+ return true;
+
+ if (MultirangeIsEmpty(mr))
+ return false;
+
+ range_deserialize(rangetyp, r, &bounds[0], &bounds[1], &empty);
+ Assert(!empty);
+
+ return multirange_bsearch_match(rangetyp, mr, bounds,
+ multirange_range_contains_bsearch_comparison);
+}
+
+/*
+ * Test whether range r contains a multirange mr.
+ */
+bool
+range_contains_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2,
+ tmp;
+ bool empty;
+
+ /*
+ * Every range contains an infinite number of empty multiranges, even an
+ * empty one.
+ */
+ if (MultirangeIsEmpty(mr))
+ return true;
+
+ if (RangeIsEmpty(r))
+ return false;
+
+ /* Range contains multirange iff it contains its union range. */
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+ multirange_get_bounds(rangetyp, mr, 0, &lower2, &tmp);
+ multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1, &tmp, &upper2);
+
+ return range_bounds_contains(rangetyp, &lower1, &upper1, &lower2, &upper2);
+}
+
+
+/* multirange, multirange -> bool functions */
+
+/* equality (internal version) */
+bool
+multirange_eq_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr1,
+ const MultirangeType *mr2)
+{
+ int32 range_count_1;
+ int32 range_count_2;
+ int32 i;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ /* Different types should be prevented by ANYMULTIRANGE matching rules */
+ if (MultirangeTypeGetOid(mr1) != MultirangeTypeGetOid(mr2))
+ elog(ERROR, "multirange types do not match");
+
+ range_count_1 = mr1->rangeCount;
+ range_count_2 = mr2->rangeCount;
+
+ if (range_count_1 != range_count_2)
+ return false;
+
+ for (i = 0; i < range_count_1; i++)
+ {
+ multirange_get_bounds(rangetyp, mr1, i, &lower1, &upper1);
+ multirange_get_bounds(rangetyp, mr2, i, &lower2, &upper2);
+
+ if (range_cmp_bounds(rangetyp, &lower1, &lower2) != 0 ||
+ range_cmp_bounds(rangetyp, &upper1, &upper2) != 0)
+ return false;
+ }
+
+ return true;
+}
+
+/* equality */
+Datum
+multirange_eq(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_eq_internal(typcache->rngtype, mr1, mr2));
+}
+
+/* inequality (internal version) */
+bool
+multirange_ne_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr1,
+ const MultirangeType *mr2)
+{
+ return (!multirange_eq_internal(rangetyp, mr1, mr2));
+}
+
+/* inequality */
+Datum
+multirange_ne(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_ne_internal(typcache->rngtype, mr1, mr2));
+}
+
+/* overlaps? */
+Datum
+range_overlaps_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_overlaps_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_overlaps_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_overlaps_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_overlaps_multirange_internal(typcache->rngtype, mr1, mr2));
+}
+
+/*
+ * Comparison function for checking if any range of multirange overlaps given
+ * key range using binary search.
+ */
+static int
+multirange_range_overlaps_bsearch_comparison(TypeCacheEntry *typcache,
+ RangeBound *lower, RangeBound *upper,
+ void *key, bool *match)
+{
+ RangeBound *keyLower = (RangeBound *) key;
+ RangeBound *keyUpper = (RangeBound *) key + 1;
+
+ if (range_cmp_bounds(typcache, keyUpper, lower) < 0)
+ return -1;
+ if (range_cmp_bounds(typcache, keyLower, upper) > 0)
+ return 1;
+
+ *match = true;
+ return 0;
+}
+
+bool
+range_overlaps_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound bounds[2];
+ bool empty;
+
+ /*
+ * Empties never overlap, even with empties. (This seems strange since
+ * they *do* contain each other, but we want to follow how ranges work.)
+ */
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return false;
+
+ range_deserialize(rangetyp, r, &bounds[0], &bounds[1], &empty);
+ Assert(!empty);
+
+ return multirange_bsearch_match(rangetyp, mr, bounds,
+ multirange_range_overlaps_bsearch_comparison);
+}
+
+bool
+multirange_overlaps_multirange_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr1,
+ const MultirangeType *mr2)
+{
+ int32 range_count1;
+ int32 range_count2;
+ int32 i1;
+ int32 i2;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ /*
+ * Empties never overlap, even with empties. (This seems strange since
+ * they *do* contain each other, but we want to follow how ranges work.)
+ */
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ return false;
+
+ range_count1 = mr1->rangeCount;
+ range_count2 = mr2->rangeCount;
+
+ /*
+ * Every range in mr1 gets a chance to overlap with the ranges in mr2, but
+ * we can use their ordering to avoid O(n^2). This is similar to
+ * range_overlaps_multirange where r1 : r2 :: mrr : r, but there if we
+ * don't find an overlap with r we're done, and here if we don't find an
+ * overlap with r2 we try the next r2.
+ */
+ i1 = 0;
+ multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1);
+ for (i1 = 0, i2 = 0; i2 < range_count2; i2++)
+ {
+ multirange_get_bounds(rangetyp, mr2, i2, &lower2, &upper2);
+
+ /* Discard r1s while r1 << r2 */
+ while (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0)
+ {
+ if (++i1 >= range_count1)
+ return false;
+ multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1);
+ }
+
+ /*
+ * If r1 && r2, we're done, otherwise we failed to find an overlap for
+ * r2, so go to the next one.
+ */
+ if (range_bounds_overlaps(rangetyp, &lower1, &upper1, &lower2, &upper2))
+ return true;
+ }
+
+ /* We looked through all of mr2 without finding an overlap */
+ return false;
+}
+
+/* does not extend to right of? */
+bool
+range_overleft_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+ multirange_get_bounds(rangetyp, mr, mr->rangeCount - 1,
+ &lower2, &upper2);
+
+ PG_RETURN_BOOL(range_cmp_bounds(rangetyp, &upper1, &upper2) <= 0);
+}
+
+Datum
+range_overleft_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_overleft_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_overleft_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+
+ if (MultirangeIsEmpty(mr) || RangeIsEmpty(r))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1,
+ &lower1, &upper1);
+ range_deserialize(typcache->rngtype, r, &lower2, &upper2, &empty);
+ Assert(!empty);
+
+ PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &upper1, &upper2) <= 0);
+}
+
+Datum
+multirange_overleft_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ multirange_get_bounds(typcache->rngtype, mr1, mr1->rangeCount - 1,
+ &lower1, &upper1);
+ multirange_get_bounds(typcache->rngtype, mr2, mr2->rangeCount - 1,
+ &lower2, &upper2);
+
+ PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &upper1, &upper2) <= 0);
+}
+
+/* does not extend to left of? */
+bool
+range_overright_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ PG_RETURN_BOOL(false);
+
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+ multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2);
+
+ return (range_cmp_bounds(rangetyp, &lower1, &lower2) >= 0);
+}
+
+Datum
+range_overright_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_overright_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_overright_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+
+ if (MultirangeIsEmpty(mr) || RangeIsEmpty(r))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ multirange_get_bounds(typcache->rngtype, mr, 0, &lower1, &upper1);
+ range_deserialize(typcache->rngtype, r, &lower2, &upper2, &empty);
+ Assert(!empty);
+
+ PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &lower1, &lower2) >= 0);
+}
+
+Datum
+multirange_overright_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ PG_RETURN_BOOL(false);
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ multirange_get_bounds(typcache->rngtype, mr1, 0, &lower1, &upper1);
+ multirange_get_bounds(typcache->rngtype, mr2, 0, &lower2, &upper2);
+
+ PG_RETURN_BOOL(range_cmp_bounds(typcache->rngtype, &lower1, &lower2) >= 0);
+}
+
+/* contains? */
+Datum
+multirange_contains_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr1, mr2));
+}
+
+/* contained by? */
+Datum
+multirange_contained_by_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_contains_multirange_internal(typcache->rngtype, mr2, mr1));
+}
+
+/*
+ * Test whether multirange mr1 contains every range from another multirange mr2.
+ */
+bool
+multirange_contains_multirange_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr1,
+ const MultirangeType *mr2)
+{
+ int32 range_count1 = mr1->rangeCount;
+ int32 range_count2 = mr2->rangeCount;
+ int i1,
+ i2;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ /*
+ * We follow the same logic for empties as ranges: - an empty multirange
+ * contains an empty range/multirange. - an empty multirange can't contain
+ * any other range/multirange. - an empty multirange is contained by any
+ * other range/multirange.
+ */
+
+ if (range_count2 == 0)
+ return true;
+ if (range_count1 == 0)
+ return false;
+
+ /*
+ * Every range in mr2 must be contained by some range in mr1. To avoid
+ * O(n^2) we walk through both ranges in tandem.
+ */
+ i1 = 0;
+ multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1);
+ for (i2 = 0; i2 < range_count2; i2++)
+ {
+ multirange_get_bounds(rangetyp, mr2, i2, &lower2, &upper2);
+
+ /* Discard r1s while r1 << r2 */
+ while (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0)
+ {
+ if (++i1 >= range_count1)
+ return false;
+ multirange_get_bounds(rangetyp, mr1, i1, &lower1, &upper1);
+ }
+
+ /*
+ * If r1 @> r2, go to the next r2, otherwise return false (since every
+ * r1[n] and r1[n+1] must have a gap). Note this will give weird
+ * answers if you don't canonicalize, e.g. with a custom
+ * int2multirange {[1,1], [2,2]} there is a "gap". But that is
+ * consistent with other range operators, e.g. '[1,1]'::int2range -|-
+ * '[2,2]'::int2range is false.
+ */
+ if (!range_bounds_contains(rangetyp, &lower1, &upper1,
+ &lower2, &upper2))
+ return false;
+ }
+
+ /* All ranges in mr2 are satisfied */
+ return true;
+}
+
+/* strictly left of? */
+Datum
+range_before_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_before_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_before_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr1, mr2));
+}
+
+/* strictly right of? */
+Datum
+range_after_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_after_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_after_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_before_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_after_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ PG_RETURN_BOOL(multirange_before_multirange_internal(typcache->rngtype, mr2, mr1));
+}
+
+/* strictly left of? (internal version) */
+bool
+range_before_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return false;
+
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+
+ multirange_get_bounds(rangetyp, mr, 0, &lower2, &upper2);
+
+ return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0);
+}
+
+bool
+multirange_before_multirange_internal(TypeCacheEntry *rangetyp,
+ const MultirangeType *mr1,
+ const MultirangeType *mr2)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ return false;
+
+ multirange_get_bounds(rangetyp, mr1, mr1->rangeCount - 1,
+ &lower1, &upper1);
+ multirange_get_bounds(rangetyp, mr2, 0,
+ &lower2, &upper2);
+
+ return (range_cmp_bounds(rangetyp, &upper1, &lower2) < 0);
+}
+
+/* strictly right of? (internal version) */
+bool
+range_after_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+ int32 range_count;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return false;
+
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+
+ range_count = mr->rangeCount;
+ multirange_get_bounds(rangetyp, mr, range_count - 1,
+ &lower2, &upper2);
+
+ return (range_cmp_bounds(rangetyp, &lower1, &upper2) > 0);
+}
+
+bool
+range_adjacent_multirange_internal(TypeCacheEntry *rangetyp,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+ bool empty;
+ int32 range_count;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return false;
+
+ range_deserialize(rangetyp, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+
+ range_count = mr->rangeCount;
+ multirange_get_bounds(rangetyp, mr, 0,
+ &lower2, &upper2);
+
+ if (bounds_adjacent(rangetyp, upper1, lower2))
+ return true;
+
+ if (range_count > 1)
+ multirange_get_bounds(rangetyp, mr, range_count - 1,
+ &lower2, &upper2);
+
+ if (bounds_adjacent(rangetyp, upper2, lower1))
+ return true;
+
+ return false;
+}
+
+/* adjacent to? */
+Datum
+range_adjacent_multirange(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_adjacent_range(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return false;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+
+ PG_RETURN_BOOL(range_adjacent_multirange_internal(typcache->rngtype, r, mr));
+}
+
+Datum
+multirange_adjacent_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ TypeCacheEntry *typcache;
+ int32 range_count1;
+ int32 range_count2;
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ if (MultirangeIsEmpty(mr1) || MultirangeIsEmpty(mr2))
+ return false;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ range_count1 = mr1->rangeCount;
+ range_count2 = mr2->rangeCount;
+ multirange_get_bounds(typcache->rngtype, mr1, range_count1 - 1,
+ &lower1, &upper1);
+ multirange_get_bounds(typcache->rngtype, mr2, 0,
+ &lower2, &upper2);
+ if (bounds_adjacent(typcache->rngtype, upper1, lower2))
+ PG_RETURN_BOOL(true);
+
+ if (range_count1 > 1)
+ multirange_get_bounds(typcache->rngtype, mr1, 0,
+ &lower1, &upper1);
+ if (range_count2 > 1)
+ multirange_get_bounds(typcache->rngtype, mr2, range_count2 - 1,
+ &lower2, &upper2);
+ if (bounds_adjacent(typcache->rngtype, upper2, lower1))
+ PG_RETURN_BOOL(true);
+ PG_RETURN_BOOL(false);
+}
+
+/* Btree support */
+
+/* btree comparator */
+Datum
+multirange_cmp(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr1 = PG_GETARG_MULTIRANGE_P(0);
+ MultirangeType *mr2 = PG_GETARG_MULTIRANGE_P(1);
+ int32 range_count_1;
+ int32 range_count_2;
+ int32 range_count_max;
+ int32 i;
+ TypeCacheEntry *typcache;
+ int cmp = 0; /* If both are empty we'll use this. */
+
+ /* Different types should be prevented by ANYMULTIRANGE matching rules */
+ if (MultirangeTypeGetOid(mr1) != MultirangeTypeGetOid(mr2))
+ elog(ERROR, "multirange types do not match");
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr1));
+
+ range_count_1 = mr1->rangeCount;
+ range_count_2 = mr2->rangeCount;
+
+ /* Loop over source data */
+ range_count_max = Max(range_count_1, range_count_2);
+ for (i = 0; i < range_count_max; i++)
+ {
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2;
+
+ /*
+ * If one multirange is shorter, it's as if it had empty ranges at the
+ * end to extend its length. An empty range compares earlier than any
+ * other range, so the shorter multirange comes before the longer.
+ * This is the same behavior as in other types, e.g. in strings 'aaa'
+ * < 'aaaaaa'.
+ */
+ if (i >= range_count_1)
+ {
+ cmp = -1;
+ break;
+ }
+ if (i >= range_count_2)
+ {
+ cmp = 1;
+ break;
+ }
+
+ multirange_get_bounds(typcache->rngtype, mr1, i, &lower1, &upper1);
+ multirange_get_bounds(typcache->rngtype, mr2, i, &lower2, &upper2);
+
+ cmp = range_cmp_bounds(typcache->rngtype, &lower1, &lower2);
+ if (cmp == 0)
+ cmp = range_cmp_bounds(typcache->rngtype, &upper1, &upper2);
+ if (cmp != 0)
+ break;
+ }
+
+ PG_FREE_IF_COPY(mr1, 0);
+ PG_FREE_IF_COPY(mr2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+/* inequality operators using the multirange_cmp function */
+Datum
+multirange_lt(PG_FUNCTION_ARGS)
+{
+ int cmp = multirange_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+multirange_le(PG_FUNCTION_ARGS)
+{
+ int cmp = multirange_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+multirange_ge(PG_FUNCTION_ARGS)
+{
+ int cmp = multirange_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+multirange_gt(PG_FUNCTION_ARGS)
+{
+ int cmp = multirange_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp > 0);
+}
+
+/* multirange -> range functions */
+
+/* Find the smallest range that includes everything in the multirange */
+Datum
+range_merge_from_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ Oid mltrngtypoid = MultirangeTypeGetOid(mr);
+ TypeCacheEntry *typcache;
+ RangeType *result;
+
+ typcache = multirange_get_typcache(fcinfo, mltrngtypoid);
+
+ if (MultirangeIsEmpty(mr))
+ {
+ result = make_empty_range(typcache->rngtype);
+ }
+ else if (mr->rangeCount == 1)
+ {
+ result = multirange_get_range(typcache->rngtype, mr, 0);
+ }
+ else
+ {
+ RangeBound firstLower,
+ firstUpper,
+ lastLower,
+ lastUpper;
+
+ multirange_get_bounds(typcache->rngtype, mr, 0,
+ &firstLower, &firstUpper);
+ multirange_get_bounds(typcache->rngtype, mr, mr->rangeCount - 1,
+ &lastLower, &lastUpper);
+
+ result = make_range(typcache->rngtype, &firstLower, &lastUpper, false);
+ }
+
+ PG_RETURN_RANGE_P(result);
+}
+
+/* Turn multirange into a set of ranges */
+Datum
+multirange_unnest(PG_FUNCTION_ARGS)
+{
+ typedef struct
+ {
+ MultirangeType *mr;
+ TypeCacheEntry *typcache;
+ int index;
+ } multirange_unnest_fctx;
+
+ FuncCallContext *funcctx;
+ multirange_unnest_fctx *fctx;
+ MemoryContext oldcontext;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MultirangeType *mr;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /*
+ * Get the multirange value and detoast if needed. We can't do this
+ * earlier because if we have to detoast, we want the detoasted copy
+ * to be in multi_call_memory_ctx, so it will go away when we're done
+ * and not before. (If no detoast happens, we assume the originally
+ * passed multirange will stick around till then.)
+ */
+ mr = PG_GETARG_MULTIRANGE_P(0);
+
+ /* allocate memory for user context */
+ fctx = (multirange_unnest_fctx *) palloc(sizeof(multirange_unnest_fctx));
+
+ /* initialize state */
+ fctx->mr = mr;
+ fctx->index = 0;
+ fctx->typcache = lookup_type_cache(MultirangeTypeGetOid(mr),
+ TYPECACHE_MULTIRANGE_INFO);
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ fctx = funcctx->user_fctx;
+
+ if (fctx->index < fctx->mr->rangeCount)
+ {
+ RangeType *range;
+
+ range = multirange_get_range(fctx->typcache->rngtype,
+ fctx->mr,
+ fctx->index);
+ fctx->index++;
+
+ SRF_RETURN_NEXT(funcctx, RangeTypePGetDatum(range));
+ }
+ else
+ {
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+/* Hash support */
+
+/* hash a multirange value */
+Datum
+hash_multirange(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ uint32 result = 1;
+ TypeCacheEntry *typcache,
+ *scache;
+ int32 range_count,
+ i;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ scache = typcache->rngtype->rngelemtype;
+ if (!OidIsValid(scache->hash_proc_finfo.fn_oid))
+ {
+ scache = lookup_type_cache(scache->type_id,
+ TYPECACHE_HASH_PROC_FINFO);
+ if (!OidIsValid(scache->hash_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(scache->type_id))));
+ }
+
+ range_count = mr->rangeCount;
+ for (i = 0; i < range_count; i++)
+ {
+ RangeBound lower,
+ upper;
+ uint8 flags = MultirangeGetFlagsPtr(mr)[i];
+ uint32 lower_hash;
+ uint32 upper_hash;
+ uint32 range_hash;
+
+ multirange_get_bounds(typcache->rngtype, mr, i, &lower, &upper);
+
+ if (RANGE_HAS_LBOUND(flags))
+ lower_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo,
+ typcache->rngtype->rng_collation,
+ lower.val));
+ else
+ lower_hash = 0;
+
+ if (RANGE_HAS_UBOUND(flags))
+ upper_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo,
+ typcache->rngtype->rng_collation,
+ upper.val));
+ else
+ upper_hash = 0;
+
+ /* Merge hashes of flags and bounds */
+ range_hash = hash_uint32((uint32) flags);
+ range_hash ^= lower_hash;
+ range_hash = pg_rotate_left32(range_hash, 1);
+ range_hash ^= upper_hash;
+
+ /*
+ * Use the same approach as hash_array to combine the individual
+ * elements' hash values:
+ */
+ result = (result << 5) - result + range_hash;
+ }
+
+ PG_FREE_IF_COPY(mr, 0);
+
+ PG_RETURN_UINT32(result);
+}
+
+/*
+ * Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
+ * Otherwise, similar to hash_multirange.
+ */
+Datum
+hash_multirange_extended(PG_FUNCTION_ARGS)
+{
+ MultirangeType *mr = PG_GETARG_MULTIRANGE_P(0);
+ Datum seed = PG_GETARG_DATUM(1);
+ uint64 result = 1;
+ TypeCacheEntry *typcache,
+ *scache;
+ int32 range_count,
+ i;
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ scache = typcache->rngtype->rngelemtype;
+ if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
+ {
+ scache = lookup_type_cache(scache->type_id,
+ TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+ if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(scache->type_id))));
+ }
+
+ range_count = mr->rangeCount;
+ for (i = 0; i < range_count; i++)
+ {
+ RangeBound lower,
+ upper;
+ uint8 flags = MultirangeGetFlagsPtr(mr)[i];
+ uint64 lower_hash;
+ uint64 upper_hash;
+ uint64 range_hash;
+
+ multirange_get_bounds(typcache->rngtype, mr, i, &lower, &upper);
+
+ if (RANGE_HAS_LBOUND(flags))
+ lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
+ typcache->rngtype->rng_collation,
+ lower.val,
+ seed));
+ else
+ lower_hash = 0;
+
+ if (RANGE_HAS_UBOUND(flags))
+ upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
+ typcache->rngtype->rng_collation,
+ upper.val,
+ seed));
+ else
+ upper_hash = 0;
+
+ /* Merge hashes of flags and bounds */
+ range_hash = DatumGetUInt64(hash_uint32_extended((uint32) flags,
+ DatumGetInt64(seed)));
+ range_hash ^= lower_hash;
+ range_hash = ROTATE_HIGH_AND_LOW_32BITS(range_hash);
+ range_hash ^= upper_hash;
+
+ /*
+ * Use the same approach as hash_array to combine the individual
+ * elements' hash values:
+ */
+ result = (result << 5) - result + range_hash;
+ }
+
+ PG_FREE_IF_COPY(mr, 0);
+
+ PG_RETURN_UINT64(result);
+}
diff --git a/src/backend/utils/adt/multirangetypes_selfuncs.c b/src/backend/utils/adt/multirangetypes_selfuncs.c
new file mode 100644
index 0000000..3af91b8
--- /dev/null
+++ b/src/backend/utils/adt/multirangetypes_selfuncs.c
@@ -0,0 +1,1336 @@
+/*-------------------------------------------------------------------------
+ *
+ * multirangetypes_selfuncs.c
+ * Functions for selectivity estimation of multirange operators
+ *
+ * Estimates are based on histograms of lower and upper bounds, and the
+ * fraction of empty multiranges.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/multirangetypes_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/multirangetypes.h"
+#include "utils/selfuncs.h"
+#include "utils/typcache.h"
+
+static double calc_multirangesel(TypeCacheEntry *typcache,
+ VariableStatData *vardata,
+ const MultirangeType *constval, Oid operator);
+static double default_multirange_selectivity(Oid operator);
+static double calc_hist_selectivity(TypeCacheEntry *typcache,
+ VariableStatData *vardata,
+ const MultirangeType *constval,
+ Oid operator);
+static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
+ const RangeBound *constbound,
+ const RangeBound *hist,
+ int hist_nvalues, bool equal);
+static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
+ const RangeBound *hist, int hist_length, bool equal);
+static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
+ const RangeBound *hist1, const RangeBound *hist2);
+static float8 get_len_position(double value, double hist1, double hist2);
+static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
+ const RangeBound *bound2);
+static int length_hist_bsearch(Datum *length_hist_values,
+ int length_hist_nvalues, double value,
+ bool equal);
+static double calc_length_hist_frac(Datum *length_hist_values,
+ int length_hist_nvalues, double length1,
+ double length2, bool equal);
+static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
+ const RangeBound *lower,
+ RangeBound *upper,
+ const RangeBound *hist_lower,
+ int hist_nvalues,
+ Datum *length_hist_values,
+ int length_hist_nvalues);
+static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
+ const RangeBound *lower,
+ const RangeBound *upper,
+ const RangeBound *hist_lower,
+ int hist_nvalues,
+ Datum *length_hist_values,
+ int length_hist_nvalues);
+
+/*
+ * Returns a default selectivity estimate for given operator, when we don't
+ * have statistics or cannot use them for some reason.
+ */
+static double
+default_multirange_selectivity(Oid operator)
+{
+ switch (operator)
+ {
+ case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
+ case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
+ return 0.01;
+
+ case OID_RANGE_CONTAINS_MULTIRANGE_OP:
+ case OID_RANGE_MULTIRANGE_CONTAINED_OP:
+ case OID_MULTIRANGE_CONTAINS_RANGE_OP:
+ case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_RANGE_CONTAINED_OP:
+ case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
+ return 0.005;
+
+ case OID_MULTIRANGE_CONTAINS_ELEM_OP:
+ case OID_MULTIRANGE_ELEM_CONTAINED_OP:
+
+ /*
+ * "multirange @> elem" is more or less identical to a scalar
+ * inequality "A >= b AND A <= c".
+ */
+ return DEFAULT_MULTIRANGE_INEQ_SEL;
+
+ case OID_MULTIRANGE_LESS_OP:
+ case OID_MULTIRANGE_LESS_EQUAL_OP:
+ case OID_MULTIRANGE_GREATER_OP:
+ case OID_MULTIRANGE_GREATER_EQUAL_OP:
+ case OID_MULTIRANGE_LEFT_RANGE_OP:
+ case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
+ case OID_RANGE_LEFT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_RIGHT_RANGE_OP:
+ case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
+ case OID_RANGE_RIGHT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
+ case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ /* these are similar to regular scalar inequalities */
+ return DEFAULT_INEQ_SEL;
+
+ default:
+
+ /*
+ * all multirange operators should be handled above, but just in
+ * case
+ */
+ return 0.01;
+ }
+}
+
+/*
+ * multirangesel -- restriction selectivity for multirange operators
+ */
+Datum
+multirangesel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec;
+ TypeCacheEntry *typcache = NULL;
+ MultirangeType *constmultirange = NULL;
+ RangeType *constrange = NULL;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
+ }
+
+ /*
+ * All the multirange operators are strict, so we can cope with a NULL
+ * constant right away.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ /*
+ * If var is on the right, commute the operator, so that we can assume the
+ * var is on the left in what follows.
+ */
+ if (!varonleft)
+ {
+ /* we have other Op var, commute to make var Op other */
+ operator = get_commutator(operator);
+ if (!operator)
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_multirange_selectivity(operator));
+ }
+ }
+
+ /*
+ * OK, there's a Var and a Const we're dealing with here. We need the
+ * Const to be of same multirange type as the column, else we can't do
+ * anything useful. (Such cases will likely fail at runtime, but here we'd
+ * rather just return a default estimate.)
+ *
+ * If the operator is "multirange @> element", the constant should be of
+ * the element type of the multirange column. Convert it to a multirange
+ * that includes only that single point, so that we don't need special
+ * handling for that in what follows.
+ */
+ if (operator == OID_MULTIRANGE_CONTAINS_ELEM_OP)
+ {
+ typcache = multirange_get_typcache(fcinfo, vardata.vartype);
+
+ if (((Const *) other)->consttype == typcache->rngtype->rngelemtype->type_id)
+ {
+ RangeBound lower,
+ upper;
+
+ lower.inclusive = true;
+ lower.val = ((Const *) other)->constvalue;
+ lower.infinite = false;
+ lower.lower = true;
+ upper.inclusive = true;
+ upper.val = ((Const *) other)->constvalue;
+ upper.infinite = false;
+ upper.lower = false;
+ constrange = range_serialize(typcache->rngtype, &lower, &upper, false);
+ constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
+ 1, &constrange);
+ }
+ }
+ else if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
+ operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
+ operator == OID_MULTIRANGE_OVERLAPS_RANGE_OP ||
+ operator == OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP ||
+ operator == OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP ||
+ operator == OID_MULTIRANGE_LEFT_RANGE_OP ||
+ operator == OID_MULTIRANGE_RIGHT_RANGE_OP)
+ {
+ /*
+ * Promote a range in "multirange OP range" just like we do an element
+ * in "multirange OP element".
+ */
+ typcache = multirange_get_typcache(fcinfo, vardata.vartype);
+ if (((Const *) other)->consttype == typcache->rngtype->type_id)
+ {
+ constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
+ constmultirange = make_multirange(typcache->type_id, typcache->rngtype,
+ 1, &constrange);
+ }
+ }
+ else if (operator == OID_RANGE_OVERLAPS_MULTIRANGE_OP ||
+ operator == OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP ||
+ operator == OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP ||
+ operator == OID_RANGE_LEFT_MULTIRANGE_OP ||
+ operator == OID_RANGE_RIGHT_MULTIRANGE_OP ||
+ operator == OID_RANGE_CONTAINS_MULTIRANGE_OP ||
+ operator == OID_MULTIRANGE_ELEM_CONTAINED_OP ||
+ operator == OID_MULTIRANGE_RANGE_CONTAINED_OP)
+ {
+ /*
+ * Here, the Var is the elem/range, not the multirange. For now we
+ * just punt and return the default estimate. In future we could
+ * disassemble the multirange constant to do something more
+ * intelligent.
+ */
+ }
+ else if (((Const *) other)->consttype == vardata.vartype)
+ {
+ /* Both sides are the same multirange type */
+ typcache = multirange_get_typcache(fcinfo, vardata.vartype);
+
+ constmultirange = DatumGetMultirangeTypeP(((Const *) other)->constvalue);
+ }
+
+ /*
+ * If we got a valid constant on one side of the operator, proceed to
+ * estimate using statistics. Otherwise punt and return a default constant
+ * estimate. Note that calc_multirangesel need not handle
+ * OID_MULTIRANGE_*_CONTAINED_OP.
+ */
+ if (constmultirange)
+ selec = calc_multirangesel(typcache, &vardata, constmultirange, operator);
+ else
+ selec = default_multirange_selectivity(operator);
+
+ ReleaseVariableStats(vardata);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+static double
+calc_multirangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
+ const MultirangeType *constval, Oid operator)
+{
+ double hist_selec;
+ double selec;
+ float4 empty_frac,
+ null_frac;
+
+ /*
+ * First look up the fraction of NULLs and empty multiranges from
+ * pg_statistic.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+ AttStatsSlot sslot;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ null_frac = stats->stanullfrac;
+
+ /* Try to get fraction of empty multiranges */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+ InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ if (sslot.nnumbers != 1)
+ elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
+ empty_frac = sslot.numbers[0];
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /* No empty fraction statistic. Assume no empty ranges. */
+ empty_frac = 0.0;
+ }
+ }
+ else
+ {
+ /*
+ * No stats are available. Follow through the calculations below
+ * anyway, assuming no NULLs and no empty multiranges. This still
+ * allows us to give a better-than-nothing estimate based on whether
+ * the constant is an empty multirange or not.
+ */
+ null_frac = 0.0;
+ empty_frac = 0.0;
+ }
+
+ if (MultirangeIsEmpty(constval))
+ {
+ /*
+ * An empty multirange matches all multiranges, all empty multiranges,
+ * or nothing, depending on the operator
+ */
+ switch (operator)
+ {
+ /* these return false if either argument is empty */
+ case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_LEFT_RANGE_OP:
+ case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
+ case OID_MULTIRANGE_RIGHT_RANGE_OP:
+ case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
+ /* nothing is less than an empty multirange */
+ case OID_MULTIRANGE_LESS_OP:
+ selec = 0.0;
+ break;
+
+ /*
+ * only empty multiranges can be contained by an empty
+ * multirange
+ */
+ case OID_RANGE_MULTIRANGE_CONTAINED_OP:
+ case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
+ /* only empty ranges are <= an empty multirange */
+ case OID_MULTIRANGE_LESS_EQUAL_OP:
+ selec = empty_frac;
+ break;
+
+ /* everything contains an empty multirange */
+ case OID_MULTIRANGE_CONTAINS_RANGE_OP:
+ case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
+ /* everything is >= an empty multirange */
+ case OID_MULTIRANGE_GREATER_EQUAL_OP:
+ selec = 1.0;
+ break;
+
+ /* all non-empty multiranges are > an empty multirange */
+ case OID_MULTIRANGE_GREATER_OP:
+ selec = 1.0 - empty_frac;
+ break;
+
+ /* an element cannot be empty */
+ case OID_MULTIRANGE_CONTAINS_ELEM_OP:
+
+ /* filtered out by multirangesel() */
+ case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
+ case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ case OID_RANGE_LEFT_MULTIRANGE_OP:
+ case OID_RANGE_RIGHT_MULTIRANGE_OP:
+ case OID_RANGE_CONTAINS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_ELEM_CONTAINED_OP:
+ case OID_MULTIRANGE_RANGE_CONTAINED_OP:
+
+ default:
+ elog(ERROR, "unexpected operator %u", operator);
+ selec = 0.0; /* keep compiler quiet */
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Calculate selectivity using bound histograms. If that fails for
+ * some reason, e.g no histogram in pg_statistic, use the default
+ * constant estimate for the fraction of non-empty values. This is
+ * still somewhat better than just returning the default estimate,
+ * because this still takes into account the fraction of empty and
+ * NULL tuples, if we had statistics for them.
+ */
+ hist_selec = calc_hist_selectivity(typcache, vardata, constval,
+ operator);
+ if (hist_selec < 0.0)
+ hist_selec = default_multirange_selectivity(operator);
+
+ /*
+ * Now merge the results for the empty multiranges and histogram
+ * calculations, realizing that the histogram covers only the
+ * non-null, non-empty values.
+ */
+ if (operator == OID_RANGE_MULTIRANGE_CONTAINED_OP ||
+ operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
+ {
+ /* empty is contained by anything non-empty */
+ selec = (1.0 - empty_frac) * hist_selec + empty_frac;
+ }
+ else
+ {
+ /* with any other operator, empty Op non-empty matches nothing */
+ selec = (1.0 - empty_frac) * hist_selec;
+ }
+ }
+
+ /* all multirange operators are strict */
+ selec *= (1.0 - null_frac);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * Calculate multirange operator selectivity using histograms of multirange bounds.
+ *
+ * This estimate is for the portion of values that are not empty and not
+ * NULL.
+ */
+static double
+calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
+ const MultirangeType *constval, Oid operator)
+{
+ TypeCacheEntry *rng_typcache = typcache->rngtype;
+ AttStatsSlot hslot;
+ AttStatsSlot lslot;
+ int nhist;
+ RangeBound *hist_lower;
+ RangeBound *hist_upper;
+ int i;
+ RangeBound const_lower;
+ RangeBound const_upper;
+ RangeBound tmp;
+ double hist_selec;
+
+ /* Can't use the histogram with insecure multirange support functions */
+ if (!statistic_proc_security_check(vardata,
+ rng_typcache->rng_cmp_proc_finfo.fn_oid))
+ return -1;
+ if (OidIsValid(rng_typcache->rng_subdiff_finfo.fn_oid) &&
+ !statistic_proc_security_check(vardata,
+ rng_typcache->rng_subdiff_finfo.fn_oid))
+ return -1;
+
+ /* Try to get histogram of ranges */
+ if (!(HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&hslot, vardata->statsTuple,
+ STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES)))
+ return -1.0;
+
+ /* check that it's a histogram, not just a dummy entry */
+ if (hslot.nvalues < 2)
+ {
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+
+ /*
+ * Convert histogram of ranges into histograms of its lower and upper
+ * bounds.
+ */
+ nhist = hslot.nvalues;
+ hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ for (i = 0; i < nhist; i++)
+ {
+ bool empty;
+
+ range_deserialize(rng_typcache, DatumGetRangeTypeP(hslot.values[i]),
+ &hist_lower[i], &hist_upper[i], &empty);
+ /* The histogram should not contain any empty ranges */
+ if (empty)
+ elog(ERROR, "bounds histogram contains an empty range");
+ }
+
+ /* @> and @< also need a histogram of range lengths */
+ if (operator == OID_MULTIRANGE_CONTAINS_RANGE_OP ||
+ operator == OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP ||
+ operator == OID_MULTIRANGE_RANGE_CONTAINED_OP ||
+ operator == OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP)
+ {
+ if (!(HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&lslot, vardata->statsTuple,
+ STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+ InvalidOid,
+ ATTSTATSSLOT_VALUES)))
+ {
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+
+ /* check that it's a histogram, not just a dummy entry */
+ if (lslot.nvalues < 2)
+ {
+ free_attstatsslot(&lslot);
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+ }
+ else
+ memset(&lslot, 0, sizeof(lslot));
+
+ /* Extract the bounds of the constant value. */
+ Assert(constval->rangeCount > 0);
+ multirange_get_bounds(rng_typcache, constval, 0,
+ &const_lower, &tmp);
+ multirange_get_bounds(rng_typcache, constval, constval->rangeCount - 1,
+ &tmp, &const_upper);
+
+ /*
+ * Calculate selectivity comparing the lower or upper bound of the
+ * constant with the histogram of lower or upper bounds.
+ */
+ switch (operator)
+ {
+ case OID_MULTIRANGE_LESS_OP:
+
+ /*
+ * The regular b-tree comparison operators (<, <=, >, >=) compare
+ * the lower bounds first, and the upper bounds for values with
+ * equal lower bounds. Estimate that by comparing the lower bounds
+ * only. This gives a fairly accurate estimate assuming there
+ * aren't many rows with a lower bound equal to the constant's
+ * lower bound.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_MULTIRANGE_LESS_EQUAL_OP:
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_MULTIRANGE_GREATER_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_MULTIRANGE_GREATER_EQUAL_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_MULTIRANGE_LEFT_RANGE_OP:
+ case OID_MULTIRANGE_LEFT_MULTIRANGE_OP:
+ /* var << const when upper(var) < lower(const) */
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_upper, nhist, false);
+ break;
+
+ case OID_MULTIRANGE_RIGHT_RANGE_OP:
+ case OID_MULTIRANGE_RIGHT_MULTIRANGE_OP:
+ /* var >> const when lower(var) > upper(const) */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(rng_typcache, &const_upper,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ /* compare lower bounds */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_MULTIRANGE_OVERLAPS_LEFT_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ /* compare upper bounds */
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache, &const_upper,
+ hist_upper, nhist, true);
+ break;
+
+ case OID_MULTIRANGE_OVERLAPS_RANGE_OP:
+ case OID_MULTIRANGE_OVERLAPS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_CONTAINS_ELEM_OP:
+
+ /*
+ * A && B <=> NOT (A << B OR A >> B).
+ *
+ * Since A << B and A >> B are mutually exclusive events we can
+ * sum their probabilities to find probability of (A << B OR A >>
+ * B).
+ *
+ * "multirange @> elem" is equivalent to "multirange &&
+ * {[elem,elem]}". The caller already constructed the singular
+ * range from the element constant, so just treat it the same as
+ * &&.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache,
+ &const_lower, hist_upper,
+ nhist, false);
+ hist_selec +=
+ (1.0 - calc_hist_selectivity_scalar(rng_typcache,
+ &const_upper, hist_lower,
+ nhist, true));
+ hist_selec = 1.0 - hist_selec;
+ break;
+
+ case OID_MULTIRANGE_CONTAINS_RANGE_OP:
+ case OID_MULTIRANGE_CONTAINS_MULTIRANGE_OP:
+ hist_selec =
+ calc_hist_selectivity_contains(rng_typcache, &const_lower,
+ &const_upper, hist_lower, nhist,
+ lslot.values, lslot.nvalues);
+ break;
+
+ case OID_MULTIRANGE_MULTIRANGE_CONTAINED_OP:
+ case OID_RANGE_MULTIRANGE_CONTAINED_OP:
+ if (const_lower.infinite)
+ {
+ /*
+ * Lower bound no longer matters. Just estimate the fraction
+ * with an upper bound <= const upper bound
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(rng_typcache, &const_upper,
+ hist_upper, nhist, true);
+ }
+ else if (const_upper.infinite)
+ {
+ hist_selec =
+ 1.0 - calc_hist_selectivity_scalar(rng_typcache, &const_lower,
+ hist_lower, nhist, false);
+ }
+ else
+ {
+ hist_selec =
+ calc_hist_selectivity_contained(rng_typcache, &const_lower,
+ &const_upper, hist_lower, nhist,
+ lslot.values, lslot.nvalues);
+ }
+ break;
+
+ /* filtered out by multirangesel() */
+ case OID_RANGE_OVERLAPS_MULTIRANGE_OP:
+ case OID_RANGE_OVERLAPS_LEFT_MULTIRANGE_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_MULTIRANGE_OP:
+ case OID_RANGE_LEFT_MULTIRANGE_OP:
+ case OID_RANGE_RIGHT_MULTIRANGE_OP:
+ case OID_RANGE_CONTAINS_MULTIRANGE_OP:
+ case OID_MULTIRANGE_ELEM_CONTAINED_OP:
+ case OID_MULTIRANGE_RANGE_CONTAINED_OP:
+
+ default:
+ elog(ERROR, "unknown multirange operator %u", operator);
+ hist_selec = -1.0; /* keep compiler quiet */
+ break;
+ }
+
+ free_attstatsslot(&lslot);
+ free_attstatsslot(&hslot);
+
+ return hist_selec;
+}
+
+
+/*
+ * Look up the fraction of values less than (or equal, if 'equal' argument
+ * is true) a given const in a histogram of range bounds.
+ */
+static double
+calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
+ const RangeBound *hist, int hist_nvalues, bool equal)
+{
+ Selectivity selec;
+ int index;
+
+ /*
+ * Find the histogram bin the given constant falls into. Estimate
+ * selectivity as the number of preceding whole bins.
+ */
+ index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
+ selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
+
+ /* Adjust using linear interpolation within the bin */
+ if (index >= 0 && index < hist_nvalues - 1)
+ selec += get_position(typcache, constbound, &hist[index],
+ &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
+
+ return selec;
+}
+
+/*
+ * Binary search on an array of range bounds. Returns greatest index of range
+ * bound in array which is less(less or equal) than given range bound. If all
+ * range bounds in array are greater or equal(greater) than given range bound,
+ * return -1. When "equal" flag is set conditions in brackets are used.
+ *
+ * This function is used in scalar operator selectivity estimation. Another
+ * goal of this function is to find a histogram bin where to stop
+ * interpolation of portion of bounds which are less than or equal to given bound.
+ */
+static int
+rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
+ int hist_length, bool equal)
+{
+ int lower = -1,
+ upper = hist_length - 1,
+ cmp,
+ middle;
+
+ while (lower < upper)
+ {
+ middle = (lower + upper + 1) / 2;
+ cmp = range_cmp_bounds(typcache, &hist[middle], value);
+
+ if (cmp < 0 || (equal && cmp == 0))
+ lower = middle;
+ else
+ upper = middle - 1;
+ }
+ return lower;
+}
+
+
+/*
+ * Binary search on length histogram. Returns greatest index of range length in
+ * histogram which is less than (less than or equal) the given length value. If
+ * all lengths in the histogram are greater than (greater than or equal) the
+ * given length, returns -1.
+ */
+static int
+length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
+ double value, bool equal)
+{
+ int lower = -1,
+ upper = length_hist_nvalues - 1,
+ middle;
+
+ while (lower < upper)
+ {
+ double middleval;
+
+ middle = (lower + upper + 1) / 2;
+
+ middleval = DatumGetFloat8(length_hist_values[middle]);
+ if (middleval < value || (equal && middleval <= value))
+ lower = middle;
+ else
+ upper = middle - 1;
+ }
+ return lower;
+}
+
+/*
+ * Get relative position of value in histogram bin in [0,1] range.
+ */
+static float8
+get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
+ const RangeBound *hist2)
+{
+ bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+ float8 position;
+
+ if (!hist1->infinite && !hist2->infinite)
+ {
+ float8 bin_width;
+
+ /*
+ * Both bounds are finite. Assuming the subtype's comparison function
+ * works sanely, the value must be finite, too, because it lies
+ * somewhere between the bounds. If it doesn't, arbitrarily return
+ * 0.5.
+ */
+ if (value->infinite)
+ return 0.5;
+
+ /* Can't interpolate without subdiff function */
+ if (!has_subdiff)
+ return 0.5;
+
+ /* Calculate relative position using subdiff function. */
+ bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ hist2->val,
+ hist1->val));
+ if (isnan(bin_width) || bin_width <= 0.0)
+ return 0.5; /* punt for NaN or zero-width bin */
+
+ position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ value->val,
+ hist1->val))
+ / bin_width;
+
+ if (isnan(position))
+ return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
+
+ /* Relative position must be in [0,1] range */
+ position = Max(position, 0.0);
+ position = Min(position, 1.0);
+ return position;
+ }
+ else if (hist1->infinite && !hist2->infinite)
+ {
+ /*
+ * Lower bin boundary is -infinite, upper is finite. If the value is
+ * -infinite, return 0.0 to indicate it's equal to the lower bound.
+ * Otherwise return 1.0 to indicate it's infinitely far from the lower
+ * bound.
+ */
+ return ((value->infinite && value->lower) ? 0.0 : 1.0);
+ }
+ else if (!hist1->infinite && hist2->infinite)
+ {
+ /* same as above, but in reverse */
+ return ((value->infinite && !value->lower) ? 1.0 : 0.0);
+ }
+ else
+ {
+ /*
+ * If both bin boundaries are infinite, they should be equal to each
+ * other, and the value should also be infinite and equal to both
+ * bounds. (But don't Assert that, to avoid crashing if a user creates
+ * a datatype with a broken comparison function).
+ *
+ * Assume the value to lie in the middle of the infinite bounds.
+ */
+ return 0.5;
+ }
+}
+
+
+/*
+ * Get relative position of value in a length histogram bin in [0,1] range.
+ */
+static double
+get_len_position(double value, double hist1, double hist2)
+{
+ if (!isinf(hist1) && !isinf(hist2))
+ {
+ /*
+ * Both bounds are finite. The value should be finite too, because it
+ * lies somewhere between the bounds. If it doesn't, just return
+ * something.
+ */
+ if (isinf(value))
+ return 0.5;
+
+ return 1.0 - (hist2 - value) / (hist2 - hist1);
+ }
+ else if (isinf(hist1) && !isinf(hist2))
+ {
+ /*
+ * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
+ * indicate the value is infinitely far from the lower bound.
+ */
+ return 1.0;
+ }
+ else if (isinf(hist1) && isinf(hist2))
+ {
+ /* same as above, but in reverse */
+ return 0.0;
+ }
+ else
+ {
+ /*
+ * If both bin boundaries are infinite, they should be equal to each
+ * other, and the value should also be infinite and equal to both
+ * bounds. (But don't Assert that, to avoid crashing unnecessarily if
+ * the caller messes up)
+ *
+ * Assume the value to lie in the middle of the infinite bounds.
+ */
+ return 0.5;
+ }
+}
+
+/*
+ * Measure distance between two range bounds.
+ */
+static float8
+get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
+{
+ bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+
+ if (!bound1->infinite && !bound2->infinite)
+ {
+ /*
+ * Neither bound is infinite, use subdiff function or return default
+ * value of 1.0 if no subdiff is available.
+ */
+ if (has_subdiff)
+ {
+ float8 res;
+
+ res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ bound2->val,
+ bound1->val));
+ /* Reject possible NaN result, also negative result */
+ if (isnan(res) || res < 0.0)
+ return 1.0;
+ else
+ return res;
+ }
+ else
+ return 1.0;
+ }
+ else if (bound1->infinite && bound2->infinite)
+ {
+ /* Both bounds are infinite */
+ if (bound1->lower == bound2->lower)
+ return 0.0;
+ else
+ return get_float8_infinity();
+ }
+ else
+ {
+ /* One bound is infinite, the other is not */
+ return get_float8_infinity();
+ }
+}
+
+/*
+ * Calculate the average of function P(x), in the interval [length1, length2],
+ * where P(x) is the fraction of tuples with length < x (or length <= x if
+ * 'equal' is true).
+ */
+static double
+calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
+ double length1, double length2, bool equal)
+{
+ double frac;
+ double A,
+ B,
+ PA,
+ PB;
+ double pos;
+ int i;
+ double area;
+
+ Assert(length2 >= length1);
+
+ if (length2 < 0.0)
+ return 0.0; /* shouldn't happen, but doesn't hurt to check */
+
+ /* All lengths in the table are <= infinite. */
+ if (isinf(length2) && equal)
+ return 1.0;
+
+ /*----------
+ * The average of a function between A and B can be calculated by the
+ * formula:
+ *
+ * B
+ * 1 /
+ * ------- | P(x)dx
+ * B - A /
+ * A
+ *
+ * The geometrical interpretation of the integral is the area under the
+ * graph of P(x). P(x) is defined by the length histogram. We calculate
+ * the area in a piecewise fashion, iterating through the length histogram
+ * bins. Each bin is a trapezoid:
+ *
+ * P(x2)
+ * /|
+ * / |
+ * P(x1)/ |
+ * | |
+ * | |
+ * ---+---+--
+ * x1 x2
+ *
+ * where x1 and x2 are the boundaries of the current histogram, and P(x1)
+ * and P(x1) are the cumulative fraction of tuples at the boundaries.
+ *
+ * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
+ *
+ * The first bin contains the lower bound passed by the caller, so we
+ * use linear interpolation between the previous and next histogram bin
+ * boundary to calculate P(x1). Likewise for the last bin: we use linear
+ * interpolation to calculate P(x2). For the bins in between, x1 and x2
+ * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
+ * P(x1) = (bin index) / (number of bins)
+ * P(x2) = (bin index + 1 / (number of bins)
+ */
+
+ /* First bin, the one that contains lower bound */
+ i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
+ if (i >= length_hist_nvalues - 1)
+ return 1.0;
+
+ if (i < 0)
+ {
+ i = 0;
+ pos = 0.0;
+ }
+ else
+ {
+ /* interpolate length1's position in the bin */
+ pos = get_len_position(length1,
+ DatumGetFloat8(length_hist_values[i]),
+ DatumGetFloat8(length_hist_values[i + 1]));
+ }
+ PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
+ B = length1;
+
+ /*
+ * In the degenerate case that length1 == length2, simply return
+ * P(length1). This is not merely an optimization: if length1 == length2,
+ * we'd divide by zero later on.
+ */
+ if (length2 == length1)
+ return PB;
+
+ /*
+ * Loop through all the bins, until we hit the last bin, the one that
+ * contains the upper bound. (if lower and upper bounds are in the same
+ * bin, this falls out immediately)
+ */
+ area = 0.0;
+ for (; i < length_hist_nvalues - 1; i++)
+ {
+ double bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
+
+ /* check if we've reached the last bin */
+ if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
+ break;
+
+ /* the upper bound of previous bin is the lower bound of this bin */
+ A = B;
+ PA = PB;
+
+ B = bin_upper;
+ PB = (double) i / (double) (length_hist_nvalues - 1);
+
+ /*
+ * Add the area of this trapezoid to the total. The point of the
+ * if-check is to avoid NaN, in the corner case that PA == PB == 0,
+ * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
+ * 0) is zero, regardless of the width (B - A).
+ */
+ if (PA > 0 || PB > 0)
+ area += 0.5 * (PB + PA) * (B - A);
+ }
+
+ /* Last bin */
+ A = B;
+ PA = PB;
+
+ B = length2; /* last bin ends at the query upper bound */
+ if (i >= length_hist_nvalues - 1)
+ pos = 0.0;
+ else
+ {
+ if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
+ pos = 0.0;
+ else
+ pos = get_len_position(length2,
+ DatumGetFloat8(length_hist_values[i]),
+ DatumGetFloat8(length_hist_values[i + 1]));
+ }
+ PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
+
+ if (PA > 0 || PB > 0)
+ area += 0.5 * (PB + PA) * (B - A);
+
+ /*
+ * Ok, we have calculated the area, ie. the integral. Divide by width to
+ * get the requested average.
+ *
+ * Avoid NaN arising from infinite / infinite. This happens at least if
+ * length2 is infinite. It's not clear what the correct value would be in
+ * that case, so 0.5 seems as good as any value.
+ */
+ if (isinf(area) && isinf(length2))
+ frac = 0.5;
+ else
+ frac = area / (length2 - length1);
+
+ return frac;
+}
+
+/*
+ * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
+ * of multiranges that fall within the constant lower and upper bounds. This uses
+ * the histograms of range lower bounds and range lengths, on the assumption
+ * that the range lengths are independent of the lower bounds.
+ *
+ * The caller has already checked that constant lower and upper bounds are
+ * finite.
+ */
+static double
+calc_hist_selectivity_contained(TypeCacheEntry *typcache,
+ const RangeBound *lower, RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues)
+{
+ int i,
+ upper_index;
+ float8 prev_dist;
+ double bin_width;
+ double upper_bin_width;
+ double sum_frac;
+
+ /*
+ * Begin by finding the bin containing the upper bound, in the lower bound
+ * histogram. Any range with a lower bound > constant upper bound can't
+ * match, ie. there are no matches in bins greater than upper_index.
+ */
+ upper->inclusive = !upper->inclusive;
+ upper->lower = true;
+ upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
+ false);
+
+ /*
+ * If the upper bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (upper_index < 0)
+ return 0.0;
+
+ /*
+ * If the upper bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ upper_index = Min(upper_index, hist_nvalues - 2);
+
+ /*
+ * Calculate upper_bin_width, ie. the fraction of the (upper_index,
+ * upper_index + 1) bin which is greater than upper bound of query range
+ * using linear interpolation of subdiff function.
+ */
+ upper_bin_width = get_position(typcache, upper,
+ &hist_lower[upper_index],
+ &hist_lower[upper_index + 1]);
+
+ /*
+ * In the loop, dist and prev_dist are the distance of the "current" bin's
+ * lower and upper bounds from the constant upper bound.
+ *
+ * bin_width represents the width of the current bin. Normally it is 1.0,
+ * meaning a full width bin, but can be less in the corner cases: start
+ * and end of the loop. We start with bin_width = upper_bin_width, because
+ * we begin at the bin containing the upper bound.
+ */
+ prev_dist = 0.0;
+ bin_width = upper_bin_width;
+
+ sum_frac = 0.0;
+ for (i = upper_index; i >= 0; i--)
+ {
+ double dist;
+ double length_hist_frac;
+ bool final_bin = false;
+
+ /*
+ * dist -- distance from upper bound of query range to lower bound of
+ * the current bin in the lower bound histogram. Or to the lower bound
+ * of the constant range, if this is the final bin, containing the
+ * constant lower bound.
+ */
+ if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
+ {
+ dist = get_distance(typcache, lower, upper);
+
+ /*
+ * Subtract from bin_width the portion of this bin that we want to
+ * ignore.
+ */
+ bin_width -= get_position(typcache, lower, &hist_lower[i],
+ &hist_lower[i + 1]);
+ if (bin_width < 0.0)
+ bin_width = 0.0;
+ final_bin = true;
+ }
+ else
+ dist = get_distance(typcache, &hist_lower[i], upper);
+
+ /*
+ * Estimate the fraction of tuples in this bin that are narrow enough
+ * to not exceed the distance to the upper bound of the query range.
+ */
+ length_hist_frac = calc_length_hist_frac(length_hist_values,
+ length_hist_nvalues,
+ prev_dist, dist, true);
+
+ /*
+ * Add the fraction of tuples in this bin, with a suitable length, to
+ * the total.
+ */
+ sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
+
+ if (final_bin)
+ break;
+
+ bin_width = 1.0;
+ prev_dist = dist;
+ }
+
+ return sum_frac;
+}
+
+/*
+ * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
+ * of multiranges that contain the constant lower and upper bounds. This uses
+ * the histograms of range lower bounds and range lengths, on the assumption
+ * that the range lengths are independent of the lower bounds.
+ */
+static double
+calc_hist_selectivity_contains(TypeCacheEntry *typcache,
+ const RangeBound *lower, const RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues)
+{
+ int i,
+ lower_index;
+ double bin_width,
+ lower_bin_width;
+ double sum_frac;
+ float8 prev_dist;
+
+ /* Find the bin containing the lower bound of query range. */
+ lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
+ true);
+
+ /*
+ * If the lower bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (lower_index < 0)
+ return 0.0;
+
+ /*
+ * If the lower bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ lower_index = Min(lower_index, hist_nvalues - 2);
+
+ /*
+ * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
+ * lower_index + 1) bin which is greater than lower bound of query range
+ * using linear interpolation of subdiff function.
+ */
+ lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
+ &hist_lower[lower_index + 1]);
+
+ /*
+ * Loop through all the lower bound bins, smaller than the query lower
+ * bound. In the loop, dist and prev_dist are the distance of the
+ * "current" bin's lower and upper bounds from the constant upper bound.
+ * We begin from query lower bound, and walk backwards, so the first bin's
+ * upper bound is the query lower bound, and its distance to the query
+ * upper bound is the length of the query range.
+ *
+ * bin_width represents the width of the current bin. Normally it is 1.0,
+ * meaning a full width bin, except for the first bin, which is only
+ * counted up to the constant lower bound.
+ */
+ prev_dist = get_distance(typcache, lower, upper);
+ sum_frac = 0.0;
+ bin_width = lower_bin_width;
+ for (i = lower_index; i >= 0; i--)
+ {
+ float8 dist;
+ double length_hist_frac;
+
+ /*
+ * dist -- distance from upper bound of query range to current value
+ * of lower bound histogram or lower bound of query range (if we've
+ * reach it).
+ */
+ dist = get_distance(typcache, &hist_lower[i], upper);
+
+ /*
+ * Get average fraction of length histogram which covers intervals
+ * longer than (or equal to) distance to upper bound of query range.
+ */
+ length_hist_frac =
+ 1.0 - calc_length_hist_frac(length_hist_values,
+ length_hist_nvalues,
+ prev_dist, dist, false);
+
+ sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
+
+ bin_width = 1.0;
+ prev_dist = dist;
+ }
+
+ return sum_frac;
+}
diff --git a/src/backend/utils/adt/name.c b/src/backend/utils/adt/name.c
new file mode 100644
index 0000000..e8bba36
--- /dev/null
+++ b/src/backend/utils/adt/name.c
@@ -0,0 +1,359 @@
+/*-------------------------------------------------------------------------
+ *
+ * name.c
+ * Functions for the built-in type "name".
+ *
+ * name replaces char16 and is carefully implemented so that it
+ * is a string of physical length NAMEDATALEN.
+ * DO NOT use hard-coded constants anywhere
+ * always use NAMEDATALEN as the symbolic constant! - jolly 8/21/95
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/name.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/namespace.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/varlena.h"
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES (none) *
+ *****************************************************************************/
+
+
+/*
+ * namein - converts "..." to internal representation
+ *
+ * Note:
+ * [Old] Currently if strlen(s) < NAMEDATALEN, the extra chars are nulls
+ * Now, always NULL terminated
+ */
+Datum
+namein(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+ Name result;
+ int len;
+
+ len = strlen(s);
+
+ /* Truncate oversize input */
+ if (len >= NAMEDATALEN)
+ len = pg_mbcliplen(s, len, NAMEDATALEN - 1);
+
+ /* We use palloc0 here to ensure result is zero-padded */
+ result = (Name) palloc0(NAMEDATALEN);
+ memcpy(NameStr(*result), s, len);
+
+ PG_RETURN_NAME(result);
+}
+
+/*
+ * nameout - converts internal representation to "..."
+ */
+Datum
+nameout(PG_FUNCTION_ARGS)
+{
+ Name s = PG_GETARG_NAME(0);
+
+ PG_RETURN_CSTRING(pstrdup(NameStr(*s)));
+}
+
+/*
+ * namerecv - converts external binary format to name
+ */
+Datum
+namerecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Name result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ if (nbytes >= NAMEDATALEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NAME_TOO_LONG),
+ errmsg("identifier too long"),
+ errdetail("Identifier must be less than %d characters.",
+ NAMEDATALEN)));
+ result = (NameData *) palloc0(NAMEDATALEN);
+ memcpy(result, str, nbytes);
+ pfree(str);
+ PG_RETURN_NAME(result);
+}
+
+/*
+ * namesend - converts name to binary format
+ */
+Datum
+namesend(PG_FUNCTION_ARGS)
+{
+ Name s = PG_GETARG_NAME(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, NameStr(*s), strlen(NameStr(*s)));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*****************************************************************************
+ * COMPARISON/SORTING ROUTINES *
+ *****************************************************************************/
+
+/*
+ * nameeq - returns 1 iff arguments are equal
+ * namene - returns 1 iff arguments are not equal
+ * namelt - returns 1 iff a < b
+ * namele - returns 1 iff a <= b
+ * namegt - returns 1 iff a > b
+ * namege - returns 1 iff a >= b
+ *
+ * Note that the use of strncmp with NAMEDATALEN limit is mostly historical;
+ * strcmp would do as well, because we do not allow NAME values that don't
+ * have a '\0' terminator. Whatever might be past the terminator is not
+ * considered relevant to comparisons.
+ */
+static int
+namecmp(Name arg1, Name arg2, Oid collid)
+{
+ /* Fast path for common case used in system catalogs */
+ if (collid == C_COLLATION_OID)
+ return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+
+ /* Else rely on the varstr infrastructure */
+ return varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
+ NameStr(*arg2), strlen(NameStr(*arg2)),
+ collid);
+}
+
+Datum
+nameeq(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) == 0);
+}
+
+Datum
+namene(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) != 0);
+}
+
+Datum
+namelt(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) < 0);
+}
+
+Datum
+namele(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
+}
+
+Datum
+namegt(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) > 0);
+}
+
+Datum
+namege(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_BOOL(namecmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
+}
+
+Datum
+btnamecmp(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ Name arg2 = PG_GETARG_NAME(1);
+
+ PG_RETURN_INT32(namecmp(arg1, arg2, PG_GET_COLLATION()));
+}
+
+Datum
+btnamesortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, NAMEOID, collid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+
+/*****************************************************************************
+ * MISCELLANEOUS PUBLIC ROUTINES *
+ *****************************************************************************/
+
+void
+namestrcpy(Name name, const char *str)
+{
+ /* NB: We need to zero-pad the destination. */
+ strncpy(NameStr(*name), str, NAMEDATALEN);
+ NameStr(*name)[NAMEDATALEN - 1] = '\0';
+}
+
+/*
+ * Compare a NAME to a C string
+ *
+ * Assumes C collation always; be careful when using this for
+ * anything but equality checks!
+ */
+int
+namestrcmp(Name name, const char *str)
+{
+ if (!name && !str)
+ return 0;
+ if (!name)
+ return -1; /* NULL < anything */
+ if (!str)
+ return 1; /* NULL < anything */
+ return strncmp(NameStr(*name), str, NAMEDATALEN);
+}
+
+
+/*
+ * SQL-functions CURRENT_USER, SESSION_USER
+ */
+Datum
+current_user(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(GetUserNameFromId(GetUserId(), false))));
+}
+
+Datum
+session_user(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(GetUserNameFromId(GetSessionUserId(), false))));
+}
+
+
+/*
+ * SQL-functions CURRENT_SCHEMA, CURRENT_SCHEMAS
+ */
+Datum
+current_schema(PG_FUNCTION_ARGS)
+{
+ List *search_path = fetch_search_path(false);
+ char *nspname;
+
+ if (search_path == NIL)
+ PG_RETURN_NULL();
+ nspname = get_namespace_name(linitial_oid(search_path));
+ list_free(search_path);
+ if (!nspname)
+ PG_RETURN_NULL(); /* recently-deleted namespace? */
+ PG_RETURN_DATUM(DirectFunctionCall1(namein, CStringGetDatum(nspname)));
+}
+
+Datum
+current_schemas(PG_FUNCTION_ARGS)
+{
+ List *search_path = fetch_search_path(PG_GETARG_BOOL(0));
+ ListCell *l;
+ Datum *names;
+ int i;
+ ArrayType *array;
+
+ names = (Datum *) palloc(list_length(search_path) * sizeof(Datum));
+ i = 0;
+ foreach(l, search_path)
+ {
+ char *nspname;
+
+ nspname = get_namespace_name(lfirst_oid(l));
+ if (nspname) /* watch out for deleted namespace */
+ {
+ names[i] = DirectFunctionCall1(namein, CStringGetDatum(nspname));
+ i++;
+ }
+ }
+ list_free(search_path);
+
+ array = construct_array(names, i,
+ NAMEOID,
+ NAMEDATALEN, /* sizeof(Name) */
+ false, /* Name is not by-val */
+ TYPALIGN_CHAR); /* alignment of Name */
+
+ PG_RETURN_POINTER(array);
+}
+
+/*
+ * SQL-function nameconcatoid(name, oid) returns name
+ *
+ * This is used in the information_schema to produce specific_name columns,
+ * which are supposed to be unique per schema. We achieve that (in an ugly
+ * way) by appending the object's OID. The result is the same as
+ * ($1::text || '_' || $2::text)::name
+ * except that, if it would not fit in NAMEDATALEN, we make it do so by
+ * truncating the name input (not the oid).
+ */
+Datum
+nameconcatoid(PG_FUNCTION_ARGS)
+{
+ Name nam = PG_GETARG_NAME(0);
+ Oid oid = PG_GETARG_OID(1);
+ Name result;
+ char suffix[20];
+ int suflen;
+ int namlen;
+
+ suflen = snprintf(suffix, sizeof(suffix), "_%u", oid);
+ namlen = strlen(NameStr(*nam));
+
+ /* Truncate oversize input by truncating name part, not suffix */
+ if (namlen + suflen >= NAMEDATALEN)
+ namlen = pg_mbcliplen(NameStr(*nam), namlen, NAMEDATALEN - 1 - suflen);
+
+ /* We use palloc0 here to ensure result is zero-padded */
+ result = (Name) palloc0(NAMEDATALEN);
+ memcpy(NameStr(*result), NameStr(*nam), namlen);
+ memcpy(NameStr(*result) + namlen, suffix, suflen);
+
+ PG_RETURN_NAME(result);
+}
diff --git a/src/backend/utils/adt/network.c b/src/backend/utils/adt/network.c
new file mode 100644
index 0000000..ae11de0
--- /dev/null
+++ b/src/backend/utils/adt/network.c
@@ -0,0 +1,2114 @@
+/*
+ * PostgreSQL type definitions for the INET and CIDR types.
+ *
+ * src/backend/utils/adt/network.c
+ *
+ * Jon Postel RIP 16 Oct 1998
+ */
+
+#include "postgres.h"
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#include "access/stratnum.h"
+#include "catalog/pg_opfamily.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "common/ip.h"
+#include "lib/hyperloglog.h"
+#include "libpq/libpq-be.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/inet.h"
+#include "utils/lsyscache.h"
+#include "utils/sortsupport.h"
+
+
+/*
+ * An IPv4 netmask size is a value in the range of 0 - 32, which is
+ * represented with 6 bits in inet/cidr abbreviated keys where possible.
+ *
+ * An IPv4 inet/cidr abbreviated key can use up to 25 bits for subnet
+ * component.
+ */
+#define ABBREV_BITS_INET4_NETMASK_SIZE 6
+#define ABBREV_BITS_INET4_SUBNET 25
+
+/* sortsupport for inet/cidr */
+typedef struct
+{
+ int64 input_count; /* number of non-null values seen */
+ bool estimating; /* true if estimating cardinality */
+
+ hyperLogLogState abbr_card; /* cardinality estimator */
+} network_sortsupport_state;
+
+static int32 network_cmp_internal(inet *a1, inet *a2);
+static int network_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static bool network_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum network_abbrev_convert(Datum original, SortSupport ssup);
+static List *match_network_function(Node *leftop,
+ Node *rightop,
+ int indexarg,
+ Oid funcid,
+ Oid opfamily);
+static List *match_network_subset(Node *leftop,
+ Node *rightop,
+ bool is_eq,
+ Oid opfamily);
+static bool addressOK(unsigned char *a, int bits, int family);
+static inet *internal_inetpl(inet *ip, int64 addend);
+
+
+/*
+ * Common INET/CIDR input routine
+ */
+static inet *
+network_in(char *src, bool is_cidr)
+{
+ int bits;
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ /*
+ * First, check to see if this is an IPv6 or IPv4 address. IPv6 addresses
+ * will have a : somewhere in them (several, in fact) so if there is one
+ * present, assume it's V6, otherwise assume it's V4.
+ */
+
+ if (strchr(src, ':') != NULL)
+ ip_family(dst) = PGSQL_AF_INET6;
+ else
+ ip_family(dst) = PGSQL_AF_INET;
+
+ bits = pg_inet_net_pton(ip_family(dst), src, ip_addr(dst),
+ is_cidr ? ip_addrsize(dst) : -1);
+ if ((bits < 0) || (bits > ip_maxbits(dst)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ /* translator: first %s is inet or cidr */
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ is_cidr ? "cidr" : "inet", src)));
+
+ /*
+ * Error check: CIDR values must not have any bits set beyond the masklen.
+ */
+ if (is_cidr)
+ {
+ if (!addressOK(ip_addr(dst), bits, ip_family(dst)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid cidr value: \"%s\"", src),
+ errdetail("Value has bits set to right of mask.")));
+ }
+
+ ip_bits(dst) = bits;
+ SET_INET_VARSIZE(dst);
+
+ return dst;
+}
+
+Datum
+inet_in(PG_FUNCTION_ARGS)
+{
+ char *src = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_INET_P(network_in(src, false));
+}
+
+Datum
+cidr_in(PG_FUNCTION_ARGS)
+{
+ char *src = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_INET_P(network_in(src, true));
+}
+
+
+/*
+ * Common INET/CIDR output routine
+ */
+static char *
+network_out(inet *src, bool is_cidr)
+{
+ char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+ char *dst;
+ int len;
+
+ dst = pg_inet_net_ntop(ip_family(src), ip_addr(src), ip_bits(src),
+ tmp, sizeof(tmp));
+ if (dst == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("could not format inet value: %m")));
+
+ /* For CIDR, add /n if not present */
+ if (is_cidr && strchr(tmp, '/') == NULL)
+ {
+ len = strlen(tmp);
+ snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(src));
+ }
+
+ return pstrdup(tmp);
+}
+
+Datum
+inet_out(PG_FUNCTION_ARGS)
+{
+ inet *src = PG_GETARG_INET_PP(0);
+
+ PG_RETURN_CSTRING(network_out(src, false));
+}
+
+Datum
+cidr_out(PG_FUNCTION_ARGS)
+{
+ inet *src = PG_GETARG_INET_PP(0);
+
+ PG_RETURN_CSTRING(network_out(src, true));
+}
+
+
+/*
+ * network_recv - converts external binary format to inet
+ *
+ * The external representation is (one byte apiece for)
+ * family, bits, is_cidr, address length, address in network byte order.
+ *
+ * Presence of is_cidr is largely for historical reasons, though it might
+ * allow some code-sharing on the client side. We send it correctly on
+ * output, but ignore the value on input.
+ */
+static inet *
+network_recv(StringInfo buf, bool is_cidr)
+{
+ inet *addr;
+ char *addrptr;
+ int bits;
+ int nb,
+ i;
+
+ /* make sure any unused bits in a CIDR value are zeroed */
+ addr = (inet *) palloc0(sizeof(inet));
+
+ ip_family(addr) = pq_getmsgbyte(buf);
+ if (ip_family(addr) != PGSQL_AF_INET &&
+ ip_family(addr) != PGSQL_AF_INET6)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ /* translator: %s is inet or cidr */
+ errmsg("invalid address family in external \"%s\" value",
+ is_cidr ? "cidr" : "inet")));
+ bits = pq_getmsgbyte(buf);
+ if (bits < 0 || bits > ip_maxbits(addr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ /* translator: %s is inet or cidr */
+ errmsg("invalid bits in external \"%s\" value",
+ is_cidr ? "cidr" : "inet")));
+ ip_bits(addr) = bits;
+ i = pq_getmsgbyte(buf); /* ignore is_cidr */
+ nb = pq_getmsgbyte(buf);
+ if (nb != ip_addrsize(addr))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ /* translator: %s is inet or cidr */
+ errmsg("invalid length in external \"%s\" value",
+ is_cidr ? "cidr" : "inet")));
+
+ addrptr = (char *) ip_addr(addr);
+ for (i = 0; i < nb; i++)
+ addrptr[i] = pq_getmsgbyte(buf);
+
+ /*
+ * Error check: CIDR values must not have any bits set beyond the masklen.
+ */
+ if (is_cidr)
+ {
+ if (!addressOK(ip_addr(addr), bits, ip_family(addr)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid external \"cidr\" value"),
+ errdetail("Value has bits set to right of mask.")));
+ }
+
+ SET_INET_VARSIZE(addr);
+
+ return addr;
+}
+
+Datum
+inet_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_INET_P(network_recv(buf, false));
+}
+
+Datum
+cidr_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_INET_P(network_recv(buf, true));
+}
+
+
+/*
+ * network_send - converts inet to binary format
+ */
+static bytea *
+network_send(inet *addr, bool is_cidr)
+{
+ StringInfoData buf;
+ char *addrptr;
+ int nb,
+ i;
+
+ pq_begintypsend(&buf);
+ pq_sendbyte(&buf, ip_family(addr));
+ pq_sendbyte(&buf, ip_bits(addr));
+ pq_sendbyte(&buf, is_cidr);
+ nb = ip_addrsize(addr);
+ if (nb < 0)
+ nb = 0;
+ pq_sendbyte(&buf, nb);
+ addrptr = (char *) ip_addr(addr);
+ for (i = 0; i < nb; i++)
+ pq_sendbyte(&buf, addrptr[i]);
+ return pq_endtypsend(&buf);
+}
+
+Datum
+inet_send(PG_FUNCTION_ARGS)
+{
+ inet *addr = PG_GETARG_INET_PP(0);
+
+ PG_RETURN_BYTEA_P(network_send(addr, false));
+}
+
+Datum
+cidr_send(PG_FUNCTION_ARGS)
+{
+ inet *addr = PG_GETARG_INET_PP(0);
+
+ PG_RETURN_BYTEA_P(network_send(addr, true));
+}
+
+
+Datum
+inet_to_cidr(PG_FUNCTION_ARGS)
+{
+ inet *src = PG_GETARG_INET_PP(0);
+ int bits;
+
+ bits = ip_bits(src);
+
+ /* safety check */
+ if ((bits < 0) || (bits > ip_maxbits(src)))
+ elog(ERROR, "invalid inet bit length: %d", bits);
+
+ PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits));
+}
+
+Datum
+inet_set_masklen(PG_FUNCTION_ARGS)
+{
+ inet *src = PG_GETARG_INET_PP(0);
+ int bits = PG_GETARG_INT32(1);
+ inet *dst;
+
+ if (bits == -1)
+ bits = ip_maxbits(src);
+
+ if ((bits < 0) || (bits > ip_maxbits(src)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid mask length: %d", bits)));
+
+ /* clone the original data */
+ dst = (inet *) palloc(VARSIZE_ANY(src));
+ memcpy(dst, src, VARSIZE_ANY(src));
+
+ ip_bits(dst) = bits;
+
+ PG_RETURN_INET_P(dst);
+}
+
+Datum
+cidr_set_masklen(PG_FUNCTION_ARGS)
+{
+ inet *src = PG_GETARG_INET_PP(0);
+ int bits = PG_GETARG_INT32(1);
+
+ if (bits == -1)
+ bits = ip_maxbits(src);
+
+ if ((bits < 0) || (bits > ip_maxbits(src)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid mask length: %d", bits)));
+
+ PG_RETURN_INET_P(cidr_set_masklen_internal(src, bits));
+}
+
+/*
+ * Copy src and set mask length to 'bits' (which must be valid for the family)
+ */
+inet *
+cidr_set_masklen_internal(const inet *src, int bits)
+{
+ inet *dst = (inet *) palloc0(sizeof(inet));
+
+ ip_family(dst) = ip_family(src);
+ ip_bits(dst) = bits;
+
+ if (bits > 0)
+ {
+ Assert(bits <= ip_maxbits(dst));
+
+ /* Clone appropriate bytes of the address, leaving the rest 0 */
+ memcpy(ip_addr(dst), ip_addr(src), (bits + 7) / 8);
+
+ /* Clear any unwanted bits in the last partial byte */
+ if (bits % 8)
+ ip_addr(dst)[bits / 8] &= ~(0xFF >> (bits % 8));
+ }
+
+ /* Set varlena header correctly */
+ SET_INET_VARSIZE(dst);
+
+ return dst;
+}
+
+/*
+ * Basic comparison function for sorting and inet/cidr comparisons.
+ *
+ * Comparison is first on the common bits of the network part, then on
+ * the length of the network part, and then on the whole unmasked address.
+ * The effect is that the network part is the major sort key, and for
+ * equal network parts we sort on the host part. Note this is only sane
+ * for CIDR if address bits to the right of the mask are guaranteed zero;
+ * otherwise logically-equal CIDRs might compare different.
+ */
+
+static int32
+network_cmp_internal(inet *a1, inet *a2)
+{
+ if (ip_family(a1) == ip_family(a2))
+ {
+ int order;
+
+ order = bitncmp(ip_addr(a1), ip_addr(a2),
+ Min(ip_bits(a1), ip_bits(a2)));
+ if (order != 0)
+ return order;
+ order = ((int) ip_bits(a1)) - ((int) ip_bits(a2));
+ if (order != 0)
+ return order;
+ return bitncmp(ip_addr(a1), ip_addr(a2), ip_maxbits(a1));
+ }
+
+ return ip_family(a1) - ip_family(a2);
+}
+
+Datum
+network_cmp(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_INT32(network_cmp_internal(a1, a2));
+}
+
+/*
+ * SortSupport strategy routine
+ */
+Datum
+network_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = network_fast_cmp;
+ ssup->ssup_extra = NULL;
+
+ if (ssup->abbreviate)
+ {
+ network_sortsupport_state *uss;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ uss = palloc(sizeof(network_sortsupport_state));
+ uss->input_count = 0;
+ uss->estimating = true;
+ initHyperLogLog(&uss->abbr_card, 10);
+
+ ssup->ssup_extra = uss;
+
+ ssup->comparator = ssup_datum_unsigned_cmp;
+ ssup->abbrev_converter = network_abbrev_convert;
+ ssup->abbrev_abort = network_abbrev_abort;
+ ssup->abbrev_full_comparator = network_fast_cmp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport comparison func
+ */
+static int
+network_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+ inet *arg1 = DatumGetInetPP(x);
+ inet *arg2 = DatumGetInetPP(y);
+
+ return network_cmp_internal(arg1, arg2);
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative inet comparator.
+ */
+static bool
+network_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ network_sortsupport_state *uss = ssup->ssup_extra;
+ double abbr_card;
+
+ if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+ return false;
+
+ abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+ /*
+ * If we have >100k distinct values, then even if we were sorting many
+ * billion rows we'd likely still break even, and the penalty of undoing
+ * that many rows of abbrevs would probably not be worth it. At this point
+ * we stop counting because we know that we're now fully committed.
+ */
+ if (abbr_card > 100000.0)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "network_abbrev: estimation ends at cardinality %f"
+ " after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count, memtupcount);
+#endif
+ uss->estimating = false;
+ return false;
+ }
+
+ /*
+ * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
+ * fudge factor allows us to abort earlier on genuinely pathological data
+ * where we've had exactly one abbreviated value in the first 2k
+ * (non-null) rows.
+ */
+ if (abbr_card < uss->input_count / 2000.0 + 0.5)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "network_abbrev: aborting abbreviation at cardinality %f"
+ " below threshold %f after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+ memtupcount);
+#endif
+ return true;
+ }
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "network_abbrev: cardinality %f after " INT64_FORMAT
+ " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+ return false;
+}
+
+/*
+ * SortSupport conversion routine. Converts original inet/cidr representation
+ * to abbreviated key representation that works with simple 3-way unsigned int
+ * comparisons. The network_cmp_internal() rules for sorting inet/cidr datums
+ * are followed by abbreviated comparisons by an encoding scheme that
+ * conditions keys through careful use of padding.
+ *
+ * Some background: inet values have three major components (take for example
+ * the address 1.2.3.4/24):
+ *
+ * * A network, or netmasked bits (1.2.3.0).
+ * * A netmask size (/24).
+ * * A subnet, or bits outside of the netmask (0.0.0.4).
+ *
+ * cidr values are the same except that with only the first two components --
+ * all their subnet bits *must* be zero (1.2.3.0/24).
+ *
+ * IPv4 and IPv6 are identical in this makeup, with the difference being that
+ * IPv4 addresses have a maximum of 32 bits compared to IPv6's 64 bits, so in
+ * IPv6 each part may be larger.
+ *
+ * inet/cidr types compare using these sorting rules. If inequality is detected
+ * at any step, comparison is finished. If any rule is a tie, the algorithm
+ * drops through to the next to break it:
+ *
+ * 1. IPv4 always appears before IPv6.
+ * 2. Network bits are compared.
+ * 3. Netmask size is compared.
+ * 4. All bits are compared (having made it here, we know that both
+ * netmasked bits and netmask size are equal, so we're in effect only
+ * comparing subnet bits).
+ *
+ * When generating abbreviated keys for SortSupport, we pack as much as we can
+ * into a datum while ensuring that when comparing those keys as integers,
+ * these rules will be respected. Exact contents depend on IP family and datum
+ * size.
+ *
+ * IPv4
+ * ----
+ *
+ * 4 byte datums:
+ *
+ * Start with 1 bit for the IP family (IPv4 or IPv6; this bit is present in
+ * every case below) followed by all but 1 of the netmasked bits.
+ *
+ * +----------+---------------------+
+ * | 1 bit IP | 31 bits network | (1 bit network
+ * | family | (truncated) | omitted)
+ * +----------+---------------------+
+ *
+ * 8 byte datums:
+ *
+ * We have space to store all netmasked bits, followed by the netmask size,
+ * followed by 25 bits of the subnet (25 bits is usually more than enough in
+ * practice). cidr datums always have all-zero subnet bits.
+ *
+ * +----------+-----------------------+--------------+--------------------+
+ * | 1 bit IP | 32 bits network | 6 bits | 25 bits subnet |
+ * | family | (full) | network size | (truncated) |
+ * +----------+-----------------------+--------------+--------------------+
+ *
+ * IPv6
+ * ----
+ *
+ * 4 byte datums:
+ *
+ * +----------+---------------------+
+ * | 1 bit IP | 31 bits network | (up to 97 bits
+ * | family | (truncated) | network omitted)
+ * +----------+---------------------+
+ *
+ * 8 byte datums:
+ *
+ * +----------+---------------------------------+
+ * | 1 bit IP | 63 bits network | (up to 65 bits
+ * | family | (truncated) | network omitted)
+ * +----------+---------------------------------+
+ */
+static Datum
+network_abbrev_convert(Datum original, SortSupport ssup)
+{
+ network_sortsupport_state *uss = ssup->ssup_extra;
+ inet *authoritative = DatumGetInetPP(original);
+ Datum res,
+ ipaddr_datum,
+ subnet_bitmask,
+ network;
+ int subnet_size;
+
+ Assert(ip_family(authoritative) == PGSQL_AF_INET ||
+ ip_family(authoritative) == PGSQL_AF_INET6);
+
+ /*
+ * Get an unsigned integer representation of the IP address by taking its
+ * first 4 or 8 bytes. Always take all 4 bytes of an IPv4 address. Take
+ * the first 8 bytes of an IPv6 address with an 8 byte datum and 4 bytes
+ * otherwise.
+ *
+ * We're consuming an array of unsigned char, so byteswap on little endian
+ * systems (an inet's ipaddr field stores the most significant byte
+ * first).
+ */
+ if (ip_family(authoritative) == PGSQL_AF_INET)
+ {
+ uint32 ipaddr_datum32;
+
+ memcpy(&ipaddr_datum32, ip_addr(authoritative), sizeof(uint32));
+
+ /* Must byteswap on little-endian machines */
+#ifndef WORDS_BIGENDIAN
+ ipaddr_datum = pg_bswap32(ipaddr_datum32);
+#else
+ ipaddr_datum = ipaddr_datum32;
+#endif
+
+ /* Initialize result without setting ipfamily bit */
+ res = (Datum) 0;
+ }
+ else
+ {
+ memcpy(&ipaddr_datum, ip_addr(authoritative), sizeof(Datum));
+
+ /* Must byteswap on little-endian machines */
+ ipaddr_datum = DatumBigEndianToNative(ipaddr_datum);
+
+ /* Initialize result with ipfamily (most significant) bit set */
+ res = ((Datum) 1) << (SIZEOF_DATUM * BITS_PER_BYTE - 1);
+ }
+
+ /*
+ * ipaddr_datum must be "split": high order bits go in "network" component
+ * of abbreviated key (often with zeroed bits at the end due to masking),
+ * while low order bits go in "subnet" component when there is space for
+ * one. This is often accomplished by generating a temp datum subnet
+ * bitmask, which we may reuse later when generating the subnet bits
+ * themselves. (Note that subnet bits are only used with IPv4 datums on
+ * platforms where datum is 8 bytes.)
+ *
+ * The number of bits in subnet is used to generate a datum subnet
+ * bitmask. For example, with a /24 IPv4 datum there are 8 subnet bits
+ * (since 32 - 24 is 8), so the final subnet bitmask is B'1111 1111'. We
+ * need explicit handling for cases where the ipaddr bits cannot all fit
+ * in a datum, though (otherwise we'd incorrectly mask the network
+ * component with IPv6 values).
+ */
+ subnet_size = ip_maxbits(authoritative) - ip_bits(authoritative);
+ Assert(subnet_size >= 0);
+ /* subnet size must work with prefix ipaddr cases */
+ subnet_size %= SIZEOF_DATUM * BITS_PER_BYTE;
+ if (ip_bits(authoritative) == 0)
+ {
+ /* Fit as many ipaddr bits as possible into subnet */
+ subnet_bitmask = ((Datum) 0) - 1;
+ network = 0;
+ }
+ else if (ip_bits(authoritative) < SIZEOF_DATUM * BITS_PER_BYTE)
+ {
+ /* Split ipaddr bits between network and subnet */
+ subnet_bitmask = (((Datum) 1) << subnet_size) - 1;
+ network = ipaddr_datum & ~subnet_bitmask;
+ }
+ else
+ {
+ /* Fit as many ipaddr bits as possible into network */
+ subnet_bitmask = 0;
+ network = ipaddr_datum;
+ }
+
+#if SIZEOF_DATUM == 8
+ if (ip_family(authoritative) == PGSQL_AF_INET)
+ {
+ /*
+ * IPv4 with 8 byte datums: keep all 32 netmasked bits, netmask size,
+ * and most significant 25 subnet bits
+ */
+ Datum netmask_size = (Datum) ip_bits(authoritative);
+ Datum subnet;
+
+ /*
+ * Shift left 31 bits: 6 bits netmask size + 25 subnet bits.
+ *
+ * We don't make any distinction between network bits that are zero
+ * due to masking and "true"/non-masked zero bits. An abbreviated
+ * comparison that is resolved by comparing a non-masked and non-zero
+ * bit to a masked/zeroed bit is effectively resolved based on
+ * ip_bits(), even though the comparison won't reach the netmask_size
+ * bits.
+ */
+ network <<= (ABBREV_BITS_INET4_NETMASK_SIZE +
+ ABBREV_BITS_INET4_SUBNET);
+
+ /* Shift size to make room for subnet bits at the end */
+ netmask_size <<= ABBREV_BITS_INET4_SUBNET;
+
+ /* Extract subnet bits without shifting them */
+ subnet = ipaddr_datum & subnet_bitmask;
+
+ /*
+ * If we have more than 25 subnet bits, we can't fit everything. Shift
+ * subnet down to avoid clobbering bits that are only supposed to be
+ * used for netmask_size.
+ *
+ * Discarding the least significant subnet bits like this is correct
+ * because abbreviated comparisons that are resolved at the subnet
+ * level must have had equal netmask_size/ip_bits() values in order to
+ * get that far.
+ */
+ if (subnet_size > ABBREV_BITS_INET4_SUBNET)
+ subnet >>= subnet_size - ABBREV_BITS_INET4_SUBNET;
+
+ /*
+ * Assemble the final abbreviated key without clobbering the ipfamily
+ * bit that must remain a zero.
+ */
+ res |= network | netmask_size | subnet;
+ }
+ else
+#endif
+ {
+ /*
+ * 4 byte datums, or IPv6 with 8 byte datums: Use as many of the
+ * netmasked bits as will fit in final abbreviated key. Avoid
+ * clobbering the ipfamily bit that was set earlier.
+ */
+ res |= network >> 1;
+ }
+
+ uss->input_count += 1;
+
+ /* Hash abbreviated key */
+ if (uss->estimating)
+ {
+ uint32 tmp;
+
+#if SIZEOF_DATUM == 8
+ tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else /* SIZEOF_DATUM != 8 */
+ tmp = (uint32) res;
+#endif
+
+ addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+ }
+
+ return res;
+}
+
+/*
+ * Boolean ordering tests.
+ */
+Datum
+network_lt(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) < 0);
+}
+
+Datum
+network_le(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) <= 0);
+}
+
+Datum
+network_eq(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) == 0);
+}
+
+Datum
+network_ge(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) >= 0);
+}
+
+Datum
+network_gt(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) > 0);
+}
+
+Datum
+network_ne(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(network_cmp_internal(a1, a2) != 0);
+}
+
+/*
+ * MIN/MAX support functions.
+ */
+Datum
+network_smaller(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (network_cmp_internal(a1, a2) < 0)
+ PG_RETURN_INET_P(a1);
+ else
+ PG_RETURN_INET_P(a2);
+}
+
+Datum
+network_larger(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (network_cmp_internal(a1, a2) > 0)
+ PG_RETURN_INET_P(a1);
+ else
+ PG_RETURN_INET_P(a2);
+}
+
+/*
+ * Support function for hash indexes on inet/cidr.
+ */
+Datum
+hashinet(PG_FUNCTION_ARGS)
+{
+ inet *addr = PG_GETARG_INET_PP(0);
+ int addrsize = ip_addrsize(addr);
+
+ /* XXX this assumes there are no pad bytes in the data structure */
+ return hash_any((unsigned char *) VARDATA_ANY(addr), addrsize + 2);
+}
+
+Datum
+hashinetextended(PG_FUNCTION_ARGS)
+{
+ inet *addr = PG_GETARG_INET_PP(0);
+ int addrsize = ip_addrsize(addr);
+
+ return hash_any_extended((unsigned char *) VARDATA_ANY(addr), addrsize + 2,
+ PG_GETARG_INT64(1));
+}
+
+/*
+ * Boolean network-inclusion tests.
+ */
+Datum
+network_sub(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (ip_family(a1) == ip_family(a2))
+ {
+ PG_RETURN_BOOL(ip_bits(a1) > ip_bits(a2) &&
+ bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+network_subeq(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (ip_family(a1) == ip_family(a2))
+ {
+ PG_RETURN_BOOL(ip_bits(a1) >= ip_bits(a2) &&
+ bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a2)) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+network_sup(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (ip_family(a1) == ip_family(a2))
+ {
+ PG_RETURN_BOOL(ip_bits(a1) < ip_bits(a2) &&
+ bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+network_supeq(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (ip_family(a1) == ip_family(a2))
+ {
+ PG_RETURN_BOOL(ip_bits(a1) <= ip_bits(a2) &&
+ bitncmp(ip_addr(a1), ip_addr(a2), ip_bits(a1)) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+network_overlap(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ if (ip_family(a1) == ip_family(a2))
+ {
+ PG_RETURN_BOOL(bitncmp(ip_addr(a1), ip_addr(a2),
+ Min(ip_bits(a1), ip_bits(a2))) == 0);
+ }
+
+ PG_RETURN_BOOL(false);
+}
+
+/*
+ * Planner support function for network subset/superset operators
+ */
+Datum
+network_subset_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestIndexCondition))
+ {
+ /* Try to convert operator/function call to index conditions */
+ SupportRequestIndexCondition *req = (SupportRequestIndexCondition *) rawreq;
+
+ if (is_opclause(req->node))
+ {
+ OpExpr *clause = (OpExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_network_function((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ req->indexarg,
+ req->funcid,
+ req->opfamily);
+ }
+ else if (is_funcclause(req->node)) /* be paranoid */
+ {
+ FuncExpr *clause = (FuncExpr *) req->node;
+
+ Assert(list_length(clause->args) == 2);
+ ret = (Node *)
+ match_network_function((Node *) linitial(clause->args),
+ (Node *) lsecond(clause->args),
+ req->indexarg,
+ req->funcid,
+ req->opfamily);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * match_network_function
+ * Try to generate an indexqual for a network subset/superset function.
+ *
+ * This layer is just concerned with identifying the function and swapping
+ * the arguments if necessary.
+ */
+static List *
+match_network_function(Node *leftop,
+ Node *rightop,
+ int indexarg,
+ Oid funcid,
+ Oid opfamily)
+{
+ switch (funcid)
+ {
+ case F_NETWORK_SUB:
+ /* indexkey must be on the left */
+ if (indexarg != 0)
+ return NIL;
+ return match_network_subset(leftop, rightop, false, opfamily);
+
+ case F_NETWORK_SUBEQ:
+ /* indexkey must be on the left */
+ if (indexarg != 0)
+ return NIL;
+ return match_network_subset(leftop, rightop, true, opfamily);
+
+ case F_NETWORK_SUP:
+ /* indexkey must be on the right */
+ if (indexarg != 1)
+ return NIL;
+ return match_network_subset(rightop, leftop, false, opfamily);
+
+ case F_NETWORK_SUPEQ:
+ /* indexkey must be on the right */
+ if (indexarg != 1)
+ return NIL;
+ return match_network_subset(rightop, leftop, true, opfamily);
+
+ default:
+
+ /*
+ * We'd only get here if somebody attached this support function
+ * to an unexpected function. Maybe we should complain, but for
+ * now, do nothing.
+ */
+ return NIL;
+ }
+}
+
+/*
+ * match_network_subset
+ * Try to generate an indexqual for a network subset function.
+ */
+static List *
+match_network_subset(Node *leftop,
+ Node *rightop,
+ bool is_eq,
+ Oid opfamily)
+{
+ List *result;
+ Datum rightopval;
+ Oid datatype = INETOID;
+ Oid opr1oid;
+ Oid opr2oid;
+ Datum opr1right;
+ Datum opr2right;
+ Expr *expr;
+
+ /*
+ * Can't do anything with a non-constant or NULL comparison value.
+ *
+ * Note that since we restrict ourselves to cases with a hard constant on
+ * the RHS, it's a-fortiori a pseudoconstant, and we don't need to worry
+ * about verifying that.
+ */
+ if (!IsA(rightop, Const) ||
+ ((Const *) rightop)->constisnull)
+ return NIL;
+ rightopval = ((Const *) rightop)->constvalue;
+
+ /*
+ * Must check that index's opfamily supports the operators we will want to
+ * apply.
+ *
+ * We insist on the opfamily being the specific one we expect, else we'd
+ * do the wrong thing if someone were to make a reverse-sort opfamily with
+ * the same operators.
+ */
+ if (opfamily != NETWORK_BTREE_FAM_OID)
+ return NIL;
+
+ /*
+ * create clause "key >= network_scan_first( rightopval )", or ">" if the
+ * operator disallows equality.
+ *
+ * Note: seeing that this function supports only fixed values for opfamily
+ * and datatype, we could just hard-wire the operator OIDs instead of
+ * looking them up. But for now it seems better to be general.
+ */
+ if (is_eq)
+ {
+ opr1oid = get_opfamily_member(opfamily, datatype, datatype,
+ BTGreaterEqualStrategyNumber);
+ if (opr1oid == InvalidOid)
+ elog(ERROR, "no >= operator for opfamily %u", opfamily);
+ }
+ else
+ {
+ opr1oid = get_opfamily_member(opfamily, datatype, datatype,
+ BTGreaterStrategyNumber);
+ if (opr1oid == InvalidOid)
+ elog(ERROR, "no > operator for opfamily %u", opfamily);
+ }
+
+ opr1right = network_scan_first(rightopval);
+
+ expr = make_opclause(opr1oid, BOOLOID, false,
+ (Expr *) leftop,
+ (Expr *) makeConst(datatype, -1,
+ InvalidOid, /* not collatable */
+ -1, opr1right,
+ false, false),
+ InvalidOid, InvalidOid);
+ result = list_make1(expr);
+
+ /* create clause "key <= network_scan_last( rightopval )" */
+
+ opr2oid = get_opfamily_member(opfamily, datatype, datatype,
+ BTLessEqualStrategyNumber);
+ if (opr2oid == InvalidOid)
+ elog(ERROR, "no <= operator for opfamily %u", opfamily);
+
+ opr2right = network_scan_last(rightopval);
+
+ expr = make_opclause(opr2oid, BOOLOID, false,
+ (Expr *) leftop,
+ (Expr *) makeConst(datatype, -1,
+ InvalidOid, /* not collatable */
+ -1, opr2right,
+ false, false),
+ InvalidOid, InvalidOid);
+ result = lappend(result, expr);
+
+ return result;
+}
+
+
+/*
+ * Extract data from a network datatype.
+ */
+Datum
+network_host(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ char *ptr;
+ char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+
+ /* force display of max bits, regardless of masklen... */
+ if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip),
+ tmp, sizeof(tmp)) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("could not format inet value: %m")));
+
+ /* Suppress /n if present (shouldn't happen now) */
+ if ((ptr = strchr(tmp, '/')) != NULL)
+ *ptr = '\0';
+
+ PG_RETURN_TEXT_P(cstring_to_text(tmp));
+}
+
+/*
+ * network_show implements the inet and cidr casts to text. This is not
+ * quite the same behavior as network_out, hence we can't drop it in favor
+ * of CoerceViaIO.
+ */
+Datum
+network_show(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ int len;
+ char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+
+ if (pg_inet_net_ntop(ip_family(ip), ip_addr(ip), ip_maxbits(ip),
+ tmp, sizeof(tmp)) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("could not format inet value: %m")));
+
+ /* Add /n if not present (which it won't be) */
+ if (strchr(tmp, '/') == NULL)
+ {
+ len = strlen(tmp);
+ snprintf(tmp + len, sizeof(tmp) - len, "/%u", ip_bits(ip));
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(tmp));
+}
+
+Datum
+inet_abbrev(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ char *dst;
+ char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+
+ dst = pg_inet_net_ntop(ip_family(ip), ip_addr(ip),
+ ip_bits(ip), tmp, sizeof(tmp));
+
+ if (dst == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("could not format inet value: %m")));
+
+ PG_RETURN_TEXT_P(cstring_to_text(tmp));
+}
+
+Datum
+cidr_abbrev(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ char *dst;
+ char tmp[sizeof("xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255/128")];
+
+ dst = pg_inet_cidr_ntop(ip_family(ip), ip_addr(ip),
+ ip_bits(ip), tmp, sizeof(tmp));
+
+ if (dst == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("could not format cidr value: %m")));
+
+ PG_RETURN_TEXT_P(cstring_to_text(tmp));
+}
+
+Datum
+network_masklen(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+
+ PG_RETURN_INT32(ip_bits(ip));
+}
+
+Datum
+network_family(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+
+ switch (ip_family(ip))
+ {
+ case PGSQL_AF_INET:
+ PG_RETURN_INT32(4);
+ break;
+ case PGSQL_AF_INET6:
+ PG_RETURN_INT32(6);
+ break;
+ default:
+ PG_RETURN_INT32(0);
+ break;
+ }
+}
+
+Datum
+network_broadcast(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *dst;
+ int byte;
+ int bits;
+ int maxbytes;
+ unsigned char mask;
+ unsigned char *a,
+ *b;
+
+ /* make sure any unused bits are zeroed */
+ dst = (inet *) palloc0(sizeof(inet));
+
+ maxbytes = ip_addrsize(ip);
+ bits = ip_bits(ip);
+ a = ip_addr(ip);
+ b = ip_addr(dst);
+
+ for (byte = 0; byte < maxbytes; byte++)
+ {
+ if (bits >= 8)
+ {
+ mask = 0x00;
+ bits -= 8;
+ }
+ else if (bits == 0)
+ mask = 0xff;
+ else
+ {
+ mask = 0xff >> bits;
+ bits = 0;
+ }
+
+ b[byte] = a[byte] | mask;
+ }
+
+ ip_family(dst) = ip_family(ip);
+ ip_bits(dst) = ip_bits(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+Datum
+network_network(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *dst;
+ int byte;
+ int bits;
+ unsigned char mask;
+ unsigned char *a,
+ *b;
+
+ /* make sure any unused bits are zeroed */
+ dst = (inet *) palloc0(sizeof(inet));
+
+ bits = ip_bits(ip);
+ a = ip_addr(ip);
+ b = ip_addr(dst);
+
+ byte = 0;
+
+ while (bits)
+ {
+ if (bits >= 8)
+ {
+ mask = 0xff;
+ bits -= 8;
+ }
+ else
+ {
+ mask = 0xff << (8 - bits);
+ bits = 0;
+ }
+
+ b[byte] = a[byte] & mask;
+ byte++;
+ }
+
+ ip_family(dst) = ip_family(ip);
+ ip_bits(dst) = ip_bits(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+Datum
+network_netmask(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *dst;
+ int byte;
+ int bits;
+ unsigned char mask;
+ unsigned char *b;
+
+ /* make sure any unused bits are zeroed */
+ dst = (inet *) palloc0(sizeof(inet));
+
+ bits = ip_bits(ip);
+ b = ip_addr(dst);
+
+ byte = 0;
+
+ while (bits)
+ {
+ if (bits >= 8)
+ {
+ mask = 0xff;
+ bits -= 8;
+ }
+ else
+ {
+ mask = 0xff << (8 - bits);
+ bits = 0;
+ }
+
+ b[byte] = mask;
+ byte++;
+ }
+
+ ip_family(dst) = ip_family(ip);
+ ip_bits(dst) = ip_maxbits(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+Datum
+network_hostmask(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *dst;
+ int byte;
+ int bits;
+ int maxbytes;
+ unsigned char mask;
+ unsigned char *b;
+
+ /* make sure any unused bits are zeroed */
+ dst = (inet *) palloc0(sizeof(inet));
+
+ maxbytes = ip_addrsize(ip);
+ bits = ip_maxbits(ip) - ip_bits(ip);
+ b = ip_addr(dst);
+
+ byte = maxbytes - 1;
+
+ while (bits)
+ {
+ if (bits >= 8)
+ {
+ mask = 0xff;
+ bits -= 8;
+ }
+ else
+ {
+ mask = 0xff >> (8 - bits);
+ bits = 0;
+ }
+
+ b[byte] = mask;
+ byte--;
+ }
+
+ ip_family(dst) = ip_family(ip);
+ ip_bits(dst) = ip_maxbits(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+/*
+ * Returns true if the addresses are from the same family, or false. Used to
+ * check that we can create a network which contains both of the networks.
+ */
+Datum
+inet_same_family(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0);
+ inet *a2 = PG_GETARG_INET_PP(1);
+
+ PG_RETURN_BOOL(ip_family(a1) == ip_family(a2));
+}
+
+/*
+ * Returns the smallest CIDR which contains both of the inputs.
+ */
+Datum
+inet_merge(PG_FUNCTION_ARGS)
+{
+ inet *a1 = PG_GETARG_INET_PP(0),
+ *a2 = PG_GETARG_INET_PP(1);
+ int commonbits;
+
+ if (ip_family(a1) != ip_family(a2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot merge addresses from different families")));
+
+ commonbits = bitncommon(ip_addr(a1), ip_addr(a2),
+ Min(ip_bits(a1), ip_bits(a2)));
+
+ PG_RETURN_INET_P(cidr_set_masklen_internal(a1, commonbits));
+}
+
+/*
+ * Convert a value of a network datatype to an approximate scalar value.
+ * This is used for estimating selectivities of inequality operators
+ * involving network types.
+ *
+ * On failure (e.g., unsupported typid), set *failure to true;
+ * otherwise, that variable is not changed.
+ */
+double
+convert_network_to_scalar(Datum value, Oid typid, bool *failure)
+{
+ switch (typid)
+ {
+ case INETOID:
+ case CIDROID:
+ {
+ inet *ip = DatumGetInetPP(value);
+ int len;
+ double res;
+ int i;
+
+ /*
+ * Note that we don't use the full address for IPv6.
+ */
+ if (ip_family(ip) == PGSQL_AF_INET)
+ len = 4;
+ else
+ len = 5;
+
+ res = ip_family(ip);
+ for (i = 0; i < len; i++)
+ {
+ res *= 256;
+ res += ip_addr(ip)[i];
+ }
+ return res;
+ }
+ case MACADDROID:
+ {
+ macaddr *mac = DatumGetMacaddrP(value);
+ double res;
+
+ res = (mac->a << 16) | (mac->b << 8) | (mac->c);
+ res *= 256 * 256 * 256;
+ res += (mac->d << 16) | (mac->e << 8) | (mac->f);
+ return res;
+ }
+ case MACADDR8OID:
+ {
+ macaddr8 *mac = DatumGetMacaddr8P(value);
+ double res;
+
+ res = (mac->a << 24) | (mac->b << 16) | (mac->c << 8) | (mac->d);
+ res *= ((double) 256) * 256 * 256 * 256;
+ res += (mac->e << 24) | (mac->f << 16) | (mac->g << 8) | (mac->h);
+ return res;
+ }
+ }
+
+ *failure = true;
+ return 0;
+}
+
+/*
+ * int
+ * bitncmp(l, r, n)
+ * compare bit masks l and r, for n bits.
+ * return:
+ * <0, >0, or 0 in the libc tradition.
+ * note:
+ * network byte order assumed. this means 192.5.5.240/28 has
+ * 0x11110000 in its fourth octet.
+ * author:
+ * Paul Vixie (ISC), June 1996
+ */
+int
+bitncmp(const unsigned char *l, const unsigned char *r, int n)
+{
+ unsigned int lb,
+ rb;
+ int x,
+ b;
+
+ b = n / 8;
+ x = memcmp(l, r, b);
+ if (x || (n % 8) == 0)
+ return x;
+
+ lb = l[b];
+ rb = r[b];
+ for (b = n % 8; b > 0; b--)
+ {
+ if (IS_HIGHBIT_SET(lb) != IS_HIGHBIT_SET(rb))
+ {
+ if (IS_HIGHBIT_SET(lb))
+ return 1;
+ return -1;
+ }
+ lb <<= 1;
+ rb <<= 1;
+ }
+ return 0;
+}
+
+/*
+ * bitncommon: compare bit masks l and r, for up to n bits.
+ *
+ * Returns the number of leading bits that match (0 to n).
+ */
+int
+bitncommon(const unsigned char *l, const unsigned char *r, int n)
+{
+ int byte,
+ nbits;
+
+ /* number of bits to examine in last byte */
+ nbits = n % 8;
+
+ /* check whole bytes */
+ for (byte = 0; byte < n / 8; byte++)
+ {
+ if (l[byte] != r[byte])
+ {
+ /* at least one bit in the last byte is not common */
+ nbits = 7;
+ break;
+ }
+ }
+
+ /* check bits in last partial byte */
+ if (nbits != 0)
+ {
+ /* calculate diff of first non-matching bytes */
+ unsigned int diff = l[byte] ^ r[byte];
+
+ /* compare the bits from the most to the least */
+ while ((diff >> (8 - nbits)) != 0)
+ nbits--;
+ }
+
+ return (8 * byte) + nbits;
+}
+
+
+/*
+ * Verify a CIDR address is OK (doesn't have bits set past the masklen)
+ */
+static bool
+addressOK(unsigned char *a, int bits, int family)
+{
+ int byte;
+ int nbits;
+ int maxbits;
+ int maxbytes;
+ unsigned char mask;
+
+ if (family == PGSQL_AF_INET)
+ {
+ maxbits = 32;
+ maxbytes = 4;
+ }
+ else
+ {
+ maxbits = 128;
+ maxbytes = 16;
+ }
+ Assert(bits <= maxbits);
+
+ if (bits == maxbits)
+ return true;
+
+ byte = bits / 8;
+
+ nbits = bits % 8;
+ mask = 0xff;
+ if (bits != 0)
+ mask >>= nbits;
+
+ while (byte < maxbytes)
+ {
+ if ((a[byte] & mask) != 0)
+ return false;
+ mask = 0xff;
+ byte++;
+ }
+
+ return true;
+}
+
+
+/*
+ * These functions are used by planner to generate indexscan limits
+ * for clauses a << b and a <<= b
+ */
+
+/* return the minimal value for an IP on a given network */
+Datum
+network_scan_first(Datum in)
+{
+ return DirectFunctionCall1(network_network, in);
+}
+
+/*
+ * return "last" IP on a given network. It's the broadcast address,
+ * however, masklen has to be set to its max bits, since
+ * 192.168.0.255/24 is considered less than 192.168.0.255/32
+ *
+ * inet_set_masklen() hacked to max out the masklength to 128 for IPv6
+ * and 32 for IPv4 when given '-1' as argument.
+ */
+Datum
+network_scan_last(Datum in)
+{
+ return DirectFunctionCall2(inet_set_masklen,
+ DirectFunctionCall1(network_broadcast, in),
+ Int32GetDatum(-1));
+}
+
+
+/*
+ * IP address that the client is connecting from (NULL if Unix socket)
+ */
+Datum
+inet_client_addr(PG_FUNCTION_ARGS)
+{
+ Port *port = MyProcPort;
+ char remote_host[NI_MAXHOST];
+ int ret;
+
+ if (port == NULL)
+ PG_RETURN_NULL();
+
+ switch (port->raddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ PG_RETURN_NULL();
+ }
+
+ remote_host[0] = '\0';
+
+ ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
+ remote_host, sizeof(remote_host),
+ NULL, 0,
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ clean_ipv6_addr(port->raddr.addr.ss_family, remote_host);
+
+ PG_RETURN_INET_P(network_in(remote_host, false));
+}
+
+
+/*
+ * port that the client is connecting from (NULL if Unix socket)
+ */
+Datum
+inet_client_port(PG_FUNCTION_ARGS)
+{
+ Port *port = MyProcPort;
+ char remote_port[NI_MAXSERV];
+ int ret;
+
+ if (port == NULL)
+ PG_RETURN_NULL();
+
+ switch (port->raddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ PG_RETURN_NULL();
+ }
+
+ remote_port[0] = '\0';
+
+ ret = pg_getnameinfo_all(&port->raddr.addr, port->raddr.salen,
+ NULL, 0,
+ remote_port, sizeof(remote_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(remote_port)));
+}
+
+
+/*
+ * IP address that the server accepted the connection on (NULL if Unix socket)
+ */
+Datum
+inet_server_addr(PG_FUNCTION_ARGS)
+{
+ Port *port = MyProcPort;
+ char local_host[NI_MAXHOST];
+ int ret;
+
+ if (port == NULL)
+ PG_RETURN_NULL();
+
+ switch (port->laddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ PG_RETURN_NULL();
+ }
+
+ local_host[0] = '\0';
+
+ ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen,
+ local_host, sizeof(local_host),
+ NULL, 0,
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ clean_ipv6_addr(port->laddr.addr.ss_family, local_host);
+
+ PG_RETURN_INET_P(network_in(local_host, false));
+}
+
+
+/*
+ * port that the server accepted the connection on (NULL if Unix socket)
+ */
+Datum
+inet_server_port(PG_FUNCTION_ARGS)
+{
+ Port *port = MyProcPort;
+ char local_port[NI_MAXSERV];
+ int ret;
+
+ if (port == NULL)
+ PG_RETURN_NULL();
+
+ switch (port->laddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ PG_RETURN_NULL();
+ }
+
+ local_port[0] = '\0';
+
+ ret = pg_getnameinfo_all(&port->laddr.addr, port->laddr.salen,
+ NULL, 0,
+ local_port, sizeof(local_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(DirectFunctionCall1(int4in, CStringGetDatum(local_port)));
+}
+
+
+Datum
+inetnot(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ {
+ int nb = ip_addrsize(ip);
+ unsigned char *pip = ip_addr(ip);
+ unsigned char *pdst = ip_addr(dst);
+
+ while (--nb >= 0)
+ pdst[nb] = ~pip[nb];
+ }
+ ip_bits(dst) = ip_bits(ip);
+
+ ip_family(dst) = ip_family(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+
+Datum
+inetand(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *ip2 = PG_GETARG_INET_PP(1);
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ if (ip_family(ip) != ip_family(ip2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot AND inet values of different sizes")));
+ else
+ {
+ int nb = ip_addrsize(ip);
+ unsigned char *pip = ip_addr(ip);
+ unsigned char *pip2 = ip_addr(ip2);
+ unsigned char *pdst = ip_addr(dst);
+
+ while (--nb >= 0)
+ pdst[nb] = pip[nb] & pip2[nb];
+ }
+ ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2));
+
+ ip_family(dst) = ip_family(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+
+Datum
+inetor(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *ip2 = PG_GETARG_INET_PP(1);
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ if (ip_family(ip) != ip_family(ip2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot OR inet values of different sizes")));
+ else
+ {
+ int nb = ip_addrsize(ip);
+ unsigned char *pip = ip_addr(ip);
+ unsigned char *pip2 = ip_addr(ip2);
+ unsigned char *pdst = ip_addr(dst);
+
+ while (--nb >= 0)
+ pdst[nb] = pip[nb] | pip2[nb];
+ }
+ ip_bits(dst) = Max(ip_bits(ip), ip_bits(ip2));
+
+ ip_family(dst) = ip_family(ip);
+ SET_INET_VARSIZE(dst);
+
+ PG_RETURN_INET_P(dst);
+}
+
+
+static inet *
+internal_inetpl(inet *ip, int64 addend)
+{
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ {
+ int nb = ip_addrsize(ip);
+ unsigned char *pip = ip_addr(ip);
+ unsigned char *pdst = ip_addr(dst);
+ int carry = 0;
+
+ while (--nb >= 0)
+ {
+ carry = pip[nb] + (int) (addend & 0xFF) + carry;
+ pdst[nb] = (unsigned char) (carry & 0xFF);
+ carry >>= 8;
+
+ /*
+ * We have to be careful about right-shifting addend because
+ * right-shift isn't portable for negative values, while simply
+ * dividing by 256 doesn't work (the standard rounding is in the
+ * wrong direction, besides which there may be machines out there
+ * that round the wrong way). So, explicitly clear the low-order
+ * byte to remove any doubt about the correct result of the
+ * division, and then divide rather than shift.
+ */
+ addend &= ~((int64) 0xFF);
+ addend /= 0x100;
+ }
+
+ /*
+ * At this point we should have addend and carry both zero if original
+ * addend was >= 0, or addend -1 and carry 1 if original addend was <
+ * 0. Anything else means overflow.
+ */
+ if (!((addend == 0 && carry == 0) ||
+ (addend == -1 && carry == 1)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("result is out of range")));
+ }
+
+ ip_bits(dst) = ip_bits(ip);
+ ip_family(dst) = ip_family(ip);
+ SET_INET_VARSIZE(dst);
+
+ return dst;
+}
+
+
+Datum
+inetpl(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ int64 addend = PG_GETARG_INT64(1);
+
+ PG_RETURN_INET_P(internal_inetpl(ip, addend));
+}
+
+
+Datum
+inetmi_int8(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ int64 addend = PG_GETARG_INT64(1);
+
+ PG_RETURN_INET_P(internal_inetpl(ip, -addend));
+}
+
+
+Datum
+inetmi(PG_FUNCTION_ARGS)
+{
+ inet *ip = PG_GETARG_INET_PP(0);
+ inet *ip2 = PG_GETARG_INET_PP(1);
+ int64 res = 0;
+
+ if (ip_family(ip) != ip_family(ip2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("cannot subtract inet values of different sizes")));
+ else
+ {
+ /*
+ * We form the difference using the traditional complement, increment,
+ * and add rule, with the increment part being handled by starting the
+ * carry off at 1. If you don't think integer arithmetic is done in
+ * two's complement, too bad.
+ */
+ int nb = ip_addrsize(ip);
+ int byte = 0;
+ unsigned char *pip = ip_addr(ip);
+ unsigned char *pip2 = ip_addr(ip2);
+ int carry = 1;
+
+ while (--nb >= 0)
+ {
+ int lobyte;
+
+ carry = pip[nb] + (~pip2[nb] & 0xFF) + carry;
+ lobyte = carry & 0xFF;
+ if (byte < sizeof(int64))
+ {
+ res |= ((int64) lobyte) << (byte * 8);
+ }
+ else
+ {
+ /*
+ * Input wider than int64: check for overflow. All bytes to
+ * the left of what will fit should be 0 or 0xFF, depending on
+ * sign of the now-complete result.
+ */
+ if ((res < 0) ? (lobyte != 0xFF) : (lobyte != 0))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("result is out of range")));
+ }
+ carry >>= 8;
+ byte++;
+ }
+
+ /*
+ * If input is narrower than int64, overflow is not possible, but we
+ * have to do proper sign extension.
+ */
+ if (carry == 0 && byte < sizeof(int64))
+ res |= ((uint64) (int64) -1) << (byte * 8);
+ }
+
+ PG_RETURN_INT64(res);
+}
+
+
+/*
+ * clean_ipv6_addr --- remove any '%zone' part from an IPv6 address string
+ *
+ * XXX This should go away someday!
+ *
+ * This is a kluge needed because we don't yet support zones in stored inet
+ * values. Since the result of getnameinfo() might include a zone spec,
+ * call this to remove it anywhere we want to feed getnameinfo's output to
+ * network_in. Beats failing entirely.
+ *
+ * An alternative approach would be to let network_in ignore %-parts for
+ * itself, but that would mean we'd silently drop zone specs in user input,
+ * which seems not such a good idea.
+ */
+void
+clean_ipv6_addr(int addr_family, char *addr)
+{
+#ifdef HAVE_IPV6
+ if (addr_family == AF_INET6)
+ {
+ char *pct = strchr(addr, '%');
+
+ if (pct)
+ *pct = '\0';
+ }
+#endif
+}
diff --git a/src/backend/utils/adt/network_gist.c b/src/backend/utils/adt/network_gist.c
new file mode 100644
index 0000000..95d16df
--- /dev/null
+++ b/src/backend/utils/adt/network_gist.c
@@ -0,0 +1,809 @@
+/*-------------------------------------------------------------------------
+ *
+ * network_gist.c
+ * GiST support for network types.
+ *
+ * The key thing to understand about this code is the definition of the
+ * "union" of a set of INET/CIDR values. It works like this:
+ * 1. If the values are not all of the same IP address family, the "union"
+ * is a dummy value with family number zero, minbits zero, commonbits zero,
+ * address all zeroes. Otherwise:
+ * 2. The union has the common IP address family number.
+ * 3. The union's minbits value is the smallest netmask length ("ip_bits")
+ * of all the input values.
+ * 4. Let C be the number of leading address bits that are in common among
+ * all the input values (C ranges from 0 to ip_maxbits for the family).
+ * 5. The union's commonbits value is C.
+ * 6. The union's address value is the same as the common prefix for its
+ * first C bits, and is zeroes to the right of that. The physical width
+ * of the address value is ip_maxbits for the address family.
+ *
+ * In a leaf index entry (representing a single key), commonbits is equal to
+ * ip_maxbits for the address family, minbits is the same as the represented
+ * value's ip_bits, and the address is equal to the represented address.
+ * Although it may appear that we're wasting a byte by storing the union
+ * format and not just the represented INET/CIDR value in leaf keys, the
+ * extra byte is actually "free" because of alignment considerations.
+ *
+ * Note that this design tracks minbits and commonbits independently; in any
+ * given union value, either might be smaller than the other. This does not
+ * help us much when descending the tree, because of the way inet comparison
+ * is defined: at non-leaf nodes we can't compare more than minbits bits
+ * even if we know them. However, it greatly improves the quality of split
+ * decisions. Preliminary testing suggests that searches are as much as
+ * twice as fast as for a simpler design in which a single field doubles as
+ * the common prefix length and the minimum ip_bits value.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/network_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/socket.h>
+
+#include "access/gist.h"
+#include "access/stratnum.h"
+#include "utils/builtins.h"
+#include "utils/inet.h"
+
+/*
+ * Operator strategy numbers used in the GiST inet_ops opclass
+ */
+#define INETSTRAT_OVERLAPS RTOverlapStrategyNumber
+#define INETSTRAT_EQ RTEqualStrategyNumber
+#define INETSTRAT_NE RTNotEqualStrategyNumber
+#define INETSTRAT_LT RTLessStrategyNumber
+#define INETSTRAT_LE RTLessEqualStrategyNumber
+#define INETSTRAT_GT RTGreaterStrategyNumber
+#define INETSTRAT_GE RTGreaterEqualStrategyNumber
+#define INETSTRAT_SUB RTSubStrategyNumber
+#define INETSTRAT_SUBEQ RTSubEqualStrategyNumber
+#define INETSTRAT_SUP RTSuperStrategyNumber
+#define INETSTRAT_SUPEQ RTSuperEqualStrategyNumber
+
+
+/*
+ * Representation of a GiST INET/CIDR index key. This is not identical to
+ * INET/CIDR because we need to keep track of the length of the common address
+ * prefix as well as the minimum netmask length. However, as long as it
+ * follows varlena header rules, the core GiST code won't know the difference.
+ * For simplicity we always use 1-byte-header varlena format.
+ */
+typedef struct GistInetKey
+{
+ uint8 va_header; /* varlena header --- don't touch directly */
+ unsigned char family; /* PGSQL_AF_INET, PGSQL_AF_INET6, or zero */
+ unsigned char minbits; /* minimum number of bits in netmask */
+ unsigned char commonbits; /* number of common prefix bits in addresses */
+ unsigned char ipaddr[16]; /* up to 128 bits of common address */
+} GistInetKey;
+
+#define DatumGetInetKeyP(X) ((GistInetKey *) DatumGetPointer(X))
+#define InetKeyPGetDatum(X) PointerGetDatum(X)
+
+/*
+ * Access macros; not really exciting, but we use these for notational
+ * consistency with access to INET/CIDR values. Note that family-zero values
+ * are stored with 4 bytes of address, not 16.
+ */
+#define gk_ip_family(gkptr) ((gkptr)->family)
+#define gk_ip_minbits(gkptr) ((gkptr)->minbits)
+#define gk_ip_commonbits(gkptr) ((gkptr)->commonbits)
+#define gk_ip_addr(gkptr) ((gkptr)->ipaddr)
+#define ip_family_maxbits(fam) ((fam) == PGSQL_AF_INET6 ? 128 : 32)
+
+/* These require that the family field has been set: */
+#define gk_ip_addrsize(gkptr) \
+ (gk_ip_family(gkptr) == PGSQL_AF_INET6 ? 16 : 4)
+#define gk_ip_maxbits(gkptr) \
+ ip_family_maxbits(gk_ip_family(gkptr))
+#define SET_GK_VARSIZE(dst) \
+ SET_VARSIZE_SHORT(dst, offsetof(GistInetKey, ipaddr) + gk_ip_addrsize(dst))
+
+
+/*
+ * The GiST query consistency check
+ */
+Datum
+inet_gist_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *ent = (GISTENTRY *) PG_GETARG_POINTER(0);
+ inet *query = PG_GETARG_INET_PP(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ GistInetKey *key = DatumGetInetKeyP(ent->key);
+ int minbits,
+ order;
+
+ /* All operators served by this function are exact. */
+ *recheck = false;
+
+ /*
+ * Check 0: different families
+ *
+ * If key represents multiple address families, its children could match
+ * anything. This can only happen on an inner index page.
+ */
+ if (gk_ip_family(key) == 0)
+ {
+ Assert(!GIST_LEAF(ent));
+ PG_RETURN_BOOL(true);
+ }
+
+ /*
+ * Check 1: different families
+ *
+ * Matching families do not help any of the strategies.
+ */
+ if (gk_ip_family(key) != ip_family(query))
+ {
+ switch (strategy)
+ {
+ case INETSTRAT_LT:
+ case INETSTRAT_LE:
+ if (gk_ip_family(key) < ip_family(query))
+ PG_RETURN_BOOL(true);
+ break;
+
+ case INETSTRAT_GE:
+ case INETSTRAT_GT:
+ if (gk_ip_family(key) > ip_family(query))
+ PG_RETURN_BOOL(true);
+ break;
+
+ case INETSTRAT_NE:
+ PG_RETURN_BOOL(true);
+ }
+ /* For all other cases, we can be sure there is no match */
+ PG_RETURN_BOOL(false);
+ }
+
+ /*
+ * Check 2: network bit count
+ *
+ * Network bit count (ip_bits) helps to check leaves for sub network and
+ * sup network operators. At non-leaf nodes, we know every child value
+ * has ip_bits >= gk_ip_minbits(key), so we can avoid descending in some
+ * cases too.
+ */
+ switch (strategy)
+ {
+ case INETSTRAT_SUB:
+ if (GIST_LEAF(ent) && gk_ip_minbits(key) <= ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_SUBEQ:
+ if (GIST_LEAF(ent) && gk_ip_minbits(key) < ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_SUPEQ:
+ case INETSTRAT_EQ:
+ if (gk_ip_minbits(key) > ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_SUP:
+ if (gk_ip_minbits(key) >= ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+ }
+
+ /*
+ * Check 3: common network bits
+ *
+ * Compare available common prefix bits to the query, but not beyond
+ * either the query's netmask or the minimum netmask among the represented
+ * values. If these bits don't match the query, we have our answer (and
+ * may or may not need to descend, depending on the operator). If they do
+ * match, and we are not at a leaf, we descend in all cases.
+ *
+ * Note this is the final check for operators that only consider the
+ * network part of the address.
+ */
+ minbits = Min(gk_ip_commonbits(key), gk_ip_minbits(key));
+ minbits = Min(minbits, ip_bits(query));
+
+ order = bitncmp(gk_ip_addr(key), ip_addr(query), minbits);
+
+ switch (strategy)
+ {
+ case INETSTRAT_SUB:
+ case INETSTRAT_SUBEQ:
+ case INETSTRAT_OVERLAPS:
+ case INETSTRAT_SUPEQ:
+ case INETSTRAT_SUP:
+ PG_RETURN_BOOL(order == 0);
+
+ case INETSTRAT_LT:
+ case INETSTRAT_LE:
+ if (order > 0)
+ PG_RETURN_BOOL(false);
+ if (order < 0 || !GIST_LEAF(ent))
+ PG_RETURN_BOOL(true);
+ break;
+
+ case INETSTRAT_EQ:
+ if (order != 0)
+ PG_RETURN_BOOL(false);
+ if (!GIST_LEAF(ent))
+ PG_RETURN_BOOL(true);
+ break;
+
+ case INETSTRAT_GE:
+ case INETSTRAT_GT:
+ if (order < 0)
+ PG_RETURN_BOOL(false);
+ if (order > 0 || !GIST_LEAF(ent))
+ PG_RETURN_BOOL(true);
+ break;
+
+ case INETSTRAT_NE:
+ if (order != 0 || !GIST_LEAF(ent))
+ PG_RETURN_BOOL(true);
+ break;
+ }
+
+ /*
+ * Remaining checks are only for leaves and basic comparison strategies.
+ * See network_cmp_internal() in network.c for the implementation we need
+ * to match. Note that in a leaf key, commonbits should equal the address
+ * length, so we compared the whole network parts above.
+ */
+ Assert(GIST_LEAF(ent));
+
+ /*
+ * Check 4: network bit count
+ *
+ * Next step is to compare netmask widths.
+ */
+ switch (strategy)
+ {
+ case INETSTRAT_LT:
+ case INETSTRAT_LE:
+ if (gk_ip_minbits(key) < ip_bits(query))
+ PG_RETURN_BOOL(true);
+ if (gk_ip_minbits(key) > ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_EQ:
+ if (gk_ip_minbits(key) != ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_GE:
+ case INETSTRAT_GT:
+ if (gk_ip_minbits(key) > ip_bits(query))
+ PG_RETURN_BOOL(true);
+ if (gk_ip_minbits(key) < ip_bits(query))
+ PG_RETURN_BOOL(false);
+ break;
+
+ case INETSTRAT_NE:
+ if (gk_ip_minbits(key) != ip_bits(query))
+ PG_RETURN_BOOL(true);
+ break;
+ }
+
+ /*
+ * Check 5: whole address
+ *
+ * Netmask bit counts are the same, so check all the address bits.
+ */
+ order = bitncmp(gk_ip_addr(key), ip_addr(query), gk_ip_maxbits(key));
+
+ switch (strategy)
+ {
+ case INETSTRAT_LT:
+ PG_RETURN_BOOL(order < 0);
+
+ case INETSTRAT_LE:
+ PG_RETURN_BOOL(order <= 0);
+
+ case INETSTRAT_EQ:
+ PG_RETURN_BOOL(order == 0);
+
+ case INETSTRAT_GE:
+ PG_RETURN_BOOL(order >= 0);
+
+ case INETSTRAT_GT:
+ PG_RETURN_BOOL(order > 0);
+
+ case INETSTRAT_NE:
+ PG_RETURN_BOOL(order != 0);
+ }
+
+ elog(ERROR, "unknown strategy for inet GiST");
+ PG_RETURN_BOOL(false); /* keep compiler quiet */
+}
+
+/*
+ * Calculate parameters of the union of some GistInetKeys.
+ *
+ * Examine the keys in elements m..n inclusive of the GISTENTRY array,
+ * and compute these output parameters:
+ * *minfamily_p = minimum IP address family number
+ * *maxfamily_p = maximum IP address family number
+ * *minbits_p = minimum netmask width
+ * *commonbits_p = number of leading bits in common among the addresses
+ *
+ * minbits and commonbits are forced to zero if there's more than one
+ * address family.
+ */
+static void
+calc_inet_union_params(GISTENTRY *ent,
+ int m, int n,
+ int *minfamily_p,
+ int *maxfamily_p,
+ int *minbits_p,
+ int *commonbits_p)
+{
+ int minfamily,
+ maxfamily,
+ minbits,
+ commonbits;
+ unsigned char *addr;
+ GistInetKey *tmp;
+ int i;
+
+ /* Must be at least one key. */
+ Assert(m <= n);
+
+ /* Initialize variables using the first key. */
+ tmp = DatumGetInetKeyP(ent[m].key);
+ minfamily = maxfamily = gk_ip_family(tmp);
+ minbits = gk_ip_minbits(tmp);
+ commonbits = gk_ip_commonbits(tmp);
+ addr = gk_ip_addr(tmp);
+
+ /* Scan remaining keys. */
+ for (i = m + 1; i <= n; i++)
+ {
+ tmp = DatumGetInetKeyP(ent[i].key);
+
+ /* Determine range of family numbers */
+ if (minfamily > gk_ip_family(tmp))
+ minfamily = gk_ip_family(tmp);
+ if (maxfamily < gk_ip_family(tmp))
+ maxfamily = gk_ip_family(tmp);
+
+ /* Find minimum minbits */
+ if (minbits > gk_ip_minbits(tmp))
+ minbits = gk_ip_minbits(tmp);
+
+ /* Find minimum number of bits in common */
+ if (commonbits > gk_ip_commonbits(tmp))
+ commonbits = gk_ip_commonbits(tmp);
+ if (commonbits > 0)
+ commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
+ }
+
+ /* Force minbits/commonbits to zero if more than one family. */
+ if (minfamily != maxfamily)
+ minbits = commonbits = 0;
+
+ *minfamily_p = minfamily;
+ *maxfamily_p = maxfamily;
+ *minbits_p = minbits;
+ *commonbits_p = commonbits;
+}
+
+/*
+ * Same as above, but the GISTENTRY elements to examine are those with
+ * indices listed in the offsets[] array.
+ */
+static void
+calc_inet_union_params_indexed(GISTENTRY *ent,
+ OffsetNumber *offsets, int noffsets,
+ int *minfamily_p,
+ int *maxfamily_p,
+ int *minbits_p,
+ int *commonbits_p)
+{
+ int minfamily,
+ maxfamily,
+ minbits,
+ commonbits;
+ unsigned char *addr;
+ GistInetKey *tmp;
+ int i;
+
+ /* Must be at least one key. */
+ Assert(noffsets > 0);
+
+ /* Initialize variables using the first key. */
+ tmp = DatumGetInetKeyP(ent[offsets[0]].key);
+ minfamily = maxfamily = gk_ip_family(tmp);
+ minbits = gk_ip_minbits(tmp);
+ commonbits = gk_ip_commonbits(tmp);
+ addr = gk_ip_addr(tmp);
+
+ /* Scan remaining keys. */
+ for (i = 1; i < noffsets; i++)
+ {
+ tmp = DatumGetInetKeyP(ent[offsets[i]].key);
+
+ /* Determine range of family numbers */
+ if (minfamily > gk_ip_family(tmp))
+ minfamily = gk_ip_family(tmp);
+ if (maxfamily < gk_ip_family(tmp))
+ maxfamily = gk_ip_family(tmp);
+
+ /* Find minimum minbits */
+ if (minbits > gk_ip_minbits(tmp))
+ minbits = gk_ip_minbits(tmp);
+
+ /* Find minimum number of bits in common */
+ if (commonbits > gk_ip_commonbits(tmp))
+ commonbits = gk_ip_commonbits(tmp);
+ if (commonbits > 0)
+ commonbits = bitncommon(addr, gk_ip_addr(tmp), commonbits);
+ }
+
+ /* Force minbits/commonbits to zero if more than one family. */
+ if (minfamily != maxfamily)
+ minbits = commonbits = 0;
+
+ *minfamily_p = minfamily;
+ *maxfamily_p = maxfamily;
+ *minbits_p = minbits;
+ *commonbits_p = commonbits;
+}
+
+/*
+ * Construct a GistInetKey representing a union value.
+ *
+ * Inputs are the family/minbits/commonbits values to use, plus a pointer to
+ * the address field of one of the union inputs. (Since we're going to copy
+ * just the bits-in-common, it doesn't matter which one.)
+ */
+static GistInetKey *
+build_inet_union_key(int family, int minbits, int commonbits,
+ unsigned char *addr)
+{
+ GistInetKey *result;
+
+ /* Make sure any unused bits are zeroed. */
+ result = (GistInetKey *) palloc0(sizeof(GistInetKey));
+
+ gk_ip_family(result) = family;
+ gk_ip_minbits(result) = minbits;
+ gk_ip_commonbits(result) = commonbits;
+
+ /* Clone appropriate bytes of the address. */
+ if (commonbits > 0)
+ memcpy(gk_ip_addr(result), addr, (commonbits + 7) / 8);
+
+ /* Clean any unwanted bits in the last partial byte. */
+ if (commonbits % 8 != 0)
+ gk_ip_addr(result)[commonbits / 8] &= ~(0xFF >> (commonbits % 8));
+
+ /* Set varlena header correctly. */
+ SET_GK_VARSIZE(result);
+
+ return result;
+}
+
+
+/*
+ * The GiST union function
+ *
+ * See comments at head of file for the definition of the union.
+ */
+Datum
+inet_gist_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GISTENTRY *ent = entryvec->vector;
+ int minfamily,
+ maxfamily,
+ minbits,
+ commonbits;
+ unsigned char *addr;
+ GistInetKey *tmp,
+ *result;
+
+ /* Determine parameters of the union. */
+ calc_inet_union_params(ent, 0, entryvec->n - 1,
+ &minfamily, &maxfamily,
+ &minbits, &commonbits);
+
+ /* If more than one family, emit family number zero. */
+ if (minfamily != maxfamily)
+ minfamily = 0;
+
+ /* Initialize address using the first key. */
+ tmp = DatumGetInetKeyP(ent[0].key);
+ addr = gk_ip_addr(tmp);
+
+ /* Construct the union value. */
+ result = build_inet_union_key(minfamily, minbits, commonbits, addr);
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * The GiST compress function
+ *
+ * Convert an inet value to GistInetKey.
+ */
+Datum
+inet_gist_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval;
+
+ if (entry->leafkey)
+ {
+ retval = palloc(sizeof(GISTENTRY));
+ if (DatumGetPointer(entry->key) != NULL)
+ {
+ inet *in = DatumGetInetPP(entry->key);
+ GistInetKey *r;
+
+ r = (GistInetKey *) palloc0(sizeof(GistInetKey));
+
+ gk_ip_family(r) = ip_family(in);
+ gk_ip_minbits(r) = ip_bits(in);
+ gk_ip_commonbits(r) = gk_ip_maxbits(r);
+ memcpy(gk_ip_addr(r), ip_addr(in), gk_ip_addrsize(r));
+ SET_GK_VARSIZE(r);
+
+ gistentryinit(*retval, PointerGetDatum(r),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ else
+ {
+ gistentryinit(*retval, (Datum) 0,
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ }
+ else
+ retval = entry;
+ PG_RETURN_POINTER(retval);
+}
+
+/*
+ * We do not need a decompress function, because the other GiST inet
+ * support functions work with the GistInetKey representation.
+ */
+
+/*
+ * The GiST fetch function
+ *
+ * Reconstruct the original inet datum from a GistInetKey.
+ */
+Datum
+inet_gist_fetch(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GistInetKey *key = DatumGetInetKeyP(entry->key);
+ GISTENTRY *retval;
+ inet *dst;
+
+ dst = (inet *) palloc0(sizeof(inet));
+
+ ip_family(dst) = gk_ip_family(key);
+ ip_bits(dst) = gk_ip_minbits(key);
+ memcpy(ip_addr(dst), gk_ip_addr(key), ip_addrsize(dst));
+ SET_INET_VARSIZE(dst);
+
+ retval = palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, InetPGetDatum(dst), entry->rel, entry->page,
+ entry->offset, false);
+
+ PG_RETURN_POINTER(retval);
+}
+
+/*
+ * The GiST page split penalty function
+ *
+ * Charge a large penalty if address family doesn't match, or a somewhat
+ * smaller one if the new value would degrade the union's minbits
+ * (minimum netmask width). Otherwise, penalty is inverse of the
+ * new number of common address bits.
+ */
+Datum
+inet_gist_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origent = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *newent = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ GistInetKey *orig = DatumGetInetKeyP(origent->key),
+ *new = DatumGetInetKeyP(newent->key);
+ int commonbits;
+
+ if (gk_ip_family(orig) == gk_ip_family(new))
+ {
+ if (gk_ip_minbits(orig) <= gk_ip_minbits(new))
+ {
+ commonbits = bitncommon(gk_ip_addr(orig), gk_ip_addr(new),
+ Min(gk_ip_commonbits(orig),
+ gk_ip_commonbits(new)));
+ if (commonbits > 0)
+ *penalty = 1.0f / commonbits;
+ else
+ *penalty = 2;
+ }
+ else
+ *penalty = 3;
+ }
+ else
+ *penalty = 4;
+
+ PG_RETURN_POINTER(penalty);
+}
+
+/*
+ * The GiST PickSplit method
+ *
+ * There are two ways to split. First one is to split by address families,
+ * if there are multiple families appearing in the input.
+ *
+ * The second and more common way is to split by addresses. To achieve this,
+ * determine the number of leading bits shared by all the keys, then split on
+ * the next bit. (We don't currently consider the netmask widths while doing
+ * this; should we?) If we fail to get a nontrivial split that way, split
+ * 50-50.
+ */
+Datum
+inet_gist_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *splitvec = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ GISTENTRY *ent = entryvec->vector;
+ int minfamily,
+ maxfamily,
+ minbits,
+ commonbits;
+ unsigned char *addr;
+ GistInetKey *tmp,
+ *left_union,
+ *right_union;
+ int maxoff,
+ nbytes;
+ OffsetNumber i,
+ *left,
+ *right;
+
+ maxoff = entryvec->n - 1;
+ nbytes = (maxoff + 1) * sizeof(OffsetNumber);
+
+ left = (OffsetNumber *) palloc(nbytes);
+ right = (OffsetNumber *) palloc(nbytes);
+
+ splitvec->spl_left = left;
+ splitvec->spl_right = right;
+
+ splitvec->spl_nleft = 0;
+ splitvec->spl_nright = 0;
+
+ /* Determine parameters of the union of all the inputs. */
+ calc_inet_union_params(ent, FirstOffsetNumber, maxoff,
+ &minfamily, &maxfamily,
+ &minbits, &commonbits);
+
+ if (minfamily != maxfamily)
+ {
+ /* Multiple families, so split by family. */
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ /*
+ * If there's more than 2 families, all but maxfamily go into the
+ * left union. This could only happen if the inputs include some
+ * IPv4, some IPv6, and some already-multiple-family unions.
+ */
+ tmp = DatumGetInetKeyP(ent[i].key);
+ if (gk_ip_family(tmp) != maxfamily)
+ left[splitvec->spl_nleft++] = i;
+ else
+ right[splitvec->spl_nright++] = i;
+ }
+ }
+ else
+ {
+ /*
+ * Split on the next bit after the common bits. If that yields a
+ * trivial split, try the next bit position to the right. Repeat till
+ * success; or if we run out of bits, do an arbitrary 50-50 split.
+ */
+ int maxbits = ip_family_maxbits(minfamily);
+
+ while (commonbits < maxbits)
+ {
+ /* Split using the commonbits'th bit position. */
+ int bitbyte = commonbits / 8;
+ int bitmask = 0x80 >> (commonbits % 8);
+
+ splitvec->spl_nleft = splitvec->spl_nright = 0;
+
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ tmp = DatumGetInetKeyP(ent[i].key);
+ addr = gk_ip_addr(tmp);
+ if ((addr[bitbyte] & bitmask) == 0)
+ left[splitvec->spl_nleft++] = i;
+ else
+ right[splitvec->spl_nright++] = i;
+ }
+
+ if (splitvec->spl_nleft > 0 && splitvec->spl_nright > 0)
+ break; /* success */
+ commonbits++;
+ }
+
+ if (commonbits >= maxbits)
+ {
+ /* Failed ... do a 50-50 split. */
+ splitvec->spl_nleft = splitvec->spl_nright = 0;
+
+ for (i = FirstOffsetNumber; i <= maxoff / 2; i = OffsetNumberNext(i))
+ {
+ left[splitvec->spl_nleft++] = i;
+ }
+ for (; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ right[splitvec->spl_nright++] = i;
+ }
+ }
+ }
+
+ /*
+ * Compute the union value for each side from scratch. In most cases we
+ * could approximate the union values with what we already know, but this
+ * ensures that each side has minbits and commonbits set as high as
+ * possible.
+ */
+ calc_inet_union_params_indexed(ent, left, splitvec->spl_nleft,
+ &minfamily, &maxfamily,
+ &minbits, &commonbits);
+ if (minfamily != maxfamily)
+ minfamily = 0;
+ tmp = DatumGetInetKeyP(ent[left[0]].key);
+ addr = gk_ip_addr(tmp);
+ left_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
+ splitvec->spl_ldatum = PointerGetDatum(left_union);
+
+ calc_inet_union_params_indexed(ent, right, splitvec->spl_nright,
+ &minfamily, &maxfamily,
+ &minbits, &commonbits);
+ if (minfamily != maxfamily)
+ minfamily = 0;
+ tmp = DatumGetInetKeyP(ent[right[0]].key);
+ addr = gk_ip_addr(tmp);
+ right_union = build_inet_union_key(minfamily, minbits, commonbits, addr);
+ splitvec->spl_rdatum = PointerGetDatum(right_union);
+
+ PG_RETURN_POINTER(splitvec);
+}
+
+/*
+ * The GiST equality function
+ */
+Datum
+inet_gist_same(PG_FUNCTION_ARGS)
+{
+ GistInetKey *left = DatumGetInetKeyP(PG_GETARG_DATUM(0));
+ GistInetKey *right = DatumGetInetKeyP(PG_GETARG_DATUM(1));
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ *result = (gk_ip_family(left) == gk_ip_family(right) &&
+ gk_ip_minbits(left) == gk_ip_minbits(right) &&
+ gk_ip_commonbits(left) == gk_ip_commonbits(right) &&
+ memcmp(gk_ip_addr(left), gk_ip_addr(right),
+ gk_ip_addrsize(left)) == 0);
+
+ PG_RETURN_POINTER(result);
+}
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
new file mode 100644
index 0000000..4919637
--- /dev/null
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -0,0 +1,972 @@
+/*-------------------------------------------------------------------------
+ *
+ * network_selfuncs.c
+ * Functions for selectivity estimation of inet/cidr operators
+ *
+ * This module provides estimators for the subnet inclusion and overlap
+ * operators. Estimates are based on null fraction, most common values,
+ * and histogram of inet/cidr columns.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/network_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "utils/builtins.h"
+#include "utils/inet.h"
+#include "utils/lsyscache.h"
+#include "utils/selfuncs.h"
+
+
+/* Default selectivity for the inet overlap operator */
+#define DEFAULT_OVERLAP_SEL 0.01
+
+/* Default selectivity for the various inclusion operators */
+#define DEFAULT_INCLUSION_SEL 0.005
+
+/* Default selectivity for specified operator */
+#define DEFAULT_SEL(operator) \
+ ((operator) == OID_INET_OVERLAP_OP ? \
+ DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
+
+/* Maximum number of items to consider in join selectivity calculations */
+#define MAX_CONSIDERED_ELEMS 1024
+
+static Selectivity networkjoinsel_inner(Oid operator,
+ VariableStatData *vardata1, VariableStatData *vardata2);
+static Selectivity networkjoinsel_semi(Oid operator,
+ VariableStatData *vardata1, VariableStatData *vardata2);
+static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues);
+static Selectivity inet_hist_value_sel(Datum *values, int nvalues,
+ Datum constvalue, int opr_codenum);
+static Selectivity inet_mcv_join_sel(Datum *mcv1_values,
+ float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
+ float4 *mcv2_numbers, int mcv2_nvalues, Oid operator);
+static Selectivity inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers,
+ int mcv_nvalues, Datum *hist_values, int hist_nvalues,
+ int opr_codenum);
+static Selectivity inet_hist_inclusion_join_sel(Datum *hist1_values,
+ int hist1_nvalues,
+ Datum *hist2_values, int hist2_nvalues,
+ int opr_codenum);
+static Selectivity inet_semi_join_sel(Datum lhs_value,
+ bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
+ bool hist_exists, Datum *hist_values, int hist_nvalues,
+ double hist_weight,
+ FmgrInfo *proc, int opr_codenum);
+static int inet_opr_codenum(Oid operator);
+static int inet_inclusion_cmp(inet *left, inet *right, int opr_codenum);
+static int inet_masklen_inclusion_cmp(inet *left, inet *right,
+ int opr_codenum);
+static int inet_hist_match_divider(inet *boundary, inet *query,
+ int opr_codenum);
+
+/*
+ * Selectivity estimation for the subnet inclusion/overlap operators
+ */
+Datum
+networksel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec,
+ mcv_selec,
+ non_mcv_selec;
+ Datum constvalue;
+ Form_pg_statistic stats;
+ AttStatsSlot hslot;
+ double sumcommon,
+ nullfrac;
+ FmgrInfo proc;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+ }
+
+ /* All of the operators handled here are strict. */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+ constvalue = ((Const *) other)->constvalue;
+
+ /* Otherwise, we need stats in order to produce a non-default estimate. */
+ if (!HeapTupleIsValid(vardata.statsTuple))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+ }
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ nullfrac = stats->stanullfrac;
+
+ /*
+ * If we have most-common-values info, add up the fractions of the MCV
+ * entries that satisfy MCV OP CONST. These fractions contribute directly
+ * to the result selectivity. Also add up the total fraction represented
+ * by MCV entries.
+ */
+ fmgr_info(get_opcode(operator), &proc);
+ mcv_selec = mcv_selectivity(&vardata, &proc, InvalidOid,
+ constvalue, varonleft,
+ &sumcommon);
+
+ /*
+ * If we have a histogram, use it to estimate the proportion of the
+ * non-MCV population that satisfies the clause. If we don't, apply the
+ * default selectivity to that population.
+ */
+ if (get_attstatsslot(&hslot, vardata.statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ int opr_codenum = inet_opr_codenum(operator);
+
+ /* Commute if needed, so we can consider histogram to be on the left */
+ if (!varonleft)
+ opr_codenum = -opr_codenum;
+ non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues,
+ constvalue, opr_codenum);
+
+ free_attstatsslot(&hslot);
+ }
+ else
+ non_mcv_selec = DEFAULT_SEL(operator);
+
+ /* Combine selectivities for MCV and non-MCV populations */
+ selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;
+
+ /* Result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ ReleaseVariableStats(vardata);
+
+ PG_RETURN_FLOAT8(selec);
+}
+
+/*
+ * Join selectivity estimation for the subnet inclusion/overlap operators
+ *
+ * This function has the same structure as eqjoinsel() in selfuncs.c.
+ *
+ * Throughout networkjoinsel and its subroutines, we have a performance issue
+ * in that the amount of work to be done is O(N^2) in the length of the MCV
+ * and histogram arrays. To keep the runtime from getting out of hand when
+ * large statistics targets have been set, we arbitrarily limit the number of
+ * values considered to 1024 (MAX_CONSIDERED_ELEMS). For the MCV arrays, this
+ * is easy: just consider at most the first N elements. (Since the MCVs are
+ * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
+ * For the histogram arrays, we decimate; that is consider only every k'th
+ * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
+ * elements are considered. This should still give us a good random sample of
+ * the non-MCV population. Decimation is done on-the-fly in the loops that
+ * iterate over the histogram arrays.
+ */
+Datum
+networkjoinsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ double selec;
+ VariableStatData vardata1;
+ VariableStatData vardata2;
+ bool join_is_reversed;
+
+ get_join_variables(root, args, sjinfo,
+ &vardata1, &vardata2, &join_is_reversed);
+
+ switch (sjinfo->jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_LEFT:
+ case JOIN_FULL:
+
+ /*
+ * Selectivity for left/full join is not exactly the same as inner
+ * join, but we neglect the difference, as eqjoinsel does.
+ */
+ selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
+ break;
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+ /* Here, it's important that we pass the outer var on the left. */
+ if (!join_is_reversed)
+ selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
+ else
+ selec = networkjoinsel_semi(get_commutator(operator),
+ &vardata2, &vardata1);
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d",
+ (int) sjinfo->jointype);
+ selec = 0; /* keep compiler quiet */
+ break;
+ }
+
+ ReleaseVariableStats(vardata1);
+ ReleaseVariableStats(vardata2);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * Inner join selectivity estimation for subnet inclusion/overlap operators
+ *
+ * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
+ * selectivity for join using the subnet inclusion operators. Unlike the
+ * join selectivity function for the equality operator, eqjoinsel_inner(),
+ * one to one matching of the values is not enough. Network inclusion
+ * operators are likely to match many to many, so we must check all pairs.
+ * (Note: it might be possible to exploit understanding of the histogram's
+ * btree ordering to reduce the work needed, but we don't currently try.)
+ * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
+ */
+static Selectivity
+networkjoinsel_inner(Oid operator,
+ VariableStatData *vardata1, VariableStatData *vardata2)
+{
+ Form_pg_statistic stats;
+ double nullfrac1 = 0.0,
+ nullfrac2 = 0.0;
+ Selectivity selec = 0.0,
+ sumcommon1 = 0.0,
+ sumcommon2 = 0.0;
+ bool mcv1_exists = false,
+ mcv2_exists = false,
+ hist1_exists = false,
+ hist2_exists = false;
+ int opr_codenum;
+ int mcv1_length = 0,
+ mcv2_length = 0;
+ AttStatsSlot mcv1_slot;
+ AttStatsSlot mcv2_slot;
+ AttStatsSlot hist1_slot;
+ AttStatsSlot hist2_slot;
+
+ if (HeapTupleIsValid(vardata1->statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+ nullfrac1 = stats->stanullfrac;
+
+ mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ /* Arbitrarily limit number of MCVs considered */
+ mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
+ if (mcv1_exists)
+ sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
+ }
+ else
+ {
+ memset(&mcv1_slot, 0, sizeof(mcv1_slot));
+ memset(&hist1_slot, 0, sizeof(hist1_slot));
+ }
+
+ if (HeapTupleIsValid(vardata2->statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+ nullfrac2 = stats->stanullfrac;
+
+ mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ /* Arbitrarily limit number of MCVs considered */
+ mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
+ if (mcv2_exists)
+ sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
+ }
+ else
+ {
+ memset(&mcv2_slot, 0, sizeof(mcv2_slot));
+ memset(&hist2_slot, 0, sizeof(hist2_slot));
+ }
+
+ opr_codenum = inet_opr_codenum(operator);
+
+ /*
+ * Calculate selectivity for MCV vs MCV matches.
+ */
+ if (mcv1_exists && mcv2_exists)
+ selec += inet_mcv_join_sel(mcv1_slot.values, mcv1_slot.numbers,
+ mcv1_length,
+ mcv2_slot.values, mcv2_slot.numbers,
+ mcv2_length,
+ operator);
+
+ /*
+ * Add in selectivities for MCV vs histogram matches, scaling according to
+ * the fractions of the populations represented by the histograms. Note
+ * that the second case needs to commute the operator.
+ */
+ if (mcv1_exists && hist2_exists)
+ selec += (1.0 - nullfrac2 - sumcommon2) *
+ inet_mcv_hist_sel(mcv1_slot.values, mcv1_slot.numbers, mcv1_length,
+ hist2_slot.values, hist2_slot.nvalues,
+ opr_codenum);
+ if (mcv2_exists && hist1_exists)
+ selec += (1.0 - nullfrac1 - sumcommon1) *
+ inet_mcv_hist_sel(mcv2_slot.values, mcv2_slot.numbers, mcv2_length,
+ hist1_slot.values, hist1_slot.nvalues,
+ -opr_codenum);
+
+ /*
+ * Add in selectivity for histogram vs histogram matches, again scaling
+ * appropriately.
+ */
+ if (hist1_exists && hist2_exists)
+ selec += (1.0 - nullfrac1 - sumcommon1) *
+ (1.0 - nullfrac2 - sumcommon2) *
+ inet_hist_inclusion_join_sel(hist1_slot.values, hist1_slot.nvalues,
+ hist2_slot.values, hist2_slot.nvalues,
+ opr_codenum);
+
+ /*
+ * If useful statistics are not available then use the default estimate.
+ * We can apply null fractions if known, though.
+ */
+ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
+ selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
+
+ /* Release stats. */
+ free_attstatsslot(&mcv1_slot);
+ free_attstatsslot(&mcv2_slot);
+ free_attstatsslot(&hist1_slot);
+ free_attstatsslot(&hist2_slot);
+
+ return selec;
+}
+
+/*
+ * Semi join selectivity estimation for subnet inclusion/overlap operators
+ *
+ * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
+ * histogram selectivity for semi/anti join cases.
+ */
+static Selectivity
+networkjoinsel_semi(Oid operator,
+ VariableStatData *vardata1, VariableStatData *vardata2)
+{
+ Form_pg_statistic stats;
+ Selectivity selec = 0.0,
+ sumcommon1 = 0.0,
+ sumcommon2 = 0.0;
+ double nullfrac1 = 0.0,
+ nullfrac2 = 0.0,
+ hist2_weight = 0.0;
+ bool mcv1_exists = false,
+ mcv2_exists = false,
+ hist1_exists = false,
+ hist2_exists = false;
+ int opr_codenum;
+ FmgrInfo proc;
+ int i,
+ mcv1_length = 0,
+ mcv2_length = 0;
+ AttStatsSlot mcv1_slot;
+ AttStatsSlot mcv2_slot;
+ AttStatsSlot hist1_slot;
+ AttStatsSlot hist2_slot;
+
+ if (HeapTupleIsValid(vardata1->statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
+ nullfrac1 = stats->stanullfrac;
+
+ mcv1_exists = get_attstatsslot(&mcv1_slot, vardata1->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ hist1_exists = get_attstatsslot(&hist1_slot, vardata1->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ /* Arbitrarily limit number of MCVs considered */
+ mcv1_length = Min(mcv1_slot.nvalues, MAX_CONSIDERED_ELEMS);
+ if (mcv1_exists)
+ sumcommon1 = mcv_population(mcv1_slot.numbers, mcv1_length);
+ }
+ else
+ {
+ memset(&mcv1_slot, 0, sizeof(mcv1_slot));
+ memset(&hist1_slot, 0, sizeof(hist1_slot));
+ }
+
+ if (HeapTupleIsValid(vardata2->statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
+ nullfrac2 = stats->stanullfrac;
+
+ mcv2_exists = get_attstatsslot(&mcv2_slot, vardata2->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ hist2_exists = get_attstatsslot(&hist2_slot, vardata2->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES);
+ /* Arbitrarily limit number of MCVs considered */
+ mcv2_length = Min(mcv2_slot.nvalues, MAX_CONSIDERED_ELEMS);
+ if (mcv2_exists)
+ sumcommon2 = mcv_population(mcv2_slot.numbers, mcv2_length);
+ }
+ else
+ {
+ memset(&mcv2_slot, 0, sizeof(mcv2_slot));
+ memset(&hist2_slot, 0, sizeof(hist2_slot));
+ }
+
+ opr_codenum = inet_opr_codenum(operator);
+ fmgr_info(get_opcode(operator), &proc);
+
+ /* Estimate number of input rows represented by RHS histogram. */
+ if (hist2_exists && vardata2->rel)
+ hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;
+
+ /*
+ * Consider each element of the LHS MCV list, matching it to whatever RHS
+ * stats we have. Scale according to the known frequency of the MCV.
+ */
+ if (mcv1_exists && (mcv2_exists || hist2_exists))
+ {
+ for (i = 0; i < mcv1_length; i++)
+ {
+ selec += mcv1_slot.numbers[i] *
+ inet_semi_join_sel(mcv1_slot.values[i],
+ mcv2_exists, mcv2_slot.values, mcv2_length,
+ hist2_exists,
+ hist2_slot.values, hist2_slot.nvalues,
+ hist2_weight,
+ &proc, opr_codenum);
+ }
+ }
+
+ /*
+ * Consider each element of the LHS histogram, except for the first and
+ * last elements, which we exclude on the grounds that they're outliers
+ * and thus not very representative. Scale on the assumption that each
+ * such histogram element represents an equal share of the LHS histogram
+ * population (which is a bit bogus, because the members of its bucket may
+ * not all act the same with respect to the join clause, but it's hard to
+ * do better).
+ *
+ * If there are too many histogram elements, decimate to limit runtime.
+ */
+ if (hist1_exists && hist1_slot.nvalues > 2 && (mcv2_exists || hist2_exists))
+ {
+ double hist_selec_sum = 0.0;
+ int k,
+ n;
+
+ k = (hist1_slot.nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
+
+ n = 0;
+ for (i = 1; i < hist1_slot.nvalues - 1; i += k)
+ {
+ hist_selec_sum +=
+ inet_semi_join_sel(hist1_slot.values[i],
+ mcv2_exists, mcv2_slot.values, mcv2_length,
+ hist2_exists,
+ hist2_slot.values, hist2_slot.nvalues,
+ hist2_weight,
+ &proc, opr_codenum);
+ n++;
+ }
+
+ selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
+ }
+
+ /*
+ * If useful statistics are not available then use the default estimate.
+ * We can apply null fractions if known, though.
+ */
+ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
+ selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
+
+ /* Release stats. */
+ free_attstatsslot(&mcv1_slot);
+ free_attstatsslot(&mcv2_slot);
+ free_attstatsslot(&hist1_slot);
+ free_attstatsslot(&hist2_slot);
+
+ return selec;
+}
+
+/*
+ * Compute the fraction of a relation's population that is represented
+ * by the MCV list.
+ */
+static Selectivity
+mcv_population(float4 *mcv_numbers, int mcv_nvalues)
+{
+ Selectivity sumcommon = 0.0;
+ int i;
+
+ for (i = 0; i < mcv_nvalues; i++)
+ {
+ sumcommon += mcv_numbers[i];
+ }
+
+ return sumcommon;
+}
+
+/*
+ * Inet histogram vs single value selectivity estimation
+ *
+ * Estimate the fraction of the histogram population that satisfies
+ * "value OPR CONST". (The result needs to be scaled to reflect the
+ * proportion of the total population represented by the histogram.)
+ *
+ * The histogram is originally for the inet btree comparison operators.
+ * Only the common bits of the network part and the length of the network part
+ * (masklen) are interesting for the subnet inclusion operators. Fortunately,
+ * btree comparison treats the network part as the major sort key. Even so,
+ * the length of the network part would not really be significant in the
+ * histogram. This would lead to big mistakes for data sets with uneven
+ * masklen distribution. To reduce this problem, comparisons with the left
+ * and the right sides of the buckets are used together.
+ *
+ * Histogram bucket matches are calculated in two forms. If the constant
+ * matches both bucket endpoints the bucket is considered as fully matched.
+ * The second form is to match the bucket partially; we recognize this when
+ * the constant matches just one endpoint, or the two endpoints fall on
+ * opposite sides of the constant. (Note that when the constant matches an
+ * interior histogram element, it gets credit for partial matches to the
+ * buckets on both sides, while a match to a histogram endpoint gets credit
+ * for only one partial match. This is desirable.)
+ *
+ * The divider in the partial bucket match is imagined as the distance
+ * between the decisive bits and the common bits of the addresses. It will
+ * be used as a power of two as it is the natural scale for the IP network
+ * inclusion. This partial bucket match divider calculation is an empirical
+ * formula and subject to change with more experiment.
+ *
+ * For a partial match, we try to calculate dividers for both of the
+ * boundaries. If the address family of a boundary value does not match the
+ * constant or comparison of the length of the network parts is not correct
+ * for the operator, the divider for that boundary will not be taken into
+ * account. If both of the dividers are valid, the greater one will be used
+ * to minimize the mistake in buckets that have disparate masklens. This
+ * calculation is unfair when dividers can be calculated for both of the
+ * boundaries but they are far from each other; but it is not a common
+ * situation as the boundaries are expected to share most of their significant
+ * bits of their masklens. The mistake would be greater, if we would use the
+ * minimum instead of the maximum, and we don't know a sensible way to combine
+ * them.
+ *
+ * For partial match in buckets that have different address families on the
+ * left and right sides, only the boundary with the same address family is
+ * taken into consideration. This can cause more mistakes for these buckets
+ * if the masklens of their boundaries are also disparate. But this can only
+ * happen in one bucket, since only two address families exist. It seems a
+ * better option than not considering these buckets at all.
+ */
+static Selectivity
+inet_hist_value_sel(Datum *values, int nvalues, Datum constvalue,
+ int opr_codenum)
+{
+ Selectivity match = 0.0;
+ inet *query,
+ *left,
+ *right;
+ int i,
+ k,
+ n;
+ int left_order,
+ right_order,
+ left_divider,
+ right_divider;
+
+ /* guard against zero-divide below */
+ if (nvalues <= 1)
+ return 0.0;
+
+ /* if there are too many histogram elements, decimate to limit runtime */
+ k = (nvalues - 2) / MAX_CONSIDERED_ELEMS + 1;
+
+ query = DatumGetInetPP(constvalue);
+
+ /* "left" is the left boundary value of the current bucket ... */
+ left = DatumGetInetPP(values[0]);
+ left_order = inet_inclusion_cmp(left, query, opr_codenum);
+
+ n = 0;
+ for (i = k; i < nvalues; i += k)
+ {
+ /* ... and "right" is the right boundary value */
+ right = DatumGetInetPP(values[i]);
+ right_order = inet_inclusion_cmp(right, query, opr_codenum);
+
+ if (left_order == 0 && right_order == 0)
+ {
+ /* The whole bucket matches, since both endpoints do. */
+ match += 1.0;
+ }
+ else if ((left_order <= 0 && right_order >= 0) ||
+ (left_order >= 0 && right_order <= 0))
+ {
+ /* Partial bucket match. */
+ left_divider = inet_hist_match_divider(left, query, opr_codenum);
+ right_divider = inet_hist_match_divider(right, query, opr_codenum);
+
+ if (left_divider >= 0 || right_divider >= 0)
+ match += 1.0 / pow(2.0, Max(left_divider, right_divider));
+ }
+
+ /* Shift the variables. */
+ left = right;
+ left_order = right_order;
+
+ /* Count the number of buckets considered. */
+ n++;
+ }
+
+ return match / n;
+}
+
+/*
+ * Inet MCV vs MCV join selectivity estimation
+ *
+ * We simply add up the fractions of the populations that satisfy the clause.
+ * The result is exact and does not need to be scaled further.
+ */
+static Selectivity
+inet_mcv_join_sel(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
+ Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
+ Oid operator)
+{
+ Selectivity selec = 0.0;
+ FmgrInfo proc;
+ int i,
+ j;
+
+ fmgr_info(get_opcode(operator), &proc);
+
+ for (i = 0; i < mcv1_nvalues; i++)
+ {
+ for (j = 0; j < mcv2_nvalues; j++)
+ if (DatumGetBool(FunctionCall2(&proc,
+ mcv1_values[i],
+ mcv2_values[j])))
+ selec += mcv1_numbers[i] * mcv2_numbers[j];
+ }
+ return selec;
+}
+
+/*
+ * Inet MCV vs histogram join selectivity estimation
+ *
+ * For each MCV on the lefthand side, estimate the fraction of the righthand's
+ * histogram population that satisfies the join clause, and add those up,
+ * scaling by the MCV's frequency. The result still needs to be scaled
+ * according to the fraction of the righthand's population represented by
+ * the histogram.
+ */
+static Selectivity
+inet_mcv_hist_sel(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
+ Datum *hist_values, int hist_nvalues,
+ int opr_codenum)
+{
+ Selectivity selec = 0.0;
+ int i;
+
+ /*
+ * We'll call inet_hist_value_selec with the histogram on the left, so we
+ * must commute the operator.
+ */
+ opr_codenum = -opr_codenum;
+
+ for (i = 0; i < mcv_nvalues; i++)
+ {
+ selec += mcv_numbers[i] *
+ inet_hist_value_sel(hist_values, hist_nvalues, mcv_values[i],
+ opr_codenum);
+ }
+ return selec;
+}
+
+/*
+ * Inet histogram vs histogram join selectivity estimation
+ *
+ * Here, we take all values listed in the second histogram (except for the
+ * first and last elements, which are excluded on the grounds of possibly
+ * not being very representative) and treat them as a uniform sample of
+ * the non-MCV population for that relation. For each one, we apply
+ * inet_hist_value_selec to see what fraction of the first histogram
+ * it matches.
+ *
+ * We could alternatively do this the other way around using the operator's
+ * commutator. XXX would it be worthwhile to do it both ways and take the
+ * average? That would at least avoid non-commutative estimation results.
+ */
+static Selectivity
+inet_hist_inclusion_join_sel(Datum *hist1_values, int hist1_nvalues,
+ Datum *hist2_values, int hist2_nvalues,
+ int opr_codenum)
+{
+ double match = 0.0;
+ int i,
+ k,
+ n;
+
+ if (hist2_nvalues <= 2)
+ return 0.0; /* no interior histogram elements */
+
+ /* if there are too many histogram elements, decimate to limit runtime */
+ k = (hist2_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;
+
+ n = 0;
+ for (i = 1; i < hist2_nvalues - 1; i += k)
+ {
+ match += inet_hist_value_sel(hist1_values, hist1_nvalues,
+ hist2_values[i], opr_codenum);
+ n++;
+ }
+
+ return match / n;
+}
+
+/*
+ * Inet semi join selectivity estimation for one value
+ *
+ * The function calculates the probability that there is at least one row
+ * in the RHS table that satisfies the "lhs_value op column" condition.
+ * It is used in semi join estimation to check a sample from the left hand
+ * side table.
+ *
+ * The MCV and histogram from the right hand side table should be provided as
+ * arguments with the lhs_value from the left hand side table for the join.
+ * hist_weight is the total number of rows represented by the histogram.
+ * For example, if the table has 1000 rows, and 10% of the rows are in the MCV
+ * list, and another 10% are NULLs, hist_weight would be 800.
+ *
+ * First, the lhs_value will be matched to the most common values. If it
+ * matches any of them, 1.0 will be returned, because then there is surely
+ * a match.
+ *
+ * Otherwise, the histogram will be used to estimate the number of rows in
+ * the second table that match the condition. If the estimate is greater
+ * than 1.0, 1.0 will be returned, because it means there is a greater chance
+ * that the lhs_value will match more than one row in the table. If it is
+ * between 0.0 and 1.0, it will be returned as the probability.
+ */
+static Selectivity
+inet_semi_join_sel(Datum lhs_value,
+ bool mcv_exists, Datum *mcv_values, int mcv_nvalues,
+ bool hist_exists, Datum *hist_values, int hist_nvalues,
+ double hist_weight,
+ FmgrInfo *proc, int opr_codenum)
+{
+ if (mcv_exists)
+ {
+ int i;
+
+ for (i = 0; i < mcv_nvalues; i++)
+ {
+ if (DatumGetBool(FunctionCall2(proc,
+ lhs_value,
+ mcv_values[i])))
+ return 1.0;
+ }
+ }
+
+ if (hist_exists && hist_weight > 0)
+ {
+ Selectivity hist_selec;
+
+ /* Commute operator, since we're passing lhs_value on the right */
+ hist_selec = inet_hist_value_sel(hist_values, hist_nvalues,
+ lhs_value, -opr_codenum);
+
+ if (hist_selec > 0)
+ return Min(1.0, hist_weight * hist_selec);
+ }
+
+ return 0.0;
+}
+
+/*
+ * Assign useful code numbers for the subnet inclusion/overlap operators
+ *
+ * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend
+ * on the exact codes assigned here; but many other places in this file
+ * know that they can negate a code to obtain the code for the commutator
+ * operator.
+ */
+static int
+inet_opr_codenum(Oid operator)
+{
+ switch (operator)
+ {
+ case OID_INET_SUP_OP:
+ return -2;
+ case OID_INET_SUPEQ_OP:
+ return -1;
+ case OID_INET_OVERLAP_OP:
+ return 0;
+ case OID_INET_SUBEQ_OP:
+ return 1;
+ case OID_INET_SUB_OP:
+ return 2;
+ default:
+ elog(ERROR, "unrecognized operator %u for inet selectivity",
+ operator);
+ }
+ return 0; /* unreached, but keep compiler quiet */
+}
+
+/*
+ * Comparison function for the subnet inclusion/overlap operators
+ *
+ * If the comparison is okay for the specified inclusion operator, the return
+ * value will be 0. Otherwise the return value will be less than or greater
+ * than 0 as appropriate for the operator.
+ *
+ * Comparison is compatible with the basic comparison function for the inet
+ * type. See network_cmp_internal() in network.c for the original. Basic
+ * comparison operators are implemented with the network_cmp_internal()
+ * function. It is possible to implement the subnet inclusion operators with
+ * this function.
+ *
+ * Comparison is first on the common bits of the network part, then on the
+ * length of the network part (masklen) as in the network_cmp_internal()
+ * function. Only the first part is in this function. The second part is
+ * separated to another function for reusability. The difference between the
+ * second part and the original network_cmp_internal() is that the inclusion
+ * operator is considered while comparing the lengths of the network parts.
+ * See the inet_masklen_inclusion_cmp() function below.
+ */
+static int
+inet_inclusion_cmp(inet *left, inet *right, int opr_codenum)
+{
+ if (ip_family(left) == ip_family(right))
+ {
+ int order;
+
+ order = bitncmp(ip_addr(left), ip_addr(right),
+ Min(ip_bits(left), ip_bits(right)));
+ if (order != 0)
+ return order;
+
+ return inet_masklen_inclusion_cmp(left, right, opr_codenum);
+ }
+
+ return ip_family(left) - ip_family(right);
+}
+
+/*
+ * Masklen comparison function for the subnet inclusion/overlap operators
+ *
+ * Compares the lengths of the network parts of the inputs. If the comparison
+ * is okay for the specified inclusion operator, the return value will be 0.
+ * Otherwise the return value will be less than or greater than 0 as
+ * appropriate for the operator.
+ */
+static int
+inet_masklen_inclusion_cmp(inet *left, inet *right, int opr_codenum)
+{
+ int order;
+
+ order = (int) ip_bits(left) - (int) ip_bits(right);
+
+ /*
+ * Return 0 if the operator would accept this combination of masklens.
+ * Note that opr_codenum zero (overlaps) will accept all cases.
+ */
+ if ((order > 0 && opr_codenum >= 0) ||
+ (order == 0 && opr_codenum >= -1 && opr_codenum <= 1) ||
+ (order < 0 && opr_codenum <= 0))
+ return 0;
+
+ /*
+ * Otherwise, return a negative value for sup/supeq (notionally, the RHS
+ * needs to have a larger masklen than it has, which would make it sort
+ * later), or a positive value for sub/subeq (vice versa).
+ */
+ return opr_codenum;
+}
+
+/*
+ * Inet histogram partial match divider calculation
+ *
+ * First the families and the lengths of the network parts are compared using
+ * the subnet inclusion operator. If those are acceptable for the operator,
+ * the divider will be calculated using the masklens and the common bits of
+ * the addresses. -1 will be returned if it cannot be calculated.
+ *
+ * See commentary for inet_hist_value_sel() for some rationale for this.
+ */
+static int
+inet_hist_match_divider(inet *boundary, inet *query, int opr_codenum)
+{
+ if (ip_family(boundary) == ip_family(query) &&
+ inet_masklen_inclusion_cmp(boundary, query, opr_codenum) == 0)
+ {
+ int min_bits,
+ decisive_bits;
+
+ min_bits = Min(ip_bits(boundary), ip_bits(query));
+
+ /*
+ * Set decisive_bits to the masklen of the one that should contain the
+ * other according to the operator.
+ */
+ if (opr_codenum < 0)
+ decisive_bits = ip_bits(boundary);
+ else if (opr_codenum > 0)
+ decisive_bits = ip_bits(query);
+ else
+ decisive_bits = min_bits;
+
+ /*
+ * Now return the number of non-common decisive bits. (This will be
+ * zero if the boundary and query in fact match, else positive.)
+ */
+ if (min_bits > 0)
+ return decisive_bits - bitncommon(ip_addr(boundary),
+ ip_addr(query),
+ min_bits);
+ return decisive_bits;
+ }
+
+ return -1;
+}
diff --git a/src/backend/utils/adt/network_spgist.c b/src/backend/utils/adt/network_spgist.c
new file mode 100644
index 0000000..3832156
--- /dev/null
+++ b/src/backend/utils/adt/network_spgist.c
@@ -0,0 +1,711 @@
+/*-------------------------------------------------------------------------
+ *
+ * network_spgist.c
+ * SP-GiST support for network types.
+ *
+ * We split inet index entries first by address family (IPv4 or IPv6).
+ * If the entries below a given inner tuple are all of the same family,
+ * we identify their common prefix and split by the next bit of the address,
+ * and by whether their masklens exceed the length of the common prefix.
+ *
+ * An inner tuple that has both IPv4 and IPv6 children has a null prefix
+ * and exactly two nodes, the first being for IPv4 and the second for IPv6.
+ *
+ * Otherwise, the prefix is a CIDR value representing the common prefix,
+ * and there are exactly four nodes. Node numbers 0 and 1 are for addresses
+ * with the same masklen as the prefix, while node numbers 2 and 3 are for
+ * addresses with larger masklen. (We do not allow a tuple to contain
+ * entries with masklen smaller than its prefix's.) Node numbers 0 and 1
+ * are distinguished by the next bit of the address after the common prefix,
+ * and likewise for node numbers 2 and 3. If there are no more bits in
+ * the address family, everything goes into node 0 (which will probably
+ * lead to creating an allTheSame tuple).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/network_spgist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <sys/socket.h>
+
+#include "access/spgist.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/inet.h"
+
+
+static int inet_spg_node_number(const inet *val, int commonbits);
+static int inet_spg_consistent_bitmap(const inet *prefix, int nkeys,
+ ScanKey scankeys, bool leaf);
+
+/*
+ * The SP-GiST configuration function
+ */
+Datum
+inet_spg_config(PG_FUNCTION_ARGS)
+{
+ /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+ spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+ cfg->prefixType = CIDROID;
+ cfg->labelType = VOIDOID;
+ cfg->canReturnData = true;
+ cfg->longValuesOK = false;
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * The SP-GiST choose function
+ */
+Datum
+inet_spg_choose(PG_FUNCTION_ARGS)
+{
+ spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+ spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+ inet *val = DatumGetInetPP(in->datum),
+ *prefix;
+ int commonbits;
+
+ /*
+ * If we're looking at a tuple that splits by address family, choose the
+ * appropriate subnode.
+ */
+ if (!in->hasPrefix)
+ {
+ /* allTheSame isn't possible for such a tuple */
+ Assert(!in->allTheSame);
+ Assert(in->nNodes == 2);
+
+ out->resultType = spgMatchNode;
+ out->result.matchNode.nodeN = (ip_family(val) == PGSQL_AF_INET) ? 0 : 1;
+ out->result.matchNode.restDatum = InetPGetDatum(val);
+
+ PG_RETURN_VOID();
+ }
+
+ /* Else it must split by prefix */
+ Assert(in->nNodes == 4 || in->allTheSame);
+
+ prefix = DatumGetInetPP(in->prefixDatum);
+ commonbits = ip_bits(prefix);
+
+ /*
+ * We cannot put addresses from different families under the same inner
+ * node, so we have to split if the new value's family is different.
+ */
+ if (ip_family(val) != ip_family(prefix))
+ {
+ /* Set up 2-node tuple */
+ out->resultType = spgSplitTuple;
+ out->result.splitTuple.prefixHasPrefix = false;
+ out->result.splitTuple.prefixNNodes = 2;
+ out->result.splitTuple.prefixNodeLabels = NULL;
+
+ /* Identify which node the existing data goes into */
+ out->result.splitTuple.childNodeN =
+ (ip_family(prefix) == PGSQL_AF_INET) ? 0 : 1;
+
+ out->result.splitTuple.postfixHasPrefix = true;
+ out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix);
+
+ PG_RETURN_VOID();
+ }
+
+ /*
+ * If the new value does not match the existing prefix, we have to split.
+ */
+ if (ip_bits(val) < commonbits ||
+ bitncmp(ip_addr(prefix), ip_addr(val), commonbits) != 0)
+ {
+ /* Determine new prefix length for the split tuple */
+ commonbits = bitncommon(ip_addr(prefix), ip_addr(val),
+ Min(ip_bits(val), commonbits));
+
+ /* Set up 4-node tuple */
+ out->resultType = spgSplitTuple;
+ out->result.splitTuple.prefixHasPrefix = true;
+ out->result.splitTuple.prefixPrefixDatum =
+ InetPGetDatum(cidr_set_masklen_internal(val, commonbits));
+ out->result.splitTuple.prefixNNodes = 4;
+ out->result.splitTuple.prefixNodeLabels = NULL;
+
+ /* Identify which node the existing data goes into */
+ out->result.splitTuple.childNodeN =
+ inet_spg_node_number(prefix, commonbits);
+
+ out->result.splitTuple.postfixHasPrefix = true;
+ out->result.splitTuple.postfixPrefixDatum = InetPGetDatum(prefix);
+
+ PG_RETURN_VOID();
+ }
+
+ /*
+ * All OK, choose the node to descend into. (If this tuple is marked
+ * allTheSame, the core code will ignore our choice of nodeN; but we need
+ * not account for that case explicitly here.)
+ */
+ out->resultType = spgMatchNode;
+ out->result.matchNode.nodeN = inet_spg_node_number(val, commonbits);
+ out->result.matchNode.restDatum = InetPGetDatum(val);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * The GiST PickSplit method
+ */
+Datum
+inet_spg_picksplit(PG_FUNCTION_ARGS)
+{
+ spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+ spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+ inet *prefix,
+ *tmp;
+ int i,
+ commonbits;
+ bool differentFamilies = false;
+
+ /* Initialize the prefix with the first item */
+ prefix = DatumGetInetPP(in->datums[0]);
+ commonbits = ip_bits(prefix);
+
+ /* Examine remaining items to discover minimum common prefix length */
+ for (i = 1; i < in->nTuples; i++)
+ {
+ tmp = DatumGetInetPP(in->datums[i]);
+
+ if (ip_family(tmp) != ip_family(prefix))
+ {
+ differentFamilies = true;
+ break;
+ }
+
+ if (ip_bits(tmp) < commonbits)
+ commonbits = ip_bits(tmp);
+ commonbits = bitncommon(ip_addr(prefix), ip_addr(tmp), commonbits);
+ if (commonbits == 0)
+ break;
+ }
+
+ /* Don't need labels; allocate output arrays */
+ out->nodeLabels = NULL;
+ out->mapTuplesToNodes = (int *) palloc(sizeof(int) * in->nTuples);
+ out->leafTupleDatums = (Datum *) palloc(sizeof(Datum) * in->nTuples);
+
+ if (differentFamilies)
+ {
+ /* Set up 2-node tuple */
+ out->hasPrefix = false;
+ out->nNodes = 2;
+
+ for (i = 0; i < in->nTuples; i++)
+ {
+ tmp = DatumGetInetPP(in->datums[i]);
+ out->mapTuplesToNodes[i] =
+ (ip_family(tmp) == PGSQL_AF_INET) ? 0 : 1;
+ out->leafTupleDatums[i] = InetPGetDatum(tmp);
+ }
+ }
+ else
+ {
+ /* Set up 4-node tuple */
+ out->hasPrefix = true;
+ out->prefixDatum =
+ InetPGetDatum(cidr_set_masklen_internal(prefix, commonbits));
+ out->nNodes = 4;
+
+ for (i = 0; i < in->nTuples; i++)
+ {
+ tmp = DatumGetInetPP(in->datums[i]);
+ out->mapTuplesToNodes[i] = inet_spg_node_number(tmp, commonbits);
+ out->leafTupleDatums[i] = InetPGetDatum(tmp);
+ }
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * The SP-GiST query consistency check for inner tuples
+ */
+Datum
+inet_spg_inner_consistent(PG_FUNCTION_ARGS)
+{
+ spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+ spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+ int i;
+ int which;
+
+ if (!in->hasPrefix)
+ {
+ Assert(!in->allTheSame);
+ Assert(in->nNodes == 2);
+
+ /* Identify which child nodes need to be visited */
+ which = 1 | (1 << 1);
+
+ for (i = 0; i < in->nkeys; i++)
+ {
+ StrategyNumber strategy = in->scankeys[i].sk_strategy;
+ inet *argument = DatumGetInetPP(in->scankeys[i].sk_argument);
+
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (ip_family(argument) == PGSQL_AF_INET)
+ which &= 1;
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (ip_family(argument) == PGSQL_AF_INET6)
+ which &= (1 << 1);
+ break;
+
+ case RTNotEqualStrategyNumber:
+ break;
+
+ default:
+ /* all other ops can only match addrs of same family */
+ if (ip_family(argument) == PGSQL_AF_INET)
+ which &= 1;
+ else
+ which &= (1 << 1);
+ break;
+ }
+ }
+ }
+ else if (!in->allTheSame)
+ {
+ Assert(in->nNodes == 4);
+
+ /* Identify which child nodes need to be visited */
+ which = inet_spg_consistent_bitmap(DatumGetInetPP(in->prefixDatum),
+ in->nkeys, in->scankeys, false);
+ }
+ else
+ {
+ /* Must visit all nodes; we assume there are less than 32 of 'em */
+ which = ~0;
+ }
+
+ out->nNodes = 0;
+
+ if (which)
+ {
+ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+
+ for (i = 0; i < in->nNodes; i++)
+ {
+ if (which & (1 << i))
+ {
+ out->nodeNumbers[out->nNodes] = i;
+ out->nNodes++;
+ }
+ }
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * The SP-GiST query consistency check for leaf tuples
+ */
+Datum
+inet_spg_leaf_consistent(PG_FUNCTION_ARGS)
+{
+ spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+ spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+ inet *leaf = DatumGetInetPP(in->leafDatum);
+
+ /* All tests are exact. */
+ out->recheck = false;
+
+ /* Leaf is what it is... */
+ out->leafValue = InetPGetDatum(leaf);
+
+ /* Use common code to apply the tests. */
+ PG_RETURN_BOOL(inet_spg_consistent_bitmap(leaf, in->nkeys, in->scankeys,
+ true));
+}
+
+/*
+ * Calculate node number (within a 4-node, single-family inner index tuple)
+ *
+ * The value must have the same family as the node's prefix, and
+ * commonbits is the mask length of the prefix. We use even or odd
+ * nodes according to the next address bit after the commonbits,
+ * and low or high nodes according to whether the value's mask length
+ * is larger than commonbits.
+ */
+static int
+inet_spg_node_number(const inet *val, int commonbits)
+{
+ int nodeN = 0;
+
+ if (commonbits < ip_maxbits(val) &&
+ ip_addr(val)[commonbits / 8] & (1 << (7 - commonbits % 8)))
+ nodeN |= 1;
+ if (commonbits < ip_bits(val))
+ nodeN |= 2;
+
+ return nodeN;
+}
+
+/*
+ * Calculate bitmap of node numbers that are consistent with the query
+ *
+ * This can be used either at a 4-way inner tuple, or at a leaf tuple.
+ * In the latter case, we should return a boolean result (0 or 1)
+ * not a bitmap.
+ *
+ * This definition is pretty odd, but the inner and leaf consistency checks
+ * are mostly common and it seems best to keep them in one function.
+ */
+static int
+inet_spg_consistent_bitmap(const inet *prefix, int nkeys, ScanKey scankeys,
+ bool leaf)
+{
+ int bitmap;
+ int commonbits,
+ i;
+
+ /* Initialize result to allow visiting all children */
+ if (leaf)
+ bitmap = 1;
+ else
+ bitmap = 1 | (1 << 1) | (1 << 2) | (1 << 3);
+
+ commonbits = ip_bits(prefix);
+
+ for (i = 0; i < nkeys; i++)
+ {
+ inet *argument = DatumGetInetPP(scankeys[i].sk_argument);
+ StrategyNumber strategy = scankeys[i].sk_strategy;
+ int order;
+
+ /*
+ * Check 0: different families
+ *
+ * Matching families do not help any of the strategies.
+ */
+ if (ip_family(argument) != ip_family(prefix))
+ {
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (ip_family(argument) < ip_family(prefix))
+ bitmap = 0;
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (ip_family(argument) > ip_family(prefix))
+ bitmap = 0;
+ break;
+
+ case RTNotEqualStrategyNumber:
+ break;
+
+ default:
+ /* For all other cases, we can be sure there is no match */
+ bitmap = 0;
+ break;
+ }
+
+ if (!bitmap)
+ break;
+
+ /* Other checks make no sense with different families. */
+ continue;
+ }
+
+ /*
+ * Check 1: network bit count
+ *
+ * Network bit count (ip_bits) helps to check leaves for sub network
+ * and sup network operators. At non-leaf nodes, we know every child
+ * value has greater ip_bits, so we can avoid descending in some cases
+ * too.
+ *
+ * This check is less expensive than checking the address bits, so we
+ * are doing this before, but it has to be done after for the basic
+ * comparison strategies, because ip_bits only affect their results
+ * when the common network bits are the same.
+ */
+ switch (strategy)
+ {
+ case RTSubStrategyNumber:
+ if (commonbits <= ip_bits(argument))
+ bitmap &= (1 << 2) | (1 << 3);
+ break;
+
+ case RTSubEqualStrategyNumber:
+ if (commonbits < ip_bits(argument))
+ bitmap &= (1 << 2) | (1 << 3);
+ break;
+
+ case RTSuperStrategyNumber:
+ if (commonbits == ip_bits(argument) - 1)
+ bitmap &= 1 | (1 << 1);
+ else if (commonbits >= ip_bits(argument))
+ bitmap = 0;
+ break;
+
+ case RTSuperEqualStrategyNumber:
+ if (commonbits == ip_bits(argument))
+ bitmap &= 1 | (1 << 1);
+ else if (commonbits > ip_bits(argument))
+ bitmap = 0;
+ break;
+
+ case RTEqualStrategyNumber:
+ if (commonbits < ip_bits(argument))
+ bitmap &= (1 << 2) | (1 << 3);
+ else if (commonbits == ip_bits(argument))
+ bitmap &= 1 | (1 << 1);
+ else
+ bitmap = 0;
+ break;
+ }
+
+ if (!bitmap)
+ break;
+
+ /*
+ * Check 2: common network bits
+ *
+ * Compare available common prefix bits to the query, but not beyond
+ * either the query's netmask or the minimum netmask among the
+ * represented values. If these bits don't match the query, we can
+ * eliminate some cases.
+ */
+ order = bitncmp(ip_addr(prefix), ip_addr(argument),
+ Min(commonbits, ip_bits(argument)));
+
+ if (order != 0)
+ {
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (order > 0)
+ bitmap = 0;
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (order < 0)
+ bitmap = 0;
+ break;
+
+ case RTNotEqualStrategyNumber:
+ break;
+
+ default:
+ /* For all other cases, we can be sure there is no match */
+ bitmap = 0;
+ break;
+ }
+
+ if (!bitmap)
+ break;
+
+ /*
+ * Remaining checks make no sense when common bits don't match.
+ */
+ continue;
+ }
+
+ /*
+ * Check 3: next network bit
+ *
+ * We can filter out branch 2 or 3 using the next network bit of the
+ * argument, if it is available.
+ *
+ * This check matters for the performance of the search. The results
+ * would be correct without it.
+ */
+ if (bitmap & ((1 << 2) | (1 << 3)) &&
+ commonbits < ip_bits(argument))
+ {
+ int nextbit;
+
+ nextbit = ip_addr(argument)[commonbits / 8] &
+ (1 << (7 - commonbits % 8));
+
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (!nextbit)
+ bitmap &= 1 | (1 << 1) | (1 << 2);
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (nextbit)
+ bitmap &= 1 | (1 << 1) | (1 << 3);
+ break;
+
+ case RTNotEqualStrategyNumber:
+ break;
+
+ default:
+ if (!nextbit)
+ bitmap &= 1 | (1 << 1) | (1 << 2);
+ else
+ bitmap &= 1 | (1 << 1) | (1 << 3);
+ break;
+ }
+
+ if (!bitmap)
+ break;
+ }
+
+ /*
+ * Remaining checks are only for the basic comparison strategies. This
+ * test relies on the strategy number ordering defined in stratnum.h.
+ */
+ if (strategy < RTEqualStrategyNumber ||
+ strategy > RTGreaterEqualStrategyNumber)
+ continue;
+
+ /*
+ * Check 4: network bit count
+ *
+ * At this point, we know that the common network bits of the prefix
+ * and the argument are the same, so we can go forward and check the
+ * ip_bits.
+ */
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (commonbits == ip_bits(argument))
+ bitmap &= 1 | (1 << 1);
+ else if (commonbits > ip_bits(argument))
+ bitmap = 0;
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (commonbits < ip_bits(argument))
+ bitmap &= (1 << 2) | (1 << 3);
+ break;
+ }
+
+ if (!bitmap)
+ break;
+
+ /* Remaining checks don't make sense with different ip_bits. */
+ if (commonbits != ip_bits(argument))
+ continue;
+
+ /*
+ * Check 5: next host bit
+ *
+ * We can filter out branch 0 or 1 using the next host bit of the
+ * argument, if it is available.
+ *
+ * This check matters for the performance of the search. The results
+ * would be correct without it. There is no point in running it for
+ * leafs as we have to check the whole address on the next step.
+ */
+ if (!leaf && bitmap & (1 | (1 << 1)) &&
+ commonbits < ip_maxbits(argument))
+ {
+ int nextbit;
+
+ nextbit = ip_addr(argument)[commonbits / 8] &
+ (1 << (7 - commonbits % 8));
+
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ case RTLessEqualStrategyNumber:
+ if (!nextbit)
+ bitmap &= 1 | (1 << 2) | (1 << 3);
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ case RTGreaterStrategyNumber:
+ if (nextbit)
+ bitmap &= (1 << 1) | (1 << 2) | (1 << 3);
+ break;
+
+ case RTNotEqualStrategyNumber:
+ break;
+
+ default:
+ if (!nextbit)
+ bitmap &= 1 | (1 << 2) | (1 << 3);
+ else
+ bitmap &= (1 << 1) | (1 << 2) | (1 << 3);
+ break;
+ }
+
+ if (!bitmap)
+ break;
+ }
+
+ /*
+ * Check 6: whole address
+ *
+ * This is the last check for correctness of the basic comparison
+ * strategies. It's only appropriate at leaf entries.
+ */
+ if (leaf)
+ {
+ /* Redo ordering comparison using all address bits */
+ order = bitncmp(ip_addr(prefix), ip_addr(argument),
+ ip_maxbits(prefix));
+
+ switch (strategy)
+ {
+ case RTLessStrategyNumber:
+ if (order >= 0)
+ bitmap = 0;
+ break;
+
+ case RTLessEqualStrategyNumber:
+ if (order > 0)
+ bitmap = 0;
+ break;
+
+ case RTEqualStrategyNumber:
+ if (order != 0)
+ bitmap = 0;
+ break;
+
+ case RTGreaterEqualStrategyNumber:
+ if (order < 0)
+ bitmap = 0;
+ break;
+
+ case RTGreaterStrategyNumber:
+ if (order <= 0)
+ bitmap = 0;
+ break;
+
+ case RTNotEqualStrategyNumber:
+ if (order == 0)
+ bitmap = 0;
+ break;
+ }
+
+ if (!bitmap)
+ break;
+ }
+ }
+
+ return bitmap;
+}
diff --git a/src/backend/utils/adt/numeric.c b/src/backend/utils/adt/numeric.c
new file mode 100644
index 0000000..a1c9d69
--- /dev/null
+++ b/src/backend/utils/adt/numeric.c
@@ -0,0 +1,11484 @@
+/*-------------------------------------------------------------------------
+ *
+ * numeric.c
+ * An exact numeric data type for the Postgres database system
+ *
+ * Original coding 1998, Jan Wieck. Heavily revised 2003, Tom Lane.
+ *
+ * Many of the algorithmic ideas are borrowed from David M. Smith's "FM"
+ * multiple-precision math library, most recently published as Algorithm
+ * 786: Multiple-Precision Complex Arithmetic and Functions, ACM
+ * Transactions on Mathematical Software, Vol. 24, No. 4, December 1998,
+ * pages 359-367.
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/numeric.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <float.h>
+#include <limits.h>
+#include <math.h>
+
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "common/int.h"
+#include "funcapi.h"
+#include "lib/hyperloglog.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+#include "utils/guc.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+#include "utils/sortsupport.h"
+
+/* ----------
+ * Uncomment the following to enable compilation of dump_numeric()
+ * and dump_var() and to get a dump of any result produced by make_result().
+ * ----------
+#define NUMERIC_DEBUG
+ */
+
+
+/* ----------
+ * Local data types
+ *
+ * Numeric values are represented in a base-NBASE floating point format.
+ * Each "digit" ranges from 0 to NBASE-1. The type NumericDigit is signed
+ * and wide enough to store a digit. We assume that NBASE*NBASE can fit in
+ * an int. Although the purely calculational routines could handle any even
+ * NBASE that's less than sqrt(INT_MAX), in practice we are only interested
+ * in NBASE a power of ten, so that I/O conversions and decimal rounding
+ * are easy. Also, it's actually more efficient if NBASE is rather less than
+ * sqrt(INT_MAX), so that there is "headroom" for mul_var and div_var_fast to
+ * postpone processing carries.
+ *
+ * Values of NBASE other than 10000 are considered of historical interest only
+ * and are no longer supported in any sense; no mechanism exists for the client
+ * to discover the base, so every client supporting binary mode expects the
+ * base-10000 format. If you plan to change this, also note the numeric
+ * abbreviation code, which assumes NBASE=10000.
+ * ----------
+ */
+
+#if 0
+#define NBASE 10
+#define HALF_NBASE 5
+#define DEC_DIGITS 1 /* decimal digits per NBASE digit */
+#define MUL_GUARD_DIGITS 4 /* these are measured in NBASE digits */
+#define DIV_GUARD_DIGITS 8
+
+typedef signed char NumericDigit;
+#endif
+
+#if 0
+#define NBASE 100
+#define HALF_NBASE 50
+#define DEC_DIGITS 2 /* decimal digits per NBASE digit */
+#define MUL_GUARD_DIGITS 3 /* these are measured in NBASE digits */
+#define DIV_GUARD_DIGITS 6
+
+typedef signed char NumericDigit;
+#endif
+
+#if 1
+#define NBASE 10000
+#define HALF_NBASE 5000
+#define DEC_DIGITS 4 /* decimal digits per NBASE digit */
+#define MUL_GUARD_DIGITS 2 /* these are measured in NBASE digits */
+#define DIV_GUARD_DIGITS 4
+
+typedef int16 NumericDigit;
+#endif
+
+/*
+ * The Numeric type as stored on disk.
+ *
+ * If the high bits of the first word of a NumericChoice (n_header, or
+ * n_short.n_header, or n_long.n_sign_dscale) are NUMERIC_SHORT, then the
+ * numeric follows the NumericShort format; if they are NUMERIC_POS or
+ * NUMERIC_NEG, it follows the NumericLong format. If they are NUMERIC_SPECIAL,
+ * the value is a NaN or Infinity. We currently always store SPECIAL values
+ * using just two bytes (i.e. only n_header), but previous releases used only
+ * the NumericLong format, so we might find 4-byte NaNs (though not infinities)
+ * on disk if a database has been migrated using pg_upgrade. In either case,
+ * the low-order bits of a special value's header are reserved and currently
+ * should always be set to zero.
+ *
+ * In the NumericShort format, the remaining 14 bits of the header word
+ * (n_short.n_header) are allocated as follows: 1 for sign (positive or
+ * negative), 6 for dynamic scale, and 7 for weight. In practice, most
+ * commonly-encountered values can be represented this way.
+ *
+ * In the NumericLong format, the remaining 14 bits of the header word
+ * (n_long.n_sign_dscale) represent the display scale; and the weight is
+ * stored separately in n_weight.
+ *
+ * NOTE: by convention, values in the packed form have been stripped of
+ * all leading and trailing zero digits (where a "digit" is of base NBASE).
+ * In particular, if the value is zero, there will be no digits at all!
+ * The weight is arbitrary in that case, but we normally set it to zero.
+ */
+
+struct NumericShort
+{
+ uint16 n_header; /* Sign + display scale + weight */
+ NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */
+};
+
+struct NumericLong
+{
+ uint16 n_sign_dscale; /* Sign + display scale */
+ int16 n_weight; /* Weight of 1st digit */
+ NumericDigit n_data[FLEXIBLE_ARRAY_MEMBER]; /* Digits */
+};
+
+union NumericChoice
+{
+ uint16 n_header; /* Header word */
+ struct NumericLong n_long; /* Long form (4-byte header) */
+ struct NumericShort n_short; /* Short form (2-byte header) */
+};
+
+struct NumericData
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ union NumericChoice choice; /* choice of format */
+};
+
+
+/*
+ * Interpretation of high bits.
+ */
+
+#define NUMERIC_SIGN_MASK 0xC000
+#define NUMERIC_POS 0x0000
+#define NUMERIC_NEG 0x4000
+#define NUMERIC_SHORT 0x8000
+#define NUMERIC_SPECIAL 0xC000
+
+#define NUMERIC_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_SIGN_MASK)
+#define NUMERIC_IS_SHORT(n) (NUMERIC_FLAGBITS(n) == NUMERIC_SHORT)
+#define NUMERIC_IS_SPECIAL(n) (NUMERIC_FLAGBITS(n) == NUMERIC_SPECIAL)
+
+#define NUMERIC_HDRSZ (VARHDRSZ + sizeof(uint16) + sizeof(int16))
+#define NUMERIC_HDRSZ_SHORT (VARHDRSZ + sizeof(uint16))
+
+/*
+ * If the flag bits are NUMERIC_SHORT or NUMERIC_SPECIAL, we want the short
+ * header; otherwise, we want the long one. Instead of testing against each
+ * value, we can just look at the high bit, for a slight efficiency gain.
+ */
+#define NUMERIC_HEADER_IS_SHORT(n) (((n)->choice.n_header & 0x8000) != 0)
+#define NUMERIC_HEADER_SIZE(n) \
+ (VARHDRSZ + sizeof(uint16) + \
+ (NUMERIC_HEADER_IS_SHORT(n) ? 0 : sizeof(int16)))
+
+/*
+ * Definitions for special values (NaN, positive infinity, negative infinity).
+ *
+ * The two bits after the NUMERIC_SPECIAL bits are 00 for NaN, 01 for positive
+ * infinity, 11 for negative infinity. (This makes the sign bit match where
+ * it is in a short-format value, though we make no use of that at present.)
+ * We could mask off the remaining bits before testing the active bits, but
+ * currently those bits must be zeroes, so masking would just add cycles.
+ */
+#define NUMERIC_EXT_SIGN_MASK 0xF000 /* high bits plus NaN/Inf flag bits */
+#define NUMERIC_NAN 0xC000
+#define NUMERIC_PINF 0xD000
+#define NUMERIC_NINF 0xF000
+#define NUMERIC_INF_SIGN_MASK 0x2000
+
+#define NUMERIC_EXT_FLAGBITS(n) ((n)->choice.n_header & NUMERIC_EXT_SIGN_MASK)
+#define NUMERIC_IS_NAN(n) ((n)->choice.n_header == NUMERIC_NAN)
+#define NUMERIC_IS_PINF(n) ((n)->choice.n_header == NUMERIC_PINF)
+#define NUMERIC_IS_NINF(n) ((n)->choice.n_header == NUMERIC_NINF)
+#define NUMERIC_IS_INF(n) \
+ (((n)->choice.n_header & ~NUMERIC_INF_SIGN_MASK) == NUMERIC_PINF)
+
+/*
+ * Short format definitions.
+ */
+
+#define NUMERIC_SHORT_SIGN_MASK 0x2000
+#define NUMERIC_SHORT_DSCALE_MASK 0x1F80
+#define NUMERIC_SHORT_DSCALE_SHIFT 7
+#define NUMERIC_SHORT_DSCALE_MAX \
+ (NUMERIC_SHORT_DSCALE_MASK >> NUMERIC_SHORT_DSCALE_SHIFT)
+#define NUMERIC_SHORT_WEIGHT_SIGN_MASK 0x0040
+#define NUMERIC_SHORT_WEIGHT_MASK 0x003F
+#define NUMERIC_SHORT_WEIGHT_MAX NUMERIC_SHORT_WEIGHT_MASK
+#define NUMERIC_SHORT_WEIGHT_MIN (-(NUMERIC_SHORT_WEIGHT_MASK+1))
+
+/*
+ * Extract sign, display scale, weight. These macros extract field values
+ * suitable for the NumericVar format from the Numeric (on-disk) format.
+ *
+ * Note that we don't trouble to ensure that dscale and weight read as zero
+ * for an infinity; however, that doesn't matter since we never convert
+ * "special" numerics to NumericVar form. Only the constants defined below
+ * (const_nan, etc) ever represent a non-finite value as a NumericVar.
+ */
+
+#define NUMERIC_DSCALE_MASK 0x3FFF
+#define NUMERIC_DSCALE_MAX NUMERIC_DSCALE_MASK
+
+#define NUMERIC_SIGN(n) \
+ (NUMERIC_IS_SHORT(n) ? \
+ (((n)->choice.n_short.n_header & NUMERIC_SHORT_SIGN_MASK) ? \
+ NUMERIC_NEG : NUMERIC_POS) : \
+ (NUMERIC_IS_SPECIAL(n) ? \
+ NUMERIC_EXT_FLAGBITS(n) : NUMERIC_FLAGBITS(n)))
+#define NUMERIC_DSCALE(n) (NUMERIC_HEADER_IS_SHORT((n)) ? \
+ ((n)->choice.n_short.n_header & NUMERIC_SHORT_DSCALE_MASK) \
+ >> NUMERIC_SHORT_DSCALE_SHIFT \
+ : ((n)->choice.n_long.n_sign_dscale & NUMERIC_DSCALE_MASK))
+#define NUMERIC_WEIGHT(n) (NUMERIC_HEADER_IS_SHORT((n)) ? \
+ (((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_SIGN_MASK ? \
+ ~NUMERIC_SHORT_WEIGHT_MASK : 0) \
+ | ((n)->choice.n_short.n_header & NUMERIC_SHORT_WEIGHT_MASK)) \
+ : ((n)->choice.n_long.n_weight))
+
+/* ----------
+ * NumericVar is the format we use for arithmetic. The digit-array part
+ * is the same as the NumericData storage format, but the header is more
+ * complex.
+ *
+ * The value represented by a NumericVar is determined by the sign, weight,
+ * ndigits, and digits[] array. If it is a "special" value (NaN or Inf)
+ * then only the sign field matters; ndigits should be zero, and the weight
+ * and dscale fields are ignored.
+ *
+ * Note: the first digit of a NumericVar's value is assumed to be multiplied
+ * by NBASE ** weight. Another way to say it is that there are weight+1
+ * digits before the decimal point. It is possible to have weight < 0.
+ *
+ * buf points at the physical start of the palloc'd digit buffer for the
+ * NumericVar. digits points at the first digit in actual use (the one
+ * with the specified weight). We normally leave an unused digit or two
+ * (preset to zeroes) between buf and digits, so that there is room to store
+ * a carry out of the top digit without reallocating space. We just need to
+ * decrement digits (and increment weight) to make room for the carry digit.
+ * (There is no such extra space in a numeric value stored in the database,
+ * only in a NumericVar in memory.)
+ *
+ * If buf is NULL then the digit buffer isn't actually palloc'd and should
+ * not be freed --- see the constants below for an example.
+ *
+ * dscale, or display scale, is the nominal precision expressed as number
+ * of digits after the decimal point (it must always be >= 0 at present).
+ * dscale may be more than the number of physically stored fractional digits,
+ * implying that we have suppressed storage of significant trailing zeroes.
+ * It should never be less than the number of stored digits, since that would
+ * imply hiding digits that are present. NOTE that dscale is always expressed
+ * in *decimal* digits, and so it may correspond to a fractional number of
+ * base-NBASE digits --- divide by DEC_DIGITS to convert to NBASE digits.
+ *
+ * rscale, or result scale, is the target precision for a computation.
+ * Like dscale it is expressed as number of *decimal* digits after the decimal
+ * point, and is always >= 0 at present.
+ * Note that rscale is not stored in variables --- it's figured on-the-fly
+ * from the dscales of the inputs.
+ *
+ * While we consistently use "weight" to refer to the base-NBASE weight of
+ * a numeric value, it is convenient in some scale-related calculations to
+ * make use of the base-10 weight (ie, the approximate log10 of the value).
+ * To avoid confusion, such a decimal-units weight is called a "dweight".
+ *
+ * NB: All the variable-level functions are written in a style that makes it
+ * possible to give one and the same variable as argument and destination.
+ * This is feasible because the digit buffer is separate from the variable.
+ * ----------
+ */
+typedef struct NumericVar
+{
+ int ndigits; /* # of digits in digits[] - can be 0! */
+ int weight; /* weight of first digit */
+ int sign; /* NUMERIC_POS, _NEG, _NAN, _PINF, or _NINF */
+ int dscale; /* display scale */
+ NumericDigit *buf; /* start of palloc'd space for digits[] */
+ NumericDigit *digits; /* base-NBASE digits */
+} NumericVar;
+
+
+/* ----------
+ * Data for generate_series
+ * ----------
+ */
+typedef struct
+{
+ NumericVar current;
+ NumericVar stop;
+ NumericVar step;
+} generate_series_numeric_fctx;
+
+
+/* ----------
+ * Sort support.
+ * ----------
+ */
+typedef struct
+{
+ void *buf; /* buffer for short varlenas */
+ int64 input_count; /* number of non-null values seen */
+ bool estimating; /* true if estimating cardinality */
+
+ hyperLogLogState abbr_card; /* cardinality estimator */
+} NumericSortSupport;
+
+
+/* ----------
+ * Fast sum accumulator.
+ *
+ * NumericSumAccum is used to implement SUM(), and other standard aggregates
+ * that track the sum of input values. It uses 32-bit integers to store the
+ * digits, instead of the normal 16-bit integers (with NBASE=10000). This
+ * way, we can safely accumulate up to NBASE - 1 values without propagating
+ * carry, before risking overflow of any of the digits. 'num_uncarried'
+ * tracks how many values have been accumulated without propagating carry.
+ *
+ * Positive and negative values are accumulated separately, in 'pos_digits'
+ * and 'neg_digits'. This is simpler and faster than deciding whether to add
+ * or subtract from the current value, for each new value (see sub_var() for
+ * the logic we avoid by doing this). Both buffers are of same size, and
+ * have the same weight and scale. In accum_sum_final(), the positive and
+ * negative sums are added together to produce the final result.
+ *
+ * When a new value has a larger ndigits or weight than the accumulator
+ * currently does, the accumulator is enlarged to accommodate the new value.
+ * We normally have one zero digit reserved for carry propagation, and that
+ * is indicated by the 'have_carry_space' flag. When accum_sum_carry() uses
+ * up the reserved digit, it clears the 'have_carry_space' flag. The next
+ * call to accum_sum_add() will enlarge the buffer, to make room for the
+ * extra digit, and set the flag again.
+ *
+ * To initialize a new accumulator, simply reset all fields to zeros.
+ *
+ * The accumulator does not handle NaNs.
+ * ----------
+ */
+typedef struct NumericSumAccum
+{
+ int ndigits;
+ int weight;
+ int dscale;
+ int num_uncarried;
+ bool have_carry_space;
+ int32 *pos_digits;
+ int32 *neg_digits;
+} NumericSumAccum;
+
+
+/*
+ * We define our own macros for packing and unpacking abbreviated-key
+ * representations for numeric values in order to avoid depending on
+ * USE_FLOAT8_BYVAL. The type of abbreviation we use is based only on
+ * the size of a datum, not the argument-passing convention for float8.
+ *
+ * The range of abbreviations for finite values is from +PG_INT64/32_MAX
+ * to -PG_INT64/32_MAX. NaN has the abbreviation PG_INT64/32_MIN, and we
+ * define the sort ordering to make that work out properly (see further
+ * comments below). PINF and NINF share the abbreviations of the largest
+ * and smallest finite abbreviation classes.
+ */
+#define NUMERIC_ABBREV_BITS (SIZEOF_DATUM * BITS_PER_BYTE)
+#if SIZEOF_DATUM == 8
+#define NumericAbbrevGetDatum(X) ((Datum) (X))
+#define DatumGetNumericAbbrev(X) ((int64) (X))
+#define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT64_MIN)
+#define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT64_MAX)
+#define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT64_MAX)
+#else
+#define NumericAbbrevGetDatum(X) ((Datum) (X))
+#define DatumGetNumericAbbrev(X) ((int32) (X))
+#define NUMERIC_ABBREV_NAN NumericAbbrevGetDatum(PG_INT32_MIN)
+#define NUMERIC_ABBREV_PINF NumericAbbrevGetDatum(-PG_INT32_MAX)
+#define NUMERIC_ABBREV_NINF NumericAbbrevGetDatum(PG_INT32_MAX)
+#endif
+
+
+/* ----------
+ * Some preinitialized constants
+ * ----------
+ */
+static const NumericDigit const_zero_data[1] = {0};
+static const NumericVar const_zero =
+{0, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_zero_data};
+
+static const NumericDigit const_one_data[1] = {1};
+static const NumericVar const_one =
+{1, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_one_data};
+
+static const NumericVar const_minus_one =
+{1, 0, NUMERIC_NEG, 0, NULL, (NumericDigit *) const_one_data};
+
+static const NumericDigit const_two_data[1] = {2};
+static const NumericVar const_two =
+{1, 0, NUMERIC_POS, 0, NULL, (NumericDigit *) const_two_data};
+
+#if DEC_DIGITS == 4
+static const NumericDigit const_zero_point_nine_data[1] = {9000};
+#elif DEC_DIGITS == 2
+static const NumericDigit const_zero_point_nine_data[1] = {90};
+#elif DEC_DIGITS == 1
+static const NumericDigit const_zero_point_nine_data[1] = {9};
+#endif
+static const NumericVar const_zero_point_nine =
+{1, -1, NUMERIC_POS, 1, NULL, (NumericDigit *) const_zero_point_nine_data};
+
+#if DEC_DIGITS == 4
+static const NumericDigit const_one_point_one_data[2] = {1, 1000};
+#elif DEC_DIGITS == 2
+static const NumericDigit const_one_point_one_data[2] = {1, 10};
+#elif DEC_DIGITS == 1
+static const NumericDigit const_one_point_one_data[2] = {1, 1};
+#endif
+static const NumericVar const_one_point_one =
+{2, 0, NUMERIC_POS, 1, NULL, (NumericDigit *) const_one_point_one_data};
+
+static const NumericVar const_nan =
+{0, 0, NUMERIC_NAN, 0, NULL, NULL};
+
+static const NumericVar const_pinf =
+{0, 0, NUMERIC_PINF, 0, NULL, NULL};
+
+static const NumericVar const_ninf =
+{0, 0, NUMERIC_NINF, 0, NULL, NULL};
+
+#if DEC_DIGITS == 4
+static const int round_powers[4] = {0, 1000, 100, 10};
+#endif
+
+
+/* ----------
+ * Local functions
+ * ----------
+ */
+
+#ifdef NUMERIC_DEBUG
+static void dump_numeric(const char *str, Numeric num);
+static void dump_var(const char *str, NumericVar *var);
+#else
+#define dump_numeric(s,n)
+#define dump_var(s,v)
+#endif
+
+#define digitbuf_alloc(ndigits) \
+ ((NumericDigit *) palloc((ndigits) * sizeof(NumericDigit)))
+#define digitbuf_free(buf) \
+ do { \
+ if ((buf) != NULL) \
+ pfree(buf); \
+ } while (0)
+
+#define init_var(v) memset(v, 0, sizeof(NumericVar))
+
+#define NUMERIC_DIGITS(num) (NUMERIC_HEADER_IS_SHORT(num) ? \
+ (num)->choice.n_short.n_data : (num)->choice.n_long.n_data)
+#define NUMERIC_NDIGITS(num) \
+ ((VARSIZE(num) - NUMERIC_HEADER_SIZE(num)) / sizeof(NumericDigit))
+#define NUMERIC_CAN_BE_SHORT(scale,weight) \
+ ((scale) <= NUMERIC_SHORT_DSCALE_MAX && \
+ (weight) <= NUMERIC_SHORT_WEIGHT_MAX && \
+ (weight) >= NUMERIC_SHORT_WEIGHT_MIN)
+
+static void alloc_var(NumericVar *var, int ndigits);
+static void free_var(NumericVar *var);
+static void zero_var(NumericVar *var);
+
+static const char *set_var_from_str(const char *str, const char *cp,
+ NumericVar *dest);
+static void set_var_from_num(Numeric value, NumericVar *dest);
+static void init_var_from_num(Numeric num, NumericVar *dest);
+static void set_var_from_var(const NumericVar *value, NumericVar *dest);
+static char *get_str_from_var(const NumericVar *var);
+static char *get_str_from_var_sci(const NumericVar *var, int rscale);
+
+static void numericvar_serialize(StringInfo buf, const NumericVar *var);
+static void numericvar_deserialize(StringInfo buf, NumericVar *var);
+
+static Numeric duplicate_numeric(Numeric num);
+static Numeric make_result(const NumericVar *var);
+static Numeric make_result_opt_error(const NumericVar *var, bool *error);
+
+static void apply_typmod(NumericVar *var, int32 typmod);
+static void apply_typmod_special(Numeric num, int32 typmod);
+
+static bool numericvar_to_int32(const NumericVar *var, int32 *result);
+static bool numericvar_to_int64(const NumericVar *var, int64 *result);
+static void int64_to_numericvar(int64 val, NumericVar *var);
+static bool numericvar_to_uint64(const NumericVar *var, uint64 *result);
+#ifdef HAVE_INT128
+static bool numericvar_to_int128(const NumericVar *var, int128 *result);
+static void int128_to_numericvar(int128 val, NumericVar *var);
+#endif
+static double numericvar_to_double_no_overflow(const NumericVar *var);
+
+static Datum numeric_abbrev_convert(Datum original_datum, SortSupport ssup);
+static bool numeric_abbrev_abort(int memtupcount, SortSupport ssup);
+static int numeric_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static int numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup);
+
+static Datum numeric_abbrev_convert_var(const NumericVar *var,
+ NumericSortSupport *nss);
+
+static int cmp_numerics(Numeric num1, Numeric num2);
+static int cmp_var(const NumericVar *var1, const NumericVar *var2);
+static int cmp_var_common(const NumericDigit *var1digits, int var1ndigits,
+ int var1weight, int var1sign,
+ const NumericDigit *var2digits, int var2ndigits,
+ int var2weight, int var2sign);
+static void add_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void sub_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void mul_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result,
+ int rscale);
+static void div_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result,
+ int rscale, bool round);
+static void div_var_fast(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result, int rscale, bool round);
+static void div_var_int(const NumericVar *var, int ival, int ival_weight,
+ NumericVar *result, int rscale, bool round);
+static int select_div_scale(const NumericVar *var1, const NumericVar *var2);
+static void mod_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void div_mod_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *quot, NumericVar *rem);
+static void ceil_var(const NumericVar *var, NumericVar *result);
+static void floor_var(const NumericVar *var, NumericVar *result);
+
+static void gcd_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void sqrt_var(const NumericVar *arg, NumericVar *result, int rscale);
+static void exp_var(const NumericVar *arg, NumericVar *result, int rscale);
+static int estimate_ln_dweight(const NumericVar *var);
+static void ln_var(const NumericVar *arg, NumericVar *result, int rscale);
+static void log_var(const NumericVar *base, const NumericVar *num,
+ NumericVar *result);
+static void power_var(const NumericVar *base, const NumericVar *exp,
+ NumericVar *result);
+static void power_var_int(const NumericVar *base, int exp, NumericVar *result,
+ int rscale);
+static void power_ten_int(int exp, NumericVar *result);
+
+static int cmp_abs(const NumericVar *var1, const NumericVar *var2);
+static int cmp_abs_common(const NumericDigit *var1digits, int var1ndigits,
+ int var1weight,
+ const NumericDigit *var2digits, int var2ndigits,
+ int var2weight);
+static void add_abs(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void sub_abs(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result);
+static void round_var(NumericVar *var, int rscale);
+static void trunc_var(NumericVar *var, int rscale);
+static void strip_var(NumericVar *var);
+static void compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
+ const NumericVar *count_var, bool reversed_bounds,
+ NumericVar *result_var);
+
+static void accum_sum_add(NumericSumAccum *accum, const NumericVar *var1);
+static void accum_sum_rescale(NumericSumAccum *accum, const NumericVar *val);
+static void accum_sum_carry(NumericSumAccum *accum);
+static void accum_sum_reset(NumericSumAccum *accum);
+static void accum_sum_final(NumericSumAccum *accum, NumericVar *result);
+static void accum_sum_copy(NumericSumAccum *dst, NumericSumAccum *src);
+static void accum_sum_combine(NumericSumAccum *accum, NumericSumAccum *accum2);
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Input-, output- and rounding-functions
+ *
+ * ----------------------------------------------------------------------
+ */
+
+
+/*
+ * numeric_in() -
+ *
+ * Input function for numeric data type
+ */
+Datum
+numeric_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ Numeric res;
+ const char *cp;
+
+ /* Skip leading spaces */
+ cp = str;
+ while (*cp)
+ {
+ if (!isspace((unsigned char) *cp))
+ break;
+ cp++;
+ }
+
+ /*
+ * Check for NaN and infinities. We recognize the same strings allowed by
+ * float8in().
+ */
+ if (pg_strncasecmp(cp, "NaN", 3) == 0)
+ {
+ res = make_result(&const_nan);
+ cp += 3;
+ }
+ else if (pg_strncasecmp(cp, "Infinity", 8) == 0)
+ {
+ res = make_result(&const_pinf);
+ cp += 8;
+ }
+ else if (pg_strncasecmp(cp, "+Infinity", 9) == 0)
+ {
+ res = make_result(&const_pinf);
+ cp += 9;
+ }
+ else if (pg_strncasecmp(cp, "-Infinity", 9) == 0)
+ {
+ res = make_result(&const_ninf);
+ cp += 9;
+ }
+ else if (pg_strncasecmp(cp, "inf", 3) == 0)
+ {
+ res = make_result(&const_pinf);
+ cp += 3;
+ }
+ else if (pg_strncasecmp(cp, "+inf", 4) == 0)
+ {
+ res = make_result(&const_pinf);
+ cp += 4;
+ }
+ else if (pg_strncasecmp(cp, "-inf", 4) == 0)
+ {
+ res = make_result(&const_ninf);
+ cp += 4;
+ }
+ else
+ {
+ /*
+ * Use set_var_from_str() to parse a normal numeric value
+ */
+ NumericVar value;
+
+ init_var(&value);
+
+ cp = set_var_from_str(str, cp, &value);
+
+ /*
+ * We duplicate a few lines of code here because we would like to
+ * throw any trailing-junk syntax error before any semantic error
+ * resulting from apply_typmod. We can't easily fold the two cases
+ * together because we mustn't apply apply_typmod to a NaN/Inf.
+ */
+ while (*cp)
+ {
+ if (!isspace((unsigned char) *cp))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
+ cp++;
+ }
+
+ apply_typmod(&value, typmod);
+
+ res = make_result(&value);
+ free_var(&value);
+
+ PG_RETURN_NUMERIC(res);
+ }
+
+ /* Should be nothing left but spaces */
+ while (*cp)
+ {
+ if (!isspace((unsigned char) *cp))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
+ cp++;
+ }
+
+ /* As above, throw any typmod error after finishing syntax check */
+ apply_typmod_special(res, typmod);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_out() -
+ *
+ * Output function for numeric data type
+ */
+Datum
+numeric_out(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar x;
+ char *str;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ PG_RETURN_CSTRING(pstrdup("Infinity"));
+ else if (NUMERIC_IS_NINF(num))
+ PG_RETURN_CSTRING(pstrdup("-Infinity"));
+ else
+ PG_RETURN_CSTRING(pstrdup("NaN"));
+ }
+
+ /*
+ * Get the number in the variable format.
+ */
+ init_var_from_num(num, &x);
+
+ str = get_str_from_var(&x);
+
+ PG_RETURN_CSTRING(str);
+}
+
+/*
+ * numeric_is_nan() -
+ *
+ * Is Numeric value a NaN?
+ */
+bool
+numeric_is_nan(Numeric num)
+{
+ return NUMERIC_IS_NAN(num);
+}
+
+/*
+ * numeric_is_inf() -
+ *
+ * Is Numeric value an infinity?
+ */
+bool
+numeric_is_inf(Numeric num)
+{
+ return NUMERIC_IS_INF(num);
+}
+
+/*
+ * numeric_is_integral() -
+ *
+ * Is Numeric value integral?
+ */
+static bool
+numeric_is_integral(Numeric num)
+{
+ NumericVar arg;
+
+ /* Reject NaN, but infinities are considered integral */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_NAN(num))
+ return false;
+ return true;
+ }
+
+ /* Integral if there are no digits to the right of the decimal point */
+ init_var_from_num(num, &arg);
+
+ return (arg.ndigits == 0 || arg.ndigits <= arg.weight + 1);
+}
+
+/*
+ * make_numeric_typmod() -
+ *
+ * Pack numeric precision and scale values into a typmod. The upper 16 bits
+ * are used for the precision (though actually not all these bits are needed,
+ * since the maximum allowed precision is 1000). The lower 16 bits are for
+ * the scale, but since the scale is constrained to the range [-1000, 1000],
+ * we use just the lower 11 of those 16 bits, and leave the remaining 5 bits
+ * unset, for possible future use.
+ *
+ * For purely historical reasons VARHDRSZ is then added to the result, thus
+ * the unused space in the upper 16 bits is not all as freely available as it
+ * might seem. (We can't let the result overflow to a negative int32, as
+ * other parts of the system would interpret that as not-a-valid-typmod.)
+ */
+static inline int32
+make_numeric_typmod(int precision, int scale)
+{
+ return ((precision << 16) | (scale & 0x7ff)) + VARHDRSZ;
+}
+
+/*
+ * Because of the offset, valid numeric typmods are at least VARHDRSZ
+ */
+static inline bool
+is_valid_numeric_typmod(int32 typmod)
+{
+ return typmod >= (int32) VARHDRSZ;
+}
+
+/*
+ * numeric_typmod_precision() -
+ *
+ * Extract the precision from a numeric typmod --- see make_numeric_typmod().
+ */
+static inline int
+numeric_typmod_precision(int32 typmod)
+{
+ return ((typmod - VARHDRSZ) >> 16) & 0xffff;
+}
+
+/*
+ * numeric_typmod_scale() -
+ *
+ * Extract the scale from a numeric typmod --- see make_numeric_typmod().
+ *
+ * Note that the scale may be negative, so we must do sign extension when
+ * unpacking it. We do this using the bit hack (x^1024)-1024, which sign
+ * extends an 11-bit two's complement number x.
+ */
+static inline int
+numeric_typmod_scale(int32 typmod)
+{
+ return (((typmod - VARHDRSZ) & 0x7ff) ^ 1024) - 1024;
+}
+
+/*
+ * numeric_maximum_size() -
+ *
+ * Maximum size of a numeric with given typmod, or -1 if unlimited/unknown.
+ */
+int32
+numeric_maximum_size(int32 typmod)
+{
+ int precision;
+ int numeric_digits;
+
+ if (!is_valid_numeric_typmod(typmod))
+ return -1;
+
+ /* precision (ie, max # of digits) is in upper bits of typmod */
+ precision = numeric_typmod_precision(typmod);
+
+ /*
+ * This formula computes the maximum number of NumericDigits we could need
+ * in order to store the specified number of decimal digits. Because the
+ * weight is stored as a number of NumericDigits rather than a number of
+ * decimal digits, it's possible that the first NumericDigit will contain
+ * only a single decimal digit. Thus, the first two decimal digits can
+ * require two NumericDigits to store, but it isn't until we reach
+ * DEC_DIGITS + 2 decimal digits that we potentially need a third
+ * NumericDigit.
+ */
+ numeric_digits = (precision + 2 * (DEC_DIGITS - 1)) / DEC_DIGITS;
+
+ /*
+ * In most cases, the size of a numeric will be smaller than the value
+ * computed below, because the varlena header will typically get toasted
+ * down to a single byte before being stored on disk, and it may also be
+ * possible to use a short numeric header. But our job here is to compute
+ * the worst case.
+ */
+ return NUMERIC_HDRSZ + (numeric_digits * sizeof(NumericDigit));
+}
+
+/*
+ * numeric_out_sci() -
+ *
+ * Output function for numeric data type in scientific notation.
+ */
+char *
+numeric_out_sci(Numeric num, int scale)
+{
+ NumericVar x;
+ char *str;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ return pstrdup("Infinity");
+ else if (NUMERIC_IS_NINF(num))
+ return pstrdup("-Infinity");
+ else
+ return pstrdup("NaN");
+ }
+
+ init_var_from_num(num, &x);
+
+ str = get_str_from_var_sci(&x, scale);
+
+ return str;
+}
+
+/*
+ * numeric_normalize() -
+ *
+ * Output function for numeric data type, suppressing insignificant trailing
+ * zeroes and then any trailing decimal point. The intent of this is to
+ * produce strings that are equal if and only if the input numeric values
+ * compare equal.
+ */
+char *
+numeric_normalize(Numeric num)
+{
+ NumericVar x;
+ char *str;
+ int last;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ return pstrdup("Infinity");
+ else if (NUMERIC_IS_NINF(num))
+ return pstrdup("-Infinity");
+ else
+ return pstrdup("NaN");
+ }
+
+ init_var_from_num(num, &x);
+
+ str = get_str_from_var(&x);
+
+ /* If there's no decimal point, there's certainly nothing to remove. */
+ if (strchr(str, '.') != NULL)
+ {
+ /*
+ * Back up over trailing fractional zeroes. Since there is a decimal
+ * point, this loop will terminate safely.
+ */
+ last = strlen(str) - 1;
+ while (str[last] == '0')
+ last--;
+
+ /* We want to get rid of the decimal point too, if it's now last. */
+ if (str[last] == '.')
+ last--;
+
+ /* Delete whatever we backed up over. */
+ str[last + 1] = '\0';
+ }
+
+ return str;
+}
+
+/*
+ * numeric_recv - converts external binary format to numeric
+ *
+ * External format is a sequence of int16's:
+ * ndigits, weight, sign, dscale, NumericDigits.
+ */
+Datum
+numeric_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ NumericVar value;
+ Numeric res;
+ int len,
+ i;
+
+ init_var(&value);
+
+ len = (uint16) pq_getmsgint(buf, sizeof(uint16));
+
+ alloc_var(&value, len);
+
+ value.weight = (int16) pq_getmsgint(buf, sizeof(int16));
+ /* we allow any int16 for weight --- OK? */
+
+ value.sign = (uint16) pq_getmsgint(buf, sizeof(uint16));
+ if (!(value.sign == NUMERIC_POS ||
+ value.sign == NUMERIC_NEG ||
+ value.sign == NUMERIC_NAN ||
+ value.sign == NUMERIC_PINF ||
+ value.sign == NUMERIC_NINF))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid sign in external \"numeric\" value")));
+
+ value.dscale = (uint16) pq_getmsgint(buf, sizeof(uint16));
+ if ((value.dscale & NUMERIC_DSCALE_MASK) != value.dscale)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid scale in external \"numeric\" value")));
+
+ for (i = 0; i < len; i++)
+ {
+ NumericDigit d = pq_getmsgint(buf, sizeof(NumericDigit));
+
+ if (d < 0 || d >= NBASE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid digit in external \"numeric\" value")));
+ value.digits[i] = d;
+ }
+
+ /*
+ * If the given dscale would hide any digits, truncate those digits away.
+ * We could alternatively throw an error, but that would take a bunch of
+ * extra code (about as much as trunc_var involves), and it might cause
+ * client compatibility issues. Be careful not to apply trunc_var to
+ * special values, as it could do the wrong thing; we don't need it
+ * anyway, since make_result will ignore all but the sign field.
+ *
+ * After doing that, be sure to check the typmod restriction.
+ */
+ if (value.sign == NUMERIC_POS ||
+ value.sign == NUMERIC_NEG)
+ {
+ trunc_var(&value, value.dscale);
+
+ apply_typmod(&value, typmod);
+
+ res = make_result(&value);
+ }
+ else
+ {
+ /* apply_typmod_special wants us to make the Numeric first */
+ res = make_result(&value);
+
+ apply_typmod_special(res, typmod);
+ }
+
+ free_var(&value);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+/*
+ * numeric_send - converts numeric to binary format
+ */
+Datum
+numeric_send(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar x;
+ StringInfoData buf;
+ int i;
+
+ init_var_from_num(num, &x);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint16(&buf, x.ndigits);
+ pq_sendint16(&buf, x.weight);
+ pq_sendint16(&buf, x.sign);
+ pq_sendint16(&buf, x.dscale);
+ for (i = 0; i < x.ndigits; i++)
+ pq_sendint16(&buf, x.digits[i]);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * numeric_support()
+ *
+ * Planner support function for the numeric() length coercion function.
+ *
+ * Flatten calls that solely represent increases in allowable precision.
+ * Scale changes mutate every datum, so they are unoptimizable. Some values,
+ * e.g. 1E-1001, can only fit into an unconstrained numeric, so a change from
+ * an unconstrained numeric to any constrained numeric is also unoptimizable.
+ */
+Datum
+numeric_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+ FuncExpr *expr = req->fcall;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 old_typmod = exprTypmod(source);
+ int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
+ int32 old_scale = numeric_typmod_scale(old_typmod);
+ int32 new_scale = numeric_typmod_scale(new_typmod);
+ int32 old_precision = numeric_typmod_precision(old_typmod);
+ int32 new_precision = numeric_typmod_precision(new_typmod);
+
+ /*
+ * If new_typmod is invalid, the destination is unconstrained;
+ * that's always OK. If old_typmod is valid, the source is
+ * constrained, and we're OK if the scale is unchanged and the
+ * precision is not decreasing. See further notes in function
+ * header comment.
+ */
+ if (!is_valid_numeric_typmod(new_typmod) ||
+ (is_valid_numeric_typmod(old_typmod) &&
+ new_scale == old_scale && new_precision >= old_precision))
+ ret = relabel_to_typmod(source, new_typmod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * numeric() -
+ *
+ * This is a special function called by the Postgres database system
+ * before a value is stored in a tuple's attribute. The precision and
+ * scale of the attribute have to be applied on the value.
+ */
+Datum
+numeric (PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ Numeric new;
+ int precision;
+ int scale;
+ int ddigits;
+ int maxdigits;
+ int dscale;
+ NumericVar var;
+
+ /*
+ * Handle NaN and infinities: if apply_typmod_special doesn't complain,
+ * just return a copy of the input.
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ apply_typmod_special(num, typmod);
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+ }
+
+ /*
+ * If the value isn't a valid type modifier, simply return a copy of the
+ * input value
+ */
+ if (!is_valid_numeric_typmod(typmod))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ /*
+ * Get the precision and scale out of the typmod value
+ */
+ precision = numeric_typmod_precision(typmod);
+ scale = numeric_typmod_scale(typmod);
+ maxdigits = precision - scale;
+
+ /* The target display scale is non-negative */
+ dscale = Max(scale, 0);
+
+ /*
+ * If the number is certainly in bounds and due to the target scale no
+ * rounding could be necessary, just make a copy of the input and modify
+ * its scale fields, unless the larger scale forces us to abandon the
+ * short representation. (Note we assume the existing dscale is
+ * honest...)
+ */
+ ddigits = (NUMERIC_WEIGHT(num) + 1) * DEC_DIGITS;
+ if (ddigits <= maxdigits && scale >= NUMERIC_DSCALE(num)
+ && (NUMERIC_CAN_BE_SHORT(dscale, NUMERIC_WEIGHT(num))
+ || !NUMERIC_IS_SHORT(num)))
+ {
+ new = duplicate_numeric(num);
+ if (NUMERIC_IS_SHORT(num))
+ new->choice.n_short.n_header =
+ (num->choice.n_short.n_header & ~NUMERIC_SHORT_DSCALE_MASK)
+ | (dscale << NUMERIC_SHORT_DSCALE_SHIFT);
+ else
+ new->choice.n_long.n_sign_dscale = NUMERIC_SIGN(new) |
+ ((uint16) dscale & NUMERIC_DSCALE_MASK);
+ PG_RETURN_NUMERIC(new);
+ }
+
+ /*
+ * We really need to fiddle with things - unpack the number into a
+ * variable and let apply_typmod() do it.
+ */
+ init_var(&var);
+
+ set_var_from_num(num, &var);
+ apply_typmod(&var, typmod);
+ new = make_result(&var);
+
+ free_var(&var);
+
+ PG_RETURN_NUMERIC(new);
+}
+
+Datum
+numerictypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+ int32 *tl;
+ int n;
+ int32 typmod;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ if (n == 2)
+ {
+ if (tl[0] < 1 || tl[0] > NUMERIC_MAX_PRECISION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("NUMERIC precision %d must be between 1 and %d",
+ tl[0], NUMERIC_MAX_PRECISION)));
+ if (tl[1] < NUMERIC_MIN_SCALE || tl[1] > NUMERIC_MAX_SCALE)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("NUMERIC scale %d must be between %d and %d",
+ tl[1], NUMERIC_MIN_SCALE, NUMERIC_MAX_SCALE)));
+ typmod = make_numeric_typmod(tl[0], tl[1]);
+ }
+ else if (n == 1)
+ {
+ if (tl[0] < 1 || tl[0] > NUMERIC_MAX_PRECISION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("NUMERIC precision %d must be between 1 and %d",
+ tl[0], NUMERIC_MAX_PRECISION)));
+ /* scale defaults to zero */
+ typmod = make_numeric_typmod(tl[0], 0);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid NUMERIC type modifier")));
+ typmod = 0; /* keep compiler quiet */
+ }
+
+ PG_RETURN_INT32(typmod);
+}
+
+Datum
+numerictypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+ char *res = (char *) palloc(64);
+
+ if (is_valid_numeric_typmod(typmod))
+ snprintf(res, 64, "(%d,%d)",
+ numeric_typmod_precision(typmod),
+ numeric_typmod_scale(typmod));
+ else
+ *res = '\0';
+
+ PG_RETURN_CSTRING(res);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Sign manipulation, rounding and the like
+ *
+ * ----------------------------------------------------------------------
+ */
+
+Datum
+numeric_abs(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+
+ /*
+ * Do it the easy way directly on the packed format
+ */
+ res = duplicate_numeric(num);
+
+ if (NUMERIC_IS_SHORT(num))
+ res->choice.n_short.n_header =
+ num->choice.n_short.n_header & ~NUMERIC_SHORT_SIGN_MASK;
+ else if (NUMERIC_IS_SPECIAL(num))
+ {
+ /* This changes -Inf to Inf, and doesn't affect NaN */
+ res->choice.n_short.n_header =
+ num->choice.n_short.n_header & ~NUMERIC_INF_SIGN_MASK;
+ }
+ else
+ res->choice.n_long.n_sign_dscale = NUMERIC_POS | NUMERIC_DSCALE(num);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+Datum
+numeric_uminus(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+
+ /*
+ * Do it the easy way directly on the packed format
+ */
+ res = duplicate_numeric(num);
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ /* Flip the sign, if it's Inf or -Inf */
+ if (!NUMERIC_IS_NAN(num))
+ res->choice.n_short.n_header =
+ num->choice.n_short.n_header ^ NUMERIC_INF_SIGN_MASK;
+ }
+
+ /*
+ * The packed format is known to be totally zero digit trimmed always. So
+ * once we've eliminated specials, we can identify a zero by the fact that
+ * there are no digits at all. Do nothing to a zero.
+ */
+ else if (NUMERIC_NDIGITS(num) != 0)
+ {
+ /* Else, flip the sign */
+ if (NUMERIC_IS_SHORT(num))
+ res->choice.n_short.n_header =
+ num->choice.n_short.n_header ^ NUMERIC_SHORT_SIGN_MASK;
+ else if (NUMERIC_SIGN(num) == NUMERIC_POS)
+ res->choice.n_long.n_sign_dscale =
+ NUMERIC_NEG | NUMERIC_DSCALE(num);
+ else
+ res->choice.n_long.n_sign_dscale =
+ NUMERIC_POS | NUMERIC_DSCALE(num);
+ }
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+Datum
+numeric_uplus(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+}
+
+
+/*
+ * numeric_sign_internal() -
+ *
+ * Returns -1 if the argument is less than 0, 0 if the argument is equal
+ * to 0, and 1 if the argument is greater than zero. Caller must have
+ * taken care of the NaN case, but we can handle infinities here.
+ */
+static int
+numeric_sign_internal(Numeric num)
+{
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ Assert(!NUMERIC_IS_NAN(num));
+ /* Must be Inf or -Inf */
+ if (NUMERIC_IS_PINF(num))
+ return 1;
+ else
+ return -1;
+ }
+
+ /*
+ * The packed format is known to be totally zero digit trimmed always. So
+ * once we've eliminated specials, we can identify a zero by the fact that
+ * there are no digits at all.
+ */
+ else if (NUMERIC_NDIGITS(num) == 0)
+ return 0;
+ else if (NUMERIC_SIGN(num) == NUMERIC_NEG)
+ return -1;
+ else
+ return 1;
+}
+
+/*
+ * numeric_sign() -
+ *
+ * returns -1 if the argument is less than 0, 0 if the argument is equal
+ * to 0, and 1 if the argument is greater than zero.
+ */
+Datum
+numeric_sign(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+
+ /*
+ * Handle NaN (infinities can be handled normally)
+ */
+ if (NUMERIC_IS_NAN(num))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ switch (numeric_sign_internal(num))
+ {
+ case 0:
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ case 1:
+ PG_RETURN_NUMERIC(make_result(&const_one));
+ case -1:
+ PG_RETURN_NUMERIC(make_result(&const_minus_one));
+ }
+
+ Assert(false);
+ return (Datum) 0;
+}
+
+
+/*
+ * numeric_round() -
+ *
+ * Round a value to have 'scale' digits after the decimal point.
+ * We allow negative 'scale', implying rounding before the decimal
+ * point --- Oracle interprets rounding that way.
+ */
+Datum
+numeric_round(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ int32 scale = PG_GETARG_INT32(1);
+ Numeric res;
+ NumericVar arg;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ /*
+ * Limit the scale value to avoid possible overflow in calculations
+ */
+ scale = Max(scale, -NUMERIC_MAX_RESULT_SCALE);
+ scale = Min(scale, NUMERIC_MAX_RESULT_SCALE);
+
+ /*
+ * Unpack the argument and round it at the proper digit position
+ */
+ init_var(&arg);
+ set_var_from_num(num, &arg);
+
+ round_var(&arg, scale);
+
+ /* We don't allow negative output dscale */
+ if (scale < 0)
+ arg.dscale = 0;
+
+ /*
+ * Return the rounded result
+ */
+ res = make_result(&arg);
+
+ free_var(&arg);
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_trunc() -
+ *
+ * Truncate a value to have 'scale' digits after the decimal point.
+ * We allow negative 'scale', implying a truncation before the decimal
+ * point --- Oracle interprets truncation that way.
+ */
+Datum
+numeric_trunc(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ int32 scale = PG_GETARG_INT32(1);
+ Numeric res;
+ NumericVar arg;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ /*
+ * Limit the scale value to avoid possible overflow in calculations
+ */
+ scale = Max(scale, -NUMERIC_MAX_RESULT_SCALE);
+ scale = Min(scale, NUMERIC_MAX_RESULT_SCALE);
+
+ /*
+ * Unpack the argument and truncate it at the proper digit position
+ */
+ init_var(&arg);
+ set_var_from_num(num, &arg);
+
+ trunc_var(&arg, scale);
+
+ /* We don't allow negative output dscale */
+ if (scale < 0)
+ arg.dscale = 0;
+
+ /*
+ * Return the truncated result
+ */
+ res = make_result(&arg);
+
+ free_var(&arg);
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_ceil() -
+ *
+ * Return the smallest integer greater than or equal to the argument
+ */
+Datum
+numeric_ceil(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar result;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ init_var_from_num(num, &result);
+ ceil_var(&result, &result);
+
+ res = make_result(&result);
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_floor() -
+ *
+ * Return the largest integer equal to or less than the argument
+ */
+Datum
+numeric_floor(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar result;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ init_var_from_num(num, &result);
+ floor_var(&result, &result);
+
+ res = make_result(&result);
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * generate_series_numeric() -
+ *
+ * Generate series of numeric.
+ */
+Datum
+generate_series_numeric(PG_FUNCTION_ARGS)
+{
+ return generate_series_step_numeric(fcinfo);
+}
+
+Datum
+generate_series_step_numeric(PG_FUNCTION_ARGS)
+{
+ generate_series_numeric_fctx *fctx;
+ FuncCallContext *funcctx;
+ MemoryContext oldcontext;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Numeric start_num = PG_GETARG_NUMERIC(0);
+ Numeric stop_num = PG_GETARG_NUMERIC(1);
+ NumericVar steploc = const_one;
+
+ /* Reject NaN and infinities in start and stop values */
+ if (NUMERIC_IS_SPECIAL(start_num))
+ {
+ if (NUMERIC_IS_NAN(start_num))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("start value cannot be NaN")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("start value cannot be infinity")));
+ }
+ if (NUMERIC_IS_SPECIAL(stop_num))
+ {
+ if (NUMERIC_IS_NAN(stop_num))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("stop value cannot be NaN")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("stop value cannot be infinity")));
+ }
+
+ /* see if we were given an explicit step size */
+ if (PG_NARGS() == 3)
+ {
+ Numeric step_num = PG_GETARG_NUMERIC(2);
+
+ if (NUMERIC_IS_SPECIAL(step_num))
+ {
+ if (NUMERIC_IS_NAN(step_num))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot be NaN")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot be infinity")));
+ }
+
+ init_var_from_num(step_num, &steploc);
+
+ if (cmp_var(&steploc, &const_zero) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot equal zero")));
+ }
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * Switch to memory context appropriate for multiple function calls.
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ fctx = (generate_series_numeric_fctx *)
+ palloc(sizeof(generate_series_numeric_fctx));
+
+ /*
+ * Use fctx to keep state from call to call. Seed current with the
+ * original start value. We must copy the start_num and stop_num
+ * values rather than pointing to them, since we may have detoasted
+ * them in the per-call context.
+ */
+ init_var(&fctx->current);
+ init_var(&fctx->stop);
+ init_var(&fctx->step);
+
+ set_var_from_num(start_num, &fctx->current);
+ set_var_from_num(stop_num, &fctx->stop);
+ set_var_from_var(&steploc, &fctx->step);
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ /*
+ * Get the saved state and use current state as the result of this
+ * iteration.
+ */
+ fctx = funcctx->user_fctx;
+
+ if ((fctx->step.sign == NUMERIC_POS &&
+ cmp_var(&fctx->current, &fctx->stop) <= 0) ||
+ (fctx->step.sign == NUMERIC_NEG &&
+ cmp_var(&fctx->current, &fctx->stop) >= 0))
+ {
+ Numeric result = make_result(&fctx->current);
+
+ /* switch to memory context appropriate for iteration calculation */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* increment current in preparation for next iteration */
+ add_var(&fctx->current, &fctx->step, &fctx->current);
+ MemoryContextSwitchTo(oldcontext);
+
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, NumericGetDatum(result));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * Implements the numeric version of the width_bucket() function
+ * defined by SQL2003. See also width_bucket_float8().
+ *
+ * 'bound1' and 'bound2' are the lower and upper bounds of the
+ * histogram's range, respectively. 'count' is the number of buckets
+ * in the histogram. width_bucket() returns an integer indicating the
+ * bucket number that 'operand' belongs to in an equiwidth histogram
+ * with the specified characteristics. An operand smaller than the
+ * lower bound is assigned to bucket 0. An operand greater than the
+ * upper bound is assigned to an additional bucket (with number
+ * count+1). We don't allow "NaN" for any of the numeric arguments.
+ */
+Datum
+width_bucket_numeric(PG_FUNCTION_ARGS)
+{
+ Numeric operand = PG_GETARG_NUMERIC(0);
+ Numeric bound1 = PG_GETARG_NUMERIC(1);
+ Numeric bound2 = PG_GETARG_NUMERIC(2);
+ int32 count = PG_GETARG_INT32(3);
+ NumericVar count_var;
+ NumericVar result_var;
+ int32 result;
+
+ if (count <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("count must be greater than zero")));
+
+ if (NUMERIC_IS_SPECIAL(operand) ||
+ NUMERIC_IS_SPECIAL(bound1) ||
+ NUMERIC_IS_SPECIAL(bound2))
+ {
+ if (NUMERIC_IS_NAN(operand) ||
+ NUMERIC_IS_NAN(bound1) ||
+ NUMERIC_IS_NAN(bound2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("operand, lower bound, and upper bound cannot be NaN")));
+ /* We allow "operand" to be infinite; cmp_numerics will cope */
+ if (NUMERIC_IS_INF(bound1) || NUMERIC_IS_INF(bound2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("lower and upper bounds must be finite")));
+ }
+
+ init_var(&result_var);
+ init_var(&count_var);
+
+ /* Convert 'count' to a numeric, for ease of use later */
+ int64_to_numericvar((int64) count, &count_var);
+
+ switch (cmp_numerics(bound1, bound2))
+ {
+ case 0:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_WIDTH_BUCKET_FUNCTION),
+ errmsg("lower bound cannot equal upper bound")));
+ break;
+
+ /* bound1 < bound2 */
+ case -1:
+ if (cmp_numerics(operand, bound1) < 0)
+ set_var_from_var(&const_zero, &result_var);
+ else if (cmp_numerics(operand, bound2) >= 0)
+ add_var(&count_var, &const_one, &result_var);
+ else
+ compute_bucket(operand, bound1, bound2, &count_var, false,
+ &result_var);
+ break;
+
+ /* bound1 > bound2 */
+ case 1:
+ if (cmp_numerics(operand, bound1) > 0)
+ set_var_from_var(&const_zero, &result_var);
+ else if (cmp_numerics(operand, bound2) <= 0)
+ add_var(&count_var, &const_one, &result_var);
+ else
+ compute_bucket(operand, bound1, bound2, &count_var, true,
+ &result_var);
+ break;
+ }
+
+ /* if result exceeds the range of a legal int4, we ereport here */
+ if (!numericvar_to_int32(&result_var, &result))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ free_var(&count_var);
+ free_var(&result_var);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * If 'operand' is not outside the bucket range, determine the correct
+ * bucket for it to go. The calculations performed by this function
+ * are derived directly from the SQL2003 spec. Note however that we
+ * multiply by count before dividing, to avoid unnecessary roundoff error.
+ */
+static void
+compute_bucket(Numeric operand, Numeric bound1, Numeric bound2,
+ const NumericVar *count_var, bool reversed_bounds,
+ NumericVar *result_var)
+{
+ NumericVar bound1_var;
+ NumericVar bound2_var;
+ NumericVar operand_var;
+
+ init_var_from_num(bound1, &bound1_var);
+ init_var_from_num(bound2, &bound2_var);
+ init_var_from_num(operand, &operand_var);
+
+ if (!reversed_bounds)
+ {
+ sub_var(&operand_var, &bound1_var, &operand_var);
+ sub_var(&bound2_var, &bound1_var, &bound2_var);
+ }
+ else
+ {
+ sub_var(&bound1_var, &operand_var, &operand_var);
+ sub_var(&bound1_var, &bound2_var, &bound2_var);
+ }
+
+ mul_var(&operand_var, count_var, &operand_var,
+ operand_var.dscale + count_var->dscale);
+ div_var(&operand_var, &bound2_var, result_var,
+ select_div_scale(&operand_var, &bound2_var), true);
+ add_var(result_var, &const_one, result_var);
+ floor_var(result_var, result_var);
+
+ free_var(&bound1_var);
+ free_var(&bound2_var);
+ free_var(&operand_var);
+}
+
+/* ----------------------------------------------------------------------
+ *
+ * Comparison functions
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *
+ * Sort support:
+ *
+ * We implement the sortsupport strategy routine in order to get the benefit of
+ * abbreviation. The ordinary numeric comparison can be quite slow as a result
+ * of palloc/pfree cycles (due to detoasting packed values for alignment);
+ * while this could be worked on itself, the abbreviation strategy gives more
+ * speedup in many common cases.
+ *
+ * Two different representations are used for the abbreviated form, one in
+ * int32 and one in int64, whichever fits into a by-value Datum. In both cases
+ * the representation is negated relative to the original value, because we use
+ * the largest negative value for NaN, which sorts higher than other values. We
+ * convert the absolute value of the numeric to a 31-bit or 63-bit positive
+ * value, and then negate it if the original number was positive.
+ *
+ * We abort the abbreviation process if the abbreviation cardinality is below
+ * 0.01% of the row count (1 per 10k non-null rows). The actual break-even
+ * point is somewhat below that, perhaps 1 per 30k (at 1 per 100k there's a
+ * very small penalty), but we don't want to build up too many abbreviated
+ * values before first testing for abort, so we take the slightly pessimistic
+ * number. We make no attempt to estimate the cardinality of the real values,
+ * since it plays no part in the cost model here (if the abbreviation is equal,
+ * the cost of comparing equal and unequal underlying values is comparable).
+ * We discontinue even checking for abort (saving us the hashing overhead) if
+ * the estimated cardinality gets to 100k; that would be enough to support many
+ * billions of rows while doing no worse than breaking even.
+ *
+ * ----------------------------------------------------------------------
+ */
+
+/*
+ * Sort support strategy routine.
+ */
+Datum
+numeric_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = numeric_fast_cmp;
+
+ if (ssup->abbreviate)
+ {
+ NumericSortSupport *nss;
+ MemoryContext oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ nss = palloc(sizeof(NumericSortSupport));
+
+ /*
+ * palloc a buffer for handling unaligned packed values in addition to
+ * the support struct
+ */
+ nss->buf = palloc(VARATT_SHORT_MAX + VARHDRSZ + 1);
+
+ nss->input_count = 0;
+ nss->estimating = true;
+ initHyperLogLog(&nss->abbr_card, 10);
+
+ ssup->ssup_extra = nss;
+
+ ssup->abbrev_full_comparator = ssup->comparator;
+ ssup->comparator = numeric_cmp_abbrev;
+ ssup->abbrev_converter = numeric_abbrev_convert;
+ ssup->abbrev_abort = numeric_abbrev_abort;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Abbreviate a numeric datum, handling NaNs and detoasting
+ * (must not leak memory!)
+ */
+static Datum
+numeric_abbrev_convert(Datum original_datum, SortSupport ssup)
+{
+ NumericSortSupport *nss = ssup->ssup_extra;
+ void *original_varatt = PG_DETOAST_DATUM_PACKED(original_datum);
+ Numeric value;
+ Datum result;
+
+ nss->input_count += 1;
+
+ /*
+ * This is to handle packed datums without needing a palloc/pfree cycle;
+ * we keep and reuse a buffer large enough to handle any short datum.
+ */
+ if (VARATT_IS_SHORT(original_varatt))
+ {
+ void *buf = nss->buf;
+ Size sz = VARSIZE_SHORT(original_varatt) - VARHDRSZ_SHORT;
+
+ Assert(sz <= VARATT_SHORT_MAX - VARHDRSZ_SHORT);
+
+ SET_VARSIZE(buf, VARHDRSZ + sz);
+ memcpy(VARDATA(buf), VARDATA_SHORT(original_varatt), sz);
+
+ value = (Numeric) buf;
+ }
+ else
+ value = (Numeric) original_varatt;
+
+ if (NUMERIC_IS_SPECIAL(value))
+ {
+ if (NUMERIC_IS_PINF(value))
+ result = NUMERIC_ABBREV_PINF;
+ else if (NUMERIC_IS_NINF(value))
+ result = NUMERIC_ABBREV_NINF;
+ else
+ result = NUMERIC_ABBREV_NAN;
+ }
+ else
+ {
+ NumericVar var;
+
+ init_var_from_num(value, &var);
+
+ result = numeric_abbrev_convert_var(&var, nss);
+ }
+
+ /* should happen only for external/compressed toasts */
+ if ((Pointer) original_varatt != DatumGetPointer(original_datum))
+ pfree(original_varatt);
+
+ return result;
+}
+
+/*
+ * Consider whether to abort abbreviation.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data. There is
+ * no reason to do so: unlike text, we have no fast check for equal values, so
+ * we pay the full overhead whenever the abbreviations are equal regardless of
+ * whether the underlying values are also equal.
+ */
+static bool
+numeric_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ NumericSortSupport *nss = ssup->ssup_extra;
+ double abbr_card;
+
+ if (memtupcount < 10000 || nss->input_count < 10000 || !nss->estimating)
+ return false;
+
+ abbr_card = estimateHyperLogLog(&nss->abbr_card);
+
+ /*
+ * If we have >100k distinct values, then even if we were sorting many
+ * billion rows we'd likely still break even, and the penalty of undoing
+ * that many rows of abbrevs would probably not be worth it. Stop even
+ * counting at that point.
+ */
+ if (abbr_card > 100000.0)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "numeric_abbrev: estimation ends at cardinality %f"
+ " after " INT64_FORMAT " values (%d rows)",
+ abbr_card, nss->input_count, memtupcount);
+#endif
+ nss->estimating = false;
+ return false;
+ }
+
+ /*
+ * Target minimum cardinality is 1 per ~10k of non-null inputs. (The
+ * break even point is somewhere between one per 100k rows, where
+ * abbreviation has a very slight penalty, and 1 per 10k where it wins by
+ * a measurable percentage.) We use the relatively pessimistic 10k
+ * threshold, and add a 0.5 row fudge factor, because it allows us to
+ * abort earlier on genuinely pathological data where we've had exactly
+ * one abbreviated value in the first 10k (non-null) rows.
+ */
+ if (abbr_card < nss->input_count / 10000.0 + 0.5)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "numeric_abbrev: aborting abbreviation at cardinality %f"
+ " below threshold %f after " INT64_FORMAT " values (%d rows)",
+ abbr_card, nss->input_count / 10000.0 + 0.5,
+ nss->input_count, memtupcount);
+#endif
+ return true;
+ }
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "numeric_abbrev: cardinality %f"
+ " after " INT64_FORMAT " values (%d rows)",
+ abbr_card, nss->input_count, memtupcount);
+#endif
+
+ return false;
+}
+
+/*
+ * Non-fmgr interface to the comparison routine to allow sortsupport to elide
+ * the fmgr call. The saving here is small given how slow numeric comparisons
+ * are, but it is a required part of the sort support API when abbreviations
+ * are performed.
+ *
+ * Two palloc/pfree cycles could be saved here by using persistent buffers for
+ * aligning short-varlena inputs, but this has not so far been considered to
+ * be worth the effort.
+ */
+static int
+numeric_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+ Numeric nx = DatumGetNumeric(x);
+ Numeric ny = DatumGetNumeric(y);
+ int result;
+
+ result = cmp_numerics(nx, ny);
+
+ if ((Pointer) nx != DatumGetPointer(x))
+ pfree(nx);
+ if ((Pointer) ny != DatumGetPointer(y))
+ pfree(ny);
+
+ return result;
+}
+
+/*
+ * Compare abbreviations of values. (Abbreviations may be equal where the true
+ * values differ, but if the abbreviations differ, they must reflect the
+ * ordering of the true values.)
+ */
+static int
+numeric_cmp_abbrev(Datum x, Datum y, SortSupport ssup)
+{
+ /*
+ * NOTE WELL: this is intentionally backwards, because the abbreviation is
+ * negated relative to the original value, to handle NaN/infinity cases.
+ */
+ if (DatumGetNumericAbbrev(x) < DatumGetNumericAbbrev(y))
+ return 1;
+ if (DatumGetNumericAbbrev(x) > DatumGetNumericAbbrev(y))
+ return -1;
+ return 0;
+}
+
+/*
+ * Abbreviate a NumericVar according to the available bit size.
+ *
+ * The 31-bit value is constructed as:
+ *
+ * 0 + 7bits digit weight + 24 bits digit value
+ *
+ * where the digit weight is in single decimal digits, not digit words, and
+ * stored in excess-44 representation[1]. The 24-bit digit value is the 7 most
+ * significant decimal digits of the value converted to binary. Values whose
+ * weights would fall outside the representable range are rounded off to zero
+ * (which is also used to represent actual zeros) or to 0x7FFFFFFF (which
+ * otherwise cannot occur). Abbreviation therefore fails to gain any advantage
+ * where values are outside the range 10^-44 to 10^83, which is not considered
+ * to be a serious limitation, or when values are of the same magnitude and
+ * equal in the first 7 decimal digits, which is considered to be an
+ * unavoidable limitation given the available bits. (Stealing three more bits
+ * to compare another digit would narrow the range of representable weights by
+ * a factor of 8, which starts to look like a real limiting factor.)
+ *
+ * (The value 44 for the excess is essentially arbitrary)
+ *
+ * The 63-bit value is constructed as:
+ *
+ * 0 + 7bits weight + 4 x 14-bit packed digit words
+ *
+ * The weight in this case is again stored in excess-44, but this time it is
+ * the original weight in digit words (i.e. powers of 10000). The first four
+ * digit words of the value (if present; trailing zeros are assumed as needed)
+ * are packed into 14 bits each to form the rest of the value. Again,
+ * out-of-range values are rounded off to 0 or 0x7FFFFFFFFFFFFFFF. The
+ * representable range in this case is 10^-176 to 10^332, which is considered
+ * to be good enough for all practical purposes, and comparison of 4 words
+ * means that at least 13 decimal digits are compared, which is considered to
+ * be a reasonable compromise between effectiveness and efficiency in computing
+ * the abbreviation.
+ *
+ * (The value 44 for the excess is even more arbitrary here, it was chosen just
+ * to match the value used in the 31-bit case)
+ *
+ * [1] - Excess-k representation means that the value is offset by adding 'k'
+ * and then treated as unsigned, so the smallest representable value is stored
+ * with all bits zero. This allows simple comparisons to work on the composite
+ * value.
+ */
+
+#if NUMERIC_ABBREV_BITS == 64
+
+static Datum
+numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
+{
+ int ndigits = var->ndigits;
+ int weight = var->weight;
+ int64 result;
+
+ if (ndigits == 0 || weight < -44)
+ {
+ result = 0;
+ }
+ else if (weight > 83)
+ {
+ result = PG_INT64_MAX;
+ }
+ else
+ {
+ result = ((int64) (weight + 44) << 56);
+
+ switch (ndigits)
+ {
+ default:
+ result |= ((int64) var->digits[3]);
+ /* FALLTHROUGH */
+ case 3:
+ result |= ((int64) var->digits[2]) << 14;
+ /* FALLTHROUGH */
+ case 2:
+ result |= ((int64) var->digits[1]) << 28;
+ /* FALLTHROUGH */
+ case 1:
+ result |= ((int64) var->digits[0]) << 42;
+ break;
+ }
+ }
+
+ /* the abbrev is negated relative to the original */
+ if (var->sign == NUMERIC_POS)
+ result = -result;
+
+ if (nss->estimating)
+ {
+ uint32 tmp = ((uint32) result
+ ^ (uint32) ((uint64) result >> 32));
+
+ addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+ }
+
+ return NumericAbbrevGetDatum(result);
+}
+
+#endif /* NUMERIC_ABBREV_BITS == 64 */
+
+#if NUMERIC_ABBREV_BITS == 32
+
+static Datum
+numeric_abbrev_convert_var(const NumericVar *var, NumericSortSupport *nss)
+{
+ int ndigits = var->ndigits;
+ int weight = var->weight;
+ int32 result;
+
+ if (ndigits == 0 || weight < -11)
+ {
+ result = 0;
+ }
+ else if (weight > 20)
+ {
+ result = PG_INT32_MAX;
+ }
+ else
+ {
+ NumericDigit nxt1 = (ndigits > 1) ? var->digits[1] : 0;
+
+ weight = (weight + 11) * 4;
+
+ result = var->digits[0];
+
+ /*
+ * "result" now has 1 to 4 nonzero decimal digits. We pack in more
+ * digits to make 7 in total (largest we can fit in 24 bits)
+ */
+
+ if (result > 999)
+ {
+ /* already have 4 digits, add 3 more */
+ result = (result * 1000) + (nxt1 / 10);
+ weight += 3;
+ }
+ else if (result > 99)
+ {
+ /* already have 3 digits, add 4 more */
+ result = (result * 10000) + nxt1;
+ weight += 2;
+ }
+ else if (result > 9)
+ {
+ NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0;
+
+ /* already have 2 digits, add 5 more */
+ result = (result * 100000) + (nxt1 * 10) + (nxt2 / 1000);
+ weight += 1;
+ }
+ else
+ {
+ NumericDigit nxt2 = (ndigits > 2) ? var->digits[2] : 0;
+
+ /* already have 1 digit, add 6 more */
+ result = (result * 1000000) + (nxt1 * 100) + (nxt2 / 100);
+ }
+
+ result = result | (weight << 24);
+ }
+
+ /* the abbrev is negated relative to the original */
+ if (var->sign == NUMERIC_POS)
+ result = -result;
+
+ if (nss->estimating)
+ {
+ uint32 tmp = (uint32) result;
+
+ addHyperLogLog(&nss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+ }
+
+ return NumericAbbrevGetDatum(result);
+}
+
+#endif /* NUMERIC_ABBREV_BITS == 32 */
+
+/*
+ * Ordinary (non-sortsupport) comparisons follow.
+ */
+
+Datum
+numeric_cmp(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ int result;
+
+ result = cmp_numerics(num1, num2);
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+
+Datum
+numeric_eq(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) == 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+numeric_ne(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) != 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+numeric_gt(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) > 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+numeric_ge(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) >= 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+numeric_lt(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) < 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+numeric_le(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ bool result;
+
+ result = cmp_numerics(num1, num2) <= 0;
+
+ PG_FREE_IF_COPY(num1, 0);
+ PG_FREE_IF_COPY(num2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+static int
+cmp_numerics(Numeric num1, Numeric num2)
+{
+ int result;
+
+ /*
+ * We consider all NANs to be equal and larger than any non-NAN (including
+ * Infinity). This is somewhat arbitrary; the important thing is to have
+ * a consistent sort order.
+ */
+ if (NUMERIC_IS_SPECIAL(num1))
+ {
+ if (NUMERIC_IS_NAN(num1))
+ {
+ if (NUMERIC_IS_NAN(num2))
+ result = 0; /* NAN = NAN */
+ else
+ result = 1; /* NAN > non-NAN */
+ }
+ else if (NUMERIC_IS_PINF(num1))
+ {
+ if (NUMERIC_IS_NAN(num2))
+ result = -1; /* PINF < NAN */
+ else if (NUMERIC_IS_PINF(num2))
+ result = 0; /* PINF = PINF */
+ else
+ result = 1; /* PINF > anything else */
+ }
+ else /* num1 must be NINF */
+ {
+ if (NUMERIC_IS_NINF(num2))
+ result = 0; /* NINF = NINF */
+ else
+ result = -1; /* NINF < anything else */
+ }
+ }
+ else if (NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NINF(num2))
+ result = 1; /* normal > NINF */
+ else
+ result = -1; /* normal < NAN or PINF */
+ }
+ else
+ {
+ result = cmp_var_common(NUMERIC_DIGITS(num1), NUMERIC_NDIGITS(num1),
+ NUMERIC_WEIGHT(num1), NUMERIC_SIGN(num1),
+ NUMERIC_DIGITS(num2), NUMERIC_NDIGITS(num2),
+ NUMERIC_WEIGHT(num2), NUMERIC_SIGN(num2));
+ }
+
+ return result;
+}
+
+/*
+ * in_range support function for numeric.
+ */
+Datum
+in_range_numeric_numeric(PG_FUNCTION_ARGS)
+{
+ Numeric val = PG_GETARG_NUMERIC(0);
+ Numeric base = PG_GETARG_NUMERIC(1);
+ Numeric offset = PG_GETARG_NUMERIC(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ bool result;
+
+ /*
+ * Reject negative (including -Inf) or NaN offset. Negative is per spec,
+ * and NaN is because appropriate semantics for that seem non-obvious.
+ */
+ if (NUMERIC_IS_NAN(offset) ||
+ NUMERIC_IS_NINF(offset) ||
+ NUMERIC_SIGN(offset) == NUMERIC_NEG)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /*
+ * Deal with cases where val and/or base is NaN, following the rule that
+ * NaN sorts after non-NaN (cf cmp_numerics). The offset cannot affect
+ * the conclusion.
+ */
+ if (NUMERIC_IS_NAN(val))
+ {
+ if (NUMERIC_IS_NAN(base))
+ result = true; /* NAN = NAN */
+ else
+ result = !less; /* NAN > non-NAN */
+ }
+ else if (NUMERIC_IS_NAN(base))
+ {
+ result = less; /* non-NAN < NAN */
+ }
+
+ /*
+ * Deal with infinite offset (necessarily +Inf, at this point).
+ */
+ else if (NUMERIC_IS_SPECIAL(offset))
+ {
+ Assert(NUMERIC_IS_PINF(offset));
+ if (sub ? NUMERIC_IS_PINF(base) : NUMERIC_IS_NINF(base))
+ {
+ /*
+ * base +/- offset would produce NaN, so return true for any val
+ * (see in_range_float8_float8() for reasoning).
+ */
+ result = true;
+ }
+ else if (sub)
+ {
+ /* base - offset must be -inf */
+ if (less)
+ result = NUMERIC_IS_NINF(val); /* only -inf is <= sum */
+ else
+ result = true; /* any val is >= sum */
+ }
+ else
+ {
+ /* base + offset must be +inf */
+ if (less)
+ result = true; /* any val is <= sum */
+ else
+ result = NUMERIC_IS_PINF(val); /* only +inf is >= sum */
+ }
+ }
+
+ /*
+ * Deal with cases where val and/or base is infinite. The offset, being
+ * now known finite, cannot affect the conclusion.
+ */
+ else if (NUMERIC_IS_SPECIAL(val))
+ {
+ if (NUMERIC_IS_PINF(val))
+ {
+ if (NUMERIC_IS_PINF(base))
+ result = true; /* PINF = PINF */
+ else
+ result = !less; /* PINF > any other non-NAN */
+ }
+ else /* val must be NINF */
+ {
+ if (NUMERIC_IS_NINF(base))
+ result = true; /* NINF = NINF */
+ else
+ result = less; /* NINF < anything else */
+ }
+ }
+ else if (NUMERIC_IS_SPECIAL(base))
+ {
+ if (NUMERIC_IS_NINF(base))
+ result = !less; /* normal > NINF */
+ else
+ result = less; /* normal < PINF */
+ }
+ else
+ {
+ /*
+ * Otherwise go ahead and compute base +/- offset. While it's
+ * possible for this to overflow the numeric format, it's unlikely
+ * enough that we don't take measures to prevent it.
+ */
+ NumericVar valv;
+ NumericVar basev;
+ NumericVar offsetv;
+ NumericVar sum;
+
+ init_var_from_num(val, &valv);
+ init_var_from_num(base, &basev);
+ init_var_from_num(offset, &offsetv);
+ init_var(&sum);
+
+ if (sub)
+ sub_var(&basev, &offsetv, &sum);
+ else
+ add_var(&basev, &offsetv, &sum);
+
+ if (less)
+ result = (cmp_var(&valv, &sum) <= 0);
+ else
+ result = (cmp_var(&valv, &sum) >= 0);
+
+ free_var(&sum);
+ }
+
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(base, 1);
+ PG_FREE_IF_COPY(offset, 2);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+hash_numeric(PG_FUNCTION_ARGS)
+{
+ Numeric key = PG_GETARG_NUMERIC(0);
+ Datum digit_hash;
+ Datum result;
+ int weight;
+ int start_offset;
+ int end_offset;
+ int i;
+ int hash_len;
+ NumericDigit *digits;
+
+ /* If it's NaN or infinity, don't try to hash the rest of the fields */
+ if (NUMERIC_IS_SPECIAL(key))
+ PG_RETURN_UINT32(0);
+
+ weight = NUMERIC_WEIGHT(key);
+ start_offset = 0;
+ end_offset = 0;
+
+ /*
+ * Omit any leading or trailing zeros from the input to the hash. The
+ * numeric implementation *should* guarantee that leading and trailing
+ * zeros are suppressed, but we're paranoid. Note that we measure the
+ * starting and ending offsets in units of NumericDigits, not bytes.
+ */
+ digits = NUMERIC_DIGITS(key);
+ for (i = 0; i < NUMERIC_NDIGITS(key); i++)
+ {
+ if (digits[i] != (NumericDigit) 0)
+ break;
+
+ start_offset++;
+
+ /*
+ * The weight is effectively the # of digits before the decimal point,
+ * so decrement it for each leading zero we skip.
+ */
+ weight--;
+ }
+
+ /*
+ * If there are no non-zero digits, then the value of the number is zero,
+ * regardless of any other fields.
+ */
+ if (NUMERIC_NDIGITS(key) == start_offset)
+ PG_RETURN_UINT32(-1);
+
+ for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
+ {
+ if (digits[i] != (NumericDigit) 0)
+ break;
+
+ end_offset++;
+ }
+
+ /* If we get here, there should be at least one non-zero digit */
+ Assert(start_offset + end_offset < NUMERIC_NDIGITS(key));
+
+ /*
+ * Note that we don't hash on the Numeric's scale, since two numerics can
+ * compare equal but have different scales. We also don't hash on the
+ * sign, although we could: since a sign difference implies inequality,
+ * this shouldn't affect correctness.
+ */
+ hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
+ digit_hash = hash_any((unsigned char *) (NUMERIC_DIGITS(key) + start_offset),
+ hash_len * sizeof(NumericDigit));
+
+ /* Mix in the weight, via XOR */
+ result = digit_hash ^ weight;
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
+ * Otherwise, similar to hash_numeric.
+ */
+Datum
+hash_numeric_extended(PG_FUNCTION_ARGS)
+{
+ Numeric key = PG_GETARG_NUMERIC(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ Datum digit_hash;
+ Datum result;
+ int weight;
+ int start_offset;
+ int end_offset;
+ int i;
+ int hash_len;
+ NumericDigit *digits;
+
+ /* If it's NaN or infinity, don't try to hash the rest of the fields */
+ if (NUMERIC_IS_SPECIAL(key))
+ PG_RETURN_UINT64(seed);
+
+ weight = NUMERIC_WEIGHT(key);
+ start_offset = 0;
+ end_offset = 0;
+
+ digits = NUMERIC_DIGITS(key);
+ for (i = 0; i < NUMERIC_NDIGITS(key); i++)
+ {
+ if (digits[i] != (NumericDigit) 0)
+ break;
+
+ start_offset++;
+
+ weight--;
+ }
+
+ if (NUMERIC_NDIGITS(key) == start_offset)
+ PG_RETURN_UINT64(seed - 1);
+
+ for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
+ {
+ if (digits[i] != (NumericDigit) 0)
+ break;
+
+ end_offset++;
+ }
+
+ Assert(start_offset + end_offset < NUMERIC_NDIGITS(key));
+
+ hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
+ digit_hash = hash_any_extended((unsigned char *) (NUMERIC_DIGITS(key)
+ + start_offset),
+ hash_len * sizeof(NumericDigit),
+ seed);
+
+ result = UInt64GetDatum(DatumGetUInt64(digit_hash) ^ weight);
+
+ PG_RETURN_DATUM(result);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Basic arithmetic functions
+ *
+ * ----------------------------------------------------------------------
+ */
+
+
+/*
+ * numeric_add() -
+ *
+ * Add two numerics
+ */
+Datum
+numeric_add(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+
+ res = numeric_add_opt_error(num1, num2, NULL);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+/*
+ * numeric_add_opt_error() -
+ *
+ * Internal version of numeric_add(). If "*have_error" flag is provided,
+ * on error it's set to true, NULL returned. This is helpful when caller
+ * need to handle errors by itself.
+ */
+Numeric
+numeric_add_opt_error(Numeric num1, Numeric num2, bool *have_error)
+{
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ return make_result(&const_nan);
+ if (NUMERIC_IS_PINF(num1))
+ {
+ if (NUMERIC_IS_NINF(num2))
+ return make_result(&const_nan); /* Inf + -Inf */
+ else
+ return make_result(&const_pinf);
+ }
+ if (NUMERIC_IS_NINF(num1))
+ {
+ if (NUMERIC_IS_PINF(num2))
+ return make_result(&const_nan); /* -Inf + Inf */
+ else
+ return make_result(&const_ninf);
+ }
+ /* by here, num1 must be finite, so num2 is not */
+ if (NUMERIC_IS_PINF(num2))
+ return make_result(&const_pinf);
+ Assert(NUMERIC_IS_NINF(num2));
+ return make_result(&const_ninf);
+ }
+
+ /*
+ * Unpack the values, let add_var() compute the result and return it.
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+ add_var(&arg1, &arg2, &result);
+
+ res = make_result_opt_error(&result, have_error);
+
+ free_var(&result);
+
+ return res;
+}
+
+
+/*
+ * numeric_sub() -
+ *
+ * Subtract one numeric from another
+ */
+Datum
+numeric_sub(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+
+ res = numeric_sub_opt_error(num1, num2, NULL);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_sub_opt_error() -
+ *
+ * Internal version of numeric_sub(). If "*have_error" flag is provided,
+ * on error it's set to true, NULL returned. This is helpful when caller
+ * need to handle errors by itself.
+ */
+Numeric
+numeric_sub_opt_error(Numeric num1, Numeric num2, bool *have_error)
+{
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ return make_result(&const_nan);
+ if (NUMERIC_IS_PINF(num1))
+ {
+ if (NUMERIC_IS_PINF(num2))
+ return make_result(&const_nan); /* Inf - Inf */
+ else
+ return make_result(&const_pinf);
+ }
+ if (NUMERIC_IS_NINF(num1))
+ {
+ if (NUMERIC_IS_NINF(num2))
+ return make_result(&const_nan); /* -Inf - -Inf */
+ else
+ return make_result(&const_ninf);
+ }
+ /* by here, num1 must be finite, so num2 is not */
+ if (NUMERIC_IS_PINF(num2))
+ return make_result(&const_ninf);
+ Assert(NUMERIC_IS_NINF(num2));
+ return make_result(&const_pinf);
+ }
+
+ /*
+ * Unpack the values, let sub_var() compute the result and return it.
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+ sub_var(&arg1, &arg2, &result);
+
+ res = make_result_opt_error(&result, have_error);
+
+ free_var(&result);
+
+ return res;
+}
+
+
+/*
+ * numeric_mul() -
+ *
+ * Calculate the product of two numerics
+ */
+Datum
+numeric_mul(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+
+ res = numeric_mul_opt_error(num1, num2, NULL);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_mul_opt_error() -
+ *
+ * Internal version of numeric_mul(). If "*have_error" flag is provided,
+ * on error it's set to true, NULL returned. This is helpful when caller
+ * need to handle errors by itself.
+ */
+Numeric
+numeric_mul_opt_error(Numeric num1, Numeric num2, bool *have_error)
+{
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ return make_result(&const_nan);
+ if (NUMERIC_IS_PINF(num1))
+ {
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ return make_result(&const_nan); /* Inf * 0 */
+ case 1:
+ return make_result(&const_pinf);
+ case -1:
+ return make_result(&const_ninf);
+ }
+ Assert(false);
+ }
+ if (NUMERIC_IS_NINF(num1))
+ {
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ return make_result(&const_nan); /* -Inf * 0 */
+ case 1:
+ return make_result(&const_ninf);
+ case -1:
+ return make_result(&const_pinf);
+ }
+ Assert(false);
+ }
+ /* by here, num1 must be finite, so num2 is not */
+ if (NUMERIC_IS_PINF(num2))
+ {
+ switch (numeric_sign_internal(num1))
+ {
+ case 0:
+ return make_result(&const_nan); /* 0 * Inf */
+ case 1:
+ return make_result(&const_pinf);
+ case -1:
+ return make_result(&const_ninf);
+ }
+ Assert(false);
+ }
+ Assert(NUMERIC_IS_NINF(num2));
+ switch (numeric_sign_internal(num1))
+ {
+ case 0:
+ return make_result(&const_nan); /* 0 * -Inf */
+ case 1:
+ return make_result(&const_ninf);
+ case -1:
+ return make_result(&const_pinf);
+ }
+ Assert(false);
+ }
+
+ /*
+ * Unpack the values, let mul_var() compute the result and return it.
+ * Unlike add_var() and sub_var(), mul_var() will round its result. In the
+ * case of numeric_mul(), which is invoked for the * operator on numerics,
+ * we request exact representation for the product (rscale = sum(dscale of
+ * arg1, dscale of arg2)). If the exact result has more digits after the
+ * decimal point than can be stored in a numeric, we round it. Rounding
+ * after computing the exact result ensures that the final result is
+ * correctly rounded (rounding in mul_var() using a truncated product
+ * would not guarantee this).
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+ mul_var(&arg1, &arg2, &result, arg1.dscale + arg2.dscale);
+
+ if (result.dscale > NUMERIC_DSCALE_MAX)
+ round_var(&result, NUMERIC_DSCALE_MAX);
+
+ res = make_result_opt_error(&result, have_error);
+
+ free_var(&result);
+
+ return res;
+}
+
+
+/*
+ * numeric_div() -
+ *
+ * Divide one numeric into another
+ */
+Datum
+numeric_div(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+
+ res = numeric_div_opt_error(num1, num2, NULL);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_div_opt_error() -
+ *
+ * Internal version of numeric_div(). If "*have_error" flag is provided,
+ * on error it's set to true, NULL returned. This is helpful when caller
+ * need to handle errors by itself.
+ */
+Numeric
+numeric_div_opt_error(Numeric num1, Numeric num2, bool *have_error)
+{
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+ int rscale;
+
+ if (have_error)
+ *have_error = false;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ return make_result(&const_nan);
+ if (NUMERIC_IS_PINF(num1))
+ {
+ if (NUMERIC_IS_SPECIAL(num2))
+ return make_result(&const_nan); /* Inf / [-]Inf */
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ if (have_error)
+ {
+ *have_error = true;
+ return NULL;
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ break;
+ case 1:
+ return make_result(&const_pinf);
+ case -1:
+ return make_result(&const_ninf);
+ }
+ Assert(false);
+ }
+ if (NUMERIC_IS_NINF(num1))
+ {
+ if (NUMERIC_IS_SPECIAL(num2))
+ return make_result(&const_nan); /* -Inf / [-]Inf */
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ if (have_error)
+ {
+ *have_error = true;
+ return NULL;
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ break;
+ case 1:
+ return make_result(&const_ninf);
+ case -1:
+ return make_result(&const_pinf);
+ }
+ Assert(false);
+ }
+ /* by here, num1 must be finite, so num2 is not */
+
+ /*
+ * POSIX would have us return zero or minus zero if num1 is zero, and
+ * otherwise throw an underflow error. But the numeric type doesn't
+ * really do underflow, so let's just return zero.
+ */
+ return make_result(&const_zero);
+ }
+
+ /*
+ * Unpack the arguments
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+
+ /*
+ * Select scale for division result
+ */
+ rscale = select_div_scale(&arg1, &arg2);
+
+ /*
+ * If "have_error" is provided, check for division by zero here
+ */
+ if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0))
+ {
+ *have_error = true;
+ return NULL;
+ }
+
+ /*
+ * Do the divide and return the result
+ */
+ div_var(&arg1, &arg2, &result, rscale, true);
+
+ res = make_result_opt_error(&result, have_error);
+
+ free_var(&result);
+
+ return res;
+}
+
+
+/*
+ * numeric_div_trunc() -
+ *
+ * Divide one numeric into another, truncating the result to an integer
+ */
+Datum
+numeric_div_trunc(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ if (NUMERIC_IS_PINF(num1))
+ {
+ if (NUMERIC_IS_SPECIAL(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan)); /* Inf / [-]Inf */
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ break;
+ case 1:
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ case -1:
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+ }
+ Assert(false);
+ }
+ if (NUMERIC_IS_NINF(num1))
+ {
+ if (NUMERIC_IS_SPECIAL(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan)); /* -Inf / [-]Inf */
+ switch (numeric_sign_internal(num2))
+ {
+ case 0:
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ break;
+ case 1:
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+ case -1:
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ }
+ Assert(false);
+ }
+ /* by here, num1 must be finite, so num2 is not */
+
+ /*
+ * POSIX would have us return zero or minus zero if num1 is zero, and
+ * otherwise throw an underflow error. But the numeric type doesn't
+ * really do underflow, so let's just return zero.
+ */
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ }
+
+ /*
+ * Unpack the arguments
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+
+ /*
+ * Do the divide and return the result
+ */
+ div_var(&arg1, &arg2, &result, 0, false);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_mod() -
+ *
+ * Calculate the modulo of two numerics
+ */
+Datum
+numeric_mod(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+
+ res = numeric_mod_opt_error(num1, num2, NULL);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_mod_opt_error() -
+ *
+ * Internal version of numeric_mod(). If "*have_error" flag is provided,
+ * on error it's set to true, NULL returned. This is helpful when caller
+ * need to handle errors by itself.
+ */
+Numeric
+numeric_mod_opt_error(Numeric num1, Numeric num2, bool *have_error)
+{
+ Numeric res;
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+
+ if (have_error)
+ *have_error = false;
+
+ /*
+ * Handle NaN and infinities. We follow POSIX fmod() on this, except that
+ * POSIX treats x-is-infinite and y-is-zero identically, raising EDOM and
+ * returning NaN. We choose to throw error only for y-is-zero.
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ return make_result(&const_nan);
+ if (NUMERIC_IS_INF(num1))
+ {
+ if (numeric_sign_internal(num2) == 0)
+ {
+ if (have_error)
+ {
+ *have_error = true;
+ return NULL;
+ }
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ }
+ /* Inf % any nonzero = NaN */
+ return make_result(&const_nan);
+ }
+ /* num2 must be [-]Inf; result is num1 regardless of sign of num2 */
+ return duplicate_numeric(num1);
+ }
+
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+
+ /*
+ * If "have_error" is provided, check for division by zero here
+ */
+ if (have_error && (arg2.ndigits == 0 || arg2.digits[0] == 0))
+ {
+ *have_error = true;
+ return NULL;
+ }
+
+ mod_var(&arg1, &arg2, &result);
+
+ res = make_result_opt_error(&result, NULL);
+
+ free_var(&result);
+
+ return res;
+}
+
+
+/*
+ * numeric_inc() -
+ *
+ * Increment a number by one
+ */
+Datum
+numeric_inc(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar arg;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ /*
+ * Compute the result and return it
+ */
+ init_var_from_num(num, &arg);
+
+ add_var(&arg, &const_one, &arg);
+
+ res = make_result(&arg);
+
+ free_var(&arg);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_smaller() -
+ *
+ * Return the smaller of two numbers
+ */
+Datum
+numeric_smaller(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+
+ /*
+ * Use cmp_numerics so that this will agree with the comparison operators,
+ * particularly as regards comparisons involving NaN.
+ */
+ if (cmp_numerics(num1, num2) < 0)
+ PG_RETURN_NUMERIC(num1);
+ else
+ PG_RETURN_NUMERIC(num2);
+}
+
+
+/*
+ * numeric_larger() -
+ *
+ * Return the larger of two numbers
+ */
+Datum
+numeric_larger(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+
+ /*
+ * Use cmp_numerics so that this will agree with the comparison operators,
+ * particularly as regards comparisons involving NaN.
+ */
+ if (cmp_numerics(num1, num2) > 0)
+ PG_RETURN_NUMERIC(num1);
+ else
+ PG_RETURN_NUMERIC(num2);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Advanced math functions
+ *
+ * ----------------------------------------------------------------------
+ */
+
+/*
+ * numeric_gcd() -
+ *
+ * Calculate the greatest common divisor of two numerics
+ */
+Datum
+numeric_gcd(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities: we consider the result to be NaN in all such
+ * cases.
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ /*
+ * Unpack the arguments
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+
+ /*
+ * Find the GCD and return the result
+ */
+ gcd_var(&arg1, &arg2, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_lcm() -
+ *
+ * Calculate the least common multiple of two numerics
+ */
+Datum
+numeric_lcm(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ Numeric res;
+
+ /*
+ * Handle NaN and infinities: we consider the result to be NaN in all such
+ * cases.
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ /*
+ * Unpack the arguments
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ init_var(&result);
+
+ /*
+ * Compute the result using lcm(x, y) = abs(x / gcd(x, y) * y), returning
+ * zero if either input is zero.
+ *
+ * Note that the division is guaranteed to be exact, returning an integer
+ * result, so the LCM is an integral multiple of both x and y. A display
+ * scale of Min(x.dscale, y.dscale) would be sufficient to represent it,
+ * but as with other numeric functions, we choose to return a result whose
+ * display scale is no smaller than either input.
+ */
+ if (arg1.ndigits == 0 || arg2.ndigits == 0)
+ set_var_from_var(&const_zero, &result);
+ else
+ {
+ gcd_var(&arg1, &arg2, &result);
+ div_var(&arg1, &result, &result, 0, false);
+ mul_var(&arg2, &result, &result, arg2.dscale);
+ result.sign = NUMERIC_POS;
+ }
+
+ result.dscale = Max(arg1.dscale, arg2.dscale);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_fac()
+ *
+ * Compute factorial
+ */
+Datum
+numeric_fac(PG_FUNCTION_ARGS)
+{
+ int64 num = PG_GETARG_INT64(0);
+ Numeric res;
+ NumericVar fact;
+ NumericVar result;
+
+ if (num < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("factorial of a negative number is undefined")));
+ if (num <= 1)
+ {
+ res = make_result(&const_one);
+ PG_RETURN_NUMERIC(res);
+ }
+ /* Fail immediately if the result would overflow */
+ if (num > 32177)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+
+ init_var(&fact);
+ init_var(&result);
+
+ int64_to_numericvar(num, &result);
+
+ for (num = num - 1; num > 1; num--)
+ {
+ /* this loop can take awhile, so allow it to be interrupted */
+ CHECK_FOR_INTERRUPTS();
+
+ int64_to_numericvar(num, &fact);
+
+ mul_var(&result, &fact, &result, 0);
+ }
+
+ res = make_result(&result);
+
+ free_var(&fact);
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_sqrt() -
+ *
+ * Compute the square root of a numeric.
+ */
+Datum
+numeric_sqrt(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar arg;
+ NumericVar result;
+ int sweight;
+ int rscale;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ /* error should match that in sqrt_var() */
+ if (NUMERIC_IS_NINF(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("cannot take square root of a negative number")));
+ /* For NAN or PINF, just duplicate the input */
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+ }
+
+ /*
+ * Unpack the argument and determine the result scale. We choose a scale
+ * to give at least NUMERIC_MIN_SIG_DIGITS significant digits; but in any
+ * case not less than the input's dscale.
+ */
+ init_var_from_num(num, &arg);
+
+ init_var(&result);
+
+ /* Assume the input was normalized, so arg.weight is accurate */
+ sweight = (arg.weight + 1) * DEC_DIGITS / 2 - 1;
+
+ rscale = NUMERIC_MIN_SIG_DIGITS - sweight;
+ rscale = Max(rscale, arg.dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ /*
+ * Let sqrt_var() do the calculation and return the result.
+ */
+ sqrt_var(&arg, &result, rscale);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_exp() -
+ *
+ * Raise e to the power of x
+ */
+Datum
+numeric_exp(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar arg;
+ NumericVar result;
+ int rscale;
+ double val;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ /* Per POSIX, exp(-Inf) is zero */
+ if (NUMERIC_IS_NINF(num))
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ /* For NAN or PINF, just duplicate the input */
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+ }
+
+ /*
+ * Unpack the argument and determine the result scale. We choose a scale
+ * to give at least NUMERIC_MIN_SIG_DIGITS significant digits; but in any
+ * case not less than the input's dscale.
+ */
+ init_var_from_num(num, &arg);
+
+ init_var(&result);
+
+ /* convert input to float8, ignoring overflow */
+ val = numericvar_to_double_no_overflow(&arg);
+
+ /*
+ * log10(result) = num * log10(e), so this is approximately the decimal
+ * weight of the result:
+ */
+ val *= 0.434294481903252;
+
+ /* limit to something that won't cause integer overflow */
+ val = Max(val, -NUMERIC_MAX_RESULT_SCALE);
+ val = Min(val, NUMERIC_MAX_RESULT_SCALE);
+
+ rscale = NUMERIC_MIN_SIG_DIGITS - (int) val;
+ rscale = Max(rscale, arg.dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ /*
+ * Let exp_var() do the calculation and return the result.
+ */
+ exp_var(&arg, &result, rscale);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_ln() -
+ *
+ * Compute the natural logarithm of x
+ */
+Datum
+numeric_ln(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar arg;
+ NumericVar result;
+ int ln_dweight;
+ int rscale;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_NINF(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of a negative number")));
+ /* For NAN or PINF, just duplicate the input */
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+ }
+
+ init_var_from_num(num, &arg);
+ init_var(&result);
+
+ /* Estimated dweight of logarithm */
+ ln_dweight = estimate_ln_dweight(&arg);
+
+ rscale = NUMERIC_MIN_SIG_DIGITS - ln_dweight;
+ rscale = Max(rscale, arg.dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ ln_var(&arg, &result, rscale);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_log() -
+ *
+ * Compute the logarithm of x in a given base
+ */
+Datum
+numeric_log(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ int sign1,
+ sign2;
+
+ if (NUMERIC_IS_NAN(num1) || NUMERIC_IS_NAN(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ /* fail on negative inputs including -Inf, as log_var would */
+ sign1 = numeric_sign_internal(num1);
+ sign2 = numeric_sign_internal(num2);
+ if (sign1 < 0 || sign2 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of a negative number")));
+ /* fail on zero inputs, as log_var would */
+ if (sign1 == 0 || sign2 == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of zero")));
+ if (NUMERIC_IS_PINF(num1))
+ {
+ /* log(Inf, Inf) reduces to Inf/Inf, so it's NaN */
+ if (NUMERIC_IS_PINF(num2))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ /* log(Inf, finite-positive) is zero (we don't throw underflow) */
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ }
+ Assert(NUMERIC_IS_PINF(num2));
+ /* log(finite-positive, Inf) is Inf */
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ }
+
+ /*
+ * Initialize things
+ */
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+ init_var(&result);
+
+ /*
+ * Call log_var() to compute and return the result; note it handles scale
+ * selection itself.
+ */
+ log_var(&arg1, &arg2, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/*
+ * numeric_power() -
+ *
+ * Raise x to the power of y
+ */
+Datum
+numeric_power(PG_FUNCTION_ARGS)
+{
+ Numeric num1 = PG_GETARG_NUMERIC(0);
+ Numeric num2 = PG_GETARG_NUMERIC(1);
+ Numeric res;
+ NumericVar arg1;
+ NumericVar arg2;
+ NumericVar result;
+ int sign1,
+ sign2;
+
+ /*
+ * Handle NaN and infinities
+ */
+ if (NUMERIC_IS_SPECIAL(num1) || NUMERIC_IS_SPECIAL(num2))
+ {
+ /*
+ * We follow the POSIX spec for pow(3), which says that NaN ^ 0 = 1,
+ * and 1 ^ NaN = 1, while all other cases with NaN inputs yield NaN
+ * (with no error).
+ */
+ if (NUMERIC_IS_NAN(num1))
+ {
+ if (!NUMERIC_IS_SPECIAL(num2))
+ {
+ init_var_from_num(num2, &arg2);
+ if (cmp_var(&arg2, &const_zero) == 0)
+ PG_RETURN_NUMERIC(make_result(&const_one));
+ }
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ }
+ if (NUMERIC_IS_NAN(num2))
+ {
+ if (!NUMERIC_IS_SPECIAL(num1))
+ {
+ init_var_from_num(num1, &arg1);
+ if (cmp_var(&arg1, &const_one) == 0)
+ PG_RETURN_NUMERIC(make_result(&const_one));
+ }
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ }
+ /* At least one input is infinite, but error rules still apply */
+ sign1 = numeric_sign_internal(num1);
+ sign2 = numeric_sign_internal(num2);
+ if (sign1 == 0 && sign2 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("zero raised to a negative power is undefined")));
+ if (sign1 < 0 && !numeric_is_integral(num2))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("a negative number raised to a non-integer power yields a complex result")));
+
+ /*
+ * POSIX gives this series of rules for pow(3) with infinite inputs:
+ *
+ * For any value of y, if x is +1, 1.0 shall be returned.
+ */
+ if (!NUMERIC_IS_SPECIAL(num1))
+ {
+ init_var_from_num(num1, &arg1);
+ if (cmp_var(&arg1, &const_one) == 0)
+ PG_RETURN_NUMERIC(make_result(&const_one));
+ }
+
+ /*
+ * For any value of x, if y is [-]0, 1.0 shall be returned.
+ */
+ if (sign2 == 0)
+ PG_RETURN_NUMERIC(make_result(&const_one));
+
+ /*
+ * For any odd integer value of y > 0, if x is [-]0, [-]0 shall be
+ * returned. For y > 0 and not an odd integer, if x is [-]0, +0 shall
+ * be returned. (Since we don't deal in minus zero, we need not
+ * distinguish these two cases.)
+ */
+ if (sign1 == 0 && sign2 > 0)
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+
+ /*
+ * If x is -1, and y is [-]Inf, 1.0 shall be returned.
+ *
+ * For |x| < 1, if y is -Inf, +Inf shall be returned.
+ *
+ * For |x| > 1, if y is -Inf, +0 shall be returned.
+ *
+ * For |x| < 1, if y is +Inf, +0 shall be returned.
+ *
+ * For |x| > 1, if y is +Inf, +Inf shall be returned.
+ */
+ if (NUMERIC_IS_INF(num2))
+ {
+ bool abs_x_gt_one;
+
+ if (NUMERIC_IS_SPECIAL(num1))
+ abs_x_gt_one = true; /* x is either Inf or -Inf */
+ else
+ {
+ init_var_from_num(num1, &arg1);
+ if (cmp_var(&arg1, &const_minus_one) == 0)
+ PG_RETURN_NUMERIC(make_result(&const_one));
+ arg1.sign = NUMERIC_POS; /* now arg1 = abs(x) */
+ abs_x_gt_one = (cmp_var(&arg1, &const_one) > 0);
+ }
+ if (abs_x_gt_one == (sign2 > 0))
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ else
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ }
+
+ /*
+ * For y < 0, if x is +Inf, +0 shall be returned.
+ *
+ * For y > 0, if x is +Inf, +Inf shall be returned.
+ */
+ if (NUMERIC_IS_PINF(num1))
+ {
+ if (sign2 > 0)
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ else
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+ }
+
+ Assert(NUMERIC_IS_NINF(num1));
+
+ /*
+ * For y an odd integer < 0, if x is -Inf, -0 shall be returned. For
+ * y < 0 and not an odd integer, if x is -Inf, +0 shall be returned.
+ * (Again, we need not distinguish these two cases.)
+ */
+ if (sign2 < 0)
+ PG_RETURN_NUMERIC(make_result(&const_zero));
+
+ /*
+ * For y an odd integer > 0, if x is -Inf, -Inf shall be returned. For
+ * y > 0 and not an odd integer, if x is -Inf, +Inf shall be returned.
+ */
+ init_var_from_num(num2, &arg2);
+ if (arg2.ndigits > 0 && arg2.ndigits == arg2.weight + 1 &&
+ (arg2.digits[arg2.ndigits - 1] & 1))
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+ else
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ }
+
+ /*
+ * The SQL spec requires that we emit a particular SQLSTATE error code for
+ * certain error conditions. Specifically, we don't return a
+ * divide-by-zero error code for 0 ^ -1. Raising a negative number to a
+ * non-integer power must produce the same error code, but that case is
+ * handled in power_var().
+ */
+ sign1 = numeric_sign_internal(num1);
+ sign2 = numeric_sign_internal(num2);
+
+ if (sign1 == 0 && sign2 < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("zero raised to a negative power is undefined")));
+
+ /*
+ * Initialize things
+ */
+ init_var(&result);
+ init_var_from_num(num1, &arg1);
+ init_var_from_num(num2, &arg2);
+
+ /*
+ * Call power_var() to compute and return the result; note it handles
+ * scale selection itself.
+ */
+ power_var(&arg1, &arg2, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+/*
+ * numeric_scale() -
+ *
+ * Returns the scale, i.e. the count of decimal digits in the fractional part
+ */
+Datum
+numeric_scale(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(NUMERIC_DSCALE(num));
+}
+
+/*
+ * Calculate minimum scale for value.
+ */
+static int
+get_min_scale(NumericVar *var)
+{
+ int min_scale;
+ int last_digit_pos;
+
+ /*
+ * Ordinarily, the input value will be "stripped" so that the last
+ * NumericDigit is nonzero. But we don't want to get into an infinite
+ * loop if it isn't, so explicitly find the last nonzero digit.
+ */
+ last_digit_pos = var->ndigits - 1;
+ while (last_digit_pos >= 0 &&
+ var->digits[last_digit_pos] == 0)
+ last_digit_pos--;
+
+ if (last_digit_pos >= 0)
+ {
+ /* compute min_scale assuming that last ndigit has no zeroes */
+ min_scale = (last_digit_pos - var->weight) * DEC_DIGITS;
+
+ /*
+ * We could get a negative result if there are no digits after the
+ * decimal point. In this case the min_scale must be zero.
+ */
+ if (min_scale > 0)
+ {
+ /*
+ * Reduce min_scale if trailing digit(s) in last NumericDigit are
+ * zero.
+ */
+ NumericDigit last_digit = var->digits[last_digit_pos];
+
+ while (last_digit % 10 == 0)
+ {
+ min_scale--;
+ last_digit /= 10;
+ }
+ }
+ else
+ min_scale = 0;
+ }
+ else
+ min_scale = 0; /* result if input is zero */
+
+ return min_scale;
+}
+
+/*
+ * Returns minimum scale required to represent supplied value without loss.
+ */
+Datum
+numeric_min_scale(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar arg;
+ int min_scale;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NULL();
+
+ init_var_from_num(num, &arg);
+ min_scale = get_min_scale(&arg);
+ free_var(&arg);
+
+ PG_RETURN_INT32(min_scale);
+}
+
+/*
+ * Reduce scale of numeric value to represent supplied value without loss.
+ */
+Datum
+numeric_trim_scale(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ Numeric res;
+ NumericVar result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ PG_RETURN_NUMERIC(duplicate_numeric(num));
+
+ init_var_from_num(num, &result);
+ result.dscale = get_min_scale(&result);
+ res = make_result(&result);
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Type conversion functions
+ *
+ * ----------------------------------------------------------------------
+ */
+
+Numeric
+int64_to_numeric(int64 val)
+{
+ Numeric res;
+ NumericVar result;
+
+ init_var(&result);
+
+ int64_to_numericvar(val, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ return res;
+}
+
+/*
+ * Convert val1/(10**log10val2) to numeric. This is much faster than normal
+ * numeric division.
+ */
+Numeric
+int64_div_fast_to_numeric(int64 val1, int log10val2)
+{
+ Numeric res;
+ NumericVar result;
+ int rscale;
+ int w;
+ int m;
+
+ init_var(&result);
+
+ /* result scale */
+ rscale = log10val2 < 0 ? 0 : log10val2;
+
+ /* how much to decrease the weight by */
+ w = log10val2 / DEC_DIGITS;
+ /* how much is left to divide by */
+ m = log10val2 % DEC_DIGITS;
+ if (m < 0)
+ {
+ m += DEC_DIGITS;
+ w--;
+ }
+
+ /*
+ * If there is anything left to divide by (10^m with 0 < m < DEC_DIGITS),
+ * multiply the dividend by 10^(DEC_DIGITS - m), and shift the weight by
+ * one more.
+ */
+ if (m > 0)
+ {
+#if DEC_DIGITS == 4
+ static const int pow10[] = {1, 10, 100, 1000};
+#elif DEC_DIGITS == 2
+ static const int pow10[] = {1, 10};
+#elif DEC_DIGITS == 1
+ static const int pow10[] = {1};
+#else
+#error unsupported NBASE
+#endif
+ int64 factor = pow10[DEC_DIGITS - m];
+ int64 new_val1;
+
+ StaticAssertStmt(lengthof(pow10) == DEC_DIGITS, "mismatch with DEC_DIGITS");
+
+ if (unlikely(pg_mul_s64_overflow(val1, factor, &new_val1)))
+ {
+#ifdef HAVE_INT128
+ /* do the multiplication using 128-bit integers */
+ int128 tmp;
+
+ tmp = (int128) val1 * (int128) factor;
+
+ int128_to_numericvar(tmp, &result);
+#else
+ /* do the multiplication using numerics */
+ NumericVar tmp;
+
+ init_var(&tmp);
+
+ int64_to_numericvar(val1, &result);
+ int64_to_numericvar(factor, &tmp);
+ mul_var(&result, &tmp, &result, 0);
+
+ free_var(&tmp);
+#endif
+ }
+ else
+ int64_to_numericvar(new_val1, &result);
+
+ w++;
+ }
+ else
+ int64_to_numericvar(val1, &result);
+
+ result.weight -= w;
+ result.dscale = rscale;
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ return res;
+}
+
+Datum
+int4_numeric(PG_FUNCTION_ARGS)
+{
+ int32 val = PG_GETARG_INT32(0);
+
+ PG_RETURN_NUMERIC(int64_to_numeric(val));
+}
+
+int32
+numeric_int4_opt_error(Numeric num, bool *have_error)
+{
+ NumericVar x;
+ int32 result;
+
+ if (have_error)
+ *have_error = false;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (have_error)
+ {
+ *have_error = true;
+ return 0;
+ }
+ else
+ {
+ if (NUMERIC_IS_NAN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert NaN to %s", "integer")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert infinity to %s", "integer")));
+ }
+ }
+
+ /* Convert to variable format, then convert to int4 */
+ init_var_from_num(num, &x);
+
+ if (!numericvar_to_int32(&x, &result))
+ {
+ if (have_error)
+ {
+ *have_error = true;
+ return 0;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+ }
+ }
+
+ return result;
+}
+
+Datum
+numeric_int4(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+
+ PG_RETURN_INT32(numeric_int4_opt_error(num, NULL));
+}
+
+/*
+ * Given a NumericVar, convert it to an int32. If the NumericVar
+ * exceeds the range of an int32, false is returned, otherwise true is returned.
+ * The input NumericVar is *not* free'd.
+ */
+static bool
+numericvar_to_int32(const NumericVar *var, int32 *result)
+{
+ int64 val;
+
+ if (!numericvar_to_int64(var, &val))
+ return false;
+
+ if (unlikely(val < PG_INT32_MIN) || unlikely(val > PG_INT32_MAX))
+ return false;
+
+ /* Down-convert to int4 */
+ *result = (int32) val;
+
+ return true;
+}
+
+Datum
+int8_numeric(PG_FUNCTION_ARGS)
+{
+ int64 val = PG_GETARG_INT64(0);
+
+ PG_RETURN_NUMERIC(int64_to_numeric(val));
+}
+
+
+Datum
+numeric_int8(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar x;
+ int64 result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_NAN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert NaN to %s", "bigint")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert infinity to %s", "bigint")));
+ }
+
+ /* Convert to variable format and thence to int8 */
+ init_var_from_num(num, &x);
+
+ if (!numericvar_to_int64(&x, &result))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+int2_numeric(PG_FUNCTION_ARGS)
+{
+ int16 val = PG_GETARG_INT16(0);
+
+ PG_RETURN_NUMERIC(int64_to_numeric(val));
+}
+
+
+Datum
+numeric_int2(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar x;
+ int64 val;
+ int16 result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_NAN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert NaN to %s", "smallint")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert infinity to %s", "smallint")));
+ }
+
+ /* Convert to variable format and thence to int8 */
+ init_var_from_num(num, &x);
+
+ if (!numericvar_to_int64(&x, &val))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ if (unlikely(val < PG_INT16_MIN) || unlikely(val > PG_INT16_MAX))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("smallint out of range")));
+
+ /* Down-convert to int2 */
+ result = (int16) val;
+
+ PG_RETURN_INT16(result);
+}
+
+
+Datum
+float8_numeric(PG_FUNCTION_ARGS)
+{
+ float8 val = PG_GETARG_FLOAT8(0);
+ Numeric res;
+ NumericVar result;
+ char buf[DBL_DIG + 100];
+
+ if (isnan(val))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ if (isinf(val))
+ {
+ if (val < 0)
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+ else
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ }
+
+ snprintf(buf, sizeof(buf), "%.*g", DBL_DIG, val);
+
+ init_var(&result);
+
+ /* Assume we need not worry about leading/trailing spaces */
+ (void) set_var_from_str(buf, buf, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+Datum
+numeric_float8(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ char *tmp;
+ Datum result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ PG_RETURN_FLOAT8(get_float8_infinity());
+ else if (NUMERIC_IS_NINF(num))
+ PG_RETURN_FLOAT8(-get_float8_infinity());
+ else
+ PG_RETURN_FLOAT8(get_float8_nan());
+ }
+
+ tmp = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(num)));
+
+ result = DirectFunctionCall1(float8in, CStringGetDatum(tmp));
+
+ pfree(tmp);
+
+ PG_RETURN_DATUM(result);
+}
+
+
+/*
+ * Convert numeric to float8; if out of range, return +/- HUGE_VAL
+ *
+ * (internal helper function, not directly callable from SQL)
+ */
+Datum
+numeric_float8_no_overflow(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ double val;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ val = HUGE_VAL;
+ else if (NUMERIC_IS_NINF(num))
+ val = -HUGE_VAL;
+ else
+ val = get_float8_nan();
+ }
+ else
+ {
+ NumericVar x;
+
+ init_var_from_num(num, &x);
+ val = numericvar_to_double_no_overflow(&x);
+ }
+
+ PG_RETURN_FLOAT8(val);
+}
+
+Datum
+float4_numeric(PG_FUNCTION_ARGS)
+{
+ float4 val = PG_GETARG_FLOAT4(0);
+ Numeric res;
+ NumericVar result;
+ char buf[FLT_DIG + 100];
+
+ if (isnan(val))
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ if (isinf(val))
+ {
+ if (val < 0)
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+ else
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ }
+
+ snprintf(buf, sizeof(buf), "%.*g", FLT_DIG, val);
+
+ init_var(&result);
+
+ /* Assume we need not worry about leading/trailing spaces */
+ (void) set_var_from_str(buf, buf, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+}
+
+
+Datum
+numeric_float4(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ char *tmp;
+ Datum result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_PINF(num))
+ PG_RETURN_FLOAT4(get_float4_infinity());
+ else if (NUMERIC_IS_NINF(num))
+ PG_RETURN_FLOAT4(-get_float4_infinity());
+ else
+ PG_RETURN_FLOAT4(get_float4_nan());
+ }
+
+ tmp = DatumGetCString(DirectFunctionCall1(numeric_out,
+ NumericGetDatum(num)));
+
+ result = DirectFunctionCall1(float4in, CStringGetDatum(tmp));
+
+ pfree(tmp);
+
+ PG_RETURN_DATUM(result);
+}
+
+
+Datum
+numeric_pg_lsn(PG_FUNCTION_ARGS)
+{
+ Numeric num = PG_GETARG_NUMERIC(0);
+ NumericVar x;
+ XLogRecPtr result;
+
+ if (NUMERIC_IS_SPECIAL(num))
+ {
+ if (NUMERIC_IS_NAN(num))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert NaN to %s", "pg_lsn")));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot convert infinity to %s", "pg_lsn")));
+ }
+
+ /* Convert to variable format and thence to pg_lsn */
+ init_var_from_num(num, &x);
+
+ if (!numericvar_to_uint64(&x, (uint64 *) &result))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("pg_lsn out of range")));
+
+ PG_RETURN_LSN(result);
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Aggregate functions
+ *
+ * The transition datatype for all these aggregates is declared as INTERNAL.
+ * Actually, it's a pointer to a NumericAggState allocated in the aggregate
+ * context. The digit buffers for the NumericVars will be there too.
+ *
+ * On platforms which support 128-bit integers some aggregates instead use a
+ * 128-bit integer based transition datatype to speed up calculations.
+ *
+ * ----------------------------------------------------------------------
+ */
+
+typedef struct NumericAggState
+{
+ bool calcSumX2; /* if true, calculate sumX2 */
+ MemoryContext agg_context; /* context we're calculating in */
+ int64 N; /* count of processed numbers */
+ NumericSumAccum sumX; /* sum of processed numbers */
+ NumericSumAccum sumX2; /* sum of squares of processed numbers */
+ int maxScale; /* maximum scale seen so far */
+ int64 maxScaleCount; /* number of values seen with maximum scale */
+ /* These counts are *not* included in N! Use NA_TOTAL_COUNT() as needed */
+ int64 NaNcount; /* count of NaN values */
+ int64 pInfcount; /* count of +Inf values */
+ int64 nInfcount; /* count of -Inf values */
+} NumericAggState;
+
+#define NA_TOTAL_COUNT(na) \
+ ((na)->N + (na)->NaNcount + (na)->pInfcount + (na)->nInfcount)
+
+/*
+ * Prepare state data for a numeric aggregate function that needs to compute
+ * sum, count and optionally sum of squares of the input.
+ */
+static NumericAggState *
+makeNumericAggState(FunctionCallInfo fcinfo, bool calcSumX2)
+{
+ NumericAggState *state;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state = (NumericAggState *) palloc0(sizeof(NumericAggState));
+ state->calcSumX2 = calcSumX2;
+ state->agg_context = agg_context;
+
+ MemoryContextSwitchTo(old_context);
+
+ return state;
+}
+
+/*
+ * Like makeNumericAggState(), but allocate the state in the current memory
+ * context.
+ */
+static NumericAggState *
+makeNumericAggStateCurrentContext(bool calcSumX2)
+{
+ NumericAggState *state;
+
+ state = (NumericAggState *) palloc0(sizeof(NumericAggState));
+ state->calcSumX2 = calcSumX2;
+ state->agg_context = CurrentMemoryContext;
+
+ return state;
+}
+
+/*
+ * Accumulate a new input value for numeric aggregate functions.
+ */
+static void
+do_numeric_accum(NumericAggState *state, Numeric newval)
+{
+ NumericVar X;
+ NumericVar X2;
+ MemoryContext old_context;
+
+ /* Count NaN/infinity inputs separately from all else */
+ if (NUMERIC_IS_SPECIAL(newval))
+ {
+ if (NUMERIC_IS_PINF(newval))
+ state->pInfcount++;
+ else if (NUMERIC_IS_NINF(newval))
+ state->nInfcount++;
+ else
+ state->NaNcount++;
+ return;
+ }
+
+ /* load processed number in short-lived context */
+ init_var_from_num(newval, &X);
+
+ /*
+ * Track the highest input dscale that we've seen, to support inverse
+ * transitions (see do_numeric_discard).
+ */
+ if (X.dscale > state->maxScale)
+ {
+ state->maxScale = X.dscale;
+ state->maxScaleCount = 1;
+ }
+ else if (X.dscale == state->maxScale)
+ state->maxScaleCount++;
+
+ /* if we need X^2, calculate that in short-lived context */
+ if (state->calcSumX2)
+ {
+ init_var(&X2);
+ mul_var(&X, &X, &X2, X.dscale * 2);
+ }
+
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(state->agg_context);
+
+ state->N++;
+
+ /* Accumulate sums */
+ accum_sum_add(&(state->sumX), &X);
+
+ if (state->calcSumX2)
+ accum_sum_add(&(state->sumX2), &X2);
+
+ MemoryContextSwitchTo(old_context);
+}
+
+/*
+ * Attempt to remove an input value from the aggregated state.
+ *
+ * If the value cannot be removed then the function will return false; the
+ * possible reasons for failing are described below.
+ *
+ * If we aggregate the values 1.01 and 2 then the result will be 3.01.
+ * If we are then asked to un-aggregate the 1.01 then we must fail as we
+ * won't be able to tell what the new aggregated value's dscale should be.
+ * We don't want to return 2.00 (dscale = 2), since the sum's dscale would
+ * have been zero if we'd really aggregated only 2.
+ *
+ * Note: alternatively, we could count the number of inputs with each possible
+ * dscale (up to some sane limit). Not yet clear if it's worth the trouble.
+ */
+static bool
+do_numeric_discard(NumericAggState *state, Numeric newval)
+{
+ NumericVar X;
+ NumericVar X2;
+ MemoryContext old_context;
+
+ /* Count NaN/infinity inputs separately from all else */
+ if (NUMERIC_IS_SPECIAL(newval))
+ {
+ if (NUMERIC_IS_PINF(newval))
+ state->pInfcount--;
+ else if (NUMERIC_IS_NINF(newval))
+ state->nInfcount--;
+ else
+ state->NaNcount--;
+ return true;
+ }
+
+ /* load processed number in short-lived context */
+ init_var_from_num(newval, &X);
+
+ /*
+ * state->sumX's dscale is the maximum dscale of any of the inputs.
+ * Removing the last input with that dscale would require us to recompute
+ * the maximum dscale of the *remaining* inputs, which we cannot do unless
+ * no more non-NaN inputs remain at all. So we report a failure instead,
+ * and force the aggregation to be redone from scratch.
+ */
+ if (X.dscale == state->maxScale)
+ {
+ if (state->maxScaleCount > 1 || state->maxScale == 0)
+ {
+ /*
+ * Some remaining inputs have same dscale, or dscale hasn't gotten
+ * above zero anyway
+ */
+ state->maxScaleCount--;
+ }
+ else if (state->N == 1)
+ {
+ /* No remaining non-NaN inputs at all, so reset maxScale */
+ state->maxScale = 0;
+ state->maxScaleCount = 0;
+ }
+ else
+ {
+ /* Correct new maxScale is uncertain, must fail */
+ return false;
+ }
+ }
+
+ /* if we need X^2, calculate that in short-lived context */
+ if (state->calcSumX2)
+ {
+ init_var(&X2);
+ mul_var(&X, &X, &X2, X.dscale * 2);
+ }
+
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(state->agg_context);
+
+ if (state->N-- > 1)
+ {
+ /* Negate X, to subtract it from the sum */
+ X.sign = (X.sign == NUMERIC_POS ? NUMERIC_NEG : NUMERIC_POS);
+ accum_sum_add(&(state->sumX), &X);
+
+ if (state->calcSumX2)
+ {
+ /* Negate X^2. X^2 is always positive */
+ X2.sign = NUMERIC_NEG;
+ accum_sum_add(&(state->sumX2), &X2);
+ }
+ }
+ else
+ {
+ /* Zero the sums */
+ Assert(state->N == 0);
+
+ accum_sum_reset(&state->sumX);
+ if (state->calcSumX2)
+ accum_sum_reset(&state->sumX2);
+ }
+
+ MemoryContextSwitchTo(old_context);
+
+ return true;
+}
+
+/*
+ * Generic transition function for numeric aggregates that require sumX2.
+ */
+Datum
+numeric_accum(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+
+ if (!PG_ARGISNULL(1))
+ do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * Generic combine function for numeric aggregates which require sumX2
+ */
+Datum
+numeric_combine(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state1;
+ NumericAggState *state2;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state1 = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ state2 = PG_ARGISNULL(1) ? NULL : (NumericAggState *) PG_GETARG_POINTER(1);
+
+ if (state2 == NULL)
+ PG_RETURN_POINTER(state1);
+
+ /* manually copy all fields from state2 to state1 */
+ if (state1 == NULL)
+ {
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state1 = makeNumericAggStateCurrentContext(true);
+ state1->N = state2->N;
+ state1->NaNcount = state2->NaNcount;
+ state1->pInfcount = state2->pInfcount;
+ state1->nInfcount = state2->nInfcount;
+ state1->maxScale = state2->maxScale;
+ state1->maxScaleCount = state2->maxScaleCount;
+
+ accum_sum_copy(&state1->sumX, &state2->sumX);
+ accum_sum_copy(&state1->sumX2, &state2->sumX2);
+
+ MemoryContextSwitchTo(old_context);
+
+ PG_RETURN_POINTER(state1);
+ }
+
+ state1->N += state2->N;
+ state1->NaNcount += state2->NaNcount;
+ state1->pInfcount += state2->pInfcount;
+ state1->nInfcount += state2->nInfcount;
+
+ if (state2->N > 0)
+ {
+ /*
+ * These are currently only needed for moving aggregates, but let's do
+ * the right thing anyway...
+ */
+ if (state2->maxScale > state1->maxScale)
+ {
+ state1->maxScale = state2->maxScale;
+ state1->maxScaleCount = state2->maxScaleCount;
+ }
+ else if (state2->maxScale == state1->maxScale)
+ state1->maxScaleCount += state2->maxScaleCount;
+
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ /* Accumulate sums */
+ accum_sum_combine(&state1->sumX, &state2->sumX);
+ accum_sum_combine(&state1->sumX2, &state2->sumX2);
+
+ MemoryContextSwitchTo(old_context);
+ }
+ PG_RETURN_POINTER(state1);
+}
+
+/*
+ * Generic transition function for numeric aggregates that don't require sumX2.
+ */
+Datum
+numeric_avg_accum(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, false);
+
+ if (!PG_ARGISNULL(1))
+ do_numeric_accum(state, PG_GETARG_NUMERIC(1));
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * Combine function for numeric aggregates which don't require sumX2
+ */
+Datum
+numeric_avg_combine(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state1;
+ NumericAggState *state2;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state1 = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+ state2 = PG_ARGISNULL(1) ? NULL : (NumericAggState *) PG_GETARG_POINTER(1);
+
+ if (state2 == NULL)
+ PG_RETURN_POINTER(state1);
+
+ /* manually copy all fields from state2 to state1 */
+ if (state1 == NULL)
+ {
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state1 = makeNumericAggStateCurrentContext(false);
+ state1->N = state2->N;
+ state1->NaNcount = state2->NaNcount;
+ state1->pInfcount = state2->pInfcount;
+ state1->nInfcount = state2->nInfcount;
+ state1->maxScale = state2->maxScale;
+ state1->maxScaleCount = state2->maxScaleCount;
+
+ accum_sum_copy(&state1->sumX, &state2->sumX);
+
+ MemoryContextSwitchTo(old_context);
+
+ PG_RETURN_POINTER(state1);
+ }
+
+ state1->N += state2->N;
+ state1->NaNcount += state2->NaNcount;
+ state1->pInfcount += state2->pInfcount;
+ state1->nInfcount += state2->nInfcount;
+
+ if (state2->N > 0)
+ {
+ /*
+ * These are currently only needed for moving aggregates, but let's do
+ * the right thing anyway...
+ */
+ if (state2->maxScale > state1->maxScale)
+ {
+ state1->maxScale = state2->maxScale;
+ state1->maxScaleCount = state2->maxScaleCount;
+ }
+ else if (state2->maxScale == state1->maxScale)
+ state1->maxScaleCount += state2->maxScaleCount;
+
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ /* Accumulate sums */
+ accum_sum_combine(&state1->sumX, &state2->sumX);
+
+ MemoryContextSwitchTo(old_context);
+ }
+ PG_RETURN_POINTER(state1);
+}
+
+/*
+ * numeric_avg_serialize
+ * Serialize NumericAggState for numeric aggregates that don't require
+ * sumX2.
+ */
+Datum
+numeric_avg_serialize(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ StringInfoData buf;
+ bytea *result;
+ NumericVar tmp_var;
+
+ /* Ensure we disallow calling when not in aggregate context */
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state = (NumericAggState *) PG_GETARG_POINTER(0);
+
+ init_var(&tmp_var);
+
+ pq_begintypsend(&buf);
+
+ /* N */
+ pq_sendint64(&buf, state->N);
+
+ /* sumX */
+ accum_sum_final(&state->sumX, &tmp_var);
+ numericvar_serialize(&buf, &tmp_var);
+
+ /* maxScale */
+ pq_sendint32(&buf, state->maxScale);
+
+ /* maxScaleCount */
+ pq_sendint64(&buf, state->maxScaleCount);
+
+ /* NaNcount */
+ pq_sendint64(&buf, state->NaNcount);
+
+ /* pInfcount */
+ pq_sendint64(&buf, state->pInfcount);
+
+ /* nInfcount */
+ pq_sendint64(&buf, state->nInfcount);
+
+ result = pq_endtypsend(&buf);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * numeric_avg_deserialize
+ * Deserialize bytea into NumericAggState for numeric aggregates that
+ * don't require sumX2.
+ */
+Datum
+numeric_avg_deserialize(PG_FUNCTION_ARGS)
+{
+ bytea *sstate;
+ NumericAggState *result;
+ StringInfoData buf;
+ NumericVar tmp_var;
+
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ sstate = PG_GETARG_BYTEA_PP(0);
+
+ init_var(&tmp_var);
+
+ /*
+ * Copy the bytea into a StringInfo so that we can "receive" it using the
+ * standard recv-function infrastructure.
+ */
+ initStringInfo(&buf);
+ appendBinaryStringInfo(&buf,
+ VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
+
+ result = makeNumericAggStateCurrentContext(false);
+
+ /* N */
+ result->N = pq_getmsgint64(&buf);
+
+ /* sumX */
+ numericvar_deserialize(&buf, &tmp_var);
+ accum_sum_add(&(result->sumX), &tmp_var);
+
+ /* maxScale */
+ result->maxScale = pq_getmsgint(&buf, 4);
+
+ /* maxScaleCount */
+ result->maxScaleCount = pq_getmsgint64(&buf);
+
+ /* NaNcount */
+ result->NaNcount = pq_getmsgint64(&buf);
+
+ /* pInfcount */
+ result->pInfcount = pq_getmsgint64(&buf);
+
+ /* nInfcount */
+ result->nInfcount = pq_getmsgint64(&buf);
+
+ pq_getmsgend(&buf);
+ pfree(buf.data);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * numeric_serialize
+ * Serialization function for NumericAggState for numeric aggregates that
+ * require sumX2.
+ */
+Datum
+numeric_serialize(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ StringInfoData buf;
+ bytea *result;
+ NumericVar tmp_var;
+
+ /* Ensure we disallow calling when not in aggregate context */
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state = (NumericAggState *) PG_GETARG_POINTER(0);
+
+ init_var(&tmp_var);
+
+ pq_begintypsend(&buf);
+
+ /* N */
+ pq_sendint64(&buf, state->N);
+
+ /* sumX */
+ accum_sum_final(&state->sumX, &tmp_var);
+ numericvar_serialize(&buf, &tmp_var);
+
+ /* sumX2 */
+ accum_sum_final(&state->sumX2, &tmp_var);
+ numericvar_serialize(&buf, &tmp_var);
+
+ /* maxScale */
+ pq_sendint32(&buf, state->maxScale);
+
+ /* maxScaleCount */
+ pq_sendint64(&buf, state->maxScaleCount);
+
+ /* NaNcount */
+ pq_sendint64(&buf, state->NaNcount);
+
+ /* pInfcount */
+ pq_sendint64(&buf, state->pInfcount);
+
+ /* nInfcount */
+ pq_sendint64(&buf, state->nInfcount);
+
+ result = pq_endtypsend(&buf);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * numeric_deserialize
+ * Deserialization function for NumericAggState for numeric aggregates that
+ * require sumX2.
+ */
+Datum
+numeric_deserialize(PG_FUNCTION_ARGS)
+{
+ bytea *sstate;
+ NumericAggState *result;
+ StringInfoData buf;
+ NumericVar tmp_var;
+
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ sstate = PG_GETARG_BYTEA_PP(0);
+
+ init_var(&tmp_var);
+
+ /*
+ * Copy the bytea into a StringInfo so that we can "receive" it using the
+ * standard recv-function infrastructure.
+ */
+ initStringInfo(&buf);
+ appendBinaryStringInfo(&buf,
+ VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
+
+ result = makeNumericAggStateCurrentContext(false);
+
+ /* N */
+ result->N = pq_getmsgint64(&buf);
+
+ /* sumX */
+ numericvar_deserialize(&buf, &tmp_var);
+ accum_sum_add(&(result->sumX), &tmp_var);
+
+ /* sumX2 */
+ numericvar_deserialize(&buf, &tmp_var);
+ accum_sum_add(&(result->sumX2), &tmp_var);
+
+ /* maxScale */
+ result->maxScale = pq_getmsgint(&buf, 4);
+
+ /* maxScaleCount */
+ result->maxScaleCount = pq_getmsgint64(&buf);
+
+ /* NaNcount */
+ result->NaNcount = pq_getmsgint64(&buf);
+
+ /* pInfcount */
+ result->pInfcount = pq_getmsgint64(&buf);
+
+ /* nInfcount */
+ result->nInfcount = pq_getmsgint64(&buf);
+
+ pq_getmsgend(&buf);
+ pfree(buf.data);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * Generic inverse transition function for numeric aggregates
+ * (with or without requirement for X^2).
+ */
+Datum
+numeric_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "numeric_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ /* If we fail to perform the inverse transition, return NULL */
+ if (!do_numeric_discard(state, PG_GETARG_NUMERIC(1)))
+ PG_RETURN_NULL();
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+
+/*
+ * Integer data types in general use Numeric accumulators to share code
+ * and avoid risk of overflow.
+ *
+ * However for performance reasons optimized special-purpose accumulator
+ * routines are used when possible.
+ *
+ * On platforms with 128-bit integer support, the 128-bit routines will be
+ * used when sum(X) or sum(X*X) fit into 128-bit.
+ *
+ * For 16 and 32 bit inputs, the N and sum(X) fit into 64-bit so the 64-bit
+ * accumulators will be used for SUM and AVG of these data types.
+ */
+
+#ifdef HAVE_INT128
+typedef struct Int128AggState
+{
+ bool calcSumX2; /* if true, calculate sumX2 */
+ int64 N; /* count of processed numbers */
+ int128 sumX; /* sum of processed numbers */
+ int128 sumX2; /* sum of squares of processed numbers */
+} Int128AggState;
+
+/*
+ * Prepare state data for a 128-bit aggregate function that needs to compute
+ * sum, count and optionally sum of squares of the input.
+ */
+static Int128AggState *
+makeInt128AggState(FunctionCallInfo fcinfo, bool calcSumX2)
+{
+ Int128AggState *state;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state = (Int128AggState *) palloc0(sizeof(Int128AggState));
+ state->calcSumX2 = calcSumX2;
+
+ MemoryContextSwitchTo(old_context);
+
+ return state;
+}
+
+/*
+ * Like makeInt128AggState(), but allocate the state in the current memory
+ * context.
+ */
+static Int128AggState *
+makeInt128AggStateCurrentContext(bool calcSumX2)
+{
+ Int128AggState *state;
+
+ state = (Int128AggState *) palloc0(sizeof(Int128AggState));
+ state->calcSumX2 = calcSumX2;
+
+ return state;
+}
+
+/*
+ * Accumulate a new input value for 128-bit aggregate functions.
+ */
+static void
+do_int128_accum(Int128AggState *state, int128 newval)
+{
+ if (state->calcSumX2)
+ state->sumX2 += newval * newval;
+
+ state->sumX += newval;
+ state->N++;
+}
+
+/*
+ * Remove an input value from the aggregated state.
+ */
+static void
+do_int128_discard(Int128AggState *state, int128 newval)
+{
+ if (state->calcSumX2)
+ state->sumX2 -= newval * newval;
+
+ state->sumX -= newval;
+ state->N--;
+}
+
+typedef Int128AggState PolyNumAggState;
+#define makePolyNumAggState makeInt128AggState
+#define makePolyNumAggStateCurrentContext makeInt128AggStateCurrentContext
+#else
+typedef NumericAggState PolyNumAggState;
+#define makePolyNumAggState makeNumericAggState
+#define makePolyNumAggStateCurrentContext makeNumericAggStateCurrentContext
+#endif
+
+Datum
+int2_accum(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makePolyNumAggState(fcinfo, true);
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_accum(state, (int128) PG_GETARG_INT16(1));
+#else
+ do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT16(1)));
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int4_accum(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makePolyNumAggState(fcinfo, true);
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_accum(state, (int128) PG_GETARG_INT32(1));
+#else
+ do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT32(1)));
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int8_accum(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makeNumericAggState(fcinfo, true);
+
+ if (!PG_ARGISNULL(1))
+ do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT64(1)));
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * Combine function for numeric aggregates which require sumX2
+ */
+Datum
+numeric_poly_combine(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state1;
+ PolyNumAggState *state2;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+ state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1);
+
+ if (state2 == NULL)
+ PG_RETURN_POINTER(state1);
+
+ /* manually copy all fields from state2 to state1 */
+ if (state1 == NULL)
+ {
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state1 = makePolyNumAggState(fcinfo, true);
+ state1->N = state2->N;
+
+#ifdef HAVE_INT128
+ state1->sumX = state2->sumX;
+ state1->sumX2 = state2->sumX2;
+#else
+ accum_sum_copy(&state1->sumX, &state2->sumX);
+ accum_sum_copy(&state1->sumX2, &state2->sumX2);
+#endif
+
+ MemoryContextSwitchTo(old_context);
+
+ PG_RETURN_POINTER(state1);
+ }
+
+ if (state2->N > 0)
+ {
+ state1->N += state2->N;
+
+#ifdef HAVE_INT128
+ state1->sumX += state2->sumX;
+ state1->sumX2 += state2->sumX2;
+#else
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ /* Accumulate sums */
+ accum_sum_combine(&state1->sumX, &state2->sumX);
+ accum_sum_combine(&state1->sumX2, &state2->sumX2);
+
+ MemoryContextSwitchTo(old_context);
+#endif
+
+ }
+ PG_RETURN_POINTER(state1);
+}
+
+/*
+ * numeric_poly_serialize
+ * Serialize PolyNumAggState into bytea for aggregate functions which
+ * require sumX2.
+ */
+Datum
+numeric_poly_serialize(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+ StringInfoData buf;
+ bytea *result;
+ NumericVar tmp_var;
+
+ /* Ensure we disallow calling when not in aggregate context */
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state = (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /*
+ * If the platform supports int128 then sumX and sumX2 will be a 128 bit
+ * integer type. Here we'll convert that into a numeric type so that the
+ * combine state is in the same format for both int128 enabled machines
+ * and machines which don't support that type. The logic here is that one
+ * day we might like to send these over to another server for further
+ * processing and we want a standard format to work with.
+ */
+
+ init_var(&tmp_var);
+
+ pq_begintypsend(&buf);
+
+ /* N */
+ pq_sendint64(&buf, state->N);
+
+ /* sumX */
+#ifdef HAVE_INT128
+ int128_to_numericvar(state->sumX, &tmp_var);
+#else
+ accum_sum_final(&state->sumX, &tmp_var);
+#endif
+ numericvar_serialize(&buf, &tmp_var);
+
+ /* sumX2 */
+#ifdef HAVE_INT128
+ int128_to_numericvar(state->sumX2, &tmp_var);
+#else
+ accum_sum_final(&state->sumX2, &tmp_var);
+#endif
+ numericvar_serialize(&buf, &tmp_var);
+
+ result = pq_endtypsend(&buf);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * numeric_poly_deserialize
+ * Deserialize PolyNumAggState from bytea for aggregate functions which
+ * require sumX2.
+ */
+Datum
+numeric_poly_deserialize(PG_FUNCTION_ARGS)
+{
+ bytea *sstate;
+ PolyNumAggState *result;
+ StringInfoData buf;
+ NumericVar tmp_var;
+
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ sstate = PG_GETARG_BYTEA_PP(0);
+
+ init_var(&tmp_var);
+
+ /*
+ * Copy the bytea into a StringInfo so that we can "receive" it using the
+ * standard recv-function infrastructure.
+ */
+ initStringInfo(&buf);
+ appendBinaryStringInfo(&buf,
+ VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
+
+ result = makePolyNumAggStateCurrentContext(false);
+
+ /* N */
+ result->N = pq_getmsgint64(&buf);
+
+ /* sumX */
+ numericvar_deserialize(&buf, &tmp_var);
+#ifdef HAVE_INT128
+ numericvar_to_int128(&tmp_var, &result->sumX);
+#else
+ accum_sum_add(&result->sumX, &tmp_var);
+#endif
+
+ /* sumX2 */
+ numericvar_deserialize(&buf, &tmp_var);
+#ifdef HAVE_INT128
+ numericvar_to_int128(&tmp_var, &result->sumX2);
+#else
+ accum_sum_add(&result->sumX2, &tmp_var);
+#endif
+
+ pq_getmsgend(&buf);
+ pfree(buf.data);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * Transition function for int8 input when we don't need sumX2.
+ */
+Datum
+int8_avg_accum(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Create the state data on the first call */
+ if (state == NULL)
+ state = makePolyNumAggState(fcinfo, false);
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_accum(state, (int128) PG_GETARG_INT64(1));
+#else
+ do_numeric_accum(state, int64_to_numeric(PG_GETARG_INT64(1)));
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+/*
+ * Combine function for PolyNumAggState for aggregates which don't require
+ * sumX2
+ */
+Datum
+int8_avg_combine(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state1;
+ PolyNumAggState *state2;
+ MemoryContext agg_context;
+ MemoryContext old_context;
+
+ if (!AggCheckCallContext(fcinfo, &agg_context))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state1 = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+ state2 = PG_ARGISNULL(1) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(1);
+
+ if (state2 == NULL)
+ PG_RETURN_POINTER(state1);
+
+ /* manually copy all fields from state2 to state1 */
+ if (state1 == NULL)
+ {
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ state1 = makePolyNumAggState(fcinfo, false);
+ state1->N = state2->N;
+
+#ifdef HAVE_INT128
+ state1->sumX = state2->sumX;
+#else
+ accum_sum_copy(&state1->sumX, &state2->sumX);
+#endif
+ MemoryContextSwitchTo(old_context);
+
+ PG_RETURN_POINTER(state1);
+ }
+
+ if (state2->N > 0)
+ {
+ state1->N += state2->N;
+
+#ifdef HAVE_INT128
+ state1->sumX += state2->sumX;
+#else
+ /* The rest of this needs to work in the aggregate context */
+ old_context = MemoryContextSwitchTo(agg_context);
+
+ /* Accumulate sums */
+ accum_sum_combine(&state1->sumX, &state2->sumX);
+
+ MemoryContextSwitchTo(old_context);
+#endif
+
+ }
+ PG_RETURN_POINTER(state1);
+}
+
+/*
+ * int8_avg_serialize
+ * Serialize PolyNumAggState into bytea using the standard
+ * recv-function infrastructure.
+ */
+Datum
+int8_avg_serialize(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+ StringInfoData buf;
+ bytea *result;
+ NumericVar tmp_var;
+
+ /* Ensure we disallow calling when not in aggregate context */
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ state = (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /*
+ * If the platform supports int128 then sumX will be a 128 integer type.
+ * Here we'll convert that into a numeric type so that the combine state
+ * is in the same format for both int128 enabled machines and machines
+ * which don't support that type. The logic here is that one day we might
+ * like to send these over to another server for further processing and we
+ * want a standard format to work with.
+ */
+
+ init_var(&tmp_var);
+
+ pq_begintypsend(&buf);
+
+ /* N */
+ pq_sendint64(&buf, state->N);
+
+ /* sumX */
+#ifdef HAVE_INT128
+ int128_to_numericvar(state->sumX, &tmp_var);
+#else
+ accum_sum_final(&state->sumX, &tmp_var);
+#endif
+ numericvar_serialize(&buf, &tmp_var);
+
+ result = pq_endtypsend(&buf);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * int8_avg_deserialize
+ * Deserialize bytea back into PolyNumAggState.
+ */
+Datum
+int8_avg_deserialize(PG_FUNCTION_ARGS)
+{
+ bytea *sstate;
+ PolyNumAggState *result;
+ StringInfoData buf;
+ NumericVar tmp_var;
+
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ sstate = PG_GETARG_BYTEA_PP(0);
+
+ init_var(&tmp_var);
+
+ /*
+ * Copy the bytea into a StringInfo so that we can "receive" it using the
+ * standard recv-function infrastructure.
+ */
+ initStringInfo(&buf);
+ appendBinaryStringInfo(&buf,
+ VARDATA_ANY(sstate), VARSIZE_ANY_EXHDR(sstate));
+
+ result = makePolyNumAggStateCurrentContext(false);
+
+ /* N */
+ result->N = pq_getmsgint64(&buf);
+
+ /* sumX */
+ numericvar_deserialize(&buf, &tmp_var);
+#ifdef HAVE_INT128
+ numericvar_to_int128(&tmp_var, &result->sumX);
+#else
+ accum_sum_add(&result->sumX, &tmp_var);
+#endif
+
+ pq_getmsgend(&buf);
+ pfree(buf.data);
+
+ free_var(&tmp_var);
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * Inverse transition functions to go with the above.
+ */
+
+Datum
+int2_accum_inv(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int2_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_discard(state, (int128) PG_GETARG_INT16(1));
+#else
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT16(1))))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int4_accum_inv(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int4_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_discard(state, (int128) PG_GETARG_INT32(1));
+#else
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT32(1))))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int8_accum_inv(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int8_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT64(1))))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+int8_avg_accum_inv(PG_FUNCTION_ARGS)
+{
+ PolyNumAggState *state;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* Should not get here with no state */
+ if (state == NULL)
+ elog(ERROR, "int8_avg_accum_inv called with NULL state");
+
+ if (!PG_ARGISNULL(1))
+ {
+#ifdef HAVE_INT128
+ do_int128_discard(state, (int128) PG_GETARG_INT64(1));
+#else
+ /* Should never fail, all inputs have dscale 0 */
+ if (!do_numeric_discard(state, int64_to_numeric(PG_GETARG_INT64(1))))
+ elog(ERROR, "do_numeric_discard failed unexpectedly");
+#endif
+ }
+
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+numeric_poly_sum(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ Numeric res;
+ NumericVar result;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || state->N == 0)
+ PG_RETURN_NULL();
+
+ init_var(&result);
+
+ int128_to_numericvar(state->sumX, &result);
+
+ res = make_result(&result);
+
+ free_var(&result);
+
+ PG_RETURN_NUMERIC(res);
+#else
+ return numeric_sum(fcinfo);
+#endif
+}
+
+Datum
+numeric_poly_avg(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ NumericVar result;
+ Datum countd,
+ sumd;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || state->N == 0)
+ PG_RETURN_NULL();
+
+ init_var(&result);
+
+ int128_to_numericvar(state->sumX, &result);
+
+ countd = NumericGetDatum(int64_to_numeric(state->N));
+ sumd = NumericGetDatum(make_result(&result));
+
+ free_var(&result);
+
+ PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd));
+#else
+ return numeric_avg(fcinfo);
+#endif
+}
+
+Datum
+numeric_avg(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ Datum N_datum;
+ Datum sumX_datum;
+ NumericVar sumX_var;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || NA_TOTAL_COUNT(state) == 0)
+ PG_RETURN_NULL();
+
+ if (state->NaNcount > 0) /* there was at least one NaN input */
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ /* adding plus and minus infinities gives NaN */
+ if (state->pInfcount > 0 && state->nInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ if (state->pInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ if (state->nInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+
+ N_datum = NumericGetDatum(int64_to_numeric(state->N));
+
+ init_var(&sumX_var);
+ accum_sum_final(&state->sumX, &sumX_var);
+ sumX_datum = NumericGetDatum(make_result(&sumX_var));
+ free_var(&sumX_var);
+
+ PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumX_datum, N_datum));
+}
+
+Datum
+numeric_sum(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ NumericVar sumX_var;
+ Numeric result;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ /* If there were no non-null inputs, return NULL */
+ if (state == NULL || NA_TOTAL_COUNT(state) == 0)
+ PG_RETURN_NULL();
+
+ if (state->NaNcount > 0) /* there was at least one NaN input */
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+
+ /* adding plus and minus infinities gives NaN */
+ if (state->pInfcount > 0 && state->nInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_nan));
+ if (state->pInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_pinf));
+ if (state->nInfcount > 0)
+ PG_RETURN_NUMERIC(make_result(&const_ninf));
+
+ init_var(&sumX_var);
+ accum_sum_final(&state->sumX, &sumX_var);
+ result = make_result(&sumX_var);
+ free_var(&sumX_var);
+
+ PG_RETURN_NUMERIC(result);
+}
+
+/*
+ * Workhorse routine for the standard deviance and variance
+ * aggregates. 'state' is aggregate's transition state.
+ * 'variance' specifies whether we should calculate the
+ * variance or the standard deviation. 'sample' indicates whether the
+ * caller is interested in the sample or the population
+ * variance/stddev.
+ *
+ * If appropriate variance statistic is undefined for the input,
+ * *is_null is set to true and NULL is returned.
+ */
+static Numeric
+numeric_stddev_internal(NumericAggState *state,
+ bool variance, bool sample,
+ bool *is_null)
+{
+ Numeric res;
+ NumericVar vN,
+ vsumX,
+ vsumX2,
+ vNminus1;
+ int64 totCount;
+ int rscale;
+
+ /*
+ * Sample stddev and variance are undefined when N <= 1; population stddev
+ * is undefined when N == 0. Return NULL in either case (note that NaNs
+ * and infinities count as normal inputs for this purpose).
+ */
+ if (state == NULL || (totCount = NA_TOTAL_COUNT(state)) == 0)
+ {
+ *is_null = true;
+ return NULL;
+ }
+
+ if (sample && totCount <= 1)
+ {
+ *is_null = true;
+ return NULL;
+ }
+
+ *is_null = false;
+
+ /*
+ * Deal with NaN and infinity cases. By analogy to the behavior of the
+ * float8 functions, any infinity input produces NaN output.
+ */
+ if (state->NaNcount > 0 || state->pInfcount > 0 || state->nInfcount > 0)
+ return make_result(&const_nan);
+
+ /* OK, normal calculation applies */
+ init_var(&vN);
+ init_var(&vsumX);
+ init_var(&vsumX2);
+
+ int64_to_numericvar(state->N, &vN);
+ accum_sum_final(&(state->sumX), &vsumX);
+ accum_sum_final(&(state->sumX2), &vsumX2);
+
+ init_var(&vNminus1);
+ sub_var(&vN, &const_one, &vNminus1);
+
+ /* compute rscale for mul_var calls */
+ rscale = vsumX.dscale * 2;
+
+ mul_var(&vsumX, &vsumX, &vsumX, rscale); /* vsumX = sumX * sumX */
+ mul_var(&vN, &vsumX2, &vsumX2, rscale); /* vsumX2 = N * sumX2 */
+ sub_var(&vsumX2, &vsumX, &vsumX2); /* N * sumX2 - sumX * sumX */
+
+ if (cmp_var(&vsumX2, &const_zero) <= 0)
+ {
+ /* Watch out for roundoff error producing a negative numerator */
+ res = make_result(&const_zero);
+ }
+ else
+ {
+ if (sample)
+ mul_var(&vN, &vNminus1, &vNminus1, 0); /* N * (N - 1) */
+ else
+ mul_var(&vN, &vN, &vNminus1, 0); /* N * N */
+ rscale = select_div_scale(&vsumX2, &vNminus1);
+ div_var(&vsumX2, &vNminus1, &vsumX, rscale, true); /* variance */
+ if (!variance)
+ sqrt_var(&vsumX, &vsumX, rscale); /* stddev */
+
+ res = make_result(&vsumX);
+ }
+
+ free_var(&vNminus1);
+ free_var(&vsumX);
+ free_var(&vsumX2);
+
+ return res;
+}
+
+Datum
+numeric_var_samp(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_stddev_internal(state, true, true, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+}
+
+Datum
+numeric_stddev_samp(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_stddev_internal(state, false, true, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+}
+
+Datum
+numeric_var_pop(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_stddev_internal(state, true, false, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+}
+
+Datum
+numeric_stddev_pop(PG_FUNCTION_ARGS)
+{
+ NumericAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (NumericAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_stddev_internal(state, false, false, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+}
+
+#ifdef HAVE_INT128
+static Numeric
+numeric_poly_stddev_internal(Int128AggState *state,
+ bool variance, bool sample,
+ bool *is_null)
+{
+ NumericAggState numstate;
+ Numeric res;
+
+ /* Initialize an empty agg state */
+ memset(&numstate, 0, sizeof(NumericAggState));
+
+ if (state)
+ {
+ NumericVar tmp_var;
+
+ numstate.N = state->N;
+
+ init_var(&tmp_var);
+
+ int128_to_numericvar(state->sumX, &tmp_var);
+ accum_sum_add(&numstate.sumX, &tmp_var);
+
+ int128_to_numericvar(state->sumX2, &tmp_var);
+ accum_sum_add(&numstate.sumX2, &tmp_var);
+
+ free_var(&tmp_var);
+ }
+
+ res = numeric_stddev_internal(&numstate, variance, sample, is_null);
+
+ if (numstate.sumX.ndigits > 0)
+ {
+ pfree(numstate.sumX.pos_digits);
+ pfree(numstate.sumX.neg_digits);
+ }
+ if (numstate.sumX2.ndigits > 0)
+ {
+ pfree(numstate.sumX2.pos_digits);
+ pfree(numstate.sumX2.neg_digits);
+ }
+
+ return res;
+}
+#endif
+
+Datum
+numeric_poly_var_samp(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_poly_stddev_internal(state, true, true, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+#else
+ return numeric_var_samp(fcinfo);
+#endif
+}
+
+Datum
+numeric_poly_stddev_samp(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_poly_stddev_internal(state, false, true, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+#else
+ return numeric_stddev_samp(fcinfo);
+#endif
+}
+
+Datum
+numeric_poly_var_pop(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_poly_stddev_internal(state, true, false, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+#else
+ return numeric_var_pop(fcinfo);
+#endif
+}
+
+Datum
+numeric_poly_stddev_pop(PG_FUNCTION_ARGS)
+{
+#ifdef HAVE_INT128
+ PolyNumAggState *state;
+ Numeric res;
+ bool is_null;
+
+ state = PG_ARGISNULL(0) ? NULL : (PolyNumAggState *) PG_GETARG_POINTER(0);
+
+ res = numeric_poly_stddev_internal(state, false, false, &is_null);
+
+ if (is_null)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_NUMERIC(res);
+#else
+ return numeric_stddev_pop(fcinfo);
+#endif
+}
+
+/*
+ * SUM transition functions for integer datatypes.
+ *
+ * To avoid overflow, we use accumulators wider than the input datatype.
+ * A Numeric accumulator is needed for int8 input; for int4 and int2
+ * inputs, we use int8 accumulators which should be sufficient for practical
+ * purposes. (The latter two therefore don't really belong in this file,
+ * but we keep them here anyway.)
+ *
+ * Because SQL defines the SUM() of no values to be NULL, not zero,
+ * the initial condition of the transition data value needs to be NULL. This
+ * means we can't rely on ExecAgg to automatically insert the first non-null
+ * data value into the transition data: it doesn't know how to do the type
+ * conversion. The upshot is that these routines have to be marked non-strict
+ * and handle substitution of the first non-null input themselves.
+ *
+ * Note: these functions are used only in plain aggregation mode.
+ * In moving-aggregate mode, we use intX_avg_accum and intX_avg_accum_inv.
+ */
+
+Datum
+int2_sum(PG_FUNCTION_ARGS)
+{
+ int64 newval;
+
+ if (PG_ARGISNULL(0))
+ {
+ /* No non-null input seen so far... */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL(); /* still no non-null */
+ /* This is the first non-null input. */
+ newval = (int64) PG_GETARG_INT16(1);
+ PG_RETURN_INT64(newval);
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to avoid palloc overhead. If not, we need to return
+ * the new value of the transition variable. (If int8 is pass-by-value,
+ * then of course this is useless as well as incorrect, so just ifdef it
+ * out.)
+ */
+#ifndef USE_FLOAT8_BYVAL /* controls int8 too */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ int64 *oldsum = (int64 *) PG_GETARG_POINTER(0);
+
+ /* Leave the running sum unchanged in the new input is null */
+ if (!PG_ARGISNULL(1))
+ *oldsum = *oldsum + (int64) PG_GETARG_INT16(1);
+
+ PG_RETURN_POINTER(oldsum);
+ }
+ else
+#endif
+ {
+ int64 oldsum = PG_GETARG_INT64(0);
+
+ /* Leave sum unchanged if new input is null. */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_INT64(oldsum);
+
+ /* OK to do the addition. */
+ newval = oldsum + (int64) PG_GETARG_INT16(1);
+
+ PG_RETURN_INT64(newval);
+ }
+}
+
+Datum
+int4_sum(PG_FUNCTION_ARGS)
+{
+ int64 newval;
+
+ if (PG_ARGISNULL(0))
+ {
+ /* No non-null input seen so far... */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL(); /* still no non-null */
+ /* This is the first non-null input. */
+ newval = (int64) PG_GETARG_INT32(1);
+ PG_RETURN_INT64(newval);
+ }
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to avoid palloc overhead. If not, we need to return
+ * the new value of the transition variable. (If int8 is pass-by-value,
+ * then of course this is useless as well as incorrect, so just ifdef it
+ * out.)
+ */
+#ifndef USE_FLOAT8_BYVAL /* controls int8 too */
+ if (AggCheckCallContext(fcinfo, NULL))
+ {
+ int64 *oldsum = (int64 *) PG_GETARG_POINTER(0);
+
+ /* Leave the running sum unchanged in the new input is null */
+ if (!PG_ARGISNULL(1))
+ *oldsum = *oldsum + (int64) PG_GETARG_INT32(1);
+
+ PG_RETURN_POINTER(oldsum);
+ }
+ else
+#endif
+ {
+ int64 oldsum = PG_GETARG_INT64(0);
+
+ /* Leave sum unchanged if new input is null. */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_INT64(oldsum);
+
+ /* OK to do the addition. */
+ newval = oldsum + (int64) PG_GETARG_INT32(1);
+
+ PG_RETURN_INT64(newval);
+ }
+}
+
+/*
+ * Note: this function is obsolete, it's no longer used for SUM(int8).
+ */
+Datum
+int8_sum(PG_FUNCTION_ARGS)
+{
+ Numeric oldsum;
+
+ if (PG_ARGISNULL(0))
+ {
+ /* No non-null input seen so far... */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL(); /* still no non-null */
+ /* This is the first non-null input. */
+ PG_RETURN_NUMERIC(int64_to_numeric(PG_GETARG_INT64(1)));
+ }
+
+ /*
+ * Note that we cannot special-case the aggregate case here, as we do for
+ * int2_sum and int4_sum: numeric is of variable size, so we cannot modify
+ * our first parameter in-place.
+ */
+
+ oldsum = PG_GETARG_NUMERIC(0);
+
+ /* Leave sum unchanged if new input is null. */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NUMERIC(oldsum);
+
+ /* OK to do the addition. */
+ PG_RETURN_DATUM(DirectFunctionCall2(numeric_add,
+ NumericGetDatum(oldsum),
+ NumericGetDatum(int64_to_numeric(PG_GETARG_INT64(1)))));
+}
+
+
+/*
+ * Routines for avg(int2) and avg(int4). The transition datatype
+ * is a two-element int8 array, holding count and sum.
+ *
+ * These functions are also used for sum(int2) and sum(int4) when
+ * operating in moving-aggregate mode, since for correct inverse transitions
+ * we need to count the inputs.
+ */
+
+typedef struct Int8TransTypeData
+{
+ int64 count;
+ int64 sum;
+} Int8TransTypeData;
+
+Datum
+int2_avg_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int16 newval = PG_GETARG_INT16(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count++;
+ transdata->sum += newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
+int4_avg_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int32 newval = PG_GETARG_INT32(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count++;
+ transdata->sum += newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
+int4_avg_combine(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray1;
+ ArrayType *transarray2;
+ Int8TransTypeData *state1;
+ Int8TransTypeData *state2;
+
+ if (!AggCheckCallContext(fcinfo, NULL))
+ elog(ERROR, "aggregate function called in non-aggregate context");
+
+ transarray1 = PG_GETARG_ARRAYTYPE_P(0);
+ transarray2 = PG_GETARG_ARRAYTYPE_P(1);
+
+ if (ARR_HASNULL(transarray1) ||
+ ARR_SIZE(transarray1) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ if (ARR_HASNULL(transarray2) ||
+ ARR_SIZE(transarray2) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ state1 = (Int8TransTypeData *) ARR_DATA_PTR(transarray1);
+ state2 = (Int8TransTypeData *) ARR_DATA_PTR(transarray2);
+
+ state1->count += state2->count;
+ state1->sum += state2->sum;
+
+ PG_RETURN_ARRAYTYPE_P(transarray1);
+}
+
+Datum
+int2_avg_accum_inv(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int16 newval = PG_GETARG_INT16(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count--;
+ transdata->sum -= newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
+int4_avg_accum_inv(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray;
+ int32 newval = PG_GETARG_INT32(1);
+ Int8TransTypeData *transdata;
+
+ /*
+ * If we're invoked as an aggregate, we can cheat and modify our first
+ * parameter in-place to reduce palloc overhead. Otherwise we need to make
+ * a copy of it before scribbling on it.
+ */
+ if (AggCheckCallContext(fcinfo, NULL))
+ transarray = PG_GETARG_ARRAYTYPE_P(0);
+ else
+ transarray = PG_GETARG_ARRAYTYPE_P_COPY(0);
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+ transdata->count--;
+ transdata->sum -= newval;
+
+ PG_RETURN_ARRAYTYPE_P(transarray);
+}
+
+Datum
+int8_avg(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Int8TransTypeData *transdata;
+ Datum countd,
+ sumd;
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+
+ /* SQL defines AVG of no values to be NULL */
+ if (transdata->count == 0)
+ PG_RETURN_NULL();
+
+ countd = NumericGetDatum(int64_to_numeric(transdata->count));
+ sumd = NumericGetDatum(int64_to_numeric(transdata->sum));
+
+ PG_RETURN_DATUM(DirectFunctionCall2(numeric_div, sumd, countd));
+}
+
+/*
+ * SUM(int2) and SUM(int4) both return int8, so we can use this
+ * final function for both.
+ */
+Datum
+int2int4_sum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Int8TransTypeData *transdata;
+
+ if (ARR_HASNULL(transarray) ||
+ ARR_SIZE(transarray) != ARR_OVERHEAD_NONULLS(1) + sizeof(Int8TransTypeData))
+ elog(ERROR, "expected 2-element int8 array");
+ transdata = (Int8TransTypeData *) ARR_DATA_PTR(transarray);
+
+ /* SQL defines SUM of no values to be NULL */
+ if (transdata->count == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(Int64GetDatumFast(transdata->sum));
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Debug support
+ *
+ * ----------------------------------------------------------------------
+ */
+
+#ifdef NUMERIC_DEBUG
+
+/*
+ * dump_numeric() - Dump a value in the db storage format for debugging
+ */
+static void
+dump_numeric(const char *str, Numeric num)
+{
+ NumericDigit *digits = NUMERIC_DIGITS(num);
+ int ndigits;
+ int i;
+
+ ndigits = NUMERIC_NDIGITS(num);
+
+ printf("%s: NUMERIC w=%d d=%d ", str,
+ NUMERIC_WEIGHT(num), NUMERIC_DSCALE(num));
+ switch (NUMERIC_SIGN(num))
+ {
+ case NUMERIC_POS:
+ printf("POS");
+ break;
+ case NUMERIC_NEG:
+ printf("NEG");
+ break;
+ case NUMERIC_NAN:
+ printf("NaN");
+ break;
+ case NUMERIC_PINF:
+ printf("Infinity");
+ break;
+ case NUMERIC_NINF:
+ printf("-Infinity");
+ break;
+ default:
+ printf("SIGN=0x%x", NUMERIC_SIGN(num));
+ break;
+ }
+
+ for (i = 0; i < ndigits; i++)
+ printf(" %0*d", DEC_DIGITS, digits[i]);
+ printf("\n");
+}
+
+
+/*
+ * dump_var() - Dump a value in the variable format for debugging
+ */
+static void
+dump_var(const char *str, NumericVar *var)
+{
+ int i;
+
+ printf("%s: VAR w=%d d=%d ", str, var->weight, var->dscale);
+ switch (var->sign)
+ {
+ case NUMERIC_POS:
+ printf("POS");
+ break;
+ case NUMERIC_NEG:
+ printf("NEG");
+ break;
+ case NUMERIC_NAN:
+ printf("NaN");
+ break;
+ case NUMERIC_PINF:
+ printf("Infinity");
+ break;
+ case NUMERIC_NINF:
+ printf("-Infinity");
+ break;
+ default:
+ printf("SIGN=0x%x", var->sign);
+ break;
+ }
+
+ for (i = 0; i < var->ndigits; i++)
+ printf(" %0*d", DEC_DIGITS, var->digits[i]);
+
+ printf("\n");
+}
+#endif /* NUMERIC_DEBUG */
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Local functions follow
+ *
+ * In general, these do not support "special" (NaN or infinity) inputs;
+ * callers should handle those possibilities first.
+ * (There are one or two exceptions, noted in their header comments.)
+ *
+ * ----------------------------------------------------------------------
+ */
+
+
+/*
+ * alloc_var() -
+ *
+ * Allocate a digit buffer of ndigits digits (plus a spare digit for rounding)
+ */
+static void
+alloc_var(NumericVar *var, int ndigits)
+{
+ digitbuf_free(var->buf);
+ var->buf = digitbuf_alloc(ndigits + 1);
+ var->buf[0] = 0; /* spare digit for rounding */
+ var->digits = var->buf + 1;
+ var->ndigits = ndigits;
+}
+
+
+/*
+ * free_var() -
+ *
+ * Return the digit buffer of a variable to the free pool
+ */
+static void
+free_var(NumericVar *var)
+{
+ digitbuf_free(var->buf);
+ var->buf = NULL;
+ var->digits = NULL;
+ var->sign = NUMERIC_NAN;
+}
+
+
+/*
+ * zero_var() -
+ *
+ * Set a variable to ZERO.
+ * Note: its dscale is not touched.
+ */
+static void
+zero_var(NumericVar *var)
+{
+ digitbuf_free(var->buf);
+ var->buf = NULL;
+ var->digits = NULL;
+ var->ndigits = 0;
+ var->weight = 0; /* by convention; doesn't really matter */
+ var->sign = NUMERIC_POS; /* anything but NAN... */
+}
+
+
+/*
+ * set_var_from_str()
+ *
+ * Parse a string and put the number into a variable
+ *
+ * This function does not handle leading or trailing spaces. It returns
+ * the end+1 position parsed, so that caller can check for trailing
+ * spaces/garbage if deemed necessary.
+ *
+ * cp is the place to actually start parsing; str is what to use in error
+ * reports. (Typically cp would be the same except advanced over spaces.)
+ */
+static const char *
+set_var_from_str(const char *str, const char *cp, NumericVar *dest)
+{
+ bool have_dp = false;
+ int i;
+ unsigned char *decdigits;
+ int sign = NUMERIC_POS;
+ int dweight = -1;
+ int ddigits;
+ int dscale = 0;
+ int weight;
+ int ndigits;
+ int offset;
+ NumericDigit *digits;
+
+ /*
+ * We first parse the string to extract decimal digits and determine the
+ * correct decimal weight. Then convert to NBASE representation.
+ */
+ switch (*cp)
+ {
+ case '+':
+ sign = NUMERIC_POS;
+ cp++;
+ break;
+
+ case '-':
+ sign = NUMERIC_NEG;
+ cp++;
+ break;
+ }
+
+ if (*cp == '.')
+ {
+ have_dp = true;
+ cp++;
+ }
+
+ if (!isdigit((unsigned char) *cp))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
+
+ decdigits = (unsigned char *) palloc(strlen(cp) + DEC_DIGITS * 2);
+
+ /* leading padding for digit alignment later */
+ memset(decdigits, 0, DEC_DIGITS);
+ i = DEC_DIGITS;
+
+ while (*cp)
+ {
+ if (isdigit((unsigned char) *cp))
+ {
+ decdigits[i++] = *cp++ - '0';
+ if (!have_dp)
+ dweight++;
+ else
+ dscale++;
+ }
+ else if (*cp == '.')
+ {
+ if (have_dp)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
+ have_dp = true;
+ cp++;
+ }
+ else
+ break;
+ }
+
+ ddigits = i - DEC_DIGITS;
+ /* trailing padding for digit alignment later */
+ memset(decdigits + i, 0, DEC_DIGITS - 1);
+
+ /* Handle exponent, if any */
+ if (*cp == 'e' || *cp == 'E')
+ {
+ long exponent;
+ char *endptr;
+
+ cp++;
+ exponent = strtol(cp, &endptr, 10);
+ if (endptr == cp)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric", str)));
+ cp = endptr;
+
+ /*
+ * At this point, dweight and dscale can't be more than about
+ * INT_MAX/2 due to the MaxAllocSize limit on string length, so
+ * constraining the exponent similarly should be enough to prevent
+ * integer overflow in this function. If the value is too large to
+ * fit in storage format, make_result() will complain about it later;
+ * for consistency use the same ereport errcode/text as make_result().
+ */
+ if (exponent >= INT_MAX / 2 || exponent <= -(INT_MAX / 2))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ dweight += (int) exponent;
+ dscale -= (int) exponent;
+ if (dscale < 0)
+ dscale = 0;
+ }
+
+ /*
+ * Okay, convert pure-decimal representation to base NBASE. First we need
+ * to determine the converted weight and ndigits. offset is the number of
+ * decimal zeroes to insert before the first given digit to have a
+ * correctly aligned first NBASE digit.
+ */
+ if (dweight >= 0)
+ weight = (dweight + 1 + DEC_DIGITS - 1) / DEC_DIGITS - 1;
+ else
+ weight = -((-dweight - 1) / DEC_DIGITS + 1);
+ offset = (weight + 1) * DEC_DIGITS - (dweight + 1);
+ ndigits = (ddigits + offset + DEC_DIGITS - 1) / DEC_DIGITS;
+
+ alloc_var(dest, ndigits);
+ dest->sign = sign;
+ dest->weight = weight;
+ dest->dscale = dscale;
+
+ i = DEC_DIGITS - offset;
+ digits = dest->digits;
+
+ while (ndigits-- > 0)
+ {
+#if DEC_DIGITS == 4
+ *digits++ = ((decdigits[i] * 10 + decdigits[i + 1]) * 10 +
+ decdigits[i + 2]) * 10 + decdigits[i + 3];
+#elif DEC_DIGITS == 2
+ *digits++ = decdigits[i] * 10 + decdigits[i + 1];
+#elif DEC_DIGITS == 1
+ *digits++ = decdigits[i];
+#else
+#error unsupported NBASE
+#endif
+ i += DEC_DIGITS;
+ }
+
+ pfree(decdigits);
+
+ /* Strip any leading/trailing zeroes, and normalize weight if zero */
+ strip_var(dest);
+
+ /* Return end+1 position for caller */
+ return cp;
+}
+
+
+/*
+ * set_var_from_num() -
+ *
+ * Convert the packed db format into a variable
+ */
+static void
+set_var_from_num(Numeric num, NumericVar *dest)
+{
+ int ndigits;
+
+ ndigits = NUMERIC_NDIGITS(num);
+
+ alloc_var(dest, ndigits);
+
+ dest->weight = NUMERIC_WEIGHT(num);
+ dest->sign = NUMERIC_SIGN(num);
+ dest->dscale = NUMERIC_DSCALE(num);
+
+ memcpy(dest->digits, NUMERIC_DIGITS(num), ndigits * sizeof(NumericDigit));
+}
+
+
+/*
+ * init_var_from_num() -
+ *
+ * Initialize a variable from packed db format. The digits array is not
+ * copied, which saves some cycles when the resulting var is not modified.
+ * Also, there's no need to call free_var(), as long as you don't assign any
+ * other value to it (with set_var_* functions, or by using the var as the
+ * destination of a function like add_var())
+ *
+ * CAUTION: Do not modify the digits buffer of a var initialized with this
+ * function, e.g by calling round_var() or trunc_var(), as the changes will
+ * propagate to the original Numeric! It's OK to use it as the destination
+ * argument of one of the calculational functions, though.
+ */
+static void
+init_var_from_num(Numeric num, NumericVar *dest)
+{
+ dest->ndigits = NUMERIC_NDIGITS(num);
+ dest->weight = NUMERIC_WEIGHT(num);
+ dest->sign = NUMERIC_SIGN(num);
+ dest->dscale = NUMERIC_DSCALE(num);
+ dest->digits = NUMERIC_DIGITS(num);
+ dest->buf = NULL; /* digits array is not palloc'd */
+}
+
+
+/*
+ * set_var_from_var() -
+ *
+ * Copy one variable into another
+ */
+static void
+set_var_from_var(const NumericVar *value, NumericVar *dest)
+{
+ NumericDigit *newbuf;
+
+ newbuf = digitbuf_alloc(value->ndigits + 1);
+ newbuf[0] = 0; /* spare digit for rounding */
+ if (value->ndigits > 0) /* else value->digits might be null */
+ memcpy(newbuf + 1, value->digits,
+ value->ndigits * sizeof(NumericDigit));
+
+ digitbuf_free(dest->buf);
+
+ memmove(dest, value, sizeof(NumericVar));
+ dest->buf = newbuf;
+ dest->digits = newbuf + 1;
+}
+
+
+/*
+ * get_str_from_var() -
+ *
+ * Convert a var to text representation (guts of numeric_out).
+ * The var is displayed to the number of digits indicated by its dscale.
+ * Returns a palloc'd string.
+ */
+static char *
+get_str_from_var(const NumericVar *var)
+{
+ int dscale;
+ char *str;
+ char *cp;
+ char *endcp;
+ int i;
+ int d;
+ NumericDigit dig;
+
+#if DEC_DIGITS > 1
+ NumericDigit d1;
+#endif
+
+ dscale = var->dscale;
+
+ /*
+ * Allocate space for the result.
+ *
+ * i is set to the # of decimal digits before decimal point. dscale is the
+ * # of decimal digits we will print after decimal point. We may generate
+ * as many as DEC_DIGITS-1 excess digits at the end, and in addition we
+ * need room for sign, decimal point, null terminator.
+ */
+ i = (var->weight + 1) * DEC_DIGITS;
+ if (i <= 0)
+ i = 1;
+
+ str = palloc(i + dscale + DEC_DIGITS + 2);
+ cp = str;
+
+ /*
+ * Output a dash for negative values
+ */
+ if (var->sign == NUMERIC_NEG)
+ *cp++ = '-';
+
+ /*
+ * Output all digits before the decimal point
+ */
+ if (var->weight < 0)
+ {
+ d = var->weight + 1;
+ *cp++ = '0';
+ }
+ else
+ {
+ for (d = 0; d <= var->weight; d++)
+ {
+ dig = (d < var->ndigits) ? var->digits[d] : 0;
+ /* In the first digit, suppress extra leading decimal zeroes */
+#if DEC_DIGITS == 4
+ {
+ bool putit = (d > 0);
+
+ d1 = dig / 1000;
+ dig -= d1 * 1000;
+ putit |= (d1 > 0);
+ if (putit)
+ *cp++ = d1 + '0';
+ d1 = dig / 100;
+ dig -= d1 * 100;
+ putit |= (d1 > 0);
+ if (putit)
+ *cp++ = d1 + '0';
+ d1 = dig / 10;
+ dig -= d1 * 10;
+ putit |= (d1 > 0);
+ if (putit)
+ *cp++ = d1 + '0';
+ *cp++ = dig + '0';
+ }
+#elif DEC_DIGITS == 2
+ d1 = dig / 10;
+ dig -= d1 * 10;
+ if (d1 > 0 || d > 0)
+ *cp++ = d1 + '0';
+ *cp++ = dig + '0';
+#elif DEC_DIGITS == 1
+ *cp++ = dig + '0';
+#else
+#error unsupported NBASE
+#endif
+ }
+ }
+
+ /*
+ * If requested, output a decimal point and all the digits that follow it.
+ * We initially put out a multiple of DEC_DIGITS digits, then truncate if
+ * needed.
+ */
+ if (dscale > 0)
+ {
+ *cp++ = '.';
+ endcp = cp + dscale;
+ for (i = 0; i < dscale; d++, i += DEC_DIGITS)
+ {
+ dig = (d >= 0 && d < var->ndigits) ? var->digits[d] : 0;
+#if DEC_DIGITS == 4
+ d1 = dig / 1000;
+ dig -= d1 * 1000;
+ *cp++ = d1 + '0';
+ d1 = dig / 100;
+ dig -= d1 * 100;
+ *cp++ = d1 + '0';
+ d1 = dig / 10;
+ dig -= d1 * 10;
+ *cp++ = d1 + '0';
+ *cp++ = dig + '0';
+#elif DEC_DIGITS == 2
+ d1 = dig / 10;
+ dig -= d1 * 10;
+ *cp++ = d1 + '0';
+ *cp++ = dig + '0';
+#elif DEC_DIGITS == 1
+ *cp++ = dig + '0';
+#else
+#error unsupported NBASE
+#endif
+ }
+ cp = endcp;
+ }
+
+ /*
+ * terminate the string and return it
+ */
+ *cp = '\0';
+ return str;
+}
+
+/*
+ * get_str_from_var_sci() -
+ *
+ * Convert a var to a normalised scientific notation text representation.
+ * This function does the heavy lifting for numeric_out_sci().
+ *
+ * This notation has the general form a * 10^b, where a is known as the
+ * "significand" and b is known as the "exponent".
+ *
+ * Because we can't do superscript in ASCII (and because we want to copy
+ * printf's behaviour) we display the exponent using E notation, with a
+ * minimum of two exponent digits.
+ *
+ * For example, the value 1234 could be output as 1.2e+03.
+ *
+ * We assume that the exponent can fit into an int32.
+ *
+ * rscale is the number of decimal digits desired after the decimal point in
+ * the output, negative values will be treated as meaning zero.
+ *
+ * Returns a palloc'd string.
+ */
+static char *
+get_str_from_var_sci(const NumericVar *var, int rscale)
+{
+ int32 exponent;
+ NumericVar tmp_var;
+ size_t len;
+ char *str;
+ char *sig_out;
+
+ if (rscale < 0)
+ rscale = 0;
+
+ /*
+ * Determine the exponent of this number in normalised form.
+ *
+ * This is the exponent required to represent the number with only one
+ * significant digit before the decimal place.
+ */
+ if (var->ndigits > 0)
+ {
+ exponent = (var->weight + 1) * DEC_DIGITS;
+
+ /*
+ * Compensate for leading decimal zeroes in the first numeric digit by
+ * decrementing the exponent.
+ */
+ exponent -= DEC_DIGITS - (int) log10(var->digits[0]);
+ }
+ else
+ {
+ /*
+ * If var has no digits, then it must be zero.
+ *
+ * Zero doesn't technically have a meaningful exponent in normalised
+ * notation, but we just display the exponent as zero for consistency
+ * of output.
+ */
+ exponent = 0;
+ }
+
+ /*
+ * Divide var by 10^exponent to get the significand, rounding to rscale
+ * decimal digits in the process.
+ */
+ init_var(&tmp_var);
+
+ power_ten_int(exponent, &tmp_var);
+ div_var(var, &tmp_var, &tmp_var, rscale, true);
+ sig_out = get_str_from_var(&tmp_var);
+
+ free_var(&tmp_var);
+
+ /*
+ * Allocate space for the result.
+ *
+ * In addition to the significand, we need room for the exponent
+ * decoration ("e"), the sign of the exponent, up to 10 digits for the
+ * exponent itself, and of course the null terminator.
+ */
+ len = strlen(sig_out) + 13;
+ str = palloc(len);
+ snprintf(str, len, "%se%+03d", sig_out, exponent);
+
+ pfree(sig_out);
+
+ return str;
+}
+
+
+/*
+ * numericvar_serialize - serialize NumericVar to binary format
+ *
+ * At variable level, no checks are performed on the weight or dscale, allowing
+ * us to pass around intermediate values with higher precision than supported
+ * by the numeric type. Note: this is incompatible with numeric_send/recv(),
+ * which use 16-bit integers for these fields.
+ */
+static void
+numericvar_serialize(StringInfo buf, const NumericVar *var)
+{
+ int i;
+
+ pq_sendint32(buf, var->ndigits);
+ pq_sendint32(buf, var->weight);
+ pq_sendint32(buf, var->sign);
+ pq_sendint32(buf, var->dscale);
+ for (i = 0; i < var->ndigits; i++)
+ pq_sendint16(buf, var->digits[i]);
+}
+
+/*
+ * numericvar_deserialize - deserialize binary format to NumericVar
+ */
+static void
+numericvar_deserialize(StringInfo buf, NumericVar *var)
+{
+ int len,
+ i;
+
+ len = pq_getmsgint(buf, sizeof(int32));
+
+ alloc_var(var, len); /* sets var->ndigits */
+
+ var->weight = pq_getmsgint(buf, sizeof(int32));
+ var->sign = pq_getmsgint(buf, sizeof(int32));
+ var->dscale = pq_getmsgint(buf, sizeof(int32));
+ for (i = 0; i < len; i++)
+ var->digits[i] = pq_getmsgint(buf, sizeof(int16));
+}
+
+
+/*
+ * duplicate_numeric() - copy a packed-format Numeric
+ *
+ * This will handle NaN and Infinity cases.
+ */
+static Numeric
+duplicate_numeric(Numeric num)
+{
+ Numeric res;
+
+ res = (Numeric) palloc(VARSIZE(num));
+ memcpy(res, num, VARSIZE(num));
+ return res;
+}
+
+/*
+ * make_result_opt_error() -
+ *
+ * Create the packed db numeric format in palloc()'d memory from
+ * a variable. This will handle NaN and Infinity cases.
+ *
+ * If "have_error" isn't NULL, on overflow *have_error is set to true and
+ * NULL is returned. This is helpful when caller needs to handle errors.
+ */
+static Numeric
+make_result_opt_error(const NumericVar *var, bool *have_error)
+{
+ Numeric result;
+ NumericDigit *digits = var->digits;
+ int weight = var->weight;
+ int sign = var->sign;
+ int n;
+ Size len;
+
+ if (have_error)
+ *have_error = false;
+
+ if ((sign & NUMERIC_SIGN_MASK) == NUMERIC_SPECIAL)
+ {
+ /*
+ * Verify valid special value. This could be just an Assert, perhaps,
+ * but it seems worthwhile to expend a few cycles to ensure that we
+ * never write any nonzero reserved bits to disk.
+ */
+ if (!(sign == NUMERIC_NAN ||
+ sign == NUMERIC_PINF ||
+ sign == NUMERIC_NINF))
+ elog(ERROR, "invalid numeric sign value 0x%x", sign);
+
+ result = (Numeric) palloc(NUMERIC_HDRSZ_SHORT);
+
+ SET_VARSIZE(result, NUMERIC_HDRSZ_SHORT);
+ result->choice.n_header = sign;
+ /* the header word is all we need */
+
+ dump_numeric("make_result()", result);
+ return result;
+ }
+
+ n = var->ndigits;
+
+ /* truncate leading zeroes */
+ while (n > 0 && *digits == 0)
+ {
+ digits++;
+ weight--;
+ n--;
+ }
+ /* truncate trailing zeroes */
+ while (n > 0 && digits[n - 1] == 0)
+ n--;
+
+ /* If zero result, force to weight=0 and positive sign */
+ if (n == 0)
+ {
+ weight = 0;
+ sign = NUMERIC_POS;
+ }
+
+ /* Build the result */
+ if (NUMERIC_CAN_BE_SHORT(var->dscale, weight))
+ {
+ len = NUMERIC_HDRSZ_SHORT + n * sizeof(NumericDigit);
+ result = (Numeric) palloc(len);
+ SET_VARSIZE(result, len);
+ result->choice.n_short.n_header =
+ (sign == NUMERIC_NEG ? (NUMERIC_SHORT | NUMERIC_SHORT_SIGN_MASK)
+ : NUMERIC_SHORT)
+ | (var->dscale << NUMERIC_SHORT_DSCALE_SHIFT)
+ | (weight < 0 ? NUMERIC_SHORT_WEIGHT_SIGN_MASK : 0)
+ | (weight & NUMERIC_SHORT_WEIGHT_MASK);
+ }
+ else
+ {
+ len = NUMERIC_HDRSZ + n * sizeof(NumericDigit);
+ result = (Numeric) palloc(len);
+ SET_VARSIZE(result, len);
+ result->choice.n_long.n_sign_dscale =
+ sign | (var->dscale & NUMERIC_DSCALE_MASK);
+ result->choice.n_long.n_weight = weight;
+ }
+
+ Assert(NUMERIC_NDIGITS(result) == n);
+ if (n > 0)
+ memcpy(NUMERIC_DIGITS(result), digits, n * sizeof(NumericDigit));
+
+ /* Check for overflow of int16 fields */
+ if (NUMERIC_WEIGHT(result) != weight ||
+ NUMERIC_DSCALE(result) != var->dscale)
+ {
+ if (have_error)
+ {
+ *have_error = true;
+ return NULL;
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ }
+ }
+
+ dump_numeric("make_result()", result);
+ return result;
+}
+
+
+/*
+ * make_result() -
+ *
+ * An interface to make_result_opt_error() without "have_error" argument.
+ */
+static Numeric
+make_result(const NumericVar *var)
+{
+ return make_result_opt_error(var, NULL);
+}
+
+
+/*
+ * apply_typmod() -
+ *
+ * Do bounds checking and rounding according to the specified typmod.
+ * Note that this is only applied to normal finite values.
+ */
+static void
+apply_typmod(NumericVar *var, int32 typmod)
+{
+ int precision;
+ int scale;
+ int maxdigits;
+ int ddigits;
+ int i;
+
+ /* Do nothing if we have an invalid typmod */
+ if (!is_valid_numeric_typmod(typmod))
+ return;
+
+ precision = numeric_typmod_precision(typmod);
+ scale = numeric_typmod_scale(typmod);
+ maxdigits = precision - scale;
+
+ /* Round to target scale (and set var->dscale) */
+ round_var(var, scale);
+
+ /* but don't allow var->dscale to be negative */
+ if (var->dscale < 0)
+ var->dscale = 0;
+
+ /*
+ * Check for overflow - note we can't do this before rounding, because
+ * rounding could raise the weight. Also note that the var's weight could
+ * be inflated by leading zeroes, which will be stripped before storage
+ * but perhaps might not have been yet. In any case, we must recognize a
+ * true zero, whose weight doesn't mean anything.
+ */
+ ddigits = (var->weight + 1) * DEC_DIGITS;
+ if (ddigits > maxdigits)
+ {
+ /* Determine true weight; and check for all-zero result */
+ for (i = 0; i < var->ndigits; i++)
+ {
+ NumericDigit dig = var->digits[i];
+
+ if (dig)
+ {
+ /* Adjust for any high-order decimal zero digits */
+#if DEC_DIGITS == 4
+ if (dig < 10)
+ ddigits -= 3;
+ else if (dig < 100)
+ ddigits -= 2;
+ else if (dig < 1000)
+ ddigits -= 1;
+#elif DEC_DIGITS == 2
+ if (dig < 10)
+ ddigits -= 1;
+#elif DEC_DIGITS == 1
+ /* no adjustment */
+#else
+#error unsupported NBASE
+#endif
+ if (ddigits > maxdigits)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("numeric field overflow"),
+ errdetail("A field with precision %d, scale %d must round to an absolute value less than %s%d.",
+ precision, scale,
+ /* Display 10^0 as 1 */
+ maxdigits ? "10^" : "",
+ maxdigits ? maxdigits : 1
+ )));
+ break;
+ }
+ ddigits -= DEC_DIGITS;
+ }
+ }
+}
+
+/*
+ * apply_typmod_special() -
+ *
+ * Do bounds checking according to the specified typmod, for an Inf or NaN.
+ * For convenience of most callers, the value is presented in packed form.
+ */
+static void
+apply_typmod_special(Numeric num, int32 typmod)
+{
+ int precision;
+ int scale;
+
+ Assert(NUMERIC_IS_SPECIAL(num)); /* caller error if not */
+
+ /*
+ * NaN is allowed regardless of the typmod; that's rather dubious perhaps,
+ * but it's a longstanding behavior. Inf is rejected if we have any
+ * typmod restriction, since an infinity shouldn't be claimed to fit in
+ * any finite number of digits.
+ */
+ if (NUMERIC_IS_NAN(num))
+ return;
+
+ /* Do nothing if we have a default typmod (-1) */
+ if (!is_valid_numeric_typmod(typmod))
+ return;
+
+ precision = numeric_typmod_precision(typmod);
+ scale = numeric_typmod_scale(typmod);
+
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("numeric field overflow"),
+ errdetail("A field with precision %d, scale %d cannot hold an infinite value.",
+ precision, scale)));
+}
+
+
+/*
+ * Convert numeric to int8, rounding if needed.
+ *
+ * If overflow, return false (no error is raised). Return true if okay.
+ */
+static bool
+numericvar_to_int64(const NumericVar *var, int64 *result)
+{
+ NumericDigit *digits;
+ int ndigits;
+ int weight;
+ int i;
+ int64 val;
+ bool neg;
+ NumericVar rounded;
+
+ /* Round to nearest integer */
+ init_var(&rounded);
+ set_var_from_var(var, &rounded);
+ round_var(&rounded, 0);
+
+ /* Check for zero input */
+ strip_var(&rounded);
+ ndigits = rounded.ndigits;
+ if (ndigits == 0)
+ {
+ *result = 0;
+ free_var(&rounded);
+ return true;
+ }
+
+ /*
+ * For input like 10000000000, we must treat stripped digits as real. So
+ * the loop assumes there are weight+1 digits before the decimal point.
+ */
+ weight = rounded.weight;
+ Assert(weight >= 0 && ndigits <= weight + 1);
+
+ /*
+ * Construct the result. To avoid issues with converting a value
+ * corresponding to INT64_MIN (which can't be represented as a positive 64
+ * bit two's complement integer), accumulate value as a negative number.
+ */
+ digits = rounded.digits;
+ neg = (rounded.sign == NUMERIC_NEG);
+ val = -digits[0];
+ for (i = 1; i <= weight; i++)
+ {
+ if (unlikely(pg_mul_s64_overflow(val, NBASE, &val)))
+ {
+ free_var(&rounded);
+ return false;
+ }
+
+ if (i < ndigits)
+ {
+ if (unlikely(pg_sub_s64_overflow(val, digits[i], &val)))
+ {
+ free_var(&rounded);
+ return false;
+ }
+ }
+ }
+
+ free_var(&rounded);
+
+ if (!neg)
+ {
+ if (unlikely(val == PG_INT64_MIN))
+ return false;
+ val = -val;
+ }
+ *result = val;
+
+ return true;
+}
+
+/*
+ * Convert int8 value to numeric.
+ */
+static void
+int64_to_numericvar(int64 val, NumericVar *var)
+{
+ uint64 uval,
+ newuval;
+ NumericDigit *ptr;
+ int ndigits;
+
+ /* int64 can require at most 19 decimal digits; add one for safety */
+ alloc_var(var, 20 / DEC_DIGITS);
+ if (val < 0)
+ {
+ var->sign = NUMERIC_NEG;
+ uval = -val;
+ }
+ else
+ {
+ var->sign = NUMERIC_POS;
+ uval = val;
+ }
+ var->dscale = 0;
+ if (val == 0)
+ {
+ var->ndigits = 0;
+ var->weight = 0;
+ return;
+ }
+ ptr = var->digits + var->ndigits;
+ ndigits = 0;
+ do
+ {
+ ptr--;
+ ndigits++;
+ newuval = uval / NBASE;
+ *ptr = uval - newuval * NBASE;
+ uval = newuval;
+ } while (uval);
+ var->digits = ptr;
+ var->ndigits = ndigits;
+ var->weight = ndigits - 1;
+}
+
+/*
+ * Convert numeric to uint64, rounding if needed.
+ *
+ * If overflow, return false (no error is raised). Return true if okay.
+ */
+static bool
+numericvar_to_uint64(const NumericVar *var, uint64 *result)
+{
+ NumericDigit *digits;
+ int ndigits;
+ int weight;
+ int i;
+ uint64 val;
+ NumericVar rounded;
+
+ /* Round to nearest integer */
+ init_var(&rounded);
+ set_var_from_var(var, &rounded);
+ round_var(&rounded, 0);
+
+ /* Check for zero input */
+ strip_var(&rounded);
+ ndigits = rounded.ndigits;
+ if (ndigits == 0)
+ {
+ *result = 0;
+ free_var(&rounded);
+ return true;
+ }
+
+ /* Check for negative input */
+ if (rounded.sign == NUMERIC_NEG)
+ {
+ free_var(&rounded);
+ return false;
+ }
+
+ /*
+ * For input like 10000000000, we must treat stripped digits as real. So
+ * the loop assumes there are weight+1 digits before the decimal point.
+ */
+ weight = rounded.weight;
+ Assert(weight >= 0 && ndigits <= weight + 1);
+
+ /* Construct the result */
+ digits = rounded.digits;
+ val = digits[0];
+ for (i = 1; i <= weight; i++)
+ {
+ if (unlikely(pg_mul_u64_overflow(val, NBASE, &val)))
+ {
+ free_var(&rounded);
+ return false;
+ }
+
+ if (i < ndigits)
+ {
+ if (unlikely(pg_add_u64_overflow(val, digits[i], &val)))
+ {
+ free_var(&rounded);
+ return false;
+ }
+ }
+ }
+
+ free_var(&rounded);
+
+ *result = val;
+
+ return true;
+}
+
+#ifdef HAVE_INT128
+/*
+ * Convert numeric to int128, rounding if needed.
+ *
+ * If overflow, return false (no error is raised). Return true if okay.
+ */
+static bool
+numericvar_to_int128(const NumericVar *var, int128 *result)
+{
+ NumericDigit *digits;
+ int ndigits;
+ int weight;
+ int i;
+ int128 val,
+ oldval;
+ bool neg;
+ NumericVar rounded;
+
+ /* Round to nearest integer */
+ init_var(&rounded);
+ set_var_from_var(var, &rounded);
+ round_var(&rounded, 0);
+
+ /* Check for zero input */
+ strip_var(&rounded);
+ ndigits = rounded.ndigits;
+ if (ndigits == 0)
+ {
+ *result = 0;
+ free_var(&rounded);
+ return true;
+ }
+
+ /*
+ * For input like 10000000000, we must treat stripped digits as real. So
+ * the loop assumes there are weight+1 digits before the decimal point.
+ */
+ weight = rounded.weight;
+ Assert(weight >= 0 && ndigits <= weight + 1);
+
+ /* Construct the result */
+ digits = rounded.digits;
+ neg = (rounded.sign == NUMERIC_NEG);
+ val = digits[0];
+ for (i = 1; i <= weight; i++)
+ {
+ oldval = val;
+ val *= NBASE;
+ if (i < ndigits)
+ val += digits[i];
+
+ /*
+ * The overflow check is a bit tricky because we want to accept
+ * INT128_MIN, which will overflow the positive accumulator. We can
+ * detect this case easily though because INT128_MIN is the only
+ * nonzero value for which -val == val (on a two's complement machine,
+ * anyway).
+ */
+ if ((val / NBASE) != oldval) /* possible overflow? */
+ {
+ if (!neg || (-val) != val || val == 0 || oldval < 0)
+ {
+ free_var(&rounded);
+ return false;
+ }
+ }
+ }
+
+ free_var(&rounded);
+
+ *result = neg ? -val : val;
+ return true;
+}
+
+/*
+ * Convert 128 bit integer to numeric.
+ */
+static void
+int128_to_numericvar(int128 val, NumericVar *var)
+{
+ uint128 uval,
+ newuval;
+ NumericDigit *ptr;
+ int ndigits;
+
+ /* int128 can require at most 39 decimal digits; add one for safety */
+ alloc_var(var, 40 / DEC_DIGITS);
+ if (val < 0)
+ {
+ var->sign = NUMERIC_NEG;
+ uval = -val;
+ }
+ else
+ {
+ var->sign = NUMERIC_POS;
+ uval = val;
+ }
+ var->dscale = 0;
+ if (val == 0)
+ {
+ var->ndigits = 0;
+ var->weight = 0;
+ return;
+ }
+ ptr = var->digits + var->ndigits;
+ ndigits = 0;
+ do
+ {
+ ptr--;
+ ndigits++;
+ newuval = uval / NBASE;
+ *ptr = uval - newuval * NBASE;
+ uval = newuval;
+ } while (uval);
+ var->digits = ptr;
+ var->ndigits = ndigits;
+ var->weight = ndigits - 1;
+}
+#endif
+
+/*
+ * Convert a NumericVar to float8; if out of range, return +/- HUGE_VAL
+ */
+static double
+numericvar_to_double_no_overflow(const NumericVar *var)
+{
+ char *tmp;
+ double val;
+ char *endptr;
+
+ tmp = get_str_from_var(var);
+
+ /* unlike float8in, we ignore ERANGE from strtod */
+ val = strtod(tmp, &endptr);
+ if (*endptr != '\0')
+ {
+ /* shouldn't happen ... */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "double precision", tmp)));
+ }
+
+ pfree(tmp);
+
+ return val;
+}
+
+
+/*
+ * cmp_var() -
+ *
+ * Compare two values on variable level. We assume zeroes have been
+ * truncated to no digits.
+ */
+static int
+cmp_var(const NumericVar *var1, const NumericVar *var2)
+{
+ return cmp_var_common(var1->digits, var1->ndigits,
+ var1->weight, var1->sign,
+ var2->digits, var2->ndigits,
+ var2->weight, var2->sign);
+}
+
+/*
+ * cmp_var_common() -
+ *
+ * Main routine of cmp_var(). This function can be used by both
+ * NumericVar and Numeric.
+ */
+static int
+cmp_var_common(const NumericDigit *var1digits, int var1ndigits,
+ int var1weight, int var1sign,
+ const NumericDigit *var2digits, int var2ndigits,
+ int var2weight, int var2sign)
+{
+ if (var1ndigits == 0)
+ {
+ if (var2ndigits == 0)
+ return 0;
+ if (var2sign == NUMERIC_NEG)
+ return 1;
+ return -1;
+ }
+ if (var2ndigits == 0)
+ {
+ if (var1sign == NUMERIC_POS)
+ return 1;
+ return -1;
+ }
+
+ if (var1sign == NUMERIC_POS)
+ {
+ if (var2sign == NUMERIC_NEG)
+ return 1;
+ return cmp_abs_common(var1digits, var1ndigits, var1weight,
+ var2digits, var2ndigits, var2weight);
+ }
+
+ if (var2sign == NUMERIC_POS)
+ return -1;
+
+ return cmp_abs_common(var2digits, var2ndigits, var2weight,
+ var1digits, var1ndigits, var1weight);
+}
+
+
+/*
+ * add_var() -
+ *
+ * Full version of add functionality on variable level (handling signs).
+ * result might point to one of the operands too without danger.
+ */
+static void
+add_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ /*
+ * Decide on the signs of the two variables what to do
+ */
+ if (var1->sign == NUMERIC_POS)
+ {
+ if (var2->sign == NUMERIC_POS)
+ {
+ /*
+ * Both are positive result = +(ABS(var1) + ABS(var2))
+ */
+ add_abs(var1, var2, result);
+ result->sign = NUMERIC_POS;
+ }
+ else
+ {
+ /*
+ * var1 is positive, var2 is negative Must compare absolute values
+ */
+ switch (cmp_abs(var1, var2))
+ {
+ case 0:
+ /* ----------
+ * ABS(var1) == ABS(var2)
+ * result = ZERO
+ * ----------
+ */
+ zero_var(result);
+ result->dscale = Max(var1->dscale, var2->dscale);
+ break;
+
+ case 1:
+ /* ----------
+ * ABS(var1) > ABS(var2)
+ * result = +(ABS(var1) - ABS(var2))
+ * ----------
+ */
+ sub_abs(var1, var2, result);
+ result->sign = NUMERIC_POS;
+ break;
+
+ case -1:
+ /* ----------
+ * ABS(var1) < ABS(var2)
+ * result = -(ABS(var2) - ABS(var1))
+ * ----------
+ */
+ sub_abs(var2, var1, result);
+ result->sign = NUMERIC_NEG;
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (var2->sign == NUMERIC_POS)
+ {
+ /* ----------
+ * var1 is negative, var2 is positive
+ * Must compare absolute values
+ * ----------
+ */
+ switch (cmp_abs(var1, var2))
+ {
+ case 0:
+ /* ----------
+ * ABS(var1) == ABS(var2)
+ * result = ZERO
+ * ----------
+ */
+ zero_var(result);
+ result->dscale = Max(var1->dscale, var2->dscale);
+ break;
+
+ case 1:
+ /* ----------
+ * ABS(var1) > ABS(var2)
+ * result = -(ABS(var1) - ABS(var2))
+ * ----------
+ */
+ sub_abs(var1, var2, result);
+ result->sign = NUMERIC_NEG;
+ break;
+
+ case -1:
+ /* ----------
+ * ABS(var1) < ABS(var2)
+ * result = +(ABS(var2) - ABS(var1))
+ * ----------
+ */
+ sub_abs(var2, var1, result);
+ result->sign = NUMERIC_POS;
+ break;
+ }
+ }
+ else
+ {
+ /* ----------
+ * Both are negative
+ * result = -(ABS(var1) + ABS(var2))
+ * ----------
+ */
+ add_abs(var1, var2, result);
+ result->sign = NUMERIC_NEG;
+ }
+ }
+}
+
+
+/*
+ * sub_var() -
+ *
+ * Full version of sub functionality on variable level (handling signs).
+ * result might point to one of the operands too without danger.
+ */
+static void
+sub_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ /*
+ * Decide on the signs of the two variables what to do
+ */
+ if (var1->sign == NUMERIC_POS)
+ {
+ if (var2->sign == NUMERIC_NEG)
+ {
+ /* ----------
+ * var1 is positive, var2 is negative
+ * result = +(ABS(var1) + ABS(var2))
+ * ----------
+ */
+ add_abs(var1, var2, result);
+ result->sign = NUMERIC_POS;
+ }
+ else
+ {
+ /* ----------
+ * Both are positive
+ * Must compare absolute values
+ * ----------
+ */
+ switch (cmp_abs(var1, var2))
+ {
+ case 0:
+ /* ----------
+ * ABS(var1) == ABS(var2)
+ * result = ZERO
+ * ----------
+ */
+ zero_var(result);
+ result->dscale = Max(var1->dscale, var2->dscale);
+ break;
+
+ case 1:
+ /* ----------
+ * ABS(var1) > ABS(var2)
+ * result = +(ABS(var1) - ABS(var2))
+ * ----------
+ */
+ sub_abs(var1, var2, result);
+ result->sign = NUMERIC_POS;
+ break;
+
+ case -1:
+ /* ----------
+ * ABS(var1) < ABS(var2)
+ * result = -(ABS(var2) - ABS(var1))
+ * ----------
+ */
+ sub_abs(var2, var1, result);
+ result->sign = NUMERIC_NEG;
+ break;
+ }
+ }
+ }
+ else
+ {
+ if (var2->sign == NUMERIC_NEG)
+ {
+ /* ----------
+ * Both are negative
+ * Must compare absolute values
+ * ----------
+ */
+ switch (cmp_abs(var1, var2))
+ {
+ case 0:
+ /* ----------
+ * ABS(var1) == ABS(var2)
+ * result = ZERO
+ * ----------
+ */
+ zero_var(result);
+ result->dscale = Max(var1->dscale, var2->dscale);
+ break;
+
+ case 1:
+ /* ----------
+ * ABS(var1) > ABS(var2)
+ * result = -(ABS(var1) - ABS(var2))
+ * ----------
+ */
+ sub_abs(var1, var2, result);
+ result->sign = NUMERIC_NEG;
+ break;
+
+ case -1:
+ /* ----------
+ * ABS(var1) < ABS(var2)
+ * result = +(ABS(var2) - ABS(var1))
+ * ----------
+ */
+ sub_abs(var2, var1, result);
+ result->sign = NUMERIC_POS;
+ break;
+ }
+ }
+ else
+ {
+ /* ----------
+ * var1 is negative, var2 is positive
+ * result = -(ABS(var1) + ABS(var2))
+ * ----------
+ */
+ add_abs(var1, var2, result);
+ result->sign = NUMERIC_NEG;
+ }
+ }
+}
+
+
+/*
+ * mul_var() -
+ *
+ * Multiplication on variable level. Product of var1 * var2 is stored
+ * in result. Result is rounded to no more than rscale fractional digits.
+ */
+static void
+mul_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
+ int rscale)
+{
+ int res_ndigits;
+ int res_sign;
+ int res_weight;
+ int maxdigits;
+ int *dig;
+ int carry;
+ int maxdig;
+ int newdig;
+ int var1ndigits;
+ int var2ndigits;
+ NumericDigit *var1digits;
+ NumericDigit *var2digits;
+ NumericDigit *res_digits;
+ int i,
+ i1,
+ i2;
+
+ /*
+ * Arrange for var1 to be the shorter of the two numbers. This improves
+ * performance because the inner multiplication loop is much simpler than
+ * the outer loop, so it's better to have a smaller number of iterations
+ * of the outer loop. This also reduces the number of times that the
+ * accumulator array needs to be normalized.
+ */
+ if (var1->ndigits > var2->ndigits)
+ {
+ const NumericVar *tmp = var1;
+
+ var1 = var2;
+ var2 = tmp;
+ }
+
+ /* copy these values into local vars for speed in inner loop */
+ var1ndigits = var1->ndigits;
+ var2ndigits = var2->ndigits;
+ var1digits = var1->digits;
+ var2digits = var2->digits;
+
+ if (var1ndigits == 0 || var2ndigits == 0)
+ {
+ /* one or both inputs is zero; so is result */
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /* Determine result sign and (maximum possible) weight */
+ if (var1->sign == var2->sign)
+ res_sign = NUMERIC_POS;
+ else
+ res_sign = NUMERIC_NEG;
+ res_weight = var1->weight + var2->weight + 2;
+
+ /*
+ * Determine the number of result digits to compute. If the exact result
+ * would have more than rscale fractional digits, truncate the computation
+ * with MUL_GUARD_DIGITS guard digits, i.e., ignore input digits that
+ * would only contribute to the right of that. (This will give the exact
+ * rounded-to-rscale answer unless carries out of the ignored positions
+ * would have propagated through more than MUL_GUARD_DIGITS digits.)
+ *
+ * Note: an exact computation could not produce more than var1ndigits +
+ * var2ndigits digits, but we allocate one extra output digit in case
+ * rscale-driven rounding produces a carry out of the highest exact digit.
+ */
+ res_ndigits = var1ndigits + var2ndigits + 1;
+ maxdigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS +
+ MUL_GUARD_DIGITS;
+ res_ndigits = Min(res_ndigits, maxdigits);
+
+ if (res_ndigits < 3)
+ {
+ /* All input digits will be ignored; so result is zero */
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * We do the arithmetic in an array "dig[]" of signed int's. Since
+ * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom
+ * to avoid normalizing carries immediately.
+ *
+ * maxdig tracks the maximum possible value of any dig[] entry; when this
+ * threatens to exceed INT_MAX, we take the time to propagate carries.
+ * Furthermore, we need to ensure that overflow doesn't occur during the
+ * carry propagation passes either. The carry values could be as much as
+ * INT_MAX/NBASE, so really we must normalize when digits threaten to
+ * exceed INT_MAX - INT_MAX/NBASE.
+ *
+ * To avoid overflow in maxdig itself, it actually represents the max
+ * possible value divided by NBASE-1, ie, at the top of the loop it is
+ * known that no dig[] entry exceeds maxdig * (NBASE-1).
+ */
+ dig = (int *) palloc0(res_ndigits * sizeof(int));
+ maxdig = 0;
+
+ /*
+ * The least significant digits of var1 should be ignored if they don't
+ * contribute directly to the first res_ndigits digits of the result that
+ * we are computing.
+ *
+ * Digit i1 of var1 and digit i2 of var2 are multiplied and added to digit
+ * i1+i2+2 of the accumulator array, so we need only consider digits of
+ * var1 for which i1 <= res_ndigits - 3.
+ */
+ for (i1 = Min(var1ndigits - 1, res_ndigits - 3); i1 >= 0; i1--)
+ {
+ NumericDigit var1digit = var1digits[i1];
+
+ if (var1digit == 0)
+ continue;
+
+ /* Time to normalize? */
+ maxdig += var1digit;
+ if (maxdig > (INT_MAX - INT_MAX / NBASE) / (NBASE - 1))
+ {
+ /* Yes, do it */
+ carry = 0;
+ for (i = res_ndigits - 1; i >= 0; i--)
+ {
+ newdig = dig[i] + carry;
+ if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ dig[i] = newdig;
+ }
+ Assert(carry == 0);
+ /* Reset maxdig to indicate new worst-case */
+ maxdig = 1 + var1digit;
+ }
+
+ /*
+ * Add the appropriate multiple of var2 into the accumulator.
+ *
+ * As above, digits of var2 can be ignored if they don't contribute,
+ * so we only include digits for which i1+i2+2 < res_ndigits.
+ *
+ * This inner loop is the performance bottleneck for multiplication,
+ * so we want to keep it simple enough so that it can be
+ * auto-vectorized. Accordingly, process the digits left-to-right
+ * even though schoolbook multiplication would suggest right-to-left.
+ * Since we aren't propagating carries in this loop, the order does
+ * not matter.
+ */
+ {
+ int i2limit = Min(var2ndigits, res_ndigits - i1 - 2);
+ int *dig_i1_2 = &dig[i1 + 2];
+
+ for (i2 = 0; i2 < i2limit; i2++)
+ dig_i1_2[i2] += var1digit * var2digits[i2];
+ }
+ }
+
+ /*
+ * Now we do a final carry propagation pass to normalize the result, which
+ * we combine with storing the result digits into the output. Note that
+ * this is still done at full precision w/guard digits.
+ */
+ alloc_var(result, res_ndigits);
+ res_digits = result->digits;
+ carry = 0;
+ for (i = res_ndigits - 1; i >= 0; i--)
+ {
+ newdig = dig[i] + carry;
+ if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ res_digits[i] = newdig;
+ }
+ Assert(carry == 0);
+
+ pfree(dig);
+
+ /*
+ * Finally, round the result to the requested precision.
+ */
+ result->weight = res_weight;
+ result->sign = res_sign;
+
+ /* Round to target rscale (and set result->dscale) */
+ round_var(result, rscale);
+
+ /* Strip leading and trailing zeroes */
+ strip_var(result);
+}
+
+
+/*
+ * div_var() -
+ *
+ * Division on variable level. Quotient of var1 / var2 is stored in result.
+ * The quotient is figured to exactly rscale fractional digits.
+ * If round is true, it is rounded at the rscale'th digit; if false, it
+ * is truncated (towards zero) at that digit.
+ */
+static void
+div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
+ int rscale, bool round)
+{
+ int div_ndigits;
+ int res_ndigits;
+ int res_sign;
+ int res_weight;
+ int carry;
+ int borrow;
+ int divisor1;
+ int divisor2;
+ NumericDigit *dividend;
+ NumericDigit *divisor;
+ NumericDigit *res_digits;
+ int i;
+ int j;
+
+ /* copy these values into local vars for speed in inner loop */
+ int var1ndigits = var1->ndigits;
+ int var2ndigits = var2->ndigits;
+
+ /*
+ * First of all division by zero check; we must not be handed an
+ * unnormalized divisor.
+ */
+ if (var2ndigits == 0 || var2->digits[0] == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ /*
+ * If the divisor has just one or two digits, delegate to div_var_int(),
+ * which uses fast short division.
+ */
+ if (var2ndigits <= 2)
+ {
+ int idivisor;
+ int idivisor_weight;
+
+ idivisor = var2->digits[0];
+ idivisor_weight = var2->weight;
+ if (var2ndigits == 2)
+ {
+ idivisor = idivisor * NBASE + var2->digits[1];
+ idivisor_weight--;
+ }
+ if (var2->sign == NUMERIC_NEG)
+ idivisor = -idivisor;
+
+ div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
+ return;
+ }
+
+ /*
+ * Otherwise, perform full long division.
+ */
+
+ /* Result zero check */
+ if (var1ndigits == 0)
+ {
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * Determine the result sign, weight and number of digits to calculate.
+ * The weight figured here is correct if the emitted quotient has no
+ * leading zero digits; otherwise strip_var() will fix things up.
+ */
+ if (var1->sign == var2->sign)
+ res_sign = NUMERIC_POS;
+ else
+ res_sign = NUMERIC_NEG;
+ res_weight = var1->weight - var2->weight;
+ /* The number of accurate result digits we need to produce: */
+ res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
+ /* ... but always at least 1 */
+ res_ndigits = Max(res_ndigits, 1);
+ /* If rounding needed, figure one more digit to ensure correct result */
+ if (round)
+ res_ndigits++;
+
+ /*
+ * The working dividend normally requires res_ndigits + var2ndigits
+ * digits, but make it at least var1ndigits so we can load all of var1
+ * into it. (There will be an additional digit dividend[0] in the
+ * dividend space, but for consistency with Knuth's notation we don't
+ * count that in div_ndigits.)
+ */
+ div_ndigits = res_ndigits + var2ndigits;
+ div_ndigits = Max(div_ndigits, var1ndigits);
+
+ /*
+ * We need a workspace with room for the working dividend (div_ndigits+1
+ * digits) plus room for the possibly-normalized divisor (var2ndigits
+ * digits). It is convenient also to have a zero at divisor[0] with the
+ * actual divisor data in divisor[1 .. var2ndigits]. Transferring the
+ * digits into the workspace also allows us to realloc the result (which
+ * might be the same as either input var) before we begin the main loop.
+ * Note that we use palloc0 to ensure that divisor[0], dividend[0], and
+ * any additional dividend positions beyond var1ndigits, start out 0.
+ */
+ dividend = (NumericDigit *)
+ palloc0((div_ndigits + var2ndigits + 2) * sizeof(NumericDigit));
+ divisor = dividend + (div_ndigits + 1);
+ memcpy(dividend + 1, var1->digits, var1ndigits * sizeof(NumericDigit));
+ memcpy(divisor + 1, var2->digits, var2ndigits * sizeof(NumericDigit));
+
+ /*
+ * Now we can realloc the result to hold the generated quotient digits.
+ */
+ alloc_var(result, res_ndigits);
+ res_digits = result->digits;
+
+ /*
+ * The full multiple-place algorithm is taken from Knuth volume 2,
+ * Algorithm 4.3.1D.
+ *
+ * We need the first divisor digit to be >= NBASE/2. If it isn't, make it
+ * so by scaling up both the divisor and dividend by the factor "d". (The
+ * reason for allocating dividend[0] above is to leave room for possible
+ * carry here.)
+ */
+ if (divisor[1] < HALF_NBASE)
+ {
+ int d = NBASE / (divisor[1] + 1);
+
+ carry = 0;
+ for (i = var2ndigits; i > 0; i--)
+ {
+ carry += divisor[i] * d;
+ divisor[i] = carry % NBASE;
+ carry = carry / NBASE;
+ }
+ Assert(carry == 0);
+ carry = 0;
+ /* at this point only var1ndigits of dividend can be nonzero */
+ for (i = var1ndigits; i >= 0; i--)
+ {
+ carry += dividend[i] * d;
+ dividend[i] = carry % NBASE;
+ carry = carry / NBASE;
+ }
+ Assert(carry == 0);
+ Assert(divisor[1] >= HALF_NBASE);
+ }
+ /* First 2 divisor digits are used repeatedly in main loop */
+ divisor1 = divisor[1];
+ divisor2 = divisor[2];
+
+ /*
+ * Begin the main loop. Each iteration of this loop produces the j'th
+ * quotient digit by dividing dividend[j .. j + var2ndigits] by the
+ * divisor; this is essentially the same as the common manual procedure
+ * for long division.
+ */
+ for (j = 0; j < res_ndigits; j++)
+ {
+ /* Estimate quotient digit from the first two dividend digits */
+ int next2digits = dividend[j] * NBASE + dividend[j + 1];
+ int qhat;
+
+ /*
+ * If next2digits are 0, then quotient digit must be 0 and there's no
+ * need to adjust the working dividend. It's worth testing here to
+ * fall out ASAP when processing trailing zeroes in a dividend.
+ */
+ if (next2digits == 0)
+ {
+ res_digits[j] = 0;
+ continue;
+ }
+
+ if (dividend[j] == divisor1)
+ qhat = NBASE - 1;
+ else
+ qhat = next2digits / divisor1;
+
+ /*
+ * Adjust quotient digit if it's too large. Knuth proves that after
+ * this step, the quotient digit will be either correct or just one
+ * too large. (Note: it's OK to use dividend[j+2] here because we
+ * know the divisor length is at least 2.)
+ */
+ while (divisor2 * qhat >
+ (next2digits - qhat * divisor1) * NBASE + dividend[j + 2])
+ qhat--;
+
+ /* As above, need do nothing more when quotient digit is 0 */
+ if (qhat > 0)
+ {
+ NumericDigit *dividend_j = &dividend[j];
+
+ /*
+ * Multiply the divisor by qhat, and subtract that from the
+ * working dividend. The multiplication and subtraction are
+ * folded together here, noting that qhat <= NBASE (since it might
+ * be one too large), and so the intermediate result "tmp_result"
+ * is in the range [-NBASE^2, NBASE - 1], and "borrow" is in the
+ * range [0, NBASE].
+ */
+ borrow = 0;
+ for (i = var2ndigits; i >= 0; i--)
+ {
+ int tmp_result;
+
+ tmp_result = dividend_j[i] - borrow - divisor[i] * qhat;
+ borrow = (NBASE - 1 - tmp_result) / NBASE;
+ dividend_j[i] = tmp_result + borrow * NBASE;
+ }
+
+ /*
+ * If we got a borrow out of the top dividend digit, then indeed
+ * qhat was one too large. Fix it, and add back the divisor to
+ * correct the working dividend. (Knuth proves that this will
+ * occur only about 3/NBASE of the time; hence, it's a good idea
+ * to test this code with small NBASE to be sure this section gets
+ * exercised.)
+ */
+ if (borrow)
+ {
+ qhat--;
+ carry = 0;
+ for (i = var2ndigits; i >= 0; i--)
+ {
+ carry += dividend_j[i] + divisor[i];
+ if (carry >= NBASE)
+ {
+ dividend_j[i] = carry - NBASE;
+ carry = 1;
+ }
+ else
+ {
+ dividend_j[i] = carry;
+ carry = 0;
+ }
+ }
+ /* A carry should occur here to cancel the borrow above */
+ Assert(carry == 1);
+ }
+ }
+
+ /* And we're done with this quotient digit */
+ res_digits[j] = qhat;
+ }
+
+ pfree(dividend);
+
+ /*
+ * Finally, round or truncate the result to the requested precision.
+ */
+ result->weight = res_weight;
+ result->sign = res_sign;
+
+ /* Round or truncate to target rscale (and set result->dscale) */
+ if (round)
+ round_var(result, rscale);
+ else
+ trunc_var(result, rscale);
+
+ /* Strip leading and trailing zeroes */
+ strip_var(result);
+}
+
+
+/*
+ * div_var_fast() -
+ *
+ * This has the same API as div_var, but is implemented using the division
+ * algorithm from the "FM" library, rather than Knuth's schoolbook-division
+ * approach. This is significantly faster but can produce inaccurate
+ * results, because it sometimes has to propagate rounding to the left,
+ * and so we can never be entirely sure that we know the requested digits
+ * exactly. We compute DIV_GUARD_DIGITS extra digits, but there is
+ * no certainty that that's enough. We use this only in the transcendental
+ * function calculation routines, where everything is approximate anyway.
+ *
+ * Although we provide a "round" argument for consistency with div_var,
+ * it is unwise to use this function with round=false. In truncation mode
+ * it is possible to get a result with no significant digits, for example
+ * with rscale=0 we might compute 0.99999... and truncate that to 0 when
+ * the correct answer is 1.
+ */
+static void
+div_var_fast(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *result, int rscale, bool round)
+{
+ int div_ndigits;
+ int load_ndigits;
+ int res_sign;
+ int res_weight;
+ int *div;
+ int qdigit;
+ int carry;
+ int maxdiv;
+ int newdig;
+ NumericDigit *res_digits;
+ double fdividend,
+ fdivisor,
+ fdivisorinverse,
+ fquotient;
+ int qi;
+ int i;
+
+ /* copy these values into local vars for speed in inner loop */
+ int var1ndigits = var1->ndigits;
+ int var2ndigits = var2->ndigits;
+ NumericDigit *var1digits = var1->digits;
+ NumericDigit *var2digits = var2->digits;
+
+ /*
+ * First of all division by zero check; we must not be handed an
+ * unnormalized divisor.
+ */
+ if (var2ndigits == 0 || var2digits[0] == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ /*
+ * If the divisor has just one or two digits, delegate to div_var_int(),
+ * which uses fast short division.
+ */
+ if (var2ndigits <= 2)
+ {
+ int idivisor;
+ int idivisor_weight;
+
+ idivisor = var2->digits[0];
+ idivisor_weight = var2->weight;
+ if (var2ndigits == 2)
+ {
+ idivisor = idivisor * NBASE + var2->digits[1];
+ idivisor_weight--;
+ }
+ if (var2->sign == NUMERIC_NEG)
+ idivisor = -idivisor;
+
+ div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
+ return;
+ }
+
+ /*
+ * Otherwise, perform full long division.
+ */
+
+ /* Result zero check */
+ if (var1ndigits == 0)
+ {
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * Determine the result sign, weight and number of digits to calculate
+ */
+ if (var1->sign == var2->sign)
+ res_sign = NUMERIC_POS;
+ else
+ res_sign = NUMERIC_NEG;
+ res_weight = var1->weight - var2->weight + 1;
+ /* The number of accurate result digits we need to produce: */
+ div_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
+ /* Add guard digits for roundoff error */
+ div_ndigits += DIV_GUARD_DIGITS;
+ if (div_ndigits < DIV_GUARD_DIGITS)
+ div_ndigits = DIV_GUARD_DIGITS;
+
+ /*
+ * We do the arithmetic in an array "div[]" of signed int's. Since
+ * INT_MAX is noticeably larger than NBASE*NBASE, this gives us headroom
+ * to avoid normalizing carries immediately.
+ *
+ * We start with div[] containing one zero digit followed by the
+ * dividend's digits (plus appended zeroes to reach the desired precision
+ * including guard digits). Each step of the main loop computes an
+ * (approximate) quotient digit and stores it into div[], removing one
+ * position of dividend space. A final pass of carry propagation takes
+ * care of any mistaken quotient digits.
+ *
+ * Note that div[] doesn't necessarily contain all of the digits from the
+ * dividend --- the desired precision plus guard digits might be less than
+ * the dividend's precision. This happens, for example, in the square
+ * root algorithm, where we typically divide a 2N-digit number by an
+ * N-digit number, and only require a result with N digits of precision.
+ */
+ div = (int *) palloc0((div_ndigits + 1) * sizeof(int));
+ load_ndigits = Min(div_ndigits, var1ndigits);
+ for (i = 0; i < load_ndigits; i++)
+ div[i + 1] = var1digits[i];
+
+ /*
+ * We estimate each quotient digit using floating-point arithmetic, taking
+ * the first four digits of the (current) dividend and divisor. This must
+ * be float to avoid overflow. The quotient digits will generally be off
+ * by no more than one from the exact answer.
+ */
+ fdivisor = (double) var2digits[0];
+ for (i = 1; i < 4; i++)
+ {
+ fdivisor *= NBASE;
+ if (i < var2ndigits)
+ fdivisor += (double) var2digits[i];
+ }
+ fdivisorinverse = 1.0 / fdivisor;
+
+ /*
+ * maxdiv tracks the maximum possible absolute value of any div[] entry;
+ * when this threatens to exceed INT_MAX, we take the time to propagate
+ * carries. Furthermore, we need to ensure that overflow doesn't occur
+ * during the carry propagation passes either. The carry values may have
+ * an absolute value as high as INT_MAX/NBASE + 1, so really we must
+ * normalize when digits threaten to exceed INT_MAX - INT_MAX/NBASE - 1.
+ *
+ * To avoid overflow in maxdiv itself, it represents the max absolute
+ * value divided by NBASE-1, ie, at the top of the loop it is known that
+ * no div[] entry has an absolute value exceeding maxdiv * (NBASE-1).
+ *
+ * Actually, though, that holds good only for div[] entries after div[qi];
+ * the adjustment done at the bottom of the loop may cause div[qi + 1] to
+ * exceed the maxdiv limit, so that div[qi] in the next iteration is
+ * beyond the limit. This does not cause problems, as explained below.
+ */
+ maxdiv = 1;
+
+ /*
+ * Outer loop computes next quotient digit, which will go into div[qi]
+ */
+ for (qi = 0; qi < div_ndigits; qi++)
+ {
+ /* Approximate the current dividend value */
+ fdividend = (double) div[qi];
+ for (i = 1; i < 4; i++)
+ {
+ fdividend *= NBASE;
+ if (qi + i <= div_ndigits)
+ fdividend += (double) div[qi + i];
+ }
+ /* Compute the (approximate) quotient digit */
+ fquotient = fdividend * fdivisorinverse;
+ qdigit = (fquotient >= 0.0) ? ((int) fquotient) :
+ (((int) fquotient) - 1); /* truncate towards -infinity */
+
+ if (qdigit != 0)
+ {
+ /* Do we need to normalize now? */
+ maxdiv += Abs(qdigit);
+ if (maxdiv > (INT_MAX - INT_MAX / NBASE - 1) / (NBASE - 1))
+ {
+ /*
+ * Yes, do it. Note that if var2ndigits is much smaller than
+ * div_ndigits, we can save a significant amount of effort
+ * here by noting that we only need to normalise those div[]
+ * entries touched where prior iterations subtracted multiples
+ * of the divisor.
+ */
+ carry = 0;
+ for (i = Min(qi + var2ndigits - 2, div_ndigits); i > qi; i--)
+ {
+ newdig = div[i] + carry;
+ if (newdig < 0)
+ {
+ carry = -((-newdig - 1) / NBASE) - 1;
+ newdig -= carry * NBASE;
+ }
+ else if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ div[i] = newdig;
+ }
+ newdig = div[qi] + carry;
+ div[qi] = newdig;
+
+ /*
+ * All the div[] digits except possibly div[qi] are now in the
+ * range 0..NBASE-1. We do not need to consider div[qi] in
+ * the maxdiv value anymore, so we can reset maxdiv to 1.
+ */
+ maxdiv = 1;
+
+ /*
+ * Recompute the quotient digit since new info may have
+ * propagated into the top four dividend digits
+ */
+ fdividend = (double) div[qi];
+ for (i = 1; i < 4; i++)
+ {
+ fdividend *= NBASE;
+ if (qi + i <= div_ndigits)
+ fdividend += (double) div[qi + i];
+ }
+ /* Compute the (approximate) quotient digit */
+ fquotient = fdividend * fdivisorinverse;
+ qdigit = (fquotient >= 0.0) ? ((int) fquotient) :
+ (((int) fquotient) - 1); /* truncate towards -infinity */
+ maxdiv += Abs(qdigit);
+ }
+
+ /*
+ * Subtract off the appropriate multiple of the divisor.
+ *
+ * The digits beyond div[qi] cannot overflow, because we know they
+ * will fall within the maxdiv limit. As for div[qi] itself, note
+ * that qdigit is approximately trunc(div[qi] / vardigits[0]),
+ * which would make the new value simply div[qi] mod vardigits[0].
+ * The lower-order terms in qdigit can change this result by not
+ * more than about twice INT_MAX/NBASE, so overflow is impossible.
+ *
+ * This inner loop is the performance bottleneck for division, so
+ * code it in the same way as the inner loop of mul_var() so that
+ * it can be auto-vectorized. We cast qdigit to NumericDigit
+ * before multiplying to allow the compiler to generate more
+ * efficient code (using 16-bit multiplication), which is safe
+ * since we know that the quotient digit is off by at most one, so
+ * there is no overflow risk.
+ */
+ if (qdigit != 0)
+ {
+ int istop = Min(var2ndigits, div_ndigits - qi + 1);
+ int *div_qi = &div[qi];
+
+ for (i = 0; i < istop; i++)
+ div_qi[i] -= ((NumericDigit) qdigit) * var2digits[i];
+ }
+ }
+
+ /*
+ * The dividend digit we are about to replace might still be nonzero.
+ * Fold it into the next digit position.
+ *
+ * There is no risk of overflow here, although proving that requires
+ * some care. Much as with the argument for div[qi] not overflowing,
+ * if we consider the first two terms in the numerator and denominator
+ * of qdigit, we can see that the final value of div[qi + 1] will be
+ * approximately a remainder mod (vardigits[0]*NBASE + vardigits[1]).
+ * Accounting for the lower-order terms is a bit complicated but ends
+ * up adding not much more than INT_MAX/NBASE to the possible range.
+ * Thus, div[qi + 1] cannot overflow here, and in its role as div[qi]
+ * in the next loop iteration, it can't be large enough to cause
+ * overflow in the carry propagation step (if any), either.
+ *
+ * But having said that: div[qi] can be more than INT_MAX/NBASE, as
+ * noted above, which means that the product div[qi] * NBASE *can*
+ * overflow. When that happens, adding it to div[qi + 1] will always
+ * cause a canceling overflow so that the end result is correct. We
+ * could avoid the intermediate overflow by doing the multiplication
+ * and addition in int64 arithmetic, but so far there appears no need.
+ */
+ div[qi + 1] += div[qi] * NBASE;
+
+ div[qi] = qdigit;
+ }
+
+ /*
+ * Approximate and store the last quotient digit (div[div_ndigits])
+ */
+ fdividend = (double) div[qi];
+ for (i = 1; i < 4; i++)
+ fdividend *= NBASE;
+ fquotient = fdividend * fdivisorinverse;
+ qdigit = (fquotient >= 0.0) ? ((int) fquotient) :
+ (((int) fquotient) - 1); /* truncate towards -infinity */
+ div[qi] = qdigit;
+
+ /*
+ * Because the quotient digits might be off by one, some of them might be
+ * -1 or NBASE at this point. The represented value is correct in a
+ * mathematical sense, but it doesn't look right. We do a final carry
+ * propagation pass to normalize the digits, which we combine with storing
+ * the result digits into the output. Note that this is still done at
+ * full precision w/guard digits.
+ */
+ alloc_var(result, div_ndigits + 1);
+ res_digits = result->digits;
+ carry = 0;
+ for (i = div_ndigits; i >= 0; i--)
+ {
+ newdig = div[i] + carry;
+ if (newdig < 0)
+ {
+ carry = -((-newdig - 1) / NBASE) - 1;
+ newdig -= carry * NBASE;
+ }
+ else if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ res_digits[i] = newdig;
+ }
+ Assert(carry == 0);
+
+ pfree(div);
+
+ /*
+ * Finally, round the result to the requested precision.
+ */
+ result->weight = res_weight;
+ result->sign = res_sign;
+
+ /* Round to target rscale (and set result->dscale) */
+ if (round)
+ round_var(result, rscale);
+ else
+ trunc_var(result, rscale);
+
+ /* Strip leading and trailing zeroes */
+ strip_var(result);
+}
+
+
+/*
+ * div_var_int() -
+ *
+ * Divide a numeric variable by a 32-bit integer with the specified weight.
+ * The quotient var / (ival * NBASE^ival_weight) is stored in result.
+ */
+static void
+div_var_int(const NumericVar *var, int ival, int ival_weight,
+ NumericVar *result, int rscale, bool round)
+{
+ NumericDigit *var_digits = var->digits;
+ int var_ndigits = var->ndigits;
+ int res_sign;
+ int res_weight;
+ int res_ndigits;
+ NumericDigit *res_buf;
+ NumericDigit *res_digits;
+ uint32 divisor;
+ int i;
+
+ /* Guard against division by zero */
+ if (ival == 0)
+ ereport(ERROR,
+ errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero"));
+
+ /* Result zero check */
+ if (var_ndigits == 0)
+ {
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * Determine the result sign, weight and number of digits to calculate.
+ * The weight figured here is correct if the emitted quotient has no
+ * leading zero digits; otherwise strip_var() will fix things up.
+ */
+ if (var->sign == NUMERIC_POS)
+ res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
+ else
+ res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
+ res_weight = var->weight - ival_weight;
+ /* The number of accurate result digits we need to produce: */
+ res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
+ /* ... but always at least 1 */
+ res_ndigits = Max(res_ndigits, 1);
+ /* If rounding needed, figure one more digit to ensure correct result */
+ if (round)
+ res_ndigits++;
+
+ res_buf = digitbuf_alloc(res_ndigits + 1);
+ res_buf[0] = 0; /* spare digit for later rounding */
+ res_digits = res_buf + 1;
+
+ /*
+ * Now compute the quotient digits. This is the short division algorithm
+ * described in Knuth volume 2, section 4.3.1 exercise 16, except that we
+ * allow the divisor to exceed the internal base.
+ *
+ * In this algorithm, the carry from one digit to the next is at most
+ * divisor - 1. Therefore, while processing the next digit, carry may
+ * become as large as divisor * NBASE - 1, and so it requires a 64-bit
+ * integer if this exceeds UINT_MAX.
+ */
+ divisor = Abs(ival);
+
+ if (divisor <= UINT_MAX / NBASE)
+ {
+ /* carry cannot overflow 32 bits */
+ uint32 carry = 0;
+
+ for (i = 0; i < res_ndigits; i++)
+ {
+ carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+ res_digits[i] = (NumericDigit) (carry / divisor);
+ carry = carry % divisor;
+ }
+ }
+ else
+ {
+ /* carry may exceed 32 bits */
+ uint64 carry = 0;
+
+ for (i = 0; i < res_ndigits; i++)
+ {
+ carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
+ res_digits[i] = (NumericDigit) (carry / divisor);
+ carry = carry % divisor;
+ }
+ }
+
+ /* Store the quotient in result */
+ digitbuf_free(result->buf);
+ result->ndigits = res_ndigits;
+ result->buf = res_buf;
+ result->digits = res_digits;
+ result->weight = res_weight;
+ result->sign = res_sign;
+
+ /* Round or truncate to target rscale (and set result->dscale) */
+ if (round)
+ round_var(result, rscale);
+ else
+ trunc_var(result, rscale);
+
+ /* Strip leading/trailing zeroes */
+ strip_var(result);
+}
+
+
+/*
+ * Default scale selection for division
+ *
+ * Returns the appropriate result scale for the division result.
+ */
+static int
+select_div_scale(const NumericVar *var1, const NumericVar *var2)
+{
+ int weight1,
+ weight2,
+ qweight,
+ i;
+ NumericDigit firstdigit1,
+ firstdigit2;
+ int rscale;
+
+ /*
+ * The result scale of a division isn't specified in any SQL standard. For
+ * PostgreSQL we select a result scale that will give at least
+ * NUMERIC_MIN_SIG_DIGITS significant digits, so that numeric gives a
+ * result no less accurate than float8; but use a scale not less than
+ * either input's display scale.
+ */
+
+ /* Get the actual (normalized) weight and first digit of each input */
+
+ weight1 = 0; /* values to use if var1 is zero */
+ firstdigit1 = 0;
+ for (i = 0; i < var1->ndigits; i++)
+ {
+ firstdigit1 = var1->digits[i];
+ if (firstdigit1 != 0)
+ {
+ weight1 = var1->weight - i;
+ break;
+ }
+ }
+
+ weight2 = 0; /* values to use if var2 is zero */
+ firstdigit2 = 0;
+ for (i = 0; i < var2->ndigits; i++)
+ {
+ firstdigit2 = var2->digits[i];
+ if (firstdigit2 != 0)
+ {
+ weight2 = var2->weight - i;
+ break;
+ }
+ }
+
+ /*
+ * Estimate weight of quotient. If the two first digits are equal, we
+ * can't be sure, but assume that var1 is less than var2.
+ */
+ qweight = weight1 - weight2;
+ if (firstdigit1 <= firstdigit2)
+ qweight--;
+
+ /* Select result scale */
+ rscale = NUMERIC_MIN_SIG_DIGITS - qweight * DEC_DIGITS;
+ rscale = Max(rscale, var1->dscale);
+ rscale = Max(rscale, var2->dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ return rscale;
+}
+
+
+/*
+ * mod_var() -
+ *
+ * Calculate the modulo of two numerics at variable level
+ */
+static void
+mod_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ NumericVar tmp;
+
+ init_var(&tmp);
+
+ /* ---------
+ * We do this using the equation
+ * mod(x,y) = x - trunc(x/y)*y
+ * div_var can be persuaded to give us trunc(x/y) directly.
+ * ----------
+ */
+ div_var(var1, var2, &tmp, 0, false);
+
+ mul_var(var2, &tmp, &tmp, var2->dscale);
+
+ sub_var(var1, &tmp, result);
+
+ free_var(&tmp);
+}
+
+
+/*
+ * div_mod_var() -
+ *
+ * Calculate the truncated integer quotient and numeric remainder of two
+ * numeric variables. The remainder is precise to var2's dscale.
+ */
+static void
+div_mod_var(const NumericVar *var1, const NumericVar *var2,
+ NumericVar *quot, NumericVar *rem)
+{
+ NumericVar q;
+ NumericVar r;
+
+ init_var(&q);
+ init_var(&r);
+
+ /*
+ * Use div_var_fast() to get an initial estimate for the integer quotient.
+ * This might be inaccurate (per the warning in div_var_fast's comments),
+ * but we can correct it below.
+ */
+ div_var_fast(var1, var2, &q, 0, false);
+
+ /* Compute initial estimate of remainder using the quotient estimate. */
+ mul_var(var2, &q, &r, var2->dscale);
+ sub_var(var1, &r, &r);
+
+ /*
+ * Adjust the results if necessary --- the remainder should have the same
+ * sign as var1, and its absolute value should be less than the absolute
+ * value of var2.
+ */
+ while (r.ndigits != 0 && r.sign != var1->sign)
+ {
+ /* The absolute value of the quotient is too large */
+ if (var1->sign == var2->sign)
+ {
+ sub_var(&q, &const_one, &q);
+ add_var(&r, var2, &r);
+ }
+ else
+ {
+ add_var(&q, &const_one, &q);
+ sub_var(&r, var2, &r);
+ }
+ }
+
+ while (cmp_abs(&r, var2) >= 0)
+ {
+ /* The absolute value of the quotient is too small */
+ if (var1->sign == var2->sign)
+ {
+ add_var(&q, &const_one, &q);
+ sub_var(&r, var2, &r);
+ }
+ else
+ {
+ sub_var(&q, &const_one, &q);
+ add_var(&r, var2, &r);
+ }
+ }
+
+ set_var_from_var(&q, quot);
+ set_var_from_var(&r, rem);
+
+ free_var(&q);
+ free_var(&r);
+}
+
+
+/*
+ * ceil_var() -
+ *
+ * Return the smallest integer greater than or equal to the argument
+ * on variable level
+ */
+static void
+ceil_var(const NumericVar *var, NumericVar *result)
+{
+ NumericVar tmp;
+
+ init_var(&tmp);
+ set_var_from_var(var, &tmp);
+
+ trunc_var(&tmp, 0);
+
+ if (var->sign == NUMERIC_POS && cmp_var(var, &tmp) != 0)
+ add_var(&tmp, &const_one, &tmp);
+
+ set_var_from_var(&tmp, result);
+ free_var(&tmp);
+}
+
+
+/*
+ * floor_var() -
+ *
+ * Return the largest integer equal to or less than the argument
+ * on variable level
+ */
+static void
+floor_var(const NumericVar *var, NumericVar *result)
+{
+ NumericVar tmp;
+
+ init_var(&tmp);
+ set_var_from_var(var, &tmp);
+
+ trunc_var(&tmp, 0);
+
+ if (var->sign == NUMERIC_NEG && cmp_var(var, &tmp) != 0)
+ sub_var(&tmp, &const_one, &tmp);
+
+ set_var_from_var(&tmp, result);
+ free_var(&tmp);
+}
+
+
+/*
+ * gcd_var() -
+ *
+ * Calculate the greatest common divisor of two numerics at variable level
+ */
+static void
+gcd_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ int res_dscale;
+ int cmp;
+ NumericVar tmp_arg;
+ NumericVar mod;
+
+ res_dscale = Max(var1->dscale, var2->dscale);
+
+ /*
+ * Arrange for var1 to be the number with the greater absolute value.
+ *
+ * This would happen automatically in the loop below, but avoids an
+ * expensive modulo operation.
+ */
+ cmp = cmp_abs(var1, var2);
+ if (cmp < 0)
+ {
+ const NumericVar *tmp = var1;
+
+ var1 = var2;
+ var2 = tmp;
+ }
+
+ /*
+ * Also avoid the taking the modulo if the inputs have the same absolute
+ * value, or if the smaller input is zero.
+ */
+ if (cmp == 0 || var2->ndigits == 0)
+ {
+ set_var_from_var(var1, result);
+ result->sign = NUMERIC_POS;
+ result->dscale = res_dscale;
+ return;
+ }
+
+ init_var(&tmp_arg);
+ init_var(&mod);
+
+ /* Use the Euclidean algorithm to find the GCD */
+ set_var_from_var(var1, &tmp_arg);
+ set_var_from_var(var2, result);
+
+ for (;;)
+ {
+ /* this loop can take a while, so allow it to be interrupted */
+ CHECK_FOR_INTERRUPTS();
+
+ mod_var(&tmp_arg, result, &mod);
+ if (mod.ndigits == 0)
+ break;
+ set_var_from_var(result, &tmp_arg);
+ set_var_from_var(&mod, result);
+ }
+ result->sign = NUMERIC_POS;
+ result->dscale = res_dscale;
+
+ free_var(&tmp_arg);
+ free_var(&mod);
+}
+
+
+/*
+ * sqrt_var() -
+ *
+ * Compute the square root of x using the Karatsuba Square Root algorithm.
+ * NOTE: we allow rscale < 0 here, implying rounding before the decimal
+ * point.
+ */
+static void
+sqrt_var(const NumericVar *arg, NumericVar *result, int rscale)
+{
+ int stat;
+ int res_weight;
+ int res_ndigits;
+ int src_ndigits;
+ int step;
+ int ndigits[32];
+ int blen;
+ int64 arg_int64;
+ int src_idx;
+ int64 s_int64;
+ int64 r_int64;
+ NumericVar s_var;
+ NumericVar r_var;
+ NumericVar a0_var;
+ NumericVar a1_var;
+ NumericVar q_var;
+ NumericVar u_var;
+
+ stat = cmp_var(arg, &const_zero);
+ if (stat == 0)
+ {
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * SQL2003 defines sqrt() in terms of power, so we need to emit the right
+ * SQLSTATE error code if the operand is negative.
+ */
+ if (stat < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("cannot take square root of a negative number")));
+
+ init_var(&s_var);
+ init_var(&r_var);
+ init_var(&a0_var);
+ init_var(&a1_var);
+ init_var(&q_var);
+ init_var(&u_var);
+
+ /*
+ * The result weight is half the input weight, rounded towards minus
+ * infinity --- res_weight = floor(arg->weight / 2).
+ */
+ if (arg->weight >= 0)
+ res_weight = arg->weight / 2;
+ else
+ res_weight = -((-arg->weight - 1) / 2 + 1);
+
+ /*
+ * Number of NBASE digits to compute. To ensure correct rounding, compute
+ * at least 1 extra decimal digit. We explicitly allow rscale to be
+ * negative here, but must always compute at least 1 NBASE digit. Thus
+ * res_ndigits = res_weight + 1 + ceil((rscale + 1) / DEC_DIGITS) or 1.
+ */
+ if (rscale + 1 >= 0)
+ res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS) / DEC_DIGITS;
+ else
+ res_ndigits = res_weight + 1 - (-rscale - 1) / DEC_DIGITS;
+ res_ndigits = Max(res_ndigits, 1);
+
+ /*
+ * Number of source NBASE digits logically required to produce a result
+ * with this precision --- every digit before the decimal point, plus 2
+ * for each result digit after the decimal point (or minus 2 for each
+ * result digit we round before the decimal point).
+ */
+ src_ndigits = arg->weight + 1 + (res_ndigits - res_weight - 1) * 2;
+ src_ndigits = Max(src_ndigits, 1);
+
+ /* ----------
+ * From this point on, we treat the input and the result as integers and
+ * compute the integer square root and remainder using the Karatsuba
+ * Square Root algorithm, which may be written recursively as follows:
+ *
+ * SqrtRem(n = a3*b^3 + a2*b^2 + a1*b + a0):
+ * [ for some base b, and coefficients a0,a1,a2,a3 chosen so that
+ * 0 <= a0,a1,a2 < b and a3 >= b/4 ]
+ * Let (s,r) = SqrtRem(a3*b + a2)
+ * Let (q,u) = DivRem(r*b + a1, 2*s)
+ * Let s = s*b + q
+ * Let r = u*b + a0 - q^2
+ * If r < 0 Then
+ * Let r = r + s
+ * Let s = s - 1
+ * Let r = r + s
+ * Return (s,r)
+ *
+ * See "Karatsuba Square Root", Paul Zimmermann, INRIA Research Report
+ * RR-3805, November 1999. At the time of writing this was available
+ * on the net at <https://hal.inria.fr/inria-00072854>.
+ *
+ * The way to read the assumption "n = a3*b^3 + a2*b^2 + a1*b + a0" is
+ * "choose a base b such that n requires at least four base-b digits to
+ * express; then those digits are a3,a2,a1,a0, with a3 possibly larger
+ * than b". For optimal performance, b should have approximately a
+ * quarter the number of digits in the input, so that the outer square
+ * root computes roughly twice as many digits as the inner one. For
+ * simplicity, we choose b = NBASE^blen, an integer power of NBASE.
+ *
+ * We implement the algorithm iteratively rather than recursively, to
+ * allow the working variables to be reused. With this approach, each
+ * digit of the input is read precisely once --- src_idx tracks the number
+ * of input digits used so far.
+ *
+ * The array ndigits[] holds the number of NBASE digits of the input that
+ * will have been used at the end of each iteration, which roughly doubles
+ * each time. Note that the array elements are stored in reverse order,
+ * so if the final iteration requires src_ndigits = 37 input digits, the
+ * array will contain [37,19,11,7,5,3], and we would start by computing
+ * the square root of the 3 most significant NBASE digits.
+ *
+ * In each iteration, we choose blen to be the largest integer for which
+ * the input number has a3 >= b/4, when written in the form above. In
+ * general, this means blen = src_ndigits / 4 (truncated), but if
+ * src_ndigits is a multiple of 4, that might lead to the coefficient a3
+ * being less than b/4 (if the first input digit is less than NBASE/4), in
+ * which case we choose blen = src_ndigits / 4 - 1. The number of digits
+ * in the inner square root is then src_ndigits - 2*blen. So, for
+ * example, if we have src_ndigits = 26 initially, the array ndigits[]
+ * will be either [26,14,8,4] or [26,14,8,6,4], depending on the size of
+ * the first input digit.
+ *
+ * Additionally, we can put an upper bound on the number of steps required
+ * as follows --- suppose that the number of source digits is an n-bit
+ * number in the range [2^(n-1), 2^n-1], then blen will be in the range
+ * [2^(n-3)-1, 2^(n-2)-1] and the number of digits in the inner square
+ * root will be in the range [2^(n-2), 2^(n-1)+1]. In the next step, blen
+ * will be in the range [2^(n-4)-1, 2^(n-3)] and the number of digits in
+ * the next inner square root will be in the range [2^(n-3), 2^(n-2)+1].
+ * This pattern repeats, and in the worst case the array ndigits[] will
+ * contain [2^n-1, 2^(n-1)+1, 2^(n-2)+1, ... 9, 5, 3], and the computation
+ * will require n steps. Therefore, since all digit array sizes are
+ * signed 32-bit integers, the number of steps required is guaranteed to
+ * be less than 32.
+ * ----------
+ */
+ step = 0;
+ while ((ndigits[step] = src_ndigits) > 4)
+ {
+ /* Choose b so that a3 >= b/4, as described above */
+ blen = src_ndigits / 4;
+ if (blen * 4 == src_ndigits && arg->digits[0] < NBASE / 4)
+ blen--;
+
+ /* Number of digits in the next step (inner square root) */
+ src_ndigits -= 2 * blen;
+ step++;
+ }
+
+ /*
+ * First iteration (innermost square root and remainder):
+ *
+ * Here src_ndigits <= 4, and the input fits in an int64. Its square root
+ * has at most 9 decimal digits, so estimate it using double precision
+ * arithmetic, which will in fact almost certainly return the correct
+ * result with no further correction required.
+ */
+ arg_int64 = arg->digits[0];
+ for (src_idx = 1; src_idx < src_ndigits; src_idx++)
+ {
+ arg_int64 *= NBASE;
+ if (src_idx < arg->ndigits)
+ arg_int64 += arg->digits[src_idx];
+ }
+
+ s_int64 = (int64) sqrt((double) arg_int64);
+ r_int64 = arg_int64 - s_int64 * s_int64;
+
+ /*
+ * Use Newton's method to correct the result, if necessary.
+ *
+ * This uses integer division with truncation to compute the truncated
+ * integer square root by iterating using the formula x -> (x + n/x) / 2.
+ * This is known to converge to isqrt(n), unless n+1 is a perfect square.
+ * If n+1 is a perfect square, the sequence will oscillate between the two
+ * values isqrt(n) and isqrt(n)+1, so we can be assured of convergence by
+ * checking the remainder.
+ */
+ while (r_int64 < 0 || r_int64 > 2 * s_int64)
+ {
+ s_int64 = (s_int64 + arg_int64 / s_int64) / 2;
+ r_int64 = arg_int64 - s_int64 * s_int64;
+ }
+
+ /*
+ * Iterations with src_ndigits <= 8:
+ *
+ * The next 1 or 2 iterations compute larger (outer) square roots with
+ * src_ndigits <= 8, so the result still fits in an int64 (even though the
+ * input no longer does) and we can continue to compute using int64
+ * variables to avoid more expensive numeric computations.
+ *
+ * It is fairly easy to see that there is no risk of the intermediate
+ * values below overflowing 64-bit integers. In the worst case, the
+ * previous iteration will have computed a 3-digit square root (of a
+ * 6-digit input less than NBASE^6 / 4), so at the start of this
+ * iteration, s will be less than NBASE^3 / 2 = 10^12 / 2, and r will be
+ * less than 10^12. In this case, blen will be 1, so numer will be less
+ * than 10^17, and denom will be less than 10^12 (and hence u will also be
+ * less than 10^12). Finally, since q^2 = u*b + a0 - r, we can also be
+ * sure that q^2 < 10^17. Therefore all these quantities fit comfortably
+ * in 64-bit integers.
+ */
+ step--;
+ while (step >= 0 && (src_ndigits = ndigits[step]) <= 8)
+ {
+ int b;
+ int a0;
+ int a1;
+ int i;
+ int64 numer;
+ int64 denom;
+ int64 q;
+ int64 u;
+
+ blen = (src_ndigits - src_idx) / 2;
+
+ /* Extract a1 and a0, and compute b */
+ a0 = 0;
+ a1 = 0;
+ b = 1;
+
+ for (i = 0; i < blen; i++, src_idx++)
+ {
+ b *= NBASE;
+ a1 *= NBASE;
+ if (src_idx < arg->ndigits)
+ a1 += arg->digits[src_idx];
+ }
+
+ for (i = 0; i < blen; i++, src_idx++)
+ {
+ a0 *= NBASE;
+ if (src_idx < arg->ndigits)
+ a0 += arg->digits[src_idx];
+ }
+
+ /* Compute (q,u) = DivRem(r*b + a1, 2*s) */
+ numer = r_int64 * b + a1;
+ denom = 2 * s_int64;
+ q = numer / denom;
+ u = numer - q * denom;
+
+ /* Compute s = s*b + q and r = u*b + a0 - q^2 */
+ s_int64 = s_int64 * b + q;
+ r_int64 = u * b + a0 - q * q;
+
+ if (r_int64 < 0)
+ {
+ /* s is too large by 1; set r += s, s--, r += s */
+ r_int64 += s_int64;
+ s_int64--;
+ r_int64 += s_int64;
+ }
+
+ Assert(src_idx == src_ndigits); /* All input digits consumed */
+ step--;
+ }
+
+ /*
+ * On platforms with 128-bit integer support, we can further delay the
+ * need to use numeric variables.
+ */
+#ifdef HAVE_INT128
+ if (step >= 0)
+ {
+ int128 s_int128;
+ int128 r_int128;
+
+ s_int128 = s_int64;
+ r_int128 = r_int64;
+
+ /*
+ * Iterations with src_ndigits <= 16:
+ *
+ * The result fits in an int128 (even though the input doesn't) so we
+ * use int128 variables to avoid more expensive numeric computations.
+ */
+ while (step >= 0 && (src_ndigits = ndigits[step]) <= 16)
+ {
+ int64 b;
+ int64 a0;
+ int64 a1;
+ int64 i;
+ int128 numer;
+ int128 denom;
+ int128 q;
+ int128 u;
+
+ blen = (src_ndigits - src_idx) / 2;
+
+ /* Extract a1 and a0, and compute b */
+ a0 = 0;
+ a1 = 0;
+ b = 1;
+
+ for (i = 0; i < blen; i++, src_idx++)
+ {
+ b *= NBASE;
+ a1 *= NBASE;
+ if (src_idx < arg->ndigits)
+ a1 += arg->digits[src_idx];
+ }
+
+ for (i = 0; i < blen; i++, src_idx++)
+ {
+ a0 *= NBASE;
+ if (src_idx < arg->ndigits)
+ a0 += arg->digits[src_idx];
+ }
+
+ /* Compute (q,u) = DivRem(r*b + a1, 2*s) */
+ numer = r_int128 * b + a1;
+ denom = 2 * s_int128;
+ q = numer / denom;
+ u = numer - q * denom;
+
+ /* Compute s = s*b + q and r = u*b + a0 - q^2 */
+ s_int128 = s_int128 * b + q;
+ r_int128 = u * b + a0 - q * q;
+
+ if (r_int128 < 0)
+ {
+ /* s is too large by 1; set r += s, s--, r += s */
+ r_int128 += s_int128;
+ s_int128--;
+ r_int128 += s_int128;
+ }
+
+ Assert(src_idx == src_ndigits); /* All input digits consumed */
+ step--;
+ }
+
+ /*
+ * All remaining iterations require numeric variables. Convert the
+ * integer values to NumericVar and continue. Note that in the final
+ * iteration we don't need the remainder, so we can save a few cycles
+ * there by not fully computing it.
+ */
+ int128_to_numericvar(s_int128, &s_var);
+ if (step >= 0)
+ int128_to_numericvar(r_int128, &r_var);
+ }
+ else
+ {
+ int64_to_numericvar(s_int64, &s_var);
+ /* step < 0, so we certainly don't need r */
+ }
+#else /* !HAVE_INT128 */
+ int64_to_numericvar(s_int64, &s_var);
+ if (step >= 0)
+ int64_to_numericvar(r_int64, &r_var);
+#endif /* HAVE_INT128 */
+
+ /*
+ * The remaining iterations with src_ndigits > 8 (or 16, if have int128)
+ * use numeric variables.
+ */
+ while (step >= 0)
+ {
+ int tmp_len;
+
+ src_ndigits = ndigits[step];
+ blen = (src_ndigits - src_idx) / 2;
+
+ /* Extract a1 and a0 */
+ if (src_idx < arg->ndigits)
+ {
+ tmp_len = Min(blen, arg->ndigits - src_idx);
+ alloc_var(&a1_var, tmp_len);
+ memcpy(a1_var.digits, arg->digits + src_idx,
+ tmp_len * sizeof(NumericDigit));
+ a1_var.weight = blen - 1;
+ a1_var.sign = NUMERIC_POS;
+ a1_var.dscale = 0;
+ strip_var(&a1_var);
+ }
+ else
+ {
+ zero_var(&a1_var);
+ a1_var.dscale = 0;
+ }
+ src_idx += blen;
+
+ if (src_idx < arg->ndigits)
+ {
+ tmp_len = Min(blen, arg->ndigits - src_idx);
+ alloc_var(&a0_var, tmp_len);
+ memcpy(a0_var.digits, arg->digits + src_idx,
+ tmp_len * sizeof(NumericDigit));
+ a0_var.weight = blen - 1;
+ a0_var.sign = NUMERIC_POS;
+ a0_var.dscale = 0;
+ strip_var(&a0_var);
+ }
+ else
+ {
+ zero_var(&a0_var);
+ a0_var.dscale = 0;
+ }
+ src_idx += blen;
+
+ /* Compute (q,u) = DivRem(r*b + a1, 2*s) */
+ set_var_from_var(&r_var, &q_var);
+ q_var.weight += blen;
+ add_var(&q_var, &a1_var, &q_var);
+ add_var(&s_var, &s_var, &u_var);
+ div_mod_var(&q_var, &u_var, &q_var, &u_var);
+
+ /* Compute s = s*b + q */
+ s_var.weight += blen;
+ add_var(&s_var, &q_var, &s_var);
+
+ /*
+ * Compute r = u*b + a0 - q^2.
+ *
+ * In the final iteration, we don't actually need r; we just need to
+ * know whether it is negative, so that we know whether to adjust s.
+ * So instead of the final subtraction we can just compare.
+ */
+ u_var.weight += blen;
+ add_var(&u_var, &a0_var, &u_var);
+ mul_var(&q_var, &q_var, &q_var, 0);
+
+ if (step > 0)
+ {
+ /* Need r for later iterations */
+ sub_var(&u_var, &q_var, &r_var);
+ if (r_var.sign == NUMERIC_NEG)
+ {
+ /* s is too large by 1; set r += s, s--, r += s */
+ add_var(&r_var, &s_var, &r_var);
+ sub_var(&s_var, &const_one, &s_var);
+ add_var(&r_var, &s_var, &r_var);
+ }
+ }
+ else
+ {
+ /* Don't need r anymore, except to test if s is too large by 1 */
+ if (cmp_var(&u_var, &q_var) < 0)
+ sub_var(&s_var, &const_one, &s_var);
+ }
+
+ Assert(src_idx == src_ndigits); /* All input digits consumed */
+ step--;
+ }
+
+ /*
+ * Construct the final result, rounding it to the requested precision.
+ */
+ set_var_from_var(&s_var, result);
+ result->weight = res_weight;
+ result->sign = NUMERIC_POS;
+
+ /* Round to target rscale (and set result->dscale) */
+ round_var(result, rscale);
+
+ /* Strip leading and trailing zeroes */
+ strip_var(result);
+
+ free_var(&s_var);
+ free_var(&r_var);
+ free_var(&a0_var);
+ free_var(&a1_var);
+ free_var(&q_var);
+ free_var(&u_var);
+}
+
+
+/*
+ * exp_var() -
+ *
+ * Raise e to the power of x, computed to rscale fractional digits
+ */
+static void
+exp_var(const NumericVar *arg, NumericVar *result, int rscale)
+{
+ NumericVar x;
+ NumericVar elem;
+ int ni;
+ double val;
+ int dweight;
+ int ndiv2;
+ int sig_digits;
+ int local_rscale;
+
+ init_var(&x);
+ init_var(&elem);
+
+ set_var_from_var(arg, &x);
+
+ /*
+ * Estimate the dweight of the result using floating point arithmetic, so
+ * that we can choose an appropriate local rscale for the calculation.
+ */
+ val = numericvar_to_double_no_overflow(&x);
+
+ /* Guard against overflow/underflow */
+ /* If you change this limit, see also power_var()'s limit */
+ if (Abs(val) >= NUMERIC_MAX_RESULT_SCALE * 3)
+ {
+ if (val > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /* decimal weight = log10(e^x) = x * log10(e) */
+ dweight = (int) (val * 0.434294481903252);
+
+ /*
+ * Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by
+ * 2^ndiv2, to improve the convergence rate of the Taylor series.
+ *
+ * Note that the overflow check above ensures that Abs(x) < 6000, which
+ * means that ndiv2 <= 20 here.
+ */
+ if (Abs(val) > 0.01)
+ {
+ ndiv2 = 1;
+ val /= 2;
+
+ while (Abs(val) > 0.01)
+ {
+ ndiv2++;
+ val /= 2;
+ }
+
+ local_rscale = x.dscale + ndiv2;
+ div_var_int(&x, 1 << ndiv2, 0, &x, local_rscale, true);
+ }
+ else
+ ndiv2 = 0;
+
+ /*
+ * Set the scale for the Taylor series expansion. The final result has
+ * (dweight + rscale + 1) significant digits. In addition, we have to
+ * raise the Taylor series result to the power 2^ndiv2, which introduces
+ * an error of up to around log10(2^ndiv2) digits, so work with this many
+ * extra digits of precision (plus a few more for good measure).
+ */
+ sig_digits = 1 + dweight + rscale + (int) (ndiv2 * 0.301029995663981);
+ sig_digits = Max(sig_digits, 0) + 8;
+
+ local_rscale = sig_digits - 1;
+
+ /*
+ * Use the Taylor series
+ *
+ * exp(x) = 1 + x + x^2/2! + x^3/3! + ...
+ *
+ * Given the limited range of x, this should converge reasonably quickly.
+ * We run the series until the terms fall below the local_rscale limit.
+ */
+ add_var(&const_one, &x, result);
+
+ mul_var(&x, &x, &elem, local_rscale);
+ ni = 2;
+ div_var_int(&elem, ni, 0, &elem, local_rscale, true);
+
+ while (elem.ndigits != 0)
+ {
+ add_var(result, &elem, result);
+
+ mul_var(&elem, &x, &elem, local_rscale);
+ ni++;
+ div_var_int(&elem, ni, 0, &elem, local_rscale, true);
+ }
+
+ /*
+ * Compensate for the argument range reduction. Since the weight of the
+ * result doubles with each multiplication, we can reduce the local rscale
+ * as we proceed.
+ */
+ while (ndiv2-- > 0)
+ {
+ local_rscale = sig_digits - result->weight * 2 * DEC_DIGITS;
+ local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ mul_var(result, result, result, local_rscale);
+ }
+
+ /* Round to requested rscale */
+ round_var(result, rscale);
+
+ free_var(&x);
+ free_var(&elem);
+}
+
+
+/*
+ * Estimate the dweight of the most significant decimal digit of the natural
+ * logarithm of a number.
+ *
+ * Essentially, we're approximating log10(abs(ln(var))). This is used to
+ * determine the appropriate rscale when computing natural logarithms.
+ *
+ * Note: many callers call this before range-checking the input. Therefore,
+ * we must be robust against values that are invalid to apply ln() to.
+ * We don't wish to throw an error here, so just return zero in such cases.
+ */
+static int
+estimate_ln_dweight(const NumericVar *var)
+{
+ int ln_dweight;
+
+ /* Caller should fail on ln(negative), but for the moment return zero */
+ if (var->sign != NUMERIC_POS)
+ return 0;
+
+ if (cmp_var(var, &const_zero_point_nine) >= 0 &&
+ cmp_var(var, &const_one_point_one) <= 0)
+ {
+ /*
+ * 0.9 <= var <= 1.1
+ *
+ * ln(var) has a negative weight (possibly very large). To get a
+ * reasonably accurate result, estimate it using ln(1+x) ~= x.
+ */
+ NumericVar x;
+
+ init_var(&x);
+ sub_var(var, &const_one, &x);
+
+ if (x.ndigits > 0)
+ {
+ /* Use weight of most significant decimal digit of x */
+ ln_dweight = x.weight * DEC_DIGITS + (int) log10(x.digits[0]);
+ }
+ else
+ {
+ /* x = 0. Since ln(1) = 0 exactly, we don't need extra digits */
+ ln_dweight = 0;
+ }
+
+ free_var(&x);
+ }
+ else
+ {
+ /*
+ * Estimate the logarithm using the first couple of digits from the
+ * input number. This will give an accurate result whenever the input
+ * is not too close to 1.
+ */
+ if (var->ndigits > 0)
+ {
+ int digits;
+ int dweight;
+ double ln_var;
+
+ digits = var->digits[0];
+ dweight = var->weight * DEC_DIGITS;
+
+ if (var->ndigits > 1)
+ {
+ digits = digits * NBASE + var->digits[1];
+ dweight -= DEC_DIGITS;
+ }
+
+ /*----------
+ * We have var ~= digits * 10^dweight
+ * so ln(var) ~= ln(digits) + dweight * ln(10)
+ *----------
+ */
+ ln_var = log((double) digits) + dweight * 2.302585092994046;
+ ln_dweight = (int) log10(Abs(ln_var));
+ }
+ else
+ {
+ /* Caller should fail on ln(0), but for the moment return zero */
+ ln_dweight = 0;
+ }
+ }
+
+ return ln_dweight;
+}
+
+
+/*
+ * ln_var() -
+ *
+ * Compute the natural log of x
+ */
+static void
+ln_var(const NumericVar *arg, NumericVar *result, int rscale)
+{
+ NumericVar x;
+ NumericVar xx;
+ int ni;
+ NumericVar elem;
+ NumericVar fact;
+ int nsqrt;
+ int local_rscale;
+ int cmp;
+
+ cmp = cmp_var(arg, &const_zero);
+ if (cmp == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of zero")));
+ else if (cmp < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_LOG),
+ errmsg("cannot take logarithm of a negative number")));
+
+ init_var(&x);
+ init_var(&xx);
+ init_var(&elem);
+ init_var(&fact);
+
+ set_var_from_var(arg, &x);
+ set_var_from_var(&const_two, &fact);
+
+ /*
+ * Reduce input into range 0.9 < x < 1.1 with repeated sqrt() operations.
+ *
+ * The final logarithm will have up to around rscale+6 significant digits.
+ * Each sqrt() will roughly halve the weight of x, so adjust the local
+ * rscale as we work so that we keep this many significant digits at each
+ * step (plus a few more for good measure).
+ *
+ * Note that we allow local_rscale < 0 during this input reduction
+ * process, which implies rounding before the decimal point. sqrt_var()
+ * explicitly supports this, and it significantly reduces the work
+ * required to reduce very large inputs to the required range. Once the
+ * input reduction is complete, x.weight will be 0 and its display scale
+ * will be non-negative again.
+ */
+ nsqrt = 0;
+ while (cmp_var(&x, &const_zero_point_nine) <= 0)
+ {
+ local_rscale = rscale - x.weight * DEC_DIGITS / 2 + 8;
+ sqrt_var(&x, &x, local_rscale);
+ mul_var(&fact, &const_two, &fact, 0);
+ nsqrt++;
+ }
+ while (cmp_var(&x, &const_one_point_one) >= 0)
+ {
+ local_rscale = rscale - x.weight * DEC_DIGITS / 2 + 8;
+ sqrt_var(&x, &x, local_rscale);
+ mul_var(&fact, &const_two, &fact, 0);
+ nsqrt++;
+ }
+
+ /*
+ * We use the Taylor series for 0.5 * ln((1+z)/(1-z)),
+ *
+ * z + z^3/3 + z^5/5 + ...
+ *
+ * where z = (x-1)/(x+1) is in the range (approximately) -0.053 .. 0.048
+ * due to the above range-reduction of x.
+ *
+ * The convergence of this is not as fast as one would like, but is
+ * tolerable given that z is small.
+ *
+ * The Taylor series result will be multiplied by 2^(nsqrt+1), which has a
+ * decimal weight of (nsqrt+1) * log10(2), so work with this many extra
+ * digits of precision (plus a few more for good measure).
+ */
+ local_rscale = rscale + (int) ((nsqrt + 1) * 0.301029995663981) + 8;
+
+ sub_var(&x, &const_one, result);
+ add_var(&x, &const_one, &elem);
+ div_var_fast(result, &elem, result, local_rscale, true);
+ set_var_from_var(result, &xx);
+ mul_var(result, result, &x, local_rscale);
+
+ ni = 1;
+
+ for (;;)
+ {
+ ni += 2;
+ mul_var(&xx, &x, &xx, local_rscale);
+ div_var_int(&xx, ni, 0, &elem, local_rscale, true);
+
+ if (elem.ndigits == 0)
+ break;
+
+ add_var(result, &elem, result);
+
+ if (elem.weight < (result->weight - local_rscale * 2 / DEC_DIGITS))
+ break;
+ }
+
+ /* Compensate for argument range reduction, round to requested rscale */
+ mul_var(result, &fact, result, rscale);
+
+ free_var(&x);
+ free_var(&xx);
+ free_var(&elem);
+ free_var(&fact);
+}
+
+
+/*
+ * log_var() -
+ *
+ * Compute the logarithm of num in a given base.
+ *
+ * Note: this routine chooses dscale of the result.
+ */
+static void
+log_var(const NumericVar *base, const NumericVar *num, NumericVar *result)
+{
+ NumericVar ln_base;
+ NumericVar ln_num;
+ int ln_base_dweight;
+ int ln_num_dweight;
+ int result_dweight;
+ int rscale;
+ int ln_base_rscale;
+ int ln_num_rscale;
+
+ init_var(&ln_base);
+ init_var(&ln_num);
+
+ /* Estimated dweights of ln(base), ln(num) and the final result */
+ ln_base_dweight = estimate_ln_dweight(base);
+ ln_num_dweight = estimate_ln_dweight(num);
+ result_dweight = ln_num_dweight - ln_base_dweight;
+
+ /*
+ * Select the scale of the result so that it will have at least
+ * NUMERIC_MIN_SIG_DIGITS significant digits and is not less than either
+ * input's display scale.
+ */
+ rscale = NUMERIC_MIN_SIG_DIGITS - result_dweight;
+ rscale = Max(rscale, base->dscale);
+ rscale = Max(rscale, num->dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ /*
+ * Set the scales for ln(base) and ln(num) so that they each have more
+ * significant digits than the final result.
+ */
+ ln_base_rscale = rscale + result_dweight - ln_base_dweight + 8;
+ ln_base_rscale = Max(ln_base_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ ln_num_rscale = rscale + result_dweight - ln_num_dweight + 8;
+ ln_num_rscale = Max(ln_num_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ /* Form natural logarithms */
+ ln_var(base, &ln_base, ln_base_rscale);
+ ln_var(num, &ln_num, ln_num_rscale);
+
+ /* Divide and round to the required scale */
+ div_var_fast(&ln_num, &ln_base, result, rscale, true);
+
+ free_var(&ln_num);
+ free_var(&ln_base);
+}
+
+
+/*
+ * power_var() -
+ *
+ * Raise base to the power of exp
+ *
+ * Note: this routine chooses dscale of the result.
+ */
+static void
+power_var(const NumericVar *base, const NumericVar *exp, NumericVar *result)
+{
+ int res_sign;
+ NumericVar abs_base;
+ NumericVar ln_base;
+ NumericVar ln_num;
+ int ln_dweight;
+ int rscale;
+ int sig_digits;
+ int local_rscale;
+ double val;
+
+ /* If exp can be represented as an integer, use power_var_int */
+ if (exp->ndigits == 0 || exp->ndigits <= exp->weight + 1)
+ {
+ /* exact integer, but does it fit in int? */
+ int64 expval64;
+
+ if (numericvar_to_int64(exp, &expval64))
+ {
+ if (expval64 >= PG_INT32_MIN && expval64 <= PG_INT32_MAX)
+ {
+ /* Okay, select rscale */
+ rscale = NUMERIC_MIN_SIG_DIGITS;
+ rscale = Max(rscale, base->dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ power_var_int(base, (int) expval64, result, rscale);
+ return;
+ }
+ }
+ }
+
+ /*
+ * This avoids log(0) for cases of 0 raised to a non-integer. 0 ^ 0 is
+ * handled by power_var_int().
+ */
+ if (cmp_var(base, &const_zero) == 0)
+ {
+ set_var_from_var(&const_zero, result);
+ result->dscale = NUMERIC_MIN_SIG_DIGITS; /* no need to round */
+ return;
+ }
+
+ init_var(&abs_base);
+ init_var(&ln_base);
+ init_var(&ln_num);
+
+ /*
+ * If base is negative, insist that exp be an integer. The result is then
+ * positive if exp is even and negative if exp is odd.
+ */
+ if (base->sign == NUMERIC_NEG)
+ {
+ /*
+ * Check that exp is an integer. This error code is defined by the
+ * SQL standard, and matches other errors in numeric_power().
+ */
+ if (exp->ndigits > 0 && exp->ndigits > exp->weight + 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_POWER_FUNCTION),
+ errmsg("a negative number raised to a non-integer power yields a complex result")));
+
+ /* Test if exp is odd or even */
+ if (exp->ndigits > 0 && exp->ndigits == exp->weight + 1 &&
+ (exp->digits[exp->ndigits - 1] & 1))
+ res_sign = NUMERIC_NEG;
+ else
+ res_sign = NUMERIC_POS;
+
+ /* Then work with abs(base) below */
+ set_var_from_var(base, &abs_base);
+ abs_base.sign = NUMERIC_POS;
+ base = &abs_base;
+ }
+ else
+ res_sign = NUMERIC_POS;
+
+ /*----------
+ * Decide on the scale for the ln() calculation. For this we need an
+ * estimate of the weight of the result, which we obtain by doing an
+ * initial low-precision calculation of exp * ln(base).
+ *
+ * We want result = e ^ (exp * ln(base))
+ * so result dweight = log10(result) = exp * ln(base) * log10(e)
+ *
+ * We also perform a crude overflow test here so that we can exit early if
+ * the full-precision result is sure to overflow, and to guard against
+ * integer overflow when determining the scale for the real calculation.
+ * exp_var() supports inputs up to NUMERIC_MAX_RESULT_SCALE * 3, so the
+ * result will overflow if exp * ln(base) >= NUMERIC_MAX_RESULT_SCALE * 3.
+ * Since the values here are only approximations, we apply a small fuzz
+ * factor to this overflow test and let exp_var() determine the exact
+ * overflow threshold so that it is consistent for all inputs.
+ *----------
+ */
+ ln_dweight = estimate_ln_dweight(base);
+
+ /*
+ * Set the scale for the low-precision calculation, computing ln(base) to
+ * around 8 significant digits. Note that ln_dweight may be as small as
+ * -SHRT_MAX, so the scale may exceed NUMERIC_MAX_DISPLAY_SCALE here.
+ */
+ local_rscale = 8 - ln_dweight;
+ local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ ln_var(base, &ln_base, local_rscale);
+
+ mul_var(&ln_base, exp, &ln_num, local_rscale);
+
+ val = numericvar_to_double_no_overflow(&ln_num);
+
+ /* initial overflow/underflow test with fuzz factor */
+ if (Abs(val) > NUMERIC_MAX_RESULT_SCALE * 3.01)
+ {
+ if (val > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ zero_var(result);
+ result->dscale = NUMERIC_MAX_DISPLAY_SCALE;
+ return;
+ }
+
+ val *= 0.434294481903252; /* approximate decimal result weight */
+
+ /* choose the result scale */
+ rscale = NUMERIC_MIN_SIG_DIGITS - (int) val;
+ rscale = Max(rscale, base->dscale);
+ rscale = Max(rscale, exp->dscale);
+ rscale = Max(rscale, NUMERIC_MIN_DISPLAY_SCALE);
+ rscale = Min(rscale, NUMERIC_MAX_DISPLAY_SCALE);
+
+ /* significant digits required in the result */
+ sig_digits = rscale + (int) val;
+ sig_digits = Max(sig_digits, 0);
+
+ /* set the scale for the real exp * ln(base) calculation */
+ local_rscale = sig_digits - ln_dweight + 8;
+ local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ /* and do the real calculation */
+
+ ln_var(base, &ln_base, local_rscale);
+
+ mul_var(&ln_base, exp, &ln_num, local_rscale);
+
+ exp_var(&ln_num, result, rscale);
+
+ if (res_sign == NUMERIC_NEG && result->ndigits > 0)
+ result->sign = NUMERIC_NEG;
+
+ free_var(&ln_num);
+ free_var(&ln_base);
+ free_var(&abs_base);
+}
+
+/*
+ * power_var_int() -
+ *
+ * Raise base to the power of exp, where exp is an integer.
+ */
+static void
+power_var_int(const NumericVar *base, int exp, NumericVar *result, int rscale)
+{
+ double f;
+ int p;
+ int i;
+ int sig_digits;
+ unsigned int mask;
+ bool neg;
+ NumericVar base_prod;
+ int local_rscale;
+
+ /* Handle some common special cases, as well as corner cases */
+ switch (exp)
+ {
+ case 0:
+
+ /*
+ * While 0 ^ 0 can be either 1 or indeterminate (error), we treat
+ * it as 1 because most programming languages do this. SQL:2003
+ * also requires a return value of 1.
+ * https://en.wikipedia.org/wiki/Exponentiation#Zero_to_the_zero_power
+ */
+ set_var_from_var(&const_one, result);
+ result->dscale = rscale; /* no need to round */
+ return;
+ case 1:
+ set_var_from_var(base, result);
+ round_var(result, rscale);
+ return;
+ case -1:
+ div_var(&const_one, base, result, rscale, true);
+ return;
+ case 2:
+ mul_var(base, base, result, rscale);
+ return;
+ default:
+ break;
+ }
+
+ /* Handle the special case where the base is zero */
+ if (base->ndigits == 0)
+ {
+ if (exp < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * The general case repeatedly multiplies base according to the bit
+ * pattern of exp.
+ *
+ * First we need to estimate the weight of the result so that we know how
+ * many significant digits are needed.
+ */
+ f = base->digits[0];
+ p = base->weight * DEC_DIGITS;
+
+ for (i = 1; i < base->ndigits && i * DEC_DIGITS < 16; i++)
+ {
+ f = f * NBASE + base->digits[i];
+ p -= DEC_DIGITS;
+ }
+
+ /*----------
+ * We have base ~= f * 10^p
+ * so log10(result) = log10(base^exp) ~= exp * (log10(f) + p)
+ *----------
+ */
+ f = exp * (log10(f) + p);
+
+ /*
+ * Apply crude overflow/underflow tests so we can exit early if the result
+ * certainly will overflow/underflow.
+ */
+ if (f > 3 * SHRT_MAX * DEC_DIGITS)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ if (f + 1 < -rscale || f + 1 < -NUMERIC_MAX_DISPLAY_SCALE)
+ {
+ zero_var(result);
+ result->dscale = rscale;
+ return;
+ }
+
+ /*
+ * Approximate number of significant digits in the result. Note that the
+ * underflow test above means that this is necessarily >= 0.
+ */
+ sig_digits = 1 + rscale + (int) f;
+
+ /*
+ * The multiplications to produce the result may introduce an error of up
+ * to around log10(abs(exp)) digits, so work with this many extra digits
+ * of precision (plus a few more for good measure).
+ */
+ sig_digits += (int) log(fabs((double) exp)) + 8;
+
+ /*
+ * Now we can proceed with the multiplications.
+ */
+ neg = (exp < 0);
+ mask = Abs(exp);
+
+ init_var(&base_prod);
+ set_var_from_var(base, &base_prod);
+
+ if (mask & 1)
+ set_var_from_var(base, result);
+ else
+ set_var_from_var(&const_one, result);
+
+ while ((mask >>= 1) > 0)
+ {
+ /*
+ * Do the multiplications using rscales large enough to hold the
+ * results to the required number of significant digits, but don't
+ * waste time by exceeding the scales of the numbers themselves.
+ */
+ local_rscale = sig_digits - 2 * base_prod.weight * DEC_DIGITS;
+ local_rscale = Min(local_rscale, 2 * base_prod.dscale);
+ local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ mul_var(&base_prod, &base_prod, &base_prod, local_rscale);
+
+ if (mask & 1)
+ {
+ local_rscale = sig_digits -
+ (base_prod.weight + result->weight) * DEC_DIGITS;
+ local_rscale = Min(local_rscale,
+ base_prod.dscale + result->dscale);
+ local_rscale = Max(local_rscale, NUMERIC_MIN_DISPLAY_SCALE);
+
+ mul_var(&base_prod, result, result, local_rscale);
+ }
+
+ /*
+ * When abs(base) > 1, the number of digits to the left of the decimal
+ * point in base_prod doubles at each iteration, so if exp is large we
+ * could easily spend large amounts of time and memory space doing the
+ * multiplications. But once the weight exceeds what will fit in
+ * int16, the final result is guaranteed to overflow (or underflow, if
+ * exp < 0), so we can give up before wasting too many cycles.
+ */
+ if (base_prod.weight > SHRT_MAX || result->weight > SHRT_MAX)
+ {
+ /* overflow, unless neg, in which case result should be 0 */
+ if (!neg)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value overflows numeric format")));
+ zero_var(result);
+ neg = false;
+ break;
+ }
+ }
+
+ free_var(&base_prod);
+
+ /* Compensate for input sign, and round to requested rscale */
+ if (neg)
+ div_var_fast(&const_one, result, result, rscale, true);
+ else
+ round_var(result, rscale);
+}
+
+/*
+ * power_ten_int() -
+ *
+ * Raise ten to the power of exp, where exp is an integer. Note that unlike
+ * power_var_int(), this does no overflow/underflow checking or rounding.
+ */
+static void
+power_ten_int(int exp, NumericVar *result)
+{
+ /* Construct the result directly, starting from 10^0 = 1 */
+ set_var_from_var(&const_one, result);
+
+ /* Scale needed to represent the result exactly */
+ result->dscale = exp < 0 ? -exp : 0;
+
+ /* Base-NBASE weight of result and remaining exponent */
+ if (exp >= 0)
+ result->weight = exp / DEC_DIGITS;
+ else
+ result->weight = (exp + 1) / DEC_DIGITS - 1;
+
+ exp -= result->weight * DEC_DIGITS;
+
+ /* Final adjustment of the result's single NBASE digit */
+ while (exp-- > 0)
+ result->digits[0] *= 10;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Following are the lowest level functions that operate unsigned
+ * on the variable level
+ *
+ * ----------------------------------------------------------------------
+ */
+
+
+/* ----------
+ * cmp_abs() -
+ *
+ * Compare the absolute values of var1 and var2
+ * Returns: -1 for ABS(var1) < ABS(var2)
+ * 0 for ABS(var1) == ABS(var2)
+ * 1 for ABS(var1) > ABS(var2)
+ * ----------
+ */
+static int
+cmp_abs(const NumericVar *var1, const NumericVar *var2)
+{
+ return cmp_abs_common(var1->digits, var1->ndigits, var1->weight,
+ var2->digits, var2->ndigits, var2->weight);
+}
+
+/* ----------
+ * cmp_abs_common() -
+ *
+ * Main routine of cmp_abs(). This function can be used by both
+ * NumericVar and Numeric.
+ * ----------
+ */
+static int
+cmp_abs_common(const NumericDigit *var1digits, int var1ndigits, int var1weight,
+ const NumericDigit *var2digits, int var2ndigits, int var2weight)
+{
+ int i1 = 0;
+ int i2 = 0;
+
+ /* Check any digits before the first common digit */
+
+ while (var1weight > var2weight && i1 < var1ndigits)
+ {
+ if (var1digits[i1++] != 0)
+ return 1;
+ var1weight--;
+ }
+ while (var2weight > var1weight && i2 < var2ndigits)
+ {
+ if (var2digits[i2++] != 0)
+ return -1;
+ var2weight--;
+ }
+
+ /* At this point, either w1 == w2 or we've run out of digits */
+
+ if (var1weight == var2weight)
+ {
+ while (i1 < var1ndigits && i2 < var2ndigits)
+ {
+ int stat = var1digits[i1++] - var2digits[i2++];
+
+ if (stat)
+ {
+ if (stat > 0)
+ return 1;
+ return -1;
+ }
+ }
+ }
+
+ /*
+ * At this point, we've run out of digits on one side or the other; so any
+ * remaining nonzero digits imply that side is larger
+ */
+ while (i1 < var1ndigits)
+ {
+ if (var1digits[i1++] != 0)
+ return 1;
+ }
+ while (i2 < var2ndigits)
+ {
+ if (var2digits[i2++] != 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * add_abs() -
+ *
+ * Add the absolute values of two variables into result.
+ * result might point to one of the operands without danger.
+ */
+static void
+add_abs(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ NumericDigit *res_buf;
+ NumericDigit *res_digits;
+ int res_ndigits;
+ int res_weight;
+ int res_rscale,
+ rscale1,
+ rscale2;
+ int res_dscale;
+ int i,
+ i1,
+ i2;
+ int carry = 0;
+
+ /* copy these values into local vars for speed in inner loop */
+ int var1ndigits = var1->ndigits;
+ int var2ndigits = var2->ndigits;
+ NumericDigit *var1digits = var1->digits;
+ NumericDigit *var2digits = var2->digits;
+
+ res_weight = Max(var1->weight, var2->weight) + 1;
+
+ res_dscale = Max(var1->dscale, var2->dscale);
+
+ /* Note: here we are figuring rscale in base-NBASE digits */
+ rscale1 = var1->ndigits - var1->weight - 1;
+ rscale2 = var2->ndigits - var2->weight - 1;
+ res_rscale = Max(rscale1, rscale2);
+
+ res_ndigits = res_rscale + res_weight + 1;
+ if (res_ndigits <= 0)
+ res_ndigits = 1;
+
+ res_buf = digitbuf_alloc(res_ndigits + 1);
+ res_buf[0] = 0; /* spare digit for later rounding */
+ res_digits = res_buf + 1;
+
+ i1 = res_rscale + var1->weight + 1;
+ i2 = res_rscale + var2->weight + 1;
+ for (i = res_ndigits - 1; i >= 0; i--)
+ {
+ i1--;
+ i2--;
+ if (i1 >= 0 && i1 < var1ndigits)
+ carry += var1digits[i1];
+ if (i2 >= 0 && i2 < var2ndigits)
+ carry += var2digits[i2];
+
+ if (carry >= NBASE)
+ {
+ res_digits[i] = carry - NBASE;
+ carry = 1;
+ }
+ else
+ {
+ res_digits[i] = carry;
+ carry = 0;
+ }
+ }
+
+ Assert(carry == 0); /* else we failed to allow for carry out */
+
+ digitbuf_free(result->buf);
+ result->ndigits = res_ndigits;
+ result->buf = res_buf;
+ result->digits = res_digits;
+ result->weight = res_weight;
+ result->dscale = res_dscale;
+
+ /* Remove leading/trailing zeroes */
+ strip_var(result);
+}
+
+
+/*
+ * sub_abs()
+ *
+ * Subtract the absolute value of var2 from the absolute value of var1
+ * and store in result. result might point to one of the operands
+ * without danger.
+ *
+ * ABS(var1) MUST BE GREATER OR EQUAL ABS(var2) !!!
+ */
+static void
+sub_abs(const NumericVar *var1, const NumericVar *var2, NumericVar *result)
+{
+ NumericDigit *res_buf;
+ NumericDigit *res_digits;
+ int res_ndigits;
+ int res_weight;
+ int res_rscale,
+ rscale1,
+ rscale2;
+ int res_dscale;
+ int i,
+ i1,
+ i2;
+ int borrow = 0;
+
+ /* copy these values into local vars for speed in inner loop */
+ int var1ndigits = var1->ndigits;
+ int var2ndigits = var2->ndigits;
+ NumericDigit *var1digits = var1->digits;
+ NumericDigit *var2digits = var2->digits;
+
+ res_weight = var1->weight;
+
+ res_dscale = Max(var1->dscale, var2->dscale);
+
+ /* Note: here we are figuring rscale in base-NBASE digits */
+ rscale1 = var1->ndigits - var1->weight - 1;
+ rscale2 = var2->ndigits - var2->weight - 1;
+ res_rscale = Max(rscale1, rscale2);
+
+ res_ndigits = res_rscale + res_weight + 1;
+ if (res_ndigits <= 0)
+ res_ndigits = 1;
+
+ res_buf = digitbuf_alloc(res_ndigits + 1);
+ res_buf[0] = 0; /* spare digit for later rounding */
+ res_digits = res_buf + 1;
+
+ i1 = res_rscale + var1->weight + 1;
+ i2 = res_rscale + var2->weight + 1;
+ for (i = res_ndigits - 1; i >= 0; i--)
+ {
+ i1--;
+ i2--;
+ if (i1 >= 0 && i1 < var1ndigits)
+ borrow += var1digits[i1];
+ if (i2 >= 0 && i2 < var2ndigits)
+ borrow -= var2digits[i2];
+
+ if (borrow < 0)
+ {
+ res_digits[i] = borrow + NBASE;
+ borrow = -1;
+ }
+ else
+ {
+ res_digits[i] = borrow;
+ borrow = 0;
+ }
+ }
+
+ Assert(borrow == 0); /* else caller gave us var1 < var2 */
+
+ digitbuf_free(result->buf);
+ result->ndigits = res_ndigits;
+ result->buf = res_buf;
+ result->digits = res_digits;
+ result->weight = res_weight;
+ result->dscale = res_dscale;
+
+ /* Remove leading/trailing zeroes */
+ strip_var(result);
+}
+
+/*
+ * round_var
+ *
+ * Round the value of a variable to no more than rscale decimal digits
+ * after the decimal point. NOTE: we allow rscale < 0 here, implying
+ * rounding before the decimal point.
+ */
+static void
+round_var(NumericVar *var, int rscale)
+{
+ NumericDigit *digits = var->digits;
+ int di;
+ int ndigits;
+ int carry;
+
+ var->dscale = rscale;
+
+ /* decimal digits wanted */
+ di = (var->weight + 1) * DEC_DIGITS + rscale;
+
+ /*
+ * If di = 0, the value loses all digits, but could round up to 1 if its
+ * first extra digit is >= 5. If di < 0 the result must be 0.
+ */
+ if (di < 0)
+ {
+ var->ndigits = 0;
+ var->weight = 0;
+ var->sign = NUMERIC_POS;
+ }
+ else
+ {
+ /* NBASE digits wanted */
+ ndigits = (di + DEC_DIGITS - 1) / DEC_DIGITS;
+
+ /* 0, or number of decimal digits to keep in last NBASE digit */
+ di %= DEC_DIGITS;
+
+ if (ndigits < var->ndigits ||
+ (ndigits == var->ndigits && di > 0))
+ {
+ var->ndigits = ndigits;
+
+#if DEC_DIGITS == 1
+ /* di must be zero */
+ carry = (digits[ndigits] >= HALF_NBASE) ? 1 : 0;
+#else
+ if (di == 0)
+ carry = (digits[ndigits] >= HALF_NBASE) ? 1 : 0;
+ else
+ {
+ /* Must round within last NBASE digit */
+ int extra,
+ pow10;
+
+#if DEC_DIGITS == 4
+ pow10 = round_powers[di];
+#elif DEC_DIGITS == 2
+ pow10 = 10;
+#else
+#error unsupported NBASE
+#endif
+ extra = digits[--ndigits] % pow10;
+ digits[ndigits] -= extra;
+ carry = 0;
+ if (extra >= pow10 / 2)
+ {
+ pow10 += digits[ndigits];
+ if (pow10 >= NBASE)
+ {
+ pow10 -= NBASE;
+ carry = 1;
+ }
+ digits[ndigits] = pow10;
+ }
+ }
+#endif
+
+ /* Propagate carry if needed */
+ while (carry)
+ {
+ carry += digits[--ndigits];
+ if (carry >= NBASE)
+ {
+ digits[ndigits] = carry - NBASE;
+ carry = 1;
+ }
+ else
+ {
+ digits[ndigits] = carry;
+ carry = 0;
+ }
+ }
+
+ if (ndigits < 0)
+ {
+ Assert(ndigits == -1); /* better not have added > 1 digit */
+ Assert(var->digits > var->buf);
+ var->digits--;
+ var->ndigits++;
+ var->weight++;
+ }
+ }
+ }
+}
+
+/*
+ * trunc_var
+ *
+ * Truncate (towards zero) the value of a variable at rscale decimal digits
+ * after the decimal point. NOTE: we allow rscale < 0 here, implying
+ * truncation before the decimal point.
+ */
+static void
+trunc_var(NumericVar *var, int rscale)
+{
+ int di;
+ int ndigits;
+
+ var->dscale = rscale;
+
+ /* decimal digits wanted */
+ di = (var->weight + 1) * DEC_DIGITS + rscale;
+
+ /*
+ * If di <= 0, the value loses all digits.
+ */
+ if (di <= 0)
+ {
+ var->ndigits = 0;
+ var->weight = 0;
+ var->sign = NUMERIC_POS;
+ }
+ else
+ {
+ /* NBASE digits wanted */
+ ndigits = (di + DEC_DIGITS - 1) / DEC_DIGITS;
+
+ if (ndigits <= var->ndigits)
+ {
+ var->ndigits = ndigits;
+
+#if DEC_DIGITS == 1
+ /* no within-digit stuff to worry about */
+#else
+ /* 0, or number of decimal digits to keep in last NBASE digit */
+ di %= DEC_DIGITS;
+
+ if (di > 0)
+ {
+ /* Must truncate within last NBASE digit */
+ NumericDigit *digits = var->digits;
+ int extra,
+ pow10;
+
+#if DEC_DIGITS == 4
+ pow10 = round_powers[di];
+#elif DEC_DIGITS == 2
+ pow10 = 10;
+#else
+#error unsupported NBASE
+#endif
+ extra = digits[--ndigits] % pow10;
+ digits[ndigits] -= extra;
+ }
+#endif
+ }
+ }
+}
+
+/*
+ * strip_var
+ *
+ * Strip any leading and trailing zeroes from a numeric variable
+ */
+static void
+strip_var(NumericVar *var)
+{
+ NumericDigit *digits = var->digits;
+ int ndigits = var->ndigits;
+
+ /* Strip leading zeroes */
+ while (ndigits > 0 && *digits == 0)
+ {
+ digits++;
+ var->weight--;
+ ndigits--;
+ }
+
+ /* Strip trailing zeroes */
+ while (ndigits > 0 && digits[ndigits - 1] == 0)
+ ndigits--;
+
+ /* If it's zero, normalize the sign and weight */
+ if (ndigits == 0)
+ {
+ var->sign = NUMERIC_POS;
+ var->weight = 0;
+ }
+
+ var->digits = digits;
+ var->ndigits = ndigits;
+}
+
+
+/* ----------------------------------------------------------------------
+ *
+ * Fast sum accumulator functions
+ *
+ * ----------------------------------------------------------------------
+ */
+
+/*
+ * Reset the accumulator's value to zero. The buffers to hold the digits
+ * are not free'd.
+ */
+static void
+accum_sum_reset(NumericSumAccum *accum)
+{
+ int i;
+
+ accum->dscale = 0;
+ for (i = 0; i < accum->ndigits; i++)
+ {
+ accum->pos_digits[i] = 0;
+ accum->neg_digits[i] = 0;
+ }
+}
+
+/*
+ * Accumulate a new value.
+ */
+static void
+accum_sum_add(NumericSumAccum *accum, const NumericVar *val)
+{
+ int32 *accum_digits;
+ int i,
+ val_i;
+ int val_ndigits;
+ NumericDigit *val_digits;
+
+ /*
+ * If we have accumulated too many values since the last carry
+ * propagation, do it now, to avoid overflowing. (We could allow more
+ * than NBASE - 1, if we reserved two extra digits, rather than one, for
+ * carry propagation. But even with NBASE - 1, this needs to be done so
+ * seldom, that the performance difference is negligible.)
+ */
+ if (accum->num_uncarried == NBASE - 1)
+ accum_sum_carry(accum);
+
+ /*
+ * Adjust the weight or scale of the old value, so that it can accommodate
+ * the new value.
+ */
+ accum_sum_rescale(accum, val);
+
+ /* */
+ if (val->sign == NUMERIC_POS)
+ accum_digits = accum->pos_digits;
+ else
+ accum_digits = accum->neg_digits;
+
+ /* copy these values into local vars for speed in loop */
+ val_ndigits = val->ndigits;
+ val_digits = val->digits;
+
+ i = accum->weight - val->weight;
+ for (val_i = 0; val_i < val_ndigits; val_i++)
+ {
+ accum_digits[i] += (int32) val_digits[val_i];
+ i++;
+ }
+
+ accum->num_uncarried++;
+}
+
+/*
+ * Propagate carries.
+ */
+static void
+accum_sum_carry(NumericSumAccum *accum)
+{
+ int i;
+ int ndigits;
+ int32 *dig;
+ int32 carry;
+ int32 newdig = 0;
+
+ /*
+ * If no new values have been added since last carry propagation, nothing
+ * to do.
+ */
+ if (accum->num_uncarried == 0)
+ return;
+
+ /*
+ * We maintain that the weight of the accumulator is always one larger
+ * than needed to hold the current value, before carrying, to make sure
+ * there is enough space for the possible extra digit when carry is
+ * propagated. We cannot expand the buffer here, unless we require
+ * callers of accum_sum_final() to switch to the right memory context.
+ */
+ Assert(accum->pos_digits[0] == 0 && accum->neg_digits[0] == 0);
+
+ ndigits = accum->ndigits;
+
+ /* Propagate carry in the positive sum */
+ dig = accum->pos_digits;
+ carry = 0;
+ for (i = ndigits - 1; i >= 0; i--)
+ {
+ newdig = dig[i] + carry;
+ if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ dig[i] = newdig;
+ }
+ /* Did we use up the digit reserved for carry propagation? */
+ if (newdig > 0)
+ accum->have_carry_space = false;
+
+ /* And the same for the negative sum */
+ dig = accum->neg_digits;
+ carry = 0;
+ for (i = ndigits - 1; i >= 0; i--)
+ {
+ newdig = dig[i] + carry;
+ if (newdig >= NBASE)
+ {
+ carry = newdig / NBASE;
+ newdig -= carry * NBASE;
+ }
+ else
+ carry = 0;
+ dig[i] = newdig;
+ }
+ if (newdig > 0)
+ accum->have_carry_space = false;
+
+ accum->num_uncarried = 0;
+}
+
+/*
+ * Re-scale accumulator to accommodate new value.
+ *
+ * If the new value has more digits than the current digit buffers in the
+ * accumulator, enlarge the buffers.
+ */
+static void
+accum_sum_rescale(NumericSumAccum *accum, const NumericVar *val)
+{
+ int old_weight = accum->weight;
+ int old_ndigits = accum->ndigits;
+ int accum_ndigits;
+ int accum_weight;
+ int accum_rscale;
+ int val_rscale;
+
+ accum_weight = old_weight;
+ accum_ndigits = old_ndigits;
+
+ /*
+ * Does the new value have a larger weight? If so, enlarge the buffers,
+ * and shift the existing value to the new weight, by adding leading
+ * zeros.
+ *
+ * We enforce that the accumulator always has a weight one larger than
+ * needed for the inputs, so that we have space for an extra digit at the
+ * final carry-propagation phase, if necessary.
+ */
+ if (val->weight >= accum_weight)
+ {
+ accum_weight = val->weight + 1;
+ accum_ndigits = accum_ndigits + (accum_weight - old_weight);
+ }
+
+ /*
+ * Even though the new value is small, we might've used up the space
+ * reserved for the carry digit in the last call to accum_sum_carry(). If
+ * so, enlarge to make room for another one.
+ */
+ else if (!accum->have_carry_space)
+ {
+ accum_weight++;
+ accum_ndigits++;
+ }
+
+ /* Is the new value wider on the right side? */
+ accum_rscale = accum_ndigits - accum_weight - 1;
+ val_rscale = val->ndigits - val->weight - 1;
+ if (val_rscale > accum_rscale)
+ accum_ndigits = accum_ndigits + (val_rscale - accum_rscale);
+
+ if (accum_ndigits != old_ndigits ||
+ accum_weight != old_weight)
+ {
+ int32 *new_pos_digits;
+ int32 *new_neg_digits;
+ int weightdiff;
+
+ weightdiff = accum_weight - old_weight;
+
+ new_pos_digits = palloc0(accum_ndigits * sizeof(int32));
+ new_neg_digits = palloc0(accum_ndigits * sizeof(int32));
+
+ if (accum->pos_digits)
+ {
+ memcpy(&new_pos_digits[weightdiff], accum->pos_digits,
+ old_ndigits * sizeof(int32));
+ pfree(accum->pos_digits);
+
+ memcpy(&new_neg_digits[weightdiff], accum->neg_digits,
+ old_ndigits * sizeof(int32));
+ pfree(accum->neg_digits);
+ }
+
+ accum->pos_digits = new_pos_digits;
+ accum->neg_digits = new_neg_digits;
+
+ accum->weight = accum_weight;
+ accum->ndigits = accum_ndigits;
+
+ Assert(accum->pos_digits[0] == 0 && accum->neg_digits[0] == 0);
+ accum->have_carry_space = true;
+ }
+
+ if (val->dscale > accum->dscale)
+ accum->dscale = val->dscale;
+}
+
+/*
+ * Return the current value of the accumulator. This perform final carry
+ * propagation, and adds together the positive and negative sums.
+ *
+ * Unlike all the other routines, the caller is not required to switch to
+ * the memory context that holds the accumulator.
+ */
+static void
+accum_sum_final(NumericSumAccum *accum, NumericVar *result)
+{
+ int i;
+ NumericVar pos_var;
+ NumericVar neg_var;
+
+ if (accum->ndigits == 0)
+ {
+ set_var_from_var(&const_zero, result);
+ return;
+ }
+
+ /* Perform final carry */
+ accum_sum_carry(accum);
+
+ /* Create NumericVars representing the positive and negative sums */
+ init_var(&pos_var);
+ init_var(&neg_var);
+
+ pos_var.ndigits = neg_var.ndigits = accum->ndigits;
+ pos_var.weight = neg_var.weight = accum->weight;
+ pos_var.dscale = neg_var.dscale = accum->dscale;
+ pos_var.sign = NUMERIC_POS;
+ neg_var.sign = NUMERIC_NEG;
+
+ pos_var.buf = pos_var.digits = digitbuf_alloc(accum->ndigits);
+ neg_var.buf = neg_var.digits = digitbuf_alloc(accum->ndigits);
+
+ for (i = 0; i < accum->ndigits; i++)
+ {
+ Assert(accum->pos_digits[i] < NBASE);
+ pos_var.digits[i] = (int16) accum->pos_digits[i];
+
+ Assert(accum->neg_digits[i] < NBASE);
+ neg_var.digits[i] = (int16) accum->neg_digits[i];
+ }
+
+ /* And add them together */
+ add_var(&pos_var, &neg_var, result);
+
+ /* Remove leading/trailing zeroes */
+ strip_var(result);
+}
+
+/*
+ * Copy an accumulator's state.
+ *
+ * 'dst' is assumed to be uninitialized beforehand. No attempt is made at
+ * freeing old values.
+ */
+static void
+accum_sum_copy(NumericSumAccum *dst, NumericSumAccum *src)
+{
+ dst->pos_digits = palloc(src->ndigits * sizeof(int32));
+ dst->neg_digits = palloc(src->ndigits * sizeof(int32));
+
+ memcpy(dst->pos_digits, src->pos_digits, src->ndigits * sizeof(int32));
+ memcpy(dst->neg_digits, src->neg_digits, src->ndigits * sizeof(int32));
+ dst->num_uncarried = src->num_uncarried;
+ dst->ndigits = src->ndigits;
+ dst->weight = src->weight;
+ dst->dscale = src->dscale;
+}
+
+/*
+ * Add the current value of 'accum2' into 'accum'.
+ */
+static void
+accum_sum_combine(NumericSumAccum *accum, NumericSumAccum *accum2)
+{
+ NumericVar tmp_var;
+
+ init_var(&tmp_var);
+
+ accum_sum_final(accum2, &tmp_var);
+ accum_sum_add(accum, &tmp_var);
+
+ free_var(&tmp_var);
+}
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
new file mode 100644
index 0000000..cc3f95d
--- /dev/null
+++ b/src/backend/utils/adt/numutils.c
@@ -0,0 +1,604 @@
+/*-------------------------------------------------------------------------
+ *
+ * numutils.c
+ * utility functions for I/O of built-in numeric types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/numutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+#include <ctype.h>
+
+#include "common/int.h"
+#include "utils/builtins.h"
+#include "port/pg_bitutils.h"
+
+/*
+ * A table of all two-digit numbers. This is used to speed up decimal digit
+ * generation by copying pairs of digits into the final output.
+ */
+static const char DIGIT_TABLE[200] =
+"00" "01" "02" "03" "04" "05" "06" "07" "08" "09"
+"10" "11" "12" "13" "14" "15" "16" "17" "18" "19"
+"20" "21" "22" "23" "24" "25" "26" "27" "28" "29"
+"30" "31" "32" "33" "34" "35" "36" "37" "38" "39"
+"40" "41" "42" "43" "44" "45" "46" "47" "48" "49"
+"50" "51" "52" "53" "54" "55" "56" "57" "58" "59"
+"60" "61" "62" "63" "64" "65" "66" "67" "68" "69"
+"70" "71" "72" "73" "74" "75" "76" "77" "78" "79"
+"80" "81" "82" "83" "84" "85" "86" "87" "88" "89"
+"90" "91" "92" "93" "94" "95" "96" "97" "98" "99";
+
+/*
+ * Adapted from http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
+ */
+static inline int
+decimalLength32(const uint32 v)
+{
+ int t;
+ static const uint32 PowersOfTen[] = {
+ 1, 10, 100,
+ 1000, 10000, 100000,
+ 1000000, 10000000, 100000000,
+ 1000000000
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos32(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+static inline int
+decimalLength64(const uint64 v)
+{
+ int t;
+ static const uint64 PowersOfTen[] = {
+ UINT64CONST(1), UINT64CONST(10),
+ UINT64CONST(100), UINT64CONST(1000),
+ UINT64CONST(10000), UINT64CONST(100000),
+ UINT64CONST(1000000), UINT64CONST(10000000),
+ UINT64CONST(100000000), UINT64CONST(1000000000),
+ UINT64CONST(10000000000), UINT64CONST(100000000000),
+ UINT64CONST(1000000000000), UINT64CONST(10000000000000),
+ UINT64CONST(100000000000000), UINT64CONST(1000000000000000),
+ UINT64CONST(10000000000000000), UINT64CONST(100000000000000000),
+ UINT64CONST(1000000000000000000), UINT64CONST(10000000000000000000)
+ };
+
+ /*
+ * Compute base-10 logarithm by dividing the base-2 logarithm by a
+ * good-enough approximation of the base-2 logarithm of 10
+ */
+ t = (pg_leftmost_one_pos64(v) + 1) * 1233 / 4096;
+ return t + (v >= PowersOfTen[t]);
+}
+
+/*
+ * Convert input string to a signed 16 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int16
+pg_strtoint16(const char *s)
+{
+ const char *ptr = s;
+ int16 tmp = 0;
+ bool neg = false;
+
+ /* skip leading spaces */
+ while (likely(*ptr) && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* handle sign */
+ if (*ptr == '-')
+ {
+ ptr++;
+ neg = true;
+ }
+ else if (*ptr == '+')
+ ptr++;
+
+ /* require at least one digit */
+ if (unlikely(!isdigit((unsigned char) *ptr)))
+ goto invalid_syntax;
+
+ /* process digits */
+ while (*ptr && isdigit((unsigned char) *ptr))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 10, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+
+ /* allow trailing whitespace, but not other trailing chars */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (unlikely(*ptr != '\0'))
+ goto invalid_syntax;
+
+ if (!neg)
+ {
+ /* could fail if input is most negative number */
+ if (unlikely(tmp == PG_INT16_MIN))
+ goto out_of_range;
+ tmp = -tmp;
+ }
+
+ return tmp;
+
+out_of_range:
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "smallint")));
+
+invalid_syntax:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "smallint", s)));
+
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Convert input string to a signed 32 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int32
+pg_strtoint32(const char *s)
+{
+ const char *ptr = s;
+ int32 tmp = 0;
+ bool neg = false;
+
+ /* skip leading spaces */
+ while (likely(*ptr) && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* handle sign */
+ if (*ptr == '-')
+ {
+ ptr++;
+ neg = true;
+ }
+ else if (*ptr == '+')
+ ptr++;
+
+ /* require at least one digit */
+ if (unlikely(!isdigit((unsigned char) *ptr)))
+ goto invalid_syntax;
+
+ /* process digits */
+ while (*ptr && isdigit((unsigned char) *ptr))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 10, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+
+ /* allow trailing whitespace, but not other trailing chars */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (unlikely(*ptr != '\0'))
+ goto invalid_syntax;
+
+ if (!neg)
+ {
+ /* could fail if input is most negative number */
+ if (unlikely(tmp == PG_INT32_MIN))
+ goto out_of_range;
+ tmp = -tmp;
+ }
+
+ return tmp;
+
+out_of_range:
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "integer")));
+
+invalid_syntax:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "integer", s)));
+
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * Convert input string to a signed 64 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. Will throw
+ * ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int64
+pg_strtoint64(const char *s)
+{
+ const char *ptr = s;
+ int64 tmp = 0;
+ bool neg = false;
+
+ /*
+ * Do our own scan, rather than relying on sscanf which might be broken
+ * for long long.
+ *
+ * As INT64_MIN can't be stored as a positive 64 bit integer, accumulate
+ * value as a negative number.
+ */
+
+ /* skip leading spaces */
+ while (*ptr && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* handle sign */
+ if (*ptr == '-')
+ {
+ ptr++;
+ neg = true;
+ }
+ else if (*ptr == '+')
+ ptr++;
+
+ /* require at least one digit */
+ if (unlikely(!isdigit((unsigned char) *ptr)))
+ goto invalid_syntax;
+
+ /* process digits */
+ while (*ptr && isdigit((unsigned char) *ptr))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 10, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+
+ /* allow trailing whitespace, but not other trailing chars */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (unlikely(*ptr != '\0'))
+ goto invalid_syntax;
+
+ if (!neg)
+ {
+ /* could fail if input is most negative number */
+ if (unlikely(tmp == PG_INT64_MIN))
+ goto out_of_range;
+ tmp = -tmp;
+ }
+
+ return tmp;
+
+out_of_range:
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "bigint")));
+
+invalid_syntax:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "bigint", s)));
+
+ return 0; /* keep compiler quiet */
+}
+
+/*
+ * pg_itoa: converts a signed 16-bit integer to its string representation
+ * and returns strlen(a).
+ *
+ * Caller must ensure that 'a' points to enough memory to hold the result
+ * (at least 7 bytes, counting a leading sign and trailing NUL).
+ *
+ * It doesn't seem worth implementing this separately.
+ */
+int
+pg_itoa(int16 i, char *a)
+{
+ return pg_ltoa((int32) i, a);
+}
+
+/*
+ * pg_ultoa_n: converts an unsigned 32-bit integer to its string representation,
+ * not NUL-terminated, and returns the length of that string representation
+ *
+ * Caller must ensure that 'a' points to enough memory to hold the result (at
+ * least 10 bytes)
+ */
+int
+pg_ultoa_n(uint32 value, char *a)
+{
+ int olength,
+ i = 0;
+
+ /* Degenerate case */
+ if (value == 0)
+ {
+ *a = '0';
+ return 1;
+ }
+
+ olength = decimalLength32(value);
+
+ /* Compute the result string. */
+ while (value >= 10000)
+ {
+ const uint32 c = value - 10000 * (value / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ char *pos = a + olength - i;
+
+ value /= 10000;
+
+ memcpy(pos - 2, DIGIT_TABLE + c0, 2);
+ memcpy(pos - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (value >= 100)
+ {
+ const uint32 c = (value % 100) << 1;
+
+ char *pos = a + olength - i;
+
+ value /= 100;
+
+ memcpy(pos - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (value >= 10)
+ {
+ const uint32 c = value << 1;
+
+ char *pos = a + olength - i;
+
+ memcpy(pos - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ {
+ *a = (char) ('0' + value);
+ }
+
+ return olength;
+}
+
+/*
+ * pg_ltoa: converts a signed 32-bit integer to its string representation and
+ * returns strlen(a).
+ *
+ * It is the caller's responsibility to ensure that a is at least 12 bytes long,
+ * which is enough room to hold a minus sign, a maximally long int32, and the
+ * above terminating NUL.
+ */
+int
+pg_ltoa(int32 value, char *a)
+{
+ uint32 uvalue = (uint32) value;
+ int len = 0;
+
+ if (value < 0)
+ {
+ uvalue = (uint32) 0 - uvalue;
+ a[len++] = '-';
+ }
+ len += pg_ultoa_n(uvalue, a + len);
+ a[len] = '\0';
+ return len;
+}
+
+/*
+ * Get the decimal representation, not NUL-terminated, and return the length of
+ * same. Caller must ensure that a points to at least MAXINT8LEN bytes.
+ */
+int
+pg_ulltoa_n(uint64 value, char *a)
+{
+ int olength,
+ i = 0;
+ uint32 value2;
+
+ /* Degenerate case */
+ if (value == 0)
+ {
+ *a = '0';
+ return 1;
+ }
+
+ olength = decimalLength64(value);
+
+ /* Compute the result string. */
+ while (value >= 100000000)
+ {
+ const uint64 q = value / 100000000;
+ uint32 value2 = (uint32) (value - 100000000 * q);
+
+ const uint32 c = value2 % 10000;
+ const uint32 d = value2 / 10000;
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+ const uint32 d0 = (d % 100) << 1;
+ const uint32 d1 = (d / 100) << 1;
+
+ char *pos = a + olength - i;
+
+ value = q;
+
+ memcpy(pos - 2, DIGIT_TABLE + c0, 2);
+ memcpy(pos - 4, DIGIT_TABLE + c1, 2);
+ memcpy(pos - 6, DIGIT_TABLE + d0, 2);
+ memcpy(pos - 8, DIGIT_TABLE + d1, 2);
+ i += 8;
+ }
+
+ /* Switch to 32-bit for speed */
+ value2 = (uint32) value;
+
+ if (value2 >= 10000)
+ {
+ const uint32 c = value2 - 10000 * (value2 / 10000);
+ const uint32 c0 = (c % 100) << 1;
+ const uint32 c1 = (c / 100) << 1;
+
+ char *pos = a + olength - i;
+
+ value2 /= 10000;
+
+ memcpy(pos - 2, DIGIT_TABLE + c0, 2);
+ memcpy(pos - 4, DIGIT_TABLE + c1, 2);
+ i += 4;
+ }
+ if (value2 >= 100)
+ {
+ const uint32 c = (value2 % 100) << 1;
+ char *pos = a + olength - i;
+
+ value2 /= 100;
+
+ memcpy(pos - 2, DIGIT_TABLE + c, 2);
+ i += 2;
+ }
+ if (value2 >= 10)
+ {
+ const uint32 c = value2 << 1;
+ char *pos = a + olength - i;
+
+ memcpy(pos - 2, DIGIT_TABLE + c, 2);
+ }
+ else
+ *a = (char) ('0' + value2);
+
+ return olength;
+}
+
+/*
+ * pg_lltoa: converts a signed 64-bit integer to its string representation and
+ * returns strlen(a).
+ *
+ * Caller must ensure that 'a' points to enough memory to hold the result
+ * (at least MAXINT8LEN + 1 bytes, counting a leading sign and trailing NUL).
+ */
+int
+pg_lltoa(int64 value, char *a)
+{
+ uint64 uvalue = value;
+ int len = 0;
+
+ if (value < 0)
+ {
+ uvalue = (uint64) 0 - uvalue;
+ a[len++] = '-';
+ }
+
+ len += pg_ulltoa_n(uvalue, a + len);
+ a[len] = '\0';
+ return len;
+}
+
+
+/*
+ * pg_ultostr_zeropad
+ * Converts 'value' into a decimal string representation stored at 'str'.
+ * 'minwidth' specifies the minimum width of the result; any extra space
+ * is filled up by prefixing the number with zeros.
+ *
+ * Returns the ending address of the string result (the last character written
+ * plus 1). Note that no NUL terminator is written.
+ *
+ * The intended use-case for this function is to build strings that contain
+ * multiple individual numbers, for example:
+ *
+ * str = pg_ultostr_zeropad(str, hours, 2);
+ * *str++ = ':';
+ * str = pg_ultostr_zeropad(str, mins, 2);
+ * *str++ = ':';
+ * str = pg_ultostr_zeropad(str, secs, 2);
+ * *str = '\0';
+ *
+ * Note: Caller must ensure that 'str' points to enough memory to hold the
+ * result.
+ */
+char *
+pg_ultostr_zeropad(char *str, uint32 value, int32 minwidth)
+{
+ int len;
+
+ Assert(minwidth > 0);
+
+ if (value < 100 && minwidth == 2) /* Short cut for common case */
+ {
+ memcpy(str, DIGIT_TABLE + value * 2, 2);
+ return str + 2;
+ }
+
+ len = pg_ultoa_n(value, str);
+ if (len >= minwidth)
+ return str + len;
+
+ memmove(str + minwidth - len, str, len);
+ memset(str, '0', minwidth - len);
+ return str + minwidth;
+}
+
+/*
+ * pg_ultostr
+ * Converts 'value' into a decimal string representation stored at 'str'.
+ *
+ * Returns the ending address of the string result (the last character written
+ * plus 1). Note that no NUL terminator is written.
+ *
+ * The intended use-case for this function is to build strings that contain
+ * multiple individual numbers, for example:
+ *
+ * str = pg_ultostr(str, a);
+ * *str++ = ' ';
+ * str = pg_ultostr(str, b);
+ * *str = '\0';
+ *
+ * Note: Caller must ensure that 'str' points to enough memory to hold the
+ * result.
+ */
+char *
+pg_ultostr(char *str, uint32 value)
+{
+ int len = pg_ultoa_n(value, str);
+
+ return str + len;
+}
diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c
new file mode 100644
index 0000000..7de31d7
--- /dev/null
+++ b/src/backend/utils/adt/oid.c
@@ -0,0 +1,468 @@
+/*-------------------------------------------------------------------------
+ *
+ * oid.c
+ * Functions for the built-in type Oid ... also oidvector.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/oid.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+#include "catalog/pg_type.h"
+#include "libpq/pqformat.h"
+#include "nodes/value.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+
+#define OidVectorSize(n) (offsetof(oidvector, values) + (n) * sizeof(Oid))
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+static Oid
+oidin_subr(const char *s, char **endloc)
+{
+ unsigned long cvt;
+ char *endptr;
+ Oid result;
+
+ if (*s == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "oid", s)));
+
+ errno = 0;
+ cvt = strtoul(s, &endptr, 10);
+
+ /*
+ * strtoul() normally only sets ERANGE. On some systems it also may set
+ * EINVAL, which simply means it couldn't parse the input string. This is
+ * handled by the second "if" consistent across platforms.
+ */
+ if (errno && errno != ERANGE && errno != EINVAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "oid", s)));
+
+ if (endptr == s && *s != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "oid", s)));
+
+ if (errno == ERANGE)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "oid")));
+
+ if (endloc)
+ {
+ /* caller wants to deal with rest of string */
+ *endloc = endptr;
+ }
+ else
+ {
+ /* allow only whitespace after number */
+ while (*endptr && isspace((unsigned char) *endptr))
+ endptr++;
+ if (*endptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "oid", s)));
+ }
+
+ result = (Oid) cvt;
+
+ /*
+ * Cope with possibility that unsigned long is wider than Oid, in which
+ * case strtoul will not raise an error for some values that are out of
+ * the range of Oid.
+ *
+ * For backwards compatibility, we want to accept inputs that are given
+ * with a minus sign, so allow the input value if it matches after either
+ * signed or unsigned extension to long.
+ *
+ * To ensure consistent results on 32-bit and 64-bit platforms, make sure
+ * the error message is the same as if strtoul() had returned ERANGE.
+ */
+#if OID_MAX != ULONG_MAX
+ if (cvt != (unsigned long) result &&
+ cvt != (unsigned long) ((int) result))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("value \"%s\" is out of range for type %s",
+ s, "oid")));
+#endif
+
+ return result;
+}
+
+Datum
+oidin(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+ Oid result;
+
+ result = oidin_subr(s, NULL);
+ PG_RETURN_OID(result);
+}
+
+Datum
+oidout(PG_FUNCTION_ARGS)
+{
+ Oid o = PG_GETARG_OID(0);
+ char *result = (char *) palloc(12);
+
+ snprintf(result, 12, "%u", o);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * oidrecv - converts external binary format to oid
+ */
+Datum
+oidrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_OID((Oid) pq_getmsgint(buf, sizeof(Oid)));
+}
+
+/*
+ * oidsend - converts oid to binary format
+ */
+Datum
+oidsend(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * construct oidvector given a raw array of Oids
+ *
+ * If oids is NULL then caller must fill values[] afterward
+ */
+oidvector *
+buildoidvector(const Oid *oids, int n)
+{
+ oidvector *result;
+
+ result = (oidvector *) palloc0(OidVectorSize(n));
+
+ if (n > 0 && oids)
+ memcpy(result->values, oids, n * sizeof(Oid));
+
+ /*
+ * Attach standard array header. For historical reasons, we set the index
+ * lower bound to 0 not 1.
+ */
+ SET_VARSIZE(result, OidVectorSize(n));
+ result->ndim = 1;
+ result->dataoffset = 0; /* never any nulls */
+ result->elemtype = OIDOID;
+ result->dim1 = n;
+ result->lbound1 = 0;
+
+ return result;
+}
+
+/*
+ * oidvectorin - converts "num num ..." to internal form
+ */
+Datum
+oidvectorin(PG_FUNCTION_ARGS)
+{
+ char *oidString = PG_GETARG_CSTRING(0);
+ oidvector *result;
+ int nalloc;
+ int n;
+
+ nalloc = 32; /* arbitrary initial size guess */
+ result = (oidvector *) palloc0(OidVectorSize(nalloc));
+
+ for (n = 0;; n++)
+ {
+ while (*oidString && isspace((unsigned char) *oidString))
+ oidString++;
+ if (*oidString == '\0')
+ break;
+
+ if (n >= nalloc)
+ {
+ nalloc *= 2;
+ result = (oidvector *) repalloc(result, OidVectorSize(nalloc));
+ }
+
+ result->values[n] = oidin_subr(oidString, &oidString);
+ }
+
+ SET_VARSIZE(result, OidVectorSize(n));
+ result->ndim = 1;
+ result->dataoffset = 0; /* never any nulls */
+ result->elemtype = OIDOID;
+ result->dim1 = n;
+ result->lbound1 = 0;
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * oidvectorout - converts internal form to "num num ..."
+ */
+Datum
+oidvectorout(PG_FUNCTION_ARGS)
+{
+ oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0);
+ int num,
+ nnums = oidArray->dim1;
+ char *rp;
+ char *result;
+
+ /* assumes sign, 10 digits, ' ' */
+ rp = result = (char *) palloc(nnums * 12 + 1);
+ for (num = 0; num < nnums; num++)
+ {
+ if (num != 0)
+ *rp++ = ' ';
+ sprintf(rp, "%u", oidArray->values[num]);
+ while (*++rp != '\0')
+ ;
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * oidvectorrecv - converts external binary format to oidvector
+ */
+Datum
+oidvectorrecv(PG_FUNCTION_ARGS)
+{
+ LOCAL_FCINFO(locfcinfo, 3);
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ oidvector *result;
+
+ /*
+ * Normally one would call array_recv() using DirectFunctionCall3, but
+ * that does not work since array_recv wants to cache some data using
+ * fcinfo->flinfo->fn_extra. So we need to pass it our own flinfo
+ * parameter.
+ */
+ InitFunctionCallInfoData(*locfcinfo, fcinfo->flinfo, 3,
+ InvalidOid, NULL, NULL);
+
+ locfcinfo->args[0].value = PointerGetDatum(buf);
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = ObjectIdGetDatum(OIDOID);
+ locfcinfo->args[1].isnull = false;
+ locfcinfo->args[2].value = Int32GetDatum(-1);
+ locfcinfo->args[2].isnull = false;
+
+ result = (oidvector *) DatumGetPointer(array_recv(locfcinfo));
+
+ Assert(!locfcinfo->isnull);
+
+ /* sanity checks: oidvector must be 1-D, 0-based, no nulls */
+ if (ARR_NDIM(result) != 1 ||
+ ARR_HASNULL(result) ||
+ ARR_ELEMTYPE(result) != OIDOID ||
+ ARR_LBOUND(result)[0] != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid oidvector data")));
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ * oidvectorsend - converts oidvector to binary format
+ */
+Datum
+oidvectorsend(PG_FUNCTION_ARGS)
+{
+ return array_send(fcinfo);
+}
+
+/*
+ * oidparse - get OID from ICONST/FCONST node
+ */
+Oid
+oidparse(Node *node)
+{
+ switch (nodeTag(node))
+ {
+ case T_Integer:
+ return intVal(node);
+ case T_Float:
+
+ /*
+ * Values too large for int4 will be represented as Float
+ * constants by the lexer. Accept these if they are valid OID
+ * strings.
+ */
+ return oidin_subr(castNode(Float, node)->fval, NULL);
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
+ }
+ return InvalidOid; /* keep compiler quiet */
+}
+
+/* qsort comparison function for Oids */
+int
+oid_cmp(const void *p1, const void *p2)
+{
+ Oid v1 = *((const Oid *) p1);
+ Oid v2 = *((const Oid *) p2);
+
+ if (v1 < v2)
+ return -1;
+ if (v1 > v2)
+ return 1;
+ return 0;
+}
+
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+Datum
+oideq(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
+
+Datum
+oidne(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 != arg2);
+}
+
+Datum
+oidlt(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 < arg2);
+}
+
+Datum
+oidle(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 <= arg2);
+}
+
+Datum
+oidge(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 >= arg2);
+}
+
+Datum
+oidgt(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_BOOL(arg1 > arg2);
+}
+
+Datum
+oidlarger(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_OID((arg1 > arg2) ? arg1 : arg2);
+}
+
+Datum
+oidsmaller(PG_FUNCTION_ARGS)
+{
+ Oid arg1 = PG_GETARG_OID(0);
+ Oid arg2 = PG_GETARG_OID(1);
+
+ PG_RETURN_OID((arg1 < arg2) ? arg1 : arg2);
+}
+
+Datum
+oidvectoreq(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp == 0);
+}
+
+Datum
+oidvectorne(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp != 0);
+}
+
+Datum
+oidvectorlt(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+oidvectorle(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+oidvectorge(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+oidvectorgt(PG_FUNCTION_ARGS)
+{
+ int32 cmp = DatumGetInt32(btoidvectorcmp(fcinfo));
+
+ PG_RETURN_BOOL(cmp > 0);
+}
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c
new file mode 100644
index 0000000..6a5ce1c
--- /dev/null
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -0,0 +1,1156 @@
+/*-------------------------------------------------------------------------
+ * oracle_compat.c
+ * Oracle compatible functions.
+ *
+ * Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * Author: Edmund Mergl <E.Mergl@bawue.de>
+ * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/oracle_compat.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/int.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/formatting.h"
+#include "utils/memutils.h"
+
+
+static text *dotrim(const char *string, int stringlen,
+ const char *set, int setlen,
+ bool doltrim, bool dortrim);
+static bytea *dobyteatrim(bytea *string, bytea *set,
+ bool doltrim, bool dortrim);
+
+
+/********************************************************************
+ *
+ * lower
+ *
+ * Syntax:
+ *
+ * text lower(text string)
+ *
+ * Purpose:
+ *
+ * Returns string, with all letters forced to lowercase.
+ *
+ ********************************************************************/
+
+Datum
+lower(PG_FUNCTION_ARGS)
+{
+ text *in_string = PG_GETARG_TEXT_PP(0);
+ char *out_string;
+ text *result;
+
+ out_string = str_tolower(VARDATA_ANY(in_string),
+ VARSIZE_ANY_EXHDR(in_string),
+ PG_GET_COLLATION());
+ result = cstring_to_text(out_string);
+ pfree(out_string);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+
+/********************************************************************
+ *
+ * upper
+ *
+ * Syntax:
+ *
+ * text upper(text string)
+ *
+ * Purpose:
+ *
+ * Returns string, with all letters forced to uppercase.
+ *
+ ********************************************************************/
+
+Datum
+upper(PG_FUNCTION_ARGS)
+{
+ text *in_string = PG_GETARG_TEXT_PP(0);
+ char *out_string;
+ text *result;
+
+ out_string = str_toupper(VARDATA_ANY(in_string),
+ VARSIZE_ANY_EXHDR(in_string),
+ PG_GET_COLLATION());
+ result = cstring_to_text(out_string);
+ pfree(out_string);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+
+/********************************************************************
+ *
+ * initcap
+ *
+ * Syntax:
+ *
+ * text initcap(text string)
+ *
+ * Purpose:
+ *
+ * Returns string, with first letter of each word in uppercase, all
+ * other letters in lowercase. A word is defined as a sequence of
+ * alphanumeric characters, delimited by non-alphanumeric
+ * characters.
+ *
+ ********************************************************************/
+
+Datum
+initcap(PG_FUNCTION_ARGS)
+{
+ text *in_string = PG_GETARG_TEXT_PP(0);
+ char *out_string;
+ text *result;
+
+ out_string = str_initcap(VARDATA_ANY(in_string),
+ VARSIZE_ANY_EXHDR(in_string),
+ PG_GET_COLLATION());
+ result = cstring_to_text(out_string);
+ pfree(out_string);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+
+/********************************************************************
+ *
+ * lpad
+ *
+ * Syntax:
+ *
+ * text lpad(text string1, int4 len, text string2)
+ *
+ * Purpose:
+ *
+ * Returns string1, left-padded to length len with the sequence of
+ * characters in string2. If len is less than the length of string1,
+ * instead truncate (on the right) to len.
+ *
+ ********************************************************************/
+
+Datum
+lpad(PG_FUNCTION_ARGS)
+{
+ text *string1 = PG_GETARG_TEXT_PP(0);
+ int32 len = PG_GETARG_INT32(1);
+ text *string2 = PG_GETARG_TEXT_PP(2);
+ text *ret;
+ char *ptr1,
+ *ptr2,
+ *ptr2start,
+ *ptr2end,
+ *ptr_ret;
+ int m,
+ s1len,
+ s2len;
+ int bytelen;
+
+ /* Negative len is silently taken as zero */
+ if (len < 0)
+ len = 0;
+
+ s1len = VARSIZE_ANY_EXHDR(string1);
+ if (s1len < 0)
+ s1len = 0; /* shouldn't happen */
+
+ s2len = VARSIZE_ANY_EXHDR(string2);
+ if (s2len < 0)
+ s2len = 0; /* shouldn't happen */
+
+ s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
+
+ if (s1len > len)
+ s1len = len; /* truncate string1 to len chars */
+
+ if (s2len <= 0)
+ len = s1len; /* nothing to pad with, so don't pad */
+
+ /* compute worst-case output length */
+ if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len,
+ &bytelen)) ||
+ unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
+ unlikely(!AllocSizeIsValid(bytelen)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested length too large")));
+
+ ret = (text *) palloc(bytelen);
+
+ m = len - s1len;
+
+ ptr2 = ptr2start = VARDATA_ANY(string2);
+ ptr2end = ptr2 + s2len;
+ ptr_ret = VARDATA(ret);
+
+ while (m--)
+ {
+ int mlen = pg_mblen(ptr2);
+
+ memcpy(ptr_ret, ptr2, mlen);
+ ptr_ret += mlen;
+ ptr2 += mlen;
+ if (ptr2 == ptr2end) /* wrap around at end of s2 */
+ ptr2 = ptr2start;
+ }
+
+ ptr1 = VARDATA_ANY(string1);
+
+ while (s1len--)
+ {
+ int mlen = pg_mblen(ptr1);
+
+ memcpy(ptr_ret, ptr1, mlen);
+ ptr_ret += mlen;
+ ptr1 += mlen;
+ }
+
+ SET_VARSIZE(ret, ptr_ret - (char *) ret);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+
+/********************************************************************
+ *
+ * rpad
+ *
+ * Syntax:
+ *
+ * text rpad(text string1, int4 len, text string2)
+ *
+ * Purpose:
+ *
+ * Returns string1, right-padded to length len with the sequence of
+ * characters in string2. If len is less than the length of string1,
+ * instead truncate (on the right) to len.
+ *
+ ********************************************************************/
+
+Datum
+rpad(PG_FUNCTION_ARGS)
+{
+ text *string1 = PG_GETARG_TEXT_PP(0);
+ int32 len = PG_GETARG_INT32(1);
+ text *string2 = PG_GETARG_TEXT_PP(2);
+ text *ret;
+ char *ptr1,
+ *ptr2,
+ *ptr2start,
+ *ptr2end,
+ *ptr_ret;
+ int m,
+ s1len,
+ s2len;
+ int bytelen;
+
+ /* Negative len is silently taken as zero */
+ if (len < 0)
+ len = 0;
+
+ s1len = VARSIZE_ANY_EXHDR(string1);
+ if (s1len < 0)
+ s1len = 0; /* shouldn't happen */
+
+ s2len = VARSIZE_ANY_EXHDR(string2);
+ if (s2len < 0)
+ s2len = 0; /* shouldn't happen */
+
+ s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len);
+
+ if (s1len > len)
+ s1len = len; /* truncate string1 to len chars */
+
+ if (s2len <= 0)
+ len = s1len; /* nothing to pad with, so don't pad */
+
+ /* compute worst-case output length */
+ if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len,
+ &bytelen)) ||
+ unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
+ unlikely(!AllocSizeIsValid(bytelen)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested length too large")));
+
+ ret = (text *) palloc(bytelen);
+
+ m = len - s1len;
+
+ ptr1 = VARDATA_ANY(string1);
+ ptr_ret = VARDATA(ret);
+
+ while (s1len--)
+ {
+ int mlen = pg_mblen(ptr1);
+
+ memcpy(ptr_ret, ptr1, mlen);
+ ptr_ret += mlen;
+ ptr1 += mlen;
+ }
+
+ ptr2 = ptr2start = VARDATA_ANY(string2);
+ ptr2end = ptr2 + s2len;
+
+ while (m--)
+ {
+ int mlen = pg_mblen(ptr2);
+
+ memcpy(ptr_ret, ptr2, mlen);
+ ptr_ret += mlen;
+ ptr2 += mlen;
+ if (ptr2 == ptr2end) /* wrap around at end of s2 */
+ ptr2 = ptr2start;
+ }
+
+ SET_VARSIZE(ret, ptr_ret - (char *) ret);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+
+/********************************************************************
+ *
+ * btrim
+ *
+ * Syntax:
+ *
+ * text btrim(text string, text set)
+ *
+ * Purpose:
+ *
+ * Returns string with characters removed from the front and back
+ * up to the first character not in set.
+ *
+ ********************************************************************/
+
+Datum
+btrim(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *set = PG_GETARG_TEXT_PP(1);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
+ true, true);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+/********************************************************************
+ *
+ * btrim1 --- btrim with set fixed as ' '
+ *
+ ********************************************************************/
+
+Datum
+btrim1(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ " ", 1,
+ true, true);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+/*
+ * Common implementation for btrim, ltrim, rtrim
+ */
+static text *
+dotrim(const char *string, int stringlen,
+ const char *set, int setlen,
+ bool doltrim, bool dortrim)
+{
+ int i;
+
+ /* Nothing to do if either string or set is empty */
+ if (stringlen > 0 && setlen > 0)
+ {
+ if (pg_database_encoding_max_length() > 1)
+ {
+ /*
+ * In the multibyte-encoding case, build arrays of pointers to
+ * character starts, so that we can avoid inefficient checks in
+ * the inner loops.
+ */
+ const char **stringchars;
+ const char **setchars;
+ int *stringmblen;
+ int *setmblen;
+ int stringnchars;
+ int setnchars;
+ int resultndx;
+ int resultnchars;
+ const char *p;
+ int len;
+ int mblen;
+ const char *str_pos;
+ int str_len;
+
+ stringchars = (const char **) palloc(stringlen * sizeof(char *));
+ stringmblen = (int *) palloc(stringlen * sizeof(int));
+ stringnchars = 0;
+ p = string;
+ len = stringlen;
+ while (len > 0)
+ {
+ stringchars[stringnchars] = p;
+ stringmblen[stringnchars] = mblen = pg_mblen(p);
+ stringnchars++;
+ p += mblen;
+ len -= mblen;
+ }
+
+ setchars = (const char **) palloc(setlen * sizeof(char *));
+ setmblen = (int *) palloc(setlen * sizeof(int));
+ setnchars = 0;
+ p = set;
+ len = setlen;
+ while (len > 0)
+ {
+ setchars[setnchars] = p;
+ setmblen[setnchars] = mblen = pg_mblen(p);
+ setnchars++;
+ p += mblen;
+ len -= mblen;
+ }
+
+ resultndx = 0; /* index in stringchars[] */
+ resultnchars = stringnchars;
+
+ if (doltrim)
+ {
+ while (resultnchars > 0)
+ {
+ str_pos = stringchars[resultndx];
+ str_len = stringmblen[resultndx];
+ for (i = 0; i < setnchars; i++)
+ {
+ if (str_len == setmblen[i] &&
+ memcmp(str_pos, setchars[i], str_len) == 0)
+ break;
+ }
+ if (i >= setnchars)
+ break; /* no match here */
+ string += str_len;
+ stringlen -= str_len;
+ resultndx++;
+ resultnchars--;
+ }
+ }
+
+ if (dortrim)
+ {
+ while (resultnchars > 0)
+ {
+ str_pos = stringchars[resultndx + resultnchars - 1];
+ str_len = stringmblen[resultndx + resultnchars - 1];
+ for (i = 0; i < setnchars; i++)
+ {
+ if (str_len == setmblen[i] &&
+ memcmp(str_pos, setchars[i], str_len) == 0)
+ break;
+ }
+ if (i >= setnchars)
+ break; /* no match here */
+ stringlen -= str_len;
+ resultnchars--;
+ }
+ }
+
+ pfree(stringchars);
+ pfree(stringmblen);
+ pfree(setchars);
+ pfree(setmblen);
+ }
+ else
+ {
+ /*
+ * In the single-byte-encoding case, we don't need such overhead.
+ */
+ if (doltrim)
+ {
+ while (stringlen > 0)
+ {
+ char str_ch = *string;
+
+ for (i = 0; i < setlen; i++)
+ {
+ if (str_ch == set[i])
+ break;
+ }
+ if (i >= setlen)
+ break; /* no match here */
+ string++;
+ stringlen--;
+ }
+ }
+
+ if (dortrim)
+ {
+ while (stringlen > 0)
+ {
+ char str_ch = string[stringlen - 1];
+
+ for (i = 0; i < setlen; i++)
+ {
+ if (str_ch == set[i])
+ break;
+ }
+ if (i >= setlen)
+ break; /* no match here */
+ stringlen--;
+ }
+ }
+ }
+ }
+
+ /* Return selected portion of string */
+ return cstring_to_text_with_len(string, stringlen);
+}
+
+/*
+ * Common implementation for bytea versions of btrim, ltrim, rtrim
+ */
+bytea *
+dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim)
+{
+ bytea *ret;
+ char *ptr,
+ *end,
+ *ptr2,
+ *ptr2start,
+ *end2;
+ int m,
+ stringlen,
+ setlen;
+
+ stringlen = VARSIZE_ANY_EXHDR(string);
+ setlen = VARSIZE_ANY_EXHDR(set);
+
+ if (stringlen <= 0 || setlen <= 0)
+ return string;
+
+ m = stringlen;
+ ptr = VARDATA_ANY(string);
+ end = ptr + stringlen - 1;
+ ptr2start = VARDATA_ANY(set);
+ end2 = ptr2start + setlen - 1;
+
+ if (doltrim)
+ {
+ while (m > 0)
+ {
+ ptr2 = ptr2start;
+ while (ptr2 <= end2)
+ {
+ if (*ptr == *ptr2)
+ break;
+ ++ptr2;
+ }
+ if (ptr2 > end2)
+ break;
+ ptr++;
+ m--;
+ }
+ }
+
+ if (dortrim)
+ {
+ while (m > 0)
+ {
+ ptr2 = ptr2start;
+ while (ptr2 <= end2)
+ {
+ if (*end == *ptr2)
+ break;
+ ++ptr2;
+ }
+ if (ptr2 > end2)
+ break;
+ end--;
+ m--;
+ }
+ }
+
+ ret = (bytea *) palloc(VARHDRSZ + m);
+ SET_VARSIZE(ret, VARHDRSZ + m);
+ memcpy(VARDATA(ret), ptr, m);
+ return ret;
+}
+
+/********************************************************************
+ *
+ * byteatrim
+ *
+ * Syntax:
+ *
+ * bytea byteatrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ * Returns string with characters removed from the front and back
+ * up to the first character not in set.
+ *
+ * Cloned from btrim and modified as required.
+ ********************************************************************/
+
+Datum
+byteatrim(PG_FUNCTION_ARGS)
+{
+ bytea *string = PG_GETARG_BYTEA_PP(0);
+ bytea *set = PG_GETARG_BYTEA_PP(1);
+ bytea *ret;
+
+ ret = dobyteatrim(string, set, true, true);
+
+ PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * bytealtrim
+ *
+ * Syntax:
+ *
+ * bytea bytealtrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ * Returns string with initial characters removed up to the first
+ * character not in set.
+ *
+ ********************************************************************/
+
+Datum
+bytealtrim(PG_FUNCTION_ARGS)
+{
+ bytea *string = PG_GETARG_BYTEA_PP(0);
+ bytea *set = PG_GETARG_BYTEA_PP(1);
+ bytea *ret;
+
+ ret = dobyteatrim(string, set, true, false);
+
+ PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * byteartrim
+ *
+ * Syntax:
+ *
+ * bytea byteartrim(bytea string, bytea set)
+ *
+ * Purpose:
+ *
+ * Returns string with final characters removed after the last
+ * character not in set.
+ *
+ ********************************************************************/
+
+Datum
+byteartrim(PG_FUNCTION_ARGS)
+{
+ bytea *string = PG_GETARG_BYTEA_PP(0);
+ bytea *set = PG_GETARG_BYTEA_PP(1);
+ bytea *ret;
+
+ ret = dobyteatrim(string, set, false, true);
+
+ PG_RETURN_BYTEA_P(ret);
+}
+
+/********************************************************************
+ *
+ * ltrim
+ *
+ * Syntax:
+ *
+ * text ltrim(text string, text set)
+ *
+ * Purpose:
+ *
+ * Returns string with initial characters removed up to the first
+ * character not in set.
+ *
+ ********************************************************************/
+
+Datum
+ltrim(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *set = PG_GETARG_TEXT_PP(1);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
+ true, false);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+/********************************************************************
+ *
+ * ltrim1 --- ltrim with set fixed as ' '
+ *
+ ********************************************************************/
+
+Datum
+ltrim1(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ " ", 1,
+ true, false);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+/********************************************************************
+ *
+ * rtrim
+ *
+ * Syntax:
+ *
+ * text rtrim(text string, text set)
+ *
+ * Purpose:
+ *
+ * Returns string with final characters removed after the last
+ * character not in set.
+ *
+ ********************************************************************/
+
+Datum
+rtrim(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *set = PG_GETARG_TEXT_PP(1);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set),
+ false, true);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+/********************************************************************
+ *
+ * rtrim1 --- rtrim with set fixed as ' '
+ *
+ ********************************************************************/
+
+Datum
+rtrim1(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *ret;
+
+ ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string),
+ " ", 1,
+ false, true);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+
+/********************************************************************
+ *
+ * translate
+ *
+ * Syntax:
+ *
+ * text translate(text string, text from, text to)
+ *
+ * Purpose:
+ *
+ * Returns string after replacing all occurrences of characters in from
+ * with the corresponding character in to. If from is longer than to,
+ * occurrences of the extra characters in from are deleted.
+ * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
+ *
+ ********************************************************************/
+
+Datum
+translate(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ text *from = PG_GETARG_TEXT_PP(1);
+ text *to = PG_GETARG_TEXT_PP(2);
+ text *result;
+ char *from_ptr,
+ *to_ptr,
+ *to_end;
+ char *source,
+ *target;
+ int m,
+ fromlen,
+ tolen,
+ retlen,
+ i;
+ int bytelen;
+ int len;
+ int source_len;
+ int from_index;
+
+ m = VARSIZE_ANY_EXHDR(string);
+ if (m <= 0)
+ PG_RETURN_TEXT_P(string);
+ source = VARDATA_ANY(string);
+
+ fromlen = VARSIZE_ANY_EXHDR(from);
+ from_ptr = VARDATA_ANY(from);
+ tolen = VARSIZE_ANY_EXHDR(to);
+ to_ptr = VARDATA_ANY(to);
+ to_end = to_ptr + tolen;
+
+ /*
+ * The worst-case expansion is to substitute a max-length character for a
+ * single-byte character at each position of the string.
+ */
+ if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), m,
+ &bytelen)) ||
+ unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) ||
+ unlikely(!AllocSizeIsValid(bytelen)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested length too large")));
+
+ result = (text *) palloc(bytelen);
+
+ target = VARDATA(result);
+ retlen = 0;
+
+ while (m > 0)
+ {
+ source_len = pg_mblen(source);
+ from_index = 0;
+
+ for (i = 0; i < fromlen; i += len)
+ {
+ len = pg_mblen(&from_ptr[i]);
+ if (len == source_len &&
+ memcmp(source, &from_ptr[i], len) == 0)
+ break;
+
+ from_index++;
+ }
+ if (i < fromlen)
+ {
+ /* substitute, or delete if no corresponding "to" character */
+ char *p = to_ptr;
+
+ for (i = 0; i < from_index; i++)
+ {
+ if (p >= to_end)
+ break;
+ p += pg_mblen(p);
+ }
+ if (p < to_end)
+ {
+ len = pg_mblen(p);
+ memcpy(target, p, len);
+ target += len;
+ retlen += len;
+ }
+ }
+ else
+ {
+ /* no match, so copy */
+ memcpy(target, source, source_len);
+ target += source_len;
+ retlen += source_len;
+ }
+
+ source += source_len;
+ m -= source_len;
+ }
+
+ SET_VARSIZE(result, retlen + VARHDRSZ);
+
+ /*
+ * The function result is probably much bigger than needed, if we're using
+ * a multibyte encoding, but it's not worth reallocating it; the result
+ * probably won't live long anyway.
+ */
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/********************************************************************
+ *
+ * ascii
+ *
+ * Syntax:
+ *
+ * int ascii(text string)
+ *
+ * Purpose:
+ *
+ * Returns the decimal representation of the first character from
+ * string.
+ * If the string is empty we return 0.
+ * If the database encoding is UTF8, we return the Unicode codepoint.
+ * If the database encoding is any other multi-byte encoding, we
+ * return the value of the first byte if it is an ASCII character
+ * (range 1 .. 127), or raise an error.
+ * For all other encodings we return the value of the first byte,
+ * (range 1..255).
+ *
+ ********************************************************************/
+
+Datum
+ascii(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ int encoding = GetDatabaseEncoding();
+ unsigned char *data;
+
+ if (VARSIZE_ANY_EXHDR(string) <= 0)
+ PG_RETURN_INT32(0);
+
+ data = (unsigned char *) VARDATA_ANY(string);
+
+ if (encoding == PG_UTF8 && *data > 127)
+ {
+ /* return the code point for Unicode */
+
+ int result = 0,
+ tbytes = 0,
+ i;
+
+ if (*data >= 0xF0)
+ {
+ result = *data & 0x07;
+ tbytes = 3;
+ }
+ else if (*data >= 0xE0)
+ {
+ result = *data & 0x0F;
+ tbytes = 2;
+ }
+ else
+ {
+ Assert(*data > 0xC0);
+ result = *data & 0x1f;
+ tbytes = 1;
+ }
+
+ Assert(tbytes > 0);
+
+ for (i = 1; i <= tbytes; i++)
+ {
+ Assert((data[i] & 0xC0) == 0x80);
+ result = (result << 6) + (data[i] & 0x3f);
+ }
+
+ PG_RETURN_INT32(result);
+ }
+ else
+ {
+ if (pg_encoding_max_length(encoding) > 1 && *data > 127)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large")));
+
+
+ PG_RETURN_INT32((int32) *data);
+ }
+}
+
+/********************************************************************
+ *
+ * chr
+ *
+ * Syntax:
+ *
+ * text chr(int val)
+ *
+ * Purpose:
+ *
+ * Returns the character having the binary equivalent to val.
+ *
+ * For UTF8 we treat the argument as a Unicode code point.
+ * For other multi-byte encodings we raise an error for arguments
+ * outside the strict ASCII range (1..127).
+ *
+ * It's important that we don't ever return a value that is not valid
+ * in the database encoding, so that this doesn't become a way for
+ * invalid data to enter the database.
+ *
+ ********************************************************************/
+
+Datum
+chr (PG_FUNCTION_ARGS)
+{
+ int32 arg = PG_GETARG_INT32(0);
+ uint32 cvalue;
+ text *result;
+ int encoding = GetDatabaseEncoding();
+
+ /*
+ * Error out on arguments that make no sense or that we can't validly
+ * represent in the encoding.
+ */
+ if (arg < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("character number must be positive")));
+ else if (arg == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("null character not permitted")));
+
+ cvalue = arg;
+
+ if (encoding == PG_UTF8 && cvalue > 127)
+ {
+ /* for Unicode we treat the argument as a code point */
+ int bytes;
+ unsigned char *wch;
+
+ /*
+ * We only allow valid Unicode code points; per RFC3629 that stops at
+ * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to
+ * U+1FFFFF.
+ */
+ if (cvalue > 0x0010ffff)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large for encoding: %u",
+ cvalue)));
+
+ if (cvalue > 0xffff)
+ bytes = 4;
+ else if (cvalue > 0x07ff)
+ bytes = 3;
+ else
+ bytes = 2;
+
+ result = (text *) palloc(VARHDRSZ + bytes);
+ SET_VARSIZE(result, VARHDRSZ + bytes);
+ wch = (unsigned char *) VARDATA(result);
+
+ if (bytes == 2)
+ {
+ wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F);
+ wch[1] = 0x80 | (cvalue & 0x3F);
+ }
+ else if (bytes == 3)
+ {
+ wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F);
+ wch[1] = 0x80 | ((cvalue >> 6) & 0x3F);
+ wch[2] = 0x80 | (cvalue & 0x3F);
+ }
+ else
+ {
+ wch[0] = 0xF0 | ((cvalue >> 18) & 0x07);
+ wch[1] = 0x80 | ((cvalue >> 12) & 0x3F);
+ wch[2] = 0x80 | ((cvalue >> 6) & 0x3F);
+ wch[3] = 0x80 | (cvalue & 0x3F);
+ }
+
+ /*
+ * The preceding range check isn't sufficient, because UTF8 excludes
+ * Unicode "surrogate pair" codes. Make sure what we created is valid
+ * UTF8.
+ */
+ if (!pg_utf8_islegal(wch, bytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character not valid for encoding: %u",
+ cvalue)));
+ }
+ else
+ {
+ bool is_mb;
+
+ is_mb = pg_encoding_max_length(encoding) > 1;
+
+ if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested character too large for encoding: %u",
+ cvalue)));
+
+ result = (text *) palloc(VARHDRSZ + 1);
+ SET_VARSIZE(result, VARHDRSZ + 1);
+ *VARDATA(result) = (char) cvalue;
+ }
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/********************************************************************
+ *
+ * repeat
+ *
+ * Syntax:
+ *
+ * text repeat(text string, int val)
+ *
+ * Purpose:
+ *
+ * Repeat string by val.
+ *
+ ********************************************************************/
+
+Datum
+repeat(PG_FUNCTION_ARGS)
+{
+ text *string = PG_GETARG_TEXT_PP(0);
+ int32 count = PG_GETARG_INT32(1);
+ text *result;
+ int slen,
+ tlen;
+ int i;
+ char *cp,
+ *sp;
+
+ if (count < 0)
+ count = 0;
+
+ slen = VARSIZE_ANY_EXHDR(string);
+
+ if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) ||
+ unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) ||
+ unlikely(!AllocSizeIsValid(tlen)))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("requested length too large")));
+
+ result = (text *) palloc(tlen);
+
+ SET_VARSIZE(result, tlen);
+ cp = VARDATA(result);
+ sp = VARDATA_ANY(string);
+ for (i = 0; i < count; i++)
+ {
+ memcpy(cp, sp, slen);
+ cp += slen;
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ PG_RETURN_TEXT_P(result);
+}
diff --git a/src/backend/utils/adt/orderedsetaggs.c b/src/backend/utils/adt/orderedsetaggs.c
new file mode 100644
index 0000000..6d4f6b7
--- /dev/null
+++ b/src/backend/utils/adt/orderedsetaggs.c
@@ -0,0 +1,1431 @@
+/*-------------------------------------------------------------------------
+ *
+ * orderedsetaggs.c
+ * Ordered-set aggregate functions.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/orderedsetaggs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "executor/executor.h"
+#include "miscadmin.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/optimizer.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/timestamp.h"
+#include "utils/tuplesort.h"
+
+
+/*
+ * Generic support for ordered-set aggregates
+ *
+ * The state for an ordered-set aggregate is divided into a per-group struct
+ * (which is the internal-type transition state datum returned to nodeAgg.c)
+ * and a per-query struct, which contains data and sub-objects that we can
+ * create just once per query because they will not change across groups.
+ * The per-query struct and subsidiary data live in the executor's per-query
+ * memory context, and go away implicitly at ExecutorEnd().
+ *
+ * These structs are set up during the first call of the transition function.
+ * Because we allow nodeAgg.c to merge ordered-set aggregates (but not
+ * hypothetical aggregates) with identical inputs and transition functions,
+ * this info must not depend on the particular aggregate (ie, particular
+ * final-function), nor on the direct argument(s) of the aggregate.
+ */
+
+typedef struct OSAPerQueryState
+{
+ /* Representative Aggref for this aggregate: */
+ Aggref *aggref;
+ /* Memory context containing this struct and other per-query data: */
+ MemoryContext qcontext;
+ /* Context for expression evaluation */
+ ExprContext *econtext;
+ /* Do we expect multiple final-function calls within one group? */
+ bool rescan_needed;
+
+ /* These fields are used only when accumulating tuples: */
+
+ /* Tuple descriptor for tuples inserted into sortstate: */
+ TupleDesc tupdesc;
+ /* Tuple slot we can use for inserting/extracting tuples: */
+ TupleTableSlot *tupslot;
+ /* Per-sort-column sorting information */
+ int numSortCols;
+ AttrNumber *sortColIdx;
+ Oid *sortOperators;
+ Oid *eqOperators;
+ Oid *sortCollations;
+ bool *sortNullsFirsts;
+ /* Equality operator call info, created only if needed: */
+ ExprState *compareTuple;
+
+ /* These fields are used only when accumulating datums: */
+
+ /* Info about datatype of datums being sorted: */
+ Oid sortColType;
+ int16 typLen;
+ bool typByVal;
+ char typAlign;
+ /* Info about sort ordering: */
+ Oid sortOperator;
+ Oid eqOperator;
+ Oid sortCollation;
+ bool sortNullsFirst;
+ /* Equality operator call info, created only if needed: */
+ FmgrInfo equalfn;
+} OSAPerQueryState;
+
+typedef struct OSAPerGroupState
+{
+ /* Link to the per-query state for this aggregate: */
+ OSAPerQueryState *qstate;
+ /* Memory context containing per-group data: */
+ MemoryContext gcontext;
+ /* Sort object we're accumulating data in: */
+ Tuplesortstate *sortstate;
+ /* Number of normal rows inserted into sortstate: */
+ int64 number_of_rows;
+ /* Have we already done tuplesort_performsort? */
+ bool sort_done;
+} OSAPerGroupState;
+
+static void ordered_set_shutdown(Datum arg);
+
+
+/*
+ * Set up working state for an ordered-set aggregate
+ */
+static OSAPerGroupState *
+ordered_set_startup(FunctionCallInfo fcinfo, bool use_tuples)
+{
+ OSAPerGroupState *osastate;
+ OSAPerQueryState *qstate;
+ MemoryContext gcontext;
+ MemoryContext oldcontext;
+ int tuplesortopt;
+
+ /*
+ * Check we're called as aggregate (and not a window function), and get
+ * the Agg node's group-lifespan context (which might change from group to
+ * group, so we shouldn't cache it in the per-query state).
+ */
+ if (AggCheckCallContext(fcinfo, &gcontext) != AGG_CONTEXT_AGGREGATE)
+ elog(ERROR, "ordered-set aggregate called in non-aggregate context");
+
+ /*
+ * We keep a link to the per-query state in fn_extra; if it's not there,
+ * create it, and do the per-query setup we need.
+ */
+ qstate = (OSAPerQueryState *) fcinfo->flinfo->fn_extra;
+ if (qstate == NULL)
+ {
+ Aggref *aggref;
+ MemoryContext qcontext;
+ List *sortlist;
+ int numSortCols;
+
+ /* Get the Aggref so we can examine aggregate's arguments */
+ aggref = AggGetAggref(fcinfo);
+ if (!aggref)
+ elog(ERROR, "ordered-set aggregate called in non-aggregate context");
+ if (!AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+ elog(ERROR, "ordered-set aggregate support function called for non-ordered-set aggregate");
+
+ /*
+ * Prepare per-query structures in the fn_mcxt, which we assume is the
+ * executor's per-query context; in any case it's the right place to
+ * keep anything found via fn_extra.
+ */
+ qcontext = fcinfo->flinfo->fn_mcxt;
+ oldcontext = MemoryContextSwitchTo(qcontext);
+
+ qstate = (OSAPerQueryState *) palloc0(sizeof(OSAPerQueryState));
+ qstate->aggref = aggref;
+ qstate->qcontext = qcontext;
+
+ /* We need to support rescans if the trans state is shared */
+ qstate->rescan_needed = AggStateIsShared(fcinfo);
+
+ /* Extract the sort information */
+ sortlist = aggref->aggorder;
+ numSortCols = list_length(sortlist);
+
+ if (use_tuples)
+ {
+ bool ishypothetical = (aggref->aggkind == AGGKIND_HYPOTHETICAL);
+ ListCell *lc;
+ int i;
+
+ if (ishypothetical)
+ numSortCols++; /* make space for flag column */
+ qstate->numSortCols = numSortCols;
+ qstate->sortColIdx = (AttrNumber *) palloc(numSortCols * sizeof(AttrNumber));
+ qstate->sortOperators = (Oid *) palloc(numSortCols * sizeof(Oid));
+ qstate->eqOperators = (Oid *) palloc(numSortCols * sizeof(Oid));
+ qstate->sortCollations = (Oid *) palloc(numSortCols * sizeof(Oid));
+ qstate->sortNullsFirsts = (bool *) palloc(numSortCols * sizeof(bool));
+
+ i = 0;
+ foreach(lc, sortlist)
+ {
+ SortGroupClause *sortcl = (SortGroupClause *) lfirst(lc);
+ TargetEntry *tle = get_sortgroupclause_tle(sortcl,
+ aggref->args);
+
+ /* the parser should have made sure of this */
+ Assert(OidIsValid(sortcl->sortop));
+
+ qstate->sortColIdx[i] = tle->resno;
+ qstate->sortOperators[i] = sortcl->sortop;
+ qstate->eqOperators[i] = sortcl->eqop;
+ qstate->sortCollations[i] = exprCollation((Node *) tle->expr);
+ qstate->sortNullsFirsts[i] = sortcl->nulls_first;
+ i++;
+ }
+
+ if (ishypothetical)
+ {
+ /* Add an integer flag column as the last sort column */
+ qstate->sortColIdx[i] = list_length(aggref->args) + 1;
+ qstate->sortOperators[i] = Int4LessOperator;
+ qstate->eqOperators[i] = Int4EqualOperator;
+ qstate->sortCollations[i] = InvalidOid;
+ qstate->sortNullsFirsts[i] = false;
+ i++;
+ }
+
+ Assert(i == numSortCols);
+
+ /*
+ * Get a tupledesc corresponding to the aggregated inputs
+ * (including sort expressions) of the agg.
+ */
+ qstate->tupdesc = ExecTypeFromTL(aggref->args);
+
+ /* If we need a flag column, hack the tupledesc to include that */
+ if (ishypothetical)
+ {
+ TupleDesc newdesc;
+ int natts = qstate->tupdesc->natts;
+
+ newdesc = CreateTemplateTupleDesc(natts + 1);
+ for (i = 1; i <= natts; i++)
+ TupleDescCopyEntry(newdesc, i, qstate->tupdesc, i);
+
+ TupleDescInitEntry(newdesc,
+ (AttrNumber) ++natts,
+ "flag",
+ INT4OID,
+ -1,
+ 0);
+
+ FreeTupleDesc(qstate->tupdesc);
+ qstate->tupdesc = newdesc;
+ }
+
+ /* Create slot we'll use to store/retrieve rows */
+ qstate->tupslot = MakeSingleTupleTableSlot(qstate->tupdesc,
+ &TTSOpsMinimalTuple);
+ }
+ else
+ {
+ /* Sort single datums */
+ SortGroupClause *sortcl;
+ TargetEntry *tle;
+
+ if (numSortCols != 1 || aggref->aggkind == AGGKIND_HYPOTHETICAL)
+ elog(ERROR, "ordered-set aggregate support function does not support multiple aggregated columns");
+
+ sortcl = (SortGroupClause *) linitial(sortlist);
+ tle = get_sortgroupclause_tle(sortcl, aggref->args);
+
+ /* the parser should have made sure of this */
+ Assert(OidIsValid(sortcl->sortop));
+
+ /* Save sort ordering info */
+ qstate->sortColType = exprType((Node *) tle->expr);
+ qstate->sortOperator = sortcl->sortop;
+ qstate->eqOperator = sortcl->eqop;
+ qstate->sortCollation = exprCollation((Node *) tle->expr);
+ qstate->sortNullsFirst = sortcl->nulls_first;
+
+ /* Save datatype info */
+ get_typlenbyvalalign(qstate->sortColType,
+ &qstate->typLen,
+ &qstate->typByVal,
+ &qstate->typAlign);
+ }
+
+ fcinfo->flinfo->fn_extra = (void *) qstate;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* Now build the stuff we need in group-lifespan context */
+ oldcontext = MemoryContextSwitchTo(gcontext);
+
+ osastate = (OSAPerGroupState *) palloc(sizeof(OSAPerGroupState));
+ osastate->qstate = qstate;
+ osastate->gcontext = gcontext;
+
+ tuplesortopt = TUPLESORT_NONE;
+
+ if (qstate->rescan_needed)
+ tuplesortopt |= TUPLESORT_RANDOMACCESS;
+
+ /*
+ * Initialize tuplesort object.
+ */
+ if (use_tuples)
+ osastate->sortstate = tuplesort_begin_heap(qstate->tupdesc,
+ qstate->numSortCols,
+ qstate->sortColIdx,
+ qstate->sortOperators,
+ qstate->sortCollations,
+ qstate->sortNullsFirsts,
+ work_mem,
+ NULL,
+ tuplesortopt);
+ else
+ osastate->sortstate = tuplesort_begin_datum(qstate->sortColType,
+ qstate->sortOperator,
+ qstate->sortCollation,
+ qstate->sortNullsFirst,
+ work_mem,
+ NULL,
+ tuplesortopt);
+
+ osastate->number_of_rows = 0;
+ osastate->sort_done = false;
+
+ /* Now register a shutdown callback to clean things up at end of group */
+ AggRegisterCallback(fcinfo,
+ ordered_set_shutdown,
+ PointerGetDatum(osastate));
+
+ MemoryContextSwitchTo(oldcontext);
+
+ return osastate;
+}
+
+/*
+ * Clean up when evaluation of an ordered-set aggregate is complete.
+ *
+ * We don't need to bother freeing objects in the per-group memory context,
+ * since that will get reset anyway by nodeAgg.c; nor should we free anything
+ * in the per-query context, which will get cleared (if this was the last
+ * group) by ExecutorEnd. But we must take care to release any potential
+ * non-memory resources.
+ *
+ * In the case where we're not expecting multiple finalfn calls, we could
+ * arguably rely on the finalfn to clean up; but it's easier and more testable
+ * if we just do it the same way in either case.
+ */
+static void
+ordered_set_shutdown(Datum arg)
+{
+ OSAPerGroupState *osastate = (OSAPerGroupState *) DatumGetPointer(arg);
+
+ /* Tuplesort object might have temp files. */
+ if (osastate->sortstate)
+ tuplesort_end(osastate->sortstate);
+ osastate->sortstate = NULL;
+ /* The tupleslot probably can't be holding a pin, but let's be safe. */
+ if (osastate->qstate->tupslot)
+ ExecClearTuple(osastate->qstate->tupslot);
+}
+
+
+/*
+ * Generic transition function for ordered-set aggregates
+ * with a single input column in which we want to suppress nulls
+ */
+Datum
+ordered_set_transition(PG_FUNCTION_ARGS)
+{
+ OSAPerGroupState *osastate;
+
+ /* If first call, create the transition state workspace */
+ if (PG_ARGISNULL(0))
+ osastate = ordered_set_startup(fcinfo, false);
+ else
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* Load the datum into the tuplesort object, but only if it's not null */
+ if (!PG_ARGISNULL(1))
+ {
+ tuplesort_putdatum(osastate->sortstate, PG_GETARG_DATUM(1), false);
+ osastate->number_of_rows++;
+ }
+
+ PG_RETURN_POINTER(osastate);
+}
+
+/*
+ * Generic transition function for ordered-set aggregates
+ * with (potentially) multiple aggregated input columns
+ */
+Datum
+ordered_set_transition_multi(PG_FUNCTION_ARGS)
+{
+ OSAPerGroupState *osastate;
+ TupleTableSlot *slot;
+ int nargs;
+ int i;
+
+ /* If first call, create the transition state workspace */
+ if (PG_ARGISNULL(0))
+ osastate = ordered_set_startup(fcinfo, true);
+ else
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* Form a tuple from all the other inputs besides the transition value */
+ slot = osastate->qstate->tupslot;
+ ExecClearTuple(slot);
+ nargs = PG_NARGS() - 1;
+ for (i = 0; i < nargs; i++)
+ {
+ slot->tts_values[i] = PG_GETARG_DATUM(i + 1);
+ slot->tts_isnull[i] = PG_ARGISNULL(i + 1);
+ }
+ if (osastate->qstate->aggref->aggkind == AGGKIND_HYPOTHETICAL)
+ {
+ /* Add a zero flag value to mark this row as a normal input row */
+ slot->tts_values[i] = Int32GetDatum(0);
+ slot->tts_isnull[i] = false;
+ i++;
+ }
+ Assert(i == slot->tts_tupleDescriptor->natts);
+ ExecStoreVirtualTuple(slot);
+
+ /* Load the row into the tuplesort object */
+ tuplesort_puttupleslot(osastate->sortstate, slot);
+ osastate->number_of_rows++;
+
+ PG_RETURN_POINTER(osastate);
+}
+
+
+/*
+ * percentile_disc(float8) within group(anyelement) - discrete percentile
+ */
+Datum
+percentile_disc_final(PG_FUNCTION_ARGS)
+{
+ OSAPerGroupState *osastate;
+ double percentile;
+ Datum val;
+ bool isnull;
+ int64 rownum;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* Get and check the percentile argument */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+
+ percentile = PG_GETARG_FLOAT8(1);
+
+ if (percentile < 0 || percentile > 1 || isnan(percentile))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("percentile value %g is not between 0 and 1",
+ percentile)));
+
+ /* If there were no regular rows, the result is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* number_of_rows could be zero if we only saw NULL input values */
+ if (osastate->number_of_rows == 0)
+ PG_RETURN_NULL();
+
+ /* Finish the sort, or rescan if we already did */
+ if (!osastate->sort_done)
+ {
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+ }
+ else
+ tuplesort_rescan(osastate->sortstate);
+
+ /*----------
+ * We need the smallest K such that (K/N) >= percentile.
+ * N>0, therefore K >= N*percentile, therefore K = ceil(N*percentile).
+ * So we skip K-1 rows (if K>0) and return the next row fetched.
+ *----------
+ */
+ rownum = (int64) ceil(percentile * osastate->number_of_rows);
+ Assert(rownum <= osastate->number_of_rows);
+
+ if (rownum > 1)
+ {
+ if (!tuplesort_skiptuples(osastate->sortstate, rownum - 1, true))
+ elog(ERROR, "missing row in percentile_disc");
+ }
+
+ if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
+ elog(ERROR, "missing row in percentile_disc");
+
+ /* We shouldn't have stored any nulls, but do the right thing anyway */
+ if (isnull)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_DATUM(val);
+}
+
+
+/*
+ * For percentile_cont, we need a way to interpolate between consecutive
+ * values. Use a helper function for that, so that we can share the rest
+ * of the code between types.
+ */
+typedef Datum (*LerpFunc) (Datum lo, Datum hi, double pct);
+
+static Datum
+float8_lerp(Datum lo, Datum hi, double pct)
+{
+ double loval = DatumGetFloat8(lo);
+ double hival = DatumGetFloat8(hi);
+
+ return Float8GetDatum(loval + (pct * (hival - loval)));
+}
+
+static Datum
+interval_lerp(Datum lo, Datum hi, double pct)
+{
+ Datum diff_result = DirectFunctionCall2(interval_mi, hi, lo);
+ Datum mul_result = DirectFunctionCall2(interval_mul,
+ diff_result,
+ Float8GetDatumFast(pct));
+
+ return DirectFunctionCall2(interval_pl, mul_result, lo);
+}
+
+/*
+ * Continuous percentile
+ */
+static Datum
+percentile_cont_final_common(FunctionCallInfo fcinfo,
+ Oid expect_type,
+ LerpFunc lerpfunc)
+{
+ OSAPerGroupState *osastate;
+ double percentile;
+ int64 first_row = 0;
+ int64 second_row = 0;
+ Datum val;
+ Datum first_val;
+ Datum second_val;
+ double proportion;
+ bool isnull;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* Get and check the percentile argument */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+
+ percentile = PG_GETARG_FLOAT8(1);
+
+ if (percentile < 0 || percentile > 1 || isnan(percentile))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("percentile value %g is not between 0 and 1",
+ percentile)));
+
+ /* If there were no regular rows, the result is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* number_of_rows could be zero if we only saw NULL input values */
+ if (osastate->number_of_rows == 0)
+ PG_RETURN_NULL();
+
+ Assert(expect_type == osastate->qstate->sortColType);
+
+ /* Finish the sort, or rescan if we already did */
+ if (!osastate->sort_done)
+ {
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+ }
+ else
+ tuplesort_rescan(osastate->sortstate);
+
+ first_row = floor(percentile * (osastate->number_of_rows - 1));
+ second_row = ceil(percentile * (osastate->number_of_rows - 1));
+
+ Assert(first_row < osastate->number_of_rows);
+
+ if (!tuplesort_skiptuples(osastate->sortstate, first_row, true))
+ elog(ERROR, "missing row in percentile_cont");
+
+ if (!tuplesort_getdatum(osastate->sortstate, true, &first_val, &isnull, NULL))
+ elog(ERROR, "missing row in percentile_cont");
+ if (isnull)
+ PG_RETURN_NULL();
+
+ if (first_row == second_row)
+ {
+ val = first_val;
+ }
+ else
+ {
+ if (!tuplesort_getdatum(osastate->sortstate, true, &second_val, &isnull, NULL))
+ elog(ERROR, "missing row in percentile_cont");
+
+ if (isnull)
+ PG_RETURN_NULL();
+
+ proportion = (percentile * (osastate->number_of_rows - 1)) - first_row;
+ val = lerpfunc(first_val, second_val, proportion);
+ }
+
+ PG_RETURN_DATUM(val);
+}
+
+/*
+ * percentile_cont(float8) within group (float8) - continuous percentile
+ */
+Datum
+percentile_cont_float8_final(PG_FUNCTION_ARGS)
+{
+ return percentile_cont_final_common(fcinfo, FLOAT8OID, float8_lerp);
+}
+
+/*
+ * percentile_cont(float8) within group (interval) - continuous percentile
+ */
+Datum
+percentile_cont_interval_final(PG_FUNCTION_ARGS)
+{
+ return percentile_cont_final_common(fcinfo, INTERVALOID, interval_lerp);
+}
+
+
+/*
+ * Support code for handling arrays of percentiles
+ *
+ * Note: in each pct_info entry, second_row should be equal to or
+ * exactly one more than first_row.
+ */
+struct pct_info
+{
+ int64 first_row; /* first row to sample */
+ int64 second_row; /* possible second row to sample */
+ double proportion; /* interpolation fraction */
+ int idx; /* index of this item in original array */
+};
+
+/*
+ * Sort comparator to sort pct_infos by first_row then second_row
+ */
+static int
+pct_info_cmp(const void *pa, const void *pb)
+{
+ const struct pct_info *a = (const struct pct_info *) pa;
+ const struct pct_info *b = (const struct pct_info *) pb;
+
+ if (a->first_row != b->first_row)
+ return (a->first_row < b->first_row) ? -1 : 1;
+ if (a->second_row != b->second_row)
+ return (a->second_row < b->second_row) ? -1 : 1;
+ return 0;
+}
+
+/*
+ * Construct array showing which rows to sample for percentiles.
+ */
+static struct pct_info *
+setup_pct_info(int num_percentiles,
+ Datum *percentiles_datum,
+ bool *percentiles_null,
+ int64 rowcount,
+ bool continuous)
+{
+ struct pct_info *pct_info;
+ int i;
+
+ pct_info = (struct pct_info *) palloc(num_percentiles * sizeof(struct pct_info));
+
+ for (i = 0; i < num_percentiles; i++)
+ {
+ pct_info[i].idx = i;
+
+ if (percentiles_null[i])
+ {
+ /* dummy entry for any NULL in array */
+ pct_info[i].first_row = 0;
+ pct_info[i].second_row = 0;
+ pct_info[i].proportion = 0;
+ }
+ else
+ {
+ double p = DatumGetFloat8(percentiles_datum[i]);
+
+ if (p < 0 || p > 1 || isnan(p))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("percentile value %g is not between 0 and 1",
+ p)));
+
+ if (continuous)
+ {
+ pct_info[i].first_row = 1 + floor(p * (rowcount - 1));
+ pct_info[i].second_row = 1 + ceil(p * (rowcount - 1));
+ pct_info[i].proportion = (p * (rowcount - 1)) - floor(p * (rowcount - 1));
+ }
+ else
+ {
+ /*----------
+ * We need the smallest K such that (K/N) >= percentile.
+ * N>0, therefore K >= N*percentile, therefore
+ * K = ceil(N*percentile); but not less than 1.
+ *----------
+ */
+ int64 row = (int64) ceil(p * rowcount);
+
+ row = Max(1, row);
+ pct_info[i].first_row = row;
+ pct_info[i].second_row = row;
+ pct_info[i].proportion = 0;
+ }
+ }
+ }
+
+ /*
+ * The parameter array wasn't necessarily in sorted order, but we need to
+ * visit the rows in order, so sort by first_row/second_row.
+ */
+ qsort(pct_info, num_percentiles, sizeof(struct pct_info), pct_info_cmp);
+
+ return pct_info;
+}
+
+/*
+ * percentile_disc(float8[]) within group (anyelement) - discrete percentiles
+ */
+Datum
+percentile_disc_multi_final(PG_FUNCTION_ARGS)
+{
+ OSAPerGroupState *osastate;
+ ArrayType *param;
+ Datum *percentiles_datum;
+ bool *percentiles_null;
+ int num_percentiles;
+ struct pct_info *pct_info;
+ Datum *result_datum;
+ bool *result_isnull;
+ int64 rownum = 0;
+ Datum val = (Datum) 0;
+ bool isnull = true;
+ int i;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* If there were no regular rows, the result is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* number_of_rows could be zero if we only saw NULL input values */
+ if (osastate->number_of_rows == 0)
+ PG_RETURN_NULL();
+
+ /* Deconstruct the percentile-array input */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ param = PG_GETARG_ARRAYTYPE_P(1);
+
+ deconstruct_array(param, FLOAT8OID,
+ /* hard-wired info on type float8 */
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE,
+ &percentiles_datum,
+ &percentiles_null,
+ &num_percentiles);
+
+ if (num_percentiles == 0)
+ PG_RETURN_POINTER(construct_empty_array(osastate->qstate->sortColType));
+
+ pct_info = setup_pct_info(num_percentiles,
+ percentiles_datum,
+ percentiles_null,
+ osastate->number_of_rows,
+ false);
+
+ result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum));
+ result_isnull = (bool *) palloc(num_percentiles * sizeof(bool));
+
+ /*
+ * Start by dealing with any nulls in the param array - those are sorted
+ * to the front on row=0, so set the corresponding result indexes to null
+ */
+ for (i = 0; i < num_percentiles; i++)
+ {
+ int idx = pct_info[i].idx;
+
+ if (pct_info[i].first_row > 0)
+ break;
+
+ result_datum[idx] = (Datum) 0;
+ result_isnull[idx] = true;
+ }
+
+ /*
+ * If there's anything left after doing the nulls, then grind the input
+ * and extract the needed values
+ */
+ if (i < num_percentiles)
+ {
+ /* Finish the sort, or rescan if we already did */
+ if (!osastate->sort_done)
+ {
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+ }
+ else
+ tuplesort_rescan(osastate->sortstate);
+
+ for (; i < num_percentiles; i++)
+ {
+ int64 target_row = pct_info[i].first_row;
+ int idx = pct_info[i].idx;
+
+ /* Advance to target row, if not already there */
+ if (target_row > rownum)
+ {
+ if (!tuplesort_skiptuples(osastate->sortstate, target_row - rownum - 1, true))
+ elog(ERROR, "missing row in percentile_disc");
+
+ if (!tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, NULL))
+ elog(ERROR, "missing row in percentile_disc");
+
+ rownum = target_row;
+ }
+
+ result_datum[idx] = val;
+ result_isnull[idx] = isnull;
+ }
+ }
+
+ /* We make the output array the same shape as the input */
+ PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull,
+ ARR_NDIM(param),
+ ARR_DIMS(param),
+ ARR_LBOUND(param),
+ osastate->qstate->sortColType,
+ osastate->qstate->typLen,
+ osastate->qstate->typByVal,
+ osastate->qstate->typAlign));
+}
+
+/*
+ * percentile_cont(float8[]) within group () - continuous percentiles
+ */
+static Datum
+percentile_cont_multi_final_common(FunctionCallInfo fcinfo,
+ Oid expect_type,
+ int16 typLen, bool typByVal, char typAlign,
+ LerpFunc lerpfunc)
+{
+ OSAPerGroupState *osastate;
+ ArrayType *param;
+ Datum *percentiles_datum;
+ bool *percentiles_null;
+ int num_percentiles;
+ struct pct_info *pct_info;
+ Datum *result_datum;
+ bool *result_isnull;
+ int64 rownum = 0;
+ Datum first_val = (Datum) 0;
+ Datum second_val = (Datum) 0;
+ bool isnull;
+ int i;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* If there were no regular rows, the result is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* number_of_rows could be zero if we only saw NULL input values */
+ if (osastate->number_of_rows == 0)
+ PG_RETURN_NULL();
+
+ Assert(expect_type == osastate->qstate->sortColType);
+
+ /* Deconstruct the percentile-array input */
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ param = PG_GETARG_ARRAYTYPE_P(1);
+
+ deconstruct_array(param, FLOAT8OID,
+ /* hard-wired info on type float8 */
+ sizeof(float8), FLOAT8PASSBYVAL, TYPALIGN_DOUBLE,
+ &percentiles_datum,
+ &percentiles_null,
+ &num_percentiles);
+
+ if (num_percentiles == 0)
+ PG_RETURN_POINTER(construct_empty_array(osastate->qstate->sortColType));
+
+ pct_info = setup_pct_info(num_percentiles,
+ percentiles_datum,
+ percentiles_null,
+ osastate->number_of_rows,
+ true);
+
+ result_datum = (Datum *) palloc(num_percentiles * sizeof(Datum));
+ result_isnull = (bool *) palloc(num_percentiles * sizeof(bool));
+
+ /*
+ * Start by dealing with any nulls in the param array - those are sorted
+ * to the front on row=0, so set the corresponding result indexes to null
+ */
+ for (i = 0; i < num_percentiles; i++)
+ {
+ int idx = pct_info[i].idx;
+
+ if (pct_info[i].first_row > 0)
+ break;
+
+ result_datum[idx] = (Datum) 0;
+ result_isnull[idx] = true;
+ }
+
+ /*
+ * If there's anything left after doing the nulls, then grind the input
+ * and extract the needed values
+ */
+ if (i < num_percentiles)
+ {
+ /* Finish the sort, or rescan if we already did */
+ if (!osastate->sort_done)
+ {
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+ }
+ else
+ tuplesort_rescan(osastate->sortstate);
+
+ for (; i < num_percentiles; i++)
+ {
+ int64 first_row = pct_info[i].first_row;
+ int64 second_row = pct_info[i].second_row;
+ int idx = pct_info[i].idx;
+
+ /*
+ * Advance to first_row, if not already there. Note that we might
+ * already have rownum beyond first_row, in which case first_val
+ * is already correct. (This occurs when interpolating between
+ * the same two input rows as for the previous percentile.)
+ */
+ if (first_row > rownum)
+ {
+ if (!tuplesort_skiptuples(osastate->sortstate, first_row - rownum - 1, true))
+ elog(ERROR, "missing row in percentile_cont");
+
+ if (!tuplesort_getdatum(osastate->sortstate, true, &first_val,
+ &isnull, NULL) || isnull)
+ elog(ERROR, "missing row in percentile_cont");
+
+ rownum = first_row;
+ /* Always advance second_val to be latest input value */
+ second_val = first_val;
+ }
+ else if (first_row == rownum)
+ {
+ /*
+ * We are already at the desired row, so we must previously
+ * have read its value into second_val (and perhaps first_val
+ * as well, but this assignment is harmless in that case).
+ */
+ first_val = second_val;
+ }
+
+ /* Fetch second_row if needed */
+ if (second_row > rownum)
+ {
+ if (!tuplesort_getdatum(osastate->sortstate, true, &second_val,
+ &isnull, NULL) || isnull)
+ elog(ERROR, "missing row in percentile_cont");
+ rownum++;
+ }
+ /* We should now certainly be on second_row exactly */
+ Assert(second_row == rownum);
+
+ /* Compute appropriate result */
+ if (second_row > first_row)
+ result_datum[idx] = lerpfunc(first_val, second_val,
+ pct_info[i].proportion);
+ else
+ result_datum[idx] = first_val;
+
+ result_isnull[idx] = false;
+ }
+ }
+
+ /* We make the output array the same shape as the input */
+ PG_RETURN_POINTER(construct_md_array(result_datum, result_isnull,
+ ARR_NDIM(param),
+ ARR_DIMS(param), ARR_LBOUND(param),
+ expect_type,
+ typLen,
+ typByVal,
+ typAlign));
+}
+
+/*
+ * percentile_cont(float8[]) within group (float8) - continuous percentiles
+ */
+Datum
+percentile_cont_float8_multi_final(PG_FUNCTION_ARGS)
+{
+ return percentile_cont_multi_final_common(fcinfo,
+ FLOAT8OID,
+ /* hard-wired info on type float8 */
+ sizeof(float8),
+ FLOAT8PASSBYVAL,
+ TYPALIGN_DOUBLE,
+ float8_lerp);
+}
+
+/*
+ * percentile_cont(float8[]) within group (interval) - continuous percentiles
+ */
+Datum
+percentile_cont_interval_multi_final(PG_FUNCTION_ARGS)
+{
+ return percentile_cont_multi_final_common(fcinfo,
+ INTERVALOID,
+ /* hard-wired info on type interval */
+ 16, false, TYPALIGN_DOUBLE,
+ interval_lerp);
+}
+
+
+/*
+ * mode() within group (anyelement) - most common value
+ */
+Datum
+mode_final(PG_FUNCTION_ARGS)
+{
+ OSAPerGroupState *osastate;
+ Datum val;
+ bool isnull;
+ Datum mode_val = (Datum) 0;
+ int64 mode_freq = 0;
+ Datum last_val = (Datum) 0;
+ int64 last_val_freq = 0;
+ bool last_val_is_mode = false;
+ FmgrInfo *equalfn;
+ Datum abbrev_val = (Datum) 0;
+ Datum last_abbrev_val = (Datum) 0;
+ bool shouldfree;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* If there were no regular rows, the result is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+
+ /* number_of_rows could be zero if we only saw NULL input values */
+ if (osastate->number_of_rows == 0)
+ PG_RETURN_NULL();
+
+ /* Look up the equality function for the datatype, if we didn't already */
+ equalfn = &(osastate->qstate->equalfn);
+ if (!OidIsValid(equalfn->fn_oid))
+ fmgr_info_cxt(get_opcode(osastate->qstate->eqOperator), equalfn,
+ osastate->qstate->qcontext);
+
+ shouldfree = !(osastate->qstate->typByVal);
+
+ /* Finish the sort, or rescan if we already did */
+ if (!osastate->sort_done)
+ {
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+ }
+ else
+ tuplesort_rescan(osastate->sortstate);
+
+ /* Scan tuples and count frequencies */
+ while (tuplesort_getdatum(osastate->sortstate, true, &val, &isnull, &abbrev_val))
+ {
+ /* we don't expect any nulls, but ignore them if found */
+ if (isnull)
+ continue;
+
+ if (last_val_freq == 0)
+ {
+ /* first nonnull value - it's the mode for now */
+ mode_val = last_val = val;
+ mode_freq = last_val_freq = 1;
+ last_val_is_mode = true;
+ last_abbrev_val = abbrev_val;
+ }
+ else if (abbrev_val == last_abbrev_val &&
+ DatumGetBool(FunctionCall2Coll(equalfn, PG_GET_COLLATION(), val, last_val)))
+ {
+ /* value equal to previous value, count it */
+ if (last_val_is_mode)
+ mode_freq++; /* needn't maintain last_val_freq */
+ else if (++last_val_freq > mode_freq)
+ {
+ /* last_val becomes new mode */
+ if (shouldfree)
+ pfree(DatumGetPointer(mode_val));
+ mode_val = last_val;
+ mode_freq = last_val_freq;
+ last_val_is_mode = true;
+ }
+ if (shouldfree)
+ pfree(DatumGetPointer(val));
+ }
+ else
+ {
+ /* val should replace last_val */
+ if (shouldfree && !last_val_is_mode)
+ pfree(DatumGetPointer(last_val));
+ last_val = val;
+ /* avoid equality function calls by reusing abbreviated keys */
+ last_abbrev_val = abbrev_val;
+ last_val_freq = 1;
+ last_val_is_mode = false;
+ }
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ if (shouldfree && !last_val_is_mode)
+ pfree(DatumGetPointer(last_val));
+
+ if (mode_freq)
+ PG_RETURN_DATUM(mode_val);
+ else
+ PG_RETURN_NULL();
+}
+
+
+/*
+ * Common code to sanity-check args for hypothetical-set functions. No need
+ * for friendly errors, these can only happen if someone's messing up the
+ * aggregate definitions. The checks are needed for security, however.
+ */
+static void
+hypothetical_check_argtypes(FunctionCallInfo fcinfo, int nargs,
+ TupleDesc tupdesc)
+{
+ int i;
+
+ /* check that we have an int4 flag column */
+ if (!tupdesc ||
+ (nargs + 1) != tupdesc->natts ||
+ TupleDescAttr(tupdesc, nargs)->atttypid != INT4OID)
+ elog(ERROR, "type mismatch in hypothetical-set function");
+
+ /* check that direct args match in type with aggregated args */
+ for (i = 0; i < nargs; i++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
+
+ if (get_fn_expr_argtype(fcinfo->flinfo, i + 1) != attr->atttypid)
+ elog(ERROR, "type mismatch in hypothetical-set function");
+ }
+}
+
+/*
+ * compute rank of hypothetical row
+ *
+ * flag should be -1 to sort hypothetical row ahead of its peers, or +1
+ * to sort behind.
+ * total number of regular rows is returned into *number_of_rows.
+ */
+static int64
+hypothetical_rank_common(FunctionCallInfo fcinfo, int flag,
+ int64 *number_of_rows)
+{
+ int nargs = PG_NARGS() - 1;
+ int64 rank = 1;
+ OSAPerGroupState *osastate;
+ TupleTableSlot *slot;
+ int i;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* If there were no regular rows, the rank is always 1 */
+ if (PG_ARGISNULL(0))
+ {
+ *number_of_rows = 0;
+ return 1;
+ }
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+ *number_of_rows = osastate->number_of_rows;
+
+ /* Adjust nargs to be the number of direct (or aggregated) args */
+ if (nargs % 2 != 0)
+ elog(ERROR, "wrong number of arguments in hypothetical-set function");
+ nargs /= 2;
+
+ hypothetical_check_argtypes(fcinfo, nargs, osastate->qstate->tupdesc);
+
+ /* because we need a hypothetical row, we can't share transition state */
+ Assert(!osastate->sort_done);
+
+ /* insert the hypothetical row into the sort */
+ slot = osastate->qstate->tupslot;
+ ExecClearTuple(slot);
+ for (i = 0; i < nargs; i++)
+ {
+ slot->tts_values[i] = PG_GETARG_DATUM(i + 1);
+ slot->tts_isnull[i] = PG_ARGISNULL(i + 1);
+ }
+ slot->tts_values[i] = Int32GetDatum(flag);
+ slot->tts_isnull[i] = false;
+ ExecStoreVirtualTuple(slot);
+
+ tuplesort_puttupleslot(osastate->sortstate, slot);
+
+ /* finish the sort */
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+
+ /* iterate till we find the hypothetical row */
+ while (tuplesort_gettupleslot(osastate->sortstate, true, true, slot, NULL))
+ {
+ bool isnull;
+ Datum d = slot_getattr(slot, nargs + 1, &isnull);
+
+ if (!isnull && DatumGetInt32(d) != 0)
+ break;
+
+ rank++;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ ExecClearTuple(slot);
+
+ return rank;
+}
+
+
+/*
+ * rank() - rank of hypothetical row
+ */
+Datum
+hypothetical_rank_final(PG_FUNCTION_ARGS)
+{
+ int64 rank;
+ int64 rowcount;
+
+ rank = hypothetical_rank_common(fcinfo, -1, &rowcount);
+
+ PG_RETURN_INT64(rank);
+}
+
+/*
+ * percent_rank() - percentile rank of hypothetical row
+ */
+Datum
+hypothetical_percent_rank_final(PG_FUNCTION_ARGS)
+{
+ int64 rank;
+ int64 rowcount;
+ double result_val;
+
+ rank = hypothetical_rank_common(fcinfo, -1, &rowcount);
+
+ if (rowcount == 0)
+ PG_RETURN_FLOAT8(0);
+
+ result_val = (double) (rank - 1) / (double) (rowcount);
+
+ PG_RETURN_FLOAT8(result_val);
+}
+
+/*
+ * cume_dist() - cumulative distribution of hypothetical row
+ */
+Datum
+hypothetical_cume_dist_final(PG_FUNCTION_ARGS)
+{
+ int64 rank;
+ int64 rowcount;
+ double result_val;
+
+ rank = hypothetical_rank_common(fcinfo, 1, &rowcount);
+
+ result_val = (double) (rank) / (double) (rowcount + 1);
+
+ PG_RETURN_FLOAT8(result_val);
+}
+
+/*
+ * dense_rank() - rank of hypothetical row without gaps in ranking
+ */
+Datum
+hypothetical_dense_rank_final(PG_FUNCTION_ARGS)
+{
+ ExprContext *econtext;
+ ExprState *compareTuple;
+ int nargs = PG_NARGS() - 1;
+ int64 rank = 1;
+ int64 duplicate_count = 0;
+ OSAPerGroupState *osastate;
+ int numDistinctCols;
+ Datum abbrevVal = (Datum) 0;
+ Datum abbrevOld = (Datum) 0;
+ TupleTableSlot *slot;
+ TupleTableSlot *extraslot;
+ TupleTableSlot *slot2;
+ int i;
+
+ Assert(AggCheckCallContext(fcinfo, NULL) == AGG_CONTEXT_AGGREGATE);
+
+ /* If there were no regular rows, the rank is always 1 */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_INT64(rank);
+
+ osastate = (OSAPerGroupState *) PG_GETARG_POINTER(0);
+ econtext = osastate->qstate->econtext;
+ if (!econtext)
+ {
+ MemoryContext oldcontext;
+
+ /* Make sure to we create econtext under correct parent context. */
+ oldcontext = MemoryContextSwitchTo(osastate->qstate->qcontext);
+ osastate->qstate->econtext = CreateStandaloneExprContext();
+ econtext = osastate->qstate->econtext;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* Adjust nargs to be the number of direct (or aggregated) args */
+ if (nargs % 2 != 0)
+ elog(ERROR, "wrong number of arguments in hypothetical-set function");
+ nargs /= 2;
+
+ hypothetical_check_argtypes(fcinfo, nargs, osastate->qstate->tupdesc);
+
+ /*
+ * When comparing tuples, we can omit the flag column since we will only
+ * compare rows with flag == 0.
+ */
+ numDistinctCols = osastate->qstate->numSortCols - 1;
+
+ /* Build tuple comparator, if we didn't already */
+ compareTuple = osastate->qstate->compareTuple;
+ if (compareTuple == NULL)
+ {
+ AttrNumber *sortColIdx = osastate->qstate->sortColIdx;
+ MemoryContext oldContext;
+
+ oldContext = MemoryContextSwitchTo(osastate->qstate->qcontext);
+ compareTuple = execTuplesMatchPrepare(osastate->qstate->tupdesc,
+ numDistinctCols,
+ sortColIdx,
+ osastate->qstate->eqOperators,
+ osastate->qstate->sortCollations,
+ NULL);
+ MemoryContextSwitchTo(oldContext);
+ osastate->qstate->compareTuple = compareTuple;
+ }
+
+ /* because we need a hypothetical row, we can't share transition state */
+ Assert(!osastate->sort_done);
+
+ /* insert the hypothetical row into the sort */
+ slot = osastate->qstate->tupslot;
+ ExecClearTuple(slot);
+ for (i = 0; i < nargs; i++)
+ {
+ slot->tts_values[i] = PG_GETARG_DATUM(i + 1);
+ slot->tts_isnull[i] = PG_ARGISNULL(i + 1);
+ }
+ slot->tts_values[i] = Int32GetDatum(-1);
+ slot->tts_isnull[i] = false;
+ ExecStoreVirtualTuple(slot);
+
+ tuplesort_puttupleslot(osastate->sortstate, slot);
+
+ /* finish the sort */
+ tuplesort_performsort(osastate->sortstate);
+ osastate->sort_done = true;
+
+ /*
+ * We alternate fetching into tupslot and extraslot so that we have the
+ * previous row available for comparisons. This is accomplished by
+ * swapping the slot pointer variables after each row.
+ */
+ extraslot = MakeSingleTupleTableSlot(osastate->qstate->tupdesc,
+ &TTSOpsMinimalTuple);
+ slot2 = extraslot;
+
+ /* iterate till we find the hypothetical row */
+ while (tuplesort_gettupleslot(osastate->sortstate, true, true, slot,
+ &abbrevVal))
+ {
+ bool isnull;
+ Datum d = slot_getattr(slot, nargs + 1, &isnull);
+ TupleTableSlot *tmpslot;
+
+ if (!isnull && DatumGetInt32(d) != 0)
+ break;
+
+ /* count non-distinct tuples */
+ econtext->ecxt_outertuple = slot;
+ econtext->ecxt_innertuple = slot2;
+
+ if (!TupIsNull(slot2) &&
+ abbrevVal == abbrevOld &&
+ ExecQualAndReset(compareTuple, econtext))
+ duplicate_count++;
+
+ tmpslot = slot2;
+ slot2 = slot;
+ slot = tmpslot;
+ /* avoid ExecQual() calls by reusing abbreviated keys */
+ abbrevOld = abbrevVal;
+
+ rank++;
+
+ CHECK_FOR_INTERRUPTS();
+ }
+
+ ExecClearTuple(slot);
+ ExecClearTuple(slot2);
+
+ ExecDropSingleTupleTableSlot(extraslot);
+
+ rank = rank - duplicate_count;
+
+ PG_RETURN_INT64(rank);
+}
diff --git a/src/backend/utils/adt/partitionfuncs.c b/src/backend/utils/adt/partitionfuncs.c
new file mode 100644
index 0000000..0243bc0
--- /dev/null
+++ b/src/backend/utils/adt/partitionfuncs.c
@@ -0,0 +1,249 @@
+/*-------------------------------------------------------------------------
+ *
+ * partitionfuncs.c
+ * Functions for accessing partition-related metadata
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/partitionfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "catalog/partition.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_inherits.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "utils/syscache.h"
+
+/*
+ * Checks if a given relation can be part of a partition tree. Returns
+ * false if the relation cannot be processed, in which case it is up to
+ * the caller to decide what to do, by either raising an error or doing
+ * something else.
+ */
+static bool
+check_rel_can_be_partition(Oid relid)
+{
+ char relkind;
+ bool relispartition;
+
+ /* Check if relation exists */
+ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid)))
+ return false;
+
+ relkind = get_rel_relkind(relid);
+ relispartition = get_rel_relispartition(relid);
+
+ /* Only allow relation types that can appear in partition trees. */
+ if (!relispartition && !RELKIND_HAS_PARTITIONS(relkind))
+ return false;
+
+ return true;
+}
+
+/*
+ * pg_partition_tree
+ *
+ * Produce a view with one row per member of a partition tree, beginning
+ * from the top-most parent given by the caller. This gives information
+ * about each partition, its immediate partitioned parent, if it is
+ * a leaf partition and its level in the hierarchy.
+ */
+Datum
+pg_partition_tree(PG_FUNCTION_ARGS)
+{
+#define PG_PARTITION_TREE_COLS 4
+ Oid rootrelid = PG_GETARG_OID(0);
+ FuncCallContext *funcctx;
+ List *partitions;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcxt;
+ TupleDesc tupdesc;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ if (!check_rel_can_be_partition(rootrelid))
+ SRF_RETURN_DONE(funcctx);
+
+ /* switch to memory context appropriate for multiple function calls */
+ oldcxt = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /*
+ * Find all members of inheritance set. We only need AccessShareLock
+ * on the children for the partition information lookup.
+ */
+ partitions = find_all_inheritors(rootrelid, AccessShareLock, NULL);
+
+ tupdesc = CreateTemplateTupleDesc(PG_PARTITION_TREE_COLS);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "relid",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "parentid",
+ REGCLASSOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "isleaf",
+ BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "level",
+ INT4OID, -1, 0);
+
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ /* The only state we need is the partition list */
+ funcctx->user_fctx = (void *) partitions;
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ partitions = (List *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < list_length(partitions))
+ {
+ Datum result;
+ Datum values[PG_PARTITION_TREE_COLS];
+ bool nulls[PG_PARTITION_TREE_COLS];
+ HeapTuple tuple;
+ Oid parentid = InvalidOid;
+ Oid relid = list_nth_oid(partitions, funcctx->call_cntr);
+ char relkind = get_rel_relkind(relid);
+ int level = 0;
+ List *ancestors = get_partition_ancestors(relid);
+ ListCell *lc;
+
+ /*
+ * Form tuple with appropriate data.
+ */
+ MemSet(nulls, 0, sizeof(nulls));
+ MemSet(values, 0, sizeof(values));
+
+ /* relid */
+ values[0] = ObjectIdGetDatum(relid);
+
+ /* parentid */
+ if (ancestors != NIL)
+ parentid = linitial_oid(ancestors);
+ if (OidIsValid(parentid))
+ values[1] = ObjectIdGetDatum(parentid);
+ else
+ nulls[1] = true;
+
+ /* isleaf */
+ values[2] = BoolGetDatum(!RELKIND_HAS_PARTITIONS(relkind));
+
+ /* level */
+ if (relid != rootrelid)
+ {
+ foreach(lc, ancestors)
+ {
+ level++;
+ if (lfirst_oid(lc) == rootrelid)
+ break;
+ }
+ }
+ values[3] = Int32GetDatum(level);
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ result = HeapTupleGetDatum(tuple);
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+
+ /* done when there are no more elements left */
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * pg_partition_root
+ *
+ * Returns the top-most parent of the partition tree to which a given
+ * relation belongs, or NULL if it's not (or cannot be) part of any
+ * partition tree.
+ */
+Datum
+pg_partition_root(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Oid rootrelid;
+ List *ancestors;
+
+ if (!check_rel_can_be_partition(relid))
+ PG_RETURN_NULL();
+
+ /* fetch the list of ancestors */
+ ancestors = get_partition_ancestors(relid);
+
+ /*
+ * If the input relation is already the top-most parent, just return
+ * itself.
+ */
+ if (ancestors == NIL)
+ PG_RETURN_OID(relid);
+
+ rootrelid = llast_oid(ancestors);
+ list_free(ancestors);
+
+ /*
+ * "rootrelid" must contain a valid OID, given that the input relation is
+ * a valid partition tree member as checked above.
+ */
+ Assert(OidIsValid(rootrelid));
+ PG_RETURN_OID(rootrelid);
+}
+
+/*
+ * pg_partition_ancestors
+ *
+ * Produces a view with one row per ancestor of the given partition,
+ * including the input relation itself.
+ */
+Datum
+pg_partition_ancestors(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ FuncCallContext *funcctx;
+ List *ancestors;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcxt;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ if (!check_rel_can_be_partition(relid))
+ SRF_RETURN_DONE(funcctx);
+
+ oldcxt = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ ancestors = get_partition_ancestors(relid);
+ ancestors = lcons_oid(relid, ancestors);
+
+ /* The only state we need is the ancestors list */
+ funcctx->user_fctx = (void *) ancestors;
+
+ MemoryContextSwitchTo(oldcxt);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ ancestors = (List *) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < list_length(ancestors))
+ {
+ Oid relid = list_nth_oid(ancestors, funcctx->call_cntr);
+
+ SRF_RETURN_NEXT(funcctx, ObjectIdGetDatum(relid));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c
new file mode 100644
index 0000000..3c8fbe6
--- /dev/null
+++ b/src/backend/utils/adt/pg_locale.c
@@ -0,0 +1,2187 @@
+/*-----------------------------------------------------------------------
+ *
+ * PostgreSQL locale utilities
+ *
+ * Portions Copyright (c) 2002-2022, PostgreSQL Global Development Group
+ *
+ * src/backend/utils/adt/pg_locale.c
+ *
+ *-----------------------------------------------------------------------
+ */
+
+/*----------
+ * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
+ * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
+ * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
+ * toupper(), etc. are always in the same fixed locale.
+ *
+ * LC_MESSAGES is settable at run time and will take effect
+ * immediately.
+ *
+ * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
+ * settable at run-time. However, we don't actually set those locale
+ * categories permanently. This would have bizarre effects like no
+ * longer accepting standard floating-point literals in some locales.
+ * Instead, we only set these locale categories briefly when needed,
+ * cache the required information obtained from localeconv() or
+ * strftime(), and then set the locale categories back to "C".
+ * The cached information is only used by the formatting functions
+ * (to_char, etc.) and the money type. For the user, this should all be
+ * transparent.
+ *
+ * !!! NOW HEAR THIS !!!
+ *
+ * We've been bitten repeatedly by this bug, so let's try to keep it in
+ * mind in future: on some platforms, the locale functions return pointers
+ * to static data that will be overwritten by any later locale function.
+ * Thus, for example, the obvious-looking sequence
+ * save = setlocale(category, NULL);
+ * if (!setlocale(category, value))
+ * fail = true;
+ * setlocale(category, save);
+ * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
+ * will change the memory save is pointing at. To do this sort of thing
+ * safely, you *must* pstrdup what setlocale returns the first time.
+ *
+ * The POSIX locale standard is available here:
+ *
+ * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
+ *----------
+ */
+
+
+#include "postgres.h"
+
+#include <time.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_control.h"
+#include "mb/pg_wchar.h"
+#include "utils/builtins.h"
+#include "utils/formatting.h"
+#include "utils/hsearch.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_locale.h"
+#include "utils/syscache.h"
+
+#ifdef USE_ICU
+#include <unicode/ucnv.h>
+#endif
+
+#ifdef __GLIBC__
+#include <gnu/libc-version.h>
+#endif
+
+#ifdef WIN32
+#include <shlwapi.h>
+#endif
+
+#define MAX_L10N_DATA 80
+
+
+/* GUC settings */
+char *locale_messages;
+char *locale_monetary;
+char *locale_numeric;
+char *locale_time;
+
+/*
+ * lc_time localization cache.
+ *
+ * We use only the first 7 or 12 entries of these arrays. The last array
+ * element is left as NULL for the convenience of outside code that wants
+ * to sequentially scan these arrays.
+ */
+char *localized_abbrev_days[7 + 1];
+char *localized_full_days[7 + 1];
+char *localized_abbrev_months[12 + 1];
+char *localized_full_months[12 + 1];
+
+/* is the databases's LC_CTYPE the C locale? */
+bool database_ctype_is_c = false;
+
+/* indicates whether locale information cache is valid */
+static bool CurrentLocaleConvValid = false;
+static bool CurrentLCTimeValid = false;
+
+/* Cache for collation-related knowledge */
+
+typedef struct
+{
+ Oid collid; /* hash key: pg_collation OID */
+ bool collate_is_c; /* is collation's LC_COLLATE C? */
+ bool ctype_is_c; /* is collation's LC_CTYPE C? */
+ bool flags_valid; /* true if above flags are valid */
+ pg_locale_t locale; /* locale_t struct, or 0 if not valid */
+} collation_cache_entry;
+
+static HTAB *collation_cache = NULL;
+
+
+#if defined(WIN32) && defined(LC_MESSAGES)
+static char *IsoLocaleName(const char *); /* MSVC specific */
+#endif
+
+#ifdef USE_ICU
+static void icu_set_collation_attributes(UCollator *collator, const char *loc);
+#endif
+
+/*
+ * pg_perm_setlocale
+ *
+ * This wraps the libc function setlocale(), with two additions. First, when
+ * changing LC_CTYPE, update gettext's encoding for the current message
+ * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
+ * not on Windows. Second, if the operation is successful, the corresponding
+ * LC_XXX environment variable is set to match. By setting the environment
+ * variable, we ensure that any subsequent use of setlocale(..., "") will
+ * preserve the settings made through this routine. Of course, LC_ALL must
+ * also be unset to fully ensure that, but that has to be done elsewhere after
+ * all the individual LC_XXX variables have been set correctly. (Thank you
+ * Perl for making this kluge necessary.)
+ */
+char *
+pg_perm_setlocale(int category, const char *locale)
+{
+ char *result;
+ const char *envvar;
+
+#ifndef WIN32
+ result = setlocale(category, locale);
+#else
+
+ /*
+ * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
+ * the given value is good and set it in the environment variables. We
+ * must ignore attempts to set to "", which means "keep using the old
+ * environment value".
+ */
+#ifdef LC_MESSAGES
+ if (category == LC_MESSAGES)
+ {
+ result = (char *) locale;
+ if (locale == NULL || locale[0] == '\0')
+ return result;
+ }
+ else
+#endif
+ result = setlocale(category, locale);
+#endif /* WIN32 */
+
+ if (result == NULL)
+ return result; /* fall out immediately on failure */
+
+ /*
+ * Use the right encoding in translated messages. Under ENABLE_NLS, let
+ * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
+ * format strings are ASCII, but database-encoding strings may enter the
+ * message via %s. This makes the overall message encoding equal to the
+ * database encoding.
+ */
+ if (category == LC_CTYPE)
+ {
+ static char save_lc_ctype[LOCALE_NAME_BUFLEN];
+
+ /* copy setlocale() return value before callee invokes it again */
+ strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
+ result = save_lc_ctype;
+
+#ifdef ENABLE_NLS
+ SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
+#else
+ SetMessageEncoding(GetDatabaseEncoding());
+#endif
+ }
+
+ switch (category)
+ {
+ case LC_COLLATE:
+ envvar = "LC_COLLATE";
+ break;
+ case LC_CTYPE:
+ envvar = "LC_CTYPE";
+ break;
+#ifdef LC_MESSAGES
+ case LC_MESSAGES:
+ envvar = "LC_MESSAGES";
+#ifdef WIN32
+ result = IsoLocaleName(locale);
+ if (result == NULL)
+ result = (char *) locale;
+ elog(DEBUG3, "IsoLocaleName() executed; locale: \"%s\"", result);
+#endif /* WIN32 */
+ break;
+#endif /* LC_MESSAGES */
+ case LC_MONETARY:
+ envvar = "LC_MONETARY";
+ break;
+ case LC_NUMERIC:
+ envvar = "LC_NUMERIC";
+ break;
+ case LC_TIME:
+ envvar = "LC_TIME";
+ break;
+ default:
+ elog(FATAL, "unrecognized LC category: %d", category);
+ return NULL; /* keep compiler quiet */
+ }
+
+ if (setenv(envvar, result, 1) != 0)
+ return NULL;
+
+ return result;
+}
+
+
+/*
+ * Is the locale name valid for the locale category?
+ *
+ * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
+ * canonical name is stored there. This is especially useful for figuring out
+ * what locale name "" means (ie, the server environment value). (Actually,
+ * it seems that on most implementations that's the only thing it's good for;
+ * we could wish that setlocale gave back a canonically spelled version of
+ * the locale name, but typically it doesn't.)
+ */
+bool
+check_locale(int category, const char *locale, char **canonname)
+{
+ char *save;
+ char *res;
+
+ if (canonname)
+ *canonname = NULL; /* in case of failure */
+
+ save = setlocale(category, NULL);
+ if (!save)
+ return false; /* won't happen, we hope */
+
+ /* save may be pointing at a modifiable scratch variable, see above. */
+ save = pstrdup(save);
+
+ /* set the locale with setlocale, to see if it accepts it. */
+ res = setlocale(category, locale);
+
+ /* save canonical name if requested. */
+ if (res && canonname)
+ *canonname = pstrdup(res);
+
+ /* restore old value. */
+ if (!setlocale(category, save))
+ elog(WARNING, "failed to restore old locale \"%s\"", save);
+ pfree(save);
+
+ return (res != NULL);
+}
+
+
+/*
+ * GUC check/assign hooks
+ *
+ * For most locale categories, the assign hook doesn't actually set the locale
+ * permanently, just reset flags so that the next use will cache the
+ * appropriate values. (See explanation at the top of this file.)
+ *
+ * Note: we accept value = "" as selecting the postmaster's environment
+ * value, whatever it was (so long as the environment setting is legal).
+ * This will have been locked down by an earlier call to pg_perm_setlocale.
+ */
+bool
+check_locale_monetary(char **newval, void **extra, GucSource source)
+{
+ return check_locale(LC_MONETARY, *newval, NULL);
+}
+
+void
+assign_locale_monetary(const char *newval, void *extra)
+{
+ CurrentLocaleConvValid = false;
+}
+
+bool
+check_locale_numeric(char **newval, void **extra, GucSource source)
+{
+ return check_locale(LC_NUMERIC, *newval, NULL);
+}
+
+void
+assign_locale_numeric(const char *newval, void *extra)
+{
+ CurrentLocaleConvValid = false;
+}
+
+bool
+check_locale_time(char **newval, void **extra, GucSource source)
+{
+ return check_locale(LC_TIME, *newval, NULL);
+}
+
+void
+assign_locale_time(const char *newval, void *extra)
+{
+ CurrentLCTimeValid = false;
+}
+
+/*
+ * We allow LC_MESSAGES to actually be set globally.
+ *
+ * Note: we normally disallow value = "" because it wouldn't have consistent
+ * semantics (it'd effectively just use the previous value). However, this
+ * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
+ * not even if the attempted setting fails due to invalid environment value.
+ * The idea there is just to accept the environment setting *if possible*
+ * during startup, until we can read the proper value from postgresql.conf.
+ */
+bool
+check_locale_messages(char **newval, void **extra, GucSource source)
+{
+ if (**newval == '\0')
+ {
+ if (source == PGC_S_DEFAULT)
+ return true;
+ else
+ return false;
+ }
+
+ /*
+ * LC_MESSAGES category does not exist everywhere, but accept it anyway
+ *
+ * On Windows, we can't even check the value, so accept blindly
+ */
+#if defined(LC_MESSAGES) && !defined(WIN32)
+ return check_locale(LC_MESSAGES, *newval, NULL);
+#else
+ return true;
+#endif
+}
+
+void
+assign_locale_messages(const char *newval, void *extra)
+{
+ /*
+ * LC_MESSAGES category does not exist everywhere, but accept it anyway.
+ * We ignore failure, as per comment above.
+ */
+#ifdef LC_MESSAGES
+ (void) pg_perm_setlocale(LC_MESSAGES, newval);
+#endif
+}
+
+
+/*
+ * Frees the malloced content of a struct lconv. (But not the struct
+ * itself.) It's important that this not throw elog(ERROR).
+ */
+static void
+free_struct_lconv(struct lconv *s)
+{
+ if (s->decimal_point)
+ free(s->decimal_point);
+ if (s->thousands_sep)
+ free(s->thousands_sep);
+ if (s->grouping)
+ free(s->grouping);
+ if (s->int_curr_symbol)
+ free(s->int_curr_symbol);
+ if (s->currency_symbol)
+ free(s->currency_symbol);
+ if (s->mon_decimal_point)
+ free(s->mon_decimal_point);
+ if (s->mon_thousands_sep)
+ free(s->mon_thousands_sep);
+ if (s->mon_grouping)
+ free(s->mon_grouping);
+ if (s->positive_sign)
+ free(s->positive_sign);
+ if (s->negative_sign)
+ free(s->negative_sign);
+}
+
+/*
+ * Check that all fields of a struct lconv (or at least, the ones we care
+ * about) are non-NULL. The field list must match free_struct_lconv().
+ */
+static bool
+struct_lconv_is_valid(struct lconv *s)
+{
+ if (s->decimal_point == NULL)
+ return false;
+ if (s->thousands_sep == NULL)
+ return false;
+ if (s->grouping == NULL)
+ return false;
+ if (s->int_curr_symbol == NULL)
+ return false;
+ if (s->currency_symbol == NULL)
+ return false;
+ if (s->mon_decimal_point == NULL)
+ return false;
+ if (s->mon_thousands_sep == NULL)
+ return false;
+ if (s->mon_grouping == NULL)
+ return false;
+ if (s->positive_sign == NULL)
+ return false;
+ if (s->negative_sign == NULL)
+ return false;
+ return true;
+}
+
+
+/*
+ * Convert the strdup'd string at *str from the specified encoding to the
+ * database encoding.
+ */
+static void
+db_encoding_convert(int encoding, char **str)
+{
+ char *pstr;
+ char *mstr;
+
+ /* convert the string to the database encoding */
+ pstr = pg_any_to_server(*str, strlen(*str), encoding);
+ if (pstr == *str)
+ return; /* no conversion happened */
+
+ /* need it malloc'd not palloc'd */
+ mstr = strdup(pstr);
+ if (mstr == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /* replace old string */
+ free(*str);
+ *str = mstr;
+
+ pfree(pstr);
+}
+
+
+/*
+ * Return the POSIX lconv struct (contains number/money formatting
+ * information) with locale information for all categories.
+ */
+struct lconv *
+PGLC_localeconv(void)
+{
+ static struct lconv CurrentLocaleConv;
+ static bool CurrentLocaleConvAllocated = false;
+ struct lconv *extlconv;
+ struct lconv worklconv;
+ char *save_lc_monetary;
+ char *save_lc_numeric;
+#ifdef WIN32
+ char *save_lc_ctype;
+#endif
+
+ /* Did we do it already? */
+ if (CurrentLocaleConvValid)
+ return &CurrentLocaleConv;
+
+ /* Free any already-allocated storage */
+ if (CurrentLocaleConvAllocated)
+ {
+ free_struct_lconv(&CurrentLocaleConv);
+ CurrentLocaleConvAllocated = false;
+ }
+
+ /*
+ * This is tricky because we really don't want to risk throwing error
+ * while the locale is set to other than our usual settings. Therefore,
+ * the process is: collect the usual settings, set locale to special
+ * setting, copy relevant data into worklconv using strdup(), restore
+ * normal settings, convert data to desired encoding, and finally stash
+ * the collected data in CurrentLocaleConv. This makes it safe if we
+ * throw an error during encoding conversion or run out of memory anywhere
+ * in the process. All data pointed to by struct lconv members is
+ * allocated with strdup, to avoid premature elog(ERROR) and to allow
+ * using a single cleanup routine.
+ */
+ memset(&worklconv, 0, sizeof(worklconv));
+
+ /* Save prevailing values of monetary and numeric locales */
+ save_lc_monetary = setlocale(LC_MONETARY, NULL);
+ if (!save_lc_monetary)
+ elog(ERROR, "setlocale(NULL) failed");
+ save_lc_monetary = pstrdup(save_lc_monetary);
+
+ save_lc_numeric = setlocale(LC_NUMERIC, NULL);
+ if (!save_lc_numeric)
+ elog(ERROR, "setlocale(NULL) failed");
+ save_lc_numeric = pstrdup(save_lc_numeric);
+
+#ifdef WIN32
+
+ /*
+ * The POSIX standard explicitly says that it is undefined what happens if
+ * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
+ * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
+ * believe that localeconv() should return strings that are encoded in the
+ * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
+ * once we have successfully collected the localeconv() results, we will
+ * convert them from that codeset to the desired server encoding.
+ *
+ * Windows, of course, resolutely does things its own way; on that
+ * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
+ * results. Hence, we must temporarily set that category as well.
+ */
+
+ /* Save prevailing value of ctype locale */
+ save_lc_ctype = setlocale(LC_CTYPE, NULL);
+ if (!save_lc_ctype)
+ elog(ERROR, "setlocale(NULL) failed");
+ save_lc_ctype = pstrdup(save_lc_ctype);
+
+ /* Here begins the critical section where we must not throw error */
+
+ /* use numeric to set the ctype */
+ setlocale(LC_CTYPE, locale_numeric);
+#endif
+
+ /* Get formatting information for numeric */
+ setlocale(LC_NUMERIC, locale_numeric);
+ extlconv = localeconv();
+
+ /* Must copy data now in case setlocale() overwrites it */
+ worklconv.decimal_point = strdup(extlconv->decimal_point);
+ worklconv.thousands_sep = strdup(extlconv->thousands_sep);
+ worklconv.grouping = strdup(extlconv->grouping);
+
+#ifdef WIN32
+ /* use monetary to set the ctype */
+ setlocale(LC_CTYPE, locale_monetary);
+#endif
+
+ /* Get formatting information for monetary */
+ setlocale(LC_MONETARY, locale_monetary);
+ extlconv = localeconv();
+
+ /* Must copy data now in case setlocale() overwrites it */
+ worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
+ worklconv.currency_symbol = strdup(extlconv->currency_symbol);
+ worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
+ worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
+ worklconv.mon_grouping = strdup(extlconv->mon_grouping);
+ worklconv.positive_sign = strdup(extlconv->positive_sign);
+ worklconv.negative_sign = strdup(extlconv->negative_sign);
+ /* Copy scalar fields as well */
+ worklconv.int_frac_digits = extlconv->int_frac_digits;
+ worklconv.frac_digits = extlconv->frac_digits;
+ worklconv.p_cs_precedes = extlconv->p_cs_precedes;
+ worklconv.p_sep_by_space = extlconv->p_sep_by_space;
+ worklconv.n_cs_precedes = extlconv->n_cs_precedes;
+ worklconv.n_sep_by_space = extlconv->n_sep_by_space;
+ worklconv.p_sign_posn = extlconv->p_sign_posn;
+ worklconv.n_sign_posn = extlconv->n_sign_posn;
+
+ /*
+ * Restore the prevailing locale settings; failure to do so is fatal.
+ * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
+ * but proceeding with the wrong value of LC_CTYPE would certainly be bad
+ * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
+ * are almost certainly "C", there's really no reason that restoring those
+ * should fail.
+ */
+#ifdef WIN32
+ if (!setlocale(LC_CTYPE, save_lc_ctype))
+ elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
+#endif
+ if (!setlocale(LC_MONETARY, save_lc_monetary))
+ elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
+ if (!setlocale(LC_NUMERIC, save_lc_numeric))
+ elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
+
+ /*
+ * At this point we've done our best to clean up, and can call functions
+ * that might possibly throw errors with a clean conscience. But let's
+ * make sure we don't leak any already-strdup'd fields in worklconv.
+ */
+ PG_TRY();
+ {
+ int encoding;
+
+ /* Release the pstrdup'd locale names */
+ pfree(save_lc_monetary);
+ pfree(save_lc_numeric);
+#ifdef WIN32
+ pfree(save_lc_ctype);
+#endif
+
+ /* If any of the preceding strdup calls failed, complain now. */
+ if (!struct_lconv_is_valid(&worklconv))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+
+ /*
+ * Now we must perform encoding conversion from whatever's associated
+ * with the locales into the database encoding. If we can't identify
+ * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
+ * use PG_SQL_ASCII, which will result in just validating that the
+ * strings are OK in the database encoding.
+ */
+ encoding = pg_get_encoding_from_locale(locale_numeric, true);
+ if (encoding < 0)
+ encoding = PG_SQL_ASCII;
+
+ db_encoding_convert(encoding, &worklconv.decimal_point);
+ db_encoding_convert(encoding, &worklconv.thousands_sep);
+ /* grouping is not text and does not require conversion */
+
+ encoding = pg_get_encoding_from_locale(locale_monetary, true);
+ if (encoding < 0)
+ encoding = PG_SQL_ASCII;
+
+ db_encoding_convert(encoding, &worklconv.int_curr_symbol);
+ db_encoding_convert(encoding, &worklconv.currency_symbol);
+ db_encoding_convert(encoding, &worklconv.mon_decimal_point);
+ db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
+ /* mon_grouping is not text and does not require conversion */
+ db_encoding_convert(encoding, &worklconv.positive_sign);
+ db_encoding_convert(encoding, &worklconv.negative_sign);
+ }
+ PG_CATCH();
+ {
+ free_struct_lconv(&worklconv);
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ /*
+ * Everything is good, so save the results.
+ */
+ CurrentLocaleConv = worklconv;
+ CurrentLocaleConvAllocated = true;
+ CurrentLocaleConvValid = true;
+ return &CurrentLocaleConv;
+}
+
+#ifdef WIN32
+/*
+ * On Windows, strftime() returns its output in encoding CP_ACP (the default
+ * operating system codepage for the computer), which is likely different
+ * from SERVER_ENCODING. This is especially important in Japanese versions
+ * of Windows which will use SJIS encoding, which we don't support as a
+ * server encoding.
+ *
+ * So, instead of using strftime(), use wcsftime() to return the value in
+ * wide characters (internally UTF16) and then convert to UTF8, which we
+ * know how to handle directly.
+ *
+ * Note that this only affects the calls to strftime() in this file, which are
+ * used to get the locale-aware strings. Other parts of the backend use
+ * pg_strftime(), which isn't locale-aware and does not need to be replaced.
+ */
+static size_t
+strftime_win32(char *dst, size_t dstlen,
+ const char *format, const struct tm *tm)
+{
+ size_t len;
+ wchar_t wformat[8]; /* formats used below need 3 chars */
+ wchar_t wbuf[MAX_L10N_DATA];
+
+ /*
+ * Get a wchar_t version of the format string. We only actually use
+ * plain-ASCII formats in this file, so we can say that they're UTF8.
+ */
+ len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
+ wformat, lengthof(wformat));
+ if (len == 0)
+ elog(ERROR, "could not convert format string from UTF-8: error code %lu",
+ GetLastError());
+
+ len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
+ if (len == 0)
+ {
+ /*
+ * wcsftime failed, possibly because the result would not fit in
+ * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
+ */
+ return 0;
+ }
+
+ len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
+ NULL, NULL);
+ if (len == 0)
+ elog(ERROR, "could not convert string to UTF-8: error code %lu",
+ GetLastError());
+
+ dst[len] = '\0';
+
+ return len;
+}
+
+/* redefine strftime() */
+#define strftime(a,b,c,d) strftime_win32(a,b,c,d)
+#endif /* WIN32 */
+
+/*
+ * Subroutine for cache_locale_time().
+ * Convert the given string from encoding "encoding" to the database
+ * encoding, and store the result at *dst, replacing any previous value.
+ */
+static void
+cache_single_string(char **dst, const char *src, int encoding)
+{
+ char *ptr;
+ char *olddst;
+
+ /* Convert the string to the database encoding, or validate it's OK */
+ ptr = pg_any_to_server(src, strlen(src), encoding);
+
+ /* Store the string in long-lived storage, replacing any previous value */
+ olddst = *dst;
+ *dst = MemoryContextStrdup(TopMemoryContext, ptr);
+ if (olddst)
+ pfree(olddst);
+
+ /* Might as well clean up any palloc'd conversion result, too */
+ if (ptr != src)
+ pfree(ptr);
+}
+
+/*
+ * Update the lc_time localization cache variables if needed.
+ */
+void
+cache_locale_time(void)
+{
+ char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
+ char *bufptr;
+ time_t timenow;
+ struct tm *timeinfo;
+ bool strftimefail = false;
+ int encoding;
+ int i;
+ char *save_lc_time;
+#ifdef WIN32
+ char *save_lc_ctype;
+#endif
+
+ /* did we do this already? */
+ if (CurrentLCTimeValid)
+ return;
+
+ elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
+
+ /*
+ * As in PGLC_localeconv(), it's critical that we not throw error while
+ * libc's locale settings have nondefault values. Hence, we just call
+ * strftime() within the critical section, and then convert and save its
+ * results afterwards.
+ */
+
+ /* Save prevailing value of time locale */
+ save_lc_time = setlocale(LC_TIME, NULL);
+ if (!save_lc_time)
+ elog(ERROR, "setlocale(NULL) failed");
+ save_lc_time = pstrdup(save_lc_time);
+
+#ifdef WIN32
+
+ /*
+ * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
+ * must set it here. This code looks the same as what PGLC_localeconv()
+ * does, but the underlying reason is different: this does NOT determine
+ * the encoding we'll get back from strftime_win32().
+ */
+
+ /* Save prevailing value of ctype locale */
+ save_lc_ctype = setlocale(LC_CTYPE, NULL);
+ if (!save_lc_ctype)
+ elog(ERROR, "setlocale(NULL) failed");
+ save_lc_ctype = pstrdup(save_lc_ctype);
+
+ /* use lc_time to set the ctype */
+ setlocale(LC_CTYPE, locale_time);
+#endif
+
+ setlocale(LC_TIME, locale_time);
+
+ /* We use times close to current time as data for strftime(). */
+ timenow = time(NULL);
+ timeinfo = localtime(&timenow);
+
+ /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
+ bufptr = buf;
+
+ /*
+ * MAX_L10N_DATA is sufficient buffer space for every known locale, and
+ * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
+ * error.) An implementation might report errors (e.g. ENOMEM) by
+ * returning 0 (or, less plausibly, a negative value) and setting errno.
+ * Report errno just in case the implementation did that, but clear it in
+ * advance of the calls so we don't emit a stale, unrelated errno.
+ */
+ errno = 0;
+
+ /* localized days */
+ for (i = 0; i < 7; i++)
+ {
+ timeinfo->tm_wday = i;
+ if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
+ strftimefail = true;
+ bufptr += MAX_L10N_DATA;
+ if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
+ strftimefail = true;
+ bufptr += MAX_L10N_DATA;
+ }
+
+ /* localized months */
+ for (i = 0; i < 12; i++)
+ {
+ timeinfo->tm_mon = i;
+ timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
+ if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
+ strftimefail = true;
+ bufptr += MAX_L10N_DATA;
+ if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
+ strftimefail = true;
+ bufptr += MAX_L10N_DATA;
+ }
+
+ /*
+ * Restore the prevailing locale settings; as in PGLC_localeconv(),
+ * failure to do so is fatal.
+ */
+#ifdef WIN32
+ if (!setlocale(LC_CTYPE, save_lc_ctype))
+ elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
+#endif
+ if (!setlocale(LC_TIME, save_lc_time))
+ elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
+
+ /*
+ * At this point we've done our best to clean up, and can throw errors, or
+ * call functions that might throw errors, with a clean conscience.
+ */
+ if (strftimefail)
+ elog(ERROR, "strftime() failed: %m");
+
+ /* Release the pstrdup'd locale names */
+ pfree(save_lc_time);
+#ifdef WIN32
+ pfree(save_lc_ctype);
+#endif
+
+#ifndef WIN32
+
+ /*
+ * As in PGLC_localeconv(), we must convert strftime()'s output from the
+ * encoding implied by LC_TIME to the database encoding. If we can't
+ * identify the LC_TIME encoding, just perform encoding validation.
+ */
+ encoding = pg_get_encoding_from_locale(locale_time, true);
+ if (encoding < 0)
+ encoding = PG_SQL_ASCII;
+
+#else
+
+ /*
+ * On Windows, strftime_win32() always returns UTF8 data, so convert from
+ * that if necessary.
+ */
+ encoding = PG_UTF8;
+
+#endif /* WIN32 */
+
+ bufptr = buf;
+
+ /* localized days */
+ for (i = 0; i < 7; i++)
+ {
+ cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
+ bufptr += MAX_L10N_DATA;
+ cache_single_string(&localized_full_days[i], bufptr, encoding);
+ bufptr += MAX_L10N_DATA;
+ }
+ localized_abbrev_days[7] = NULL;
+ localized_full_days[7] = NULL;
+
+ /* localized months */
+ for (i = 0; i < 12; i++)
+ {
+ cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
+ bufptr += MAX_L10N_DATA;
+ cache_single_string(&localized_full_months[i], bufptr, encoding);
+ bufptr += MAX_L10N_DATA;
+ }
+ localized_abbrev_months[12] = NULL;
+ localized_full_months[12] = NULL;
+
+ CurrentLCTimeValid = true;
+}
+
+
+#if defined(WIN32) && defined(LC_MESSAGES)
+/*
+ * Convert a Windows setlocale() argument to a Unix-style one.
+ *
+ * Regardless of platform, we install message catalogs under a Unix-style
+ * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
+ * following that style will elicit localized interface strings.
+ *
+ * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
+ * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
+ * case-insensitive. setlocale() returns the fully-qualified form; for
+ * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
+ * setlocale() and _create_locale() select a "locale identifier"[1] and store
+ * it in an undocumented _locale_t field. From that LCID, we can retrieve the
+ * ISO 639 language and the ISO 3166 country. Character encoding does not
+ * matter, because the server and client encodings govern that.
+ *
+ * Windows Vista introduced the "locale name" concept[2], closely following
+ * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
+ * Studio 2012, setlocale() accepts locale names in addition to the strings it
+ * accepted historically. It does not standardize them; setlocale("Th-tH")
+ * returns "Th-tH". setlocale(category, "") still returns a traditional
+ * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
+ * content to carry locale names instead of locale identifiers.
+ *
+ * Visual Studio 2015 should still be able to do the same as Visual Studio
+ * 2012, but the declaration of locale_name is missing in _locale_t, causing
+ * this code compilation to fail, hence this falls back instead on to
+ * enumerating all system locales by using EnumSystemLocalesEx to find the
+ * required locale name. If the input argument is in Unix-style then we can
+ * get ISO Locale name directly by using GetLocaleInfoEx() with LCType as
+ * LOCALE_SNAME.
+ *
+ * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol in
+ * releases before Windows 8. IsoLocaleName() always fails in a MinGW-built
+ * postgres.exe, so only Unix-style values of the lc_messages GUC can elicit
+ * localized messages. In particular, every lc_messages setting that initdb
+ * can select automatically will yield only C-locale messages. XXX This could
+ * be fixed by running the fully-qualified locale name through a lookup table.
+ *
+ * This function returns a pointer to a static buffer bearing the converted
+ * name or NULL if conversion fails.
+ *
+ * [1] https://docs.microsoft.com/en-us/windows/win32/intl/locale-identifiers
+ * [2] https://docs.microsoft.com/en-us/windows/win32/intl/locale-names
+ */
+
+#if _MSC_VER >= 1900
+/*
+ * Callback function for EnumSystemLocalesEx() in get_iso_localename().
+ *
+ * This function enumerates all system locales, searching for one that matches
+ * an input with the format: <Language>[_<Country>], e.g.
+ * English[_United States]
+ *
+ * The input is a three wchar_t array as an LPARAM. The first element is the
+ * locale_name we want to match, the second element is an allocated buffer
+ * where the Unix-style locale is copied if a match is found, and the third
+ * element is the search status, 1 if a match was found, 0 otherwise.
+ */
+static BOOL CALLBACK
+search_locale_enum(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
+{
+ wchar_t test_locale[LOCALE_NAME_MAX_LENGTH];
+ wchar_t **argv;
+
+ (void) (dwFlags);
+
+ argv = (wchar_t **) lparam;
+ *argv[2] = (wchar_t) 0;
+
+ memset(test_locale, 0, sizeof(test_locale));
+
+ /* Get the name of the <Language> in English */
+ if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHLANGUAGENAME,
+ test_locale, LOCALE_NAME_MAX_LENGTH))
+ {
+ /*
+ * If the enumerated locale does not have a hyphen ("en") OR the
+ * lc_message input does not have an underscore ("English"), we only
+ * need to compare the <Language> tags.
+ */
+ if (wcsrchr(pStr, '-') == NULL || wcsrchr(argv[0], '_') == NULL)
+ {
+ if (_wcsicmp(argv[0], test_locale) == 0)
+ {
+ wcscpy(argv[1], pStr);
+ *argv[2] = (wchar_t) 1;
+ return FALSE;
+ }
+ }
+
+ /*
+ * We have to compare a full <Language>_<Country> tag, so we append
+ * the underscore and name of the country/region in English, e.g.
+ * "English_United States".
+ */
+ else
+ {
+ size_t len;
+
+ wcscat(test_locale, L"_");
+ len = wcslen(test_locale);
+ if (GetLocaleInfoEx(pStr, LOCALE_SENGLISHCOUNTRYNAME,
+ test_locale + len,
+ LOCALE_NAME_MAX_LENGTH - len))
+ {
+ if (_wcsicmp(argv[0], test_locale) == 0)
+ {
+ wcscpy(argv[1], pStr);
+ *argv[2] = (wchar_t) 1;
+ return FALSE;
+ }
+ }
+ }
+ }
+
+ return TRUE;
+}
+
+/*
+ * This function converts a Windows locale name to an ISO formatted version
+ * for Visual Studio 2015 or greater.
+ *
+ * Returns NULL, if no valid conversion was found.
+ */
+static char *
+get_iso_localename(const char *winlocname)
+{
+ wchar_t wc_locale_name[LOCALE_NAME_MAX_LENGTH];
+ wchar_t buffer[LOCALE_NAME_MAX_LENGTH];
+ static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
+ char *period;
+ int len;
+ int ret_val;
+
+ /*
+ * Valid locales have the following syntax:
+ * <Language>[_<Country>[.<CodePage>]]
+ *
+ * GetLocaleInfoEx can only take locale name without code-page and for the
+ * purpose of this API the code-page doesn't matter.
+ */
+ period = strchr(winlocname, '.');
+ if (period != NULL)
+ len = period - winlocname;
+ else
+ len = pg_mbstrlen(winlocname);
+
+ memset(wc_locale_name, 0, sizeof(wc_locale_name));
+ memset(buffer, 0, sizeof(buffer));
+ MultiByteToWideChar(CP_ACP, 0, winlocname, len, wc_locale_name,
+ LOCALE_NAME_MAX_LENGTH);
+
+ /*
+ * If the lc_messages is already a Unix-style string, we have a direct
+ * match with LOCALE_SNAME, e.g. en-US, en_US.
+ */
+ ret_val = GetLocaleInfoEx(wc_locale_name, LOCALE_SNAME, (LPWSTR) &buffer,
+ LOCALE_NAME_MAX_LENGTH);
+ if (!ret_val)
+ {
+ /*
+ * Search for a locale in the system that matches language and country
+ * name.
+ */
+ wchar_t *argv[3];
+
+ argv[0] = wc_locale_name;
+ argv[1] = buffer;
+ argv[2] = (wchar_t *) &ret_val;
+ EnumSystemLocalesEx(search_locale_enum, LOCALE_WINDOWS, (LPARAM) argv,
+ NULL);
+ }
+
+ if (ret_val)
+ {
+ size_t rc;
+ char *hyphen;
+
+ /* Locale names use only ASCII, any conversion locale suffices. */
+ rc = wchar2char(iso_lc_messages, buffer, sizeof(iso_lc_messages), NULL);
+ if (rc == -1 || rc == sizeof(iso_lc_messages))
+ return NULL;
+
+ /*
+ * Simply replace the hyphen with an underscore. See comments in
+ * IsoLocaleName.
+ */
+ hyphen = strchr(iso_lc_messages, '-');
+ if (hyphen)
+ *hyphen = '_';
+ return iso_lc_messages;
+ }
+
+ return NULL;
+}
+#endif /* _MSC_VER >= 1900 */
+
+static char *
+IsoLocaleName(const char *winlocname)
+{
+#if defined(_MSC_VER)
+ static char iso_lc_messages[LOCALE_NAME_MAX_LENGTH];
+
+ if (pg_strcasecmp("c", winlocname) == 0 ||
+ pg_strcasecmp("posix", winlocname) == 0)
+ {
+ strcpy(iso_lc_messages, "C");
+ return iso_lc_messages;
+ }
+ else
+ {
+#if (_MSC_VER >= 1900) /* Visual Studio 2015 or later */
+ return get_iso_localename(winlocname);
+#else
+ _locale_t loct;
+
+ loct = _create_locale(LC_CTYPE, winlocname);
+ if (loct != NULL)
+ {
+ size_t rc;
+ char *hyphen;
+
+ /* Locale names use only ASCII, any conversion locale suffices. */
+ rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
+ sizeof(iso_lc_messages), NULL);
+ _free_locale(loct);
+ if (rc == -1 || rc == sizeof(iso_lc_messages))
+ return NULL;
+
+ /*
+ * Since the message catalogs sit on a case-insensitive
+ * filesystem, we need not standardize letter case here. So long
+ * as we do not ship message catalogs for which it would matter,
+ * we also need not translate the script/variant portion, e.g.
+ * uz-Cyrl-UZ to uz_UZ@cyrillic. Simply replace the hyphen with
+ * an underscore.
+ *
+ * Note that the locale name can be less-specific than the value
+ * we would derive under earlier Visual Studio releases. For
+ * example, French_France.1252 yields just "fr". This does not
+ * affect any of the country-specific message catalogs available
+ * as of this writing (pt_BR, zh_CN, zh_TW).
+ */
+ hyphen = strchr(iso_lc_messages, '-');
+ if (hyphen)
+ *hyphen = '_';
+ return iso_lc_messages;
+ }
+#endif /* Visual Studio 2015 or later */
+ }
+#endif /* defined(_MSC_VER) */
+ return NULL; /* Not supported on this version of msvc/mingw */
+}
+#endif /* WIN32 && LC_MESSAGES */
+
+
+/*
+ * Detect aging strxfrm() implementations that, in a subset of locales, write
+ * past the specified buffer length. Affected users must update OS packages
+ * before using PostgreSQL 9.5 or later.
+ *
+ * Assume that the bug can come and go from one postmaster startup to another
+ * due to physical replication among diverse machines. Assume that the bug's
+ * presence will not change during the life of a particular postmaster. Given
+ * those assumptions, call this no less than once per postmaster startup per
+ * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
+ * there is no need to consider pg_collation locales.
+ */
+void
+check_strxfrm_bug(void)
+{
+ char buf[32];
+ const int canary = 0x7F;
+ bool ok = true;
+
+ /*
+ * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
+ * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
+ * below that range.
+ *
+ * The bug is present in Solaris 8 as well; it is absent in Solaris 10
+ * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
+ * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
+ * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
+ */
+ buf[7] = canary;
+ (void) strxfrm(buf, "ab", 7);
+ if (buf[7] != canary)
+ ok = false;
+
+ /*
+ * illumos bug #1594 was present in the source tree from 2010-10-11 to
+ * 2012-02-01. Given an ASCII string of any length and length limit 1,
+ * affected systems ignore the length limit and modify a number of bytes
+ * one less than the return value. The problem inputs for this bug do not
+ * overlap those for the Solaris bug, hence a distinct test.
+ *
+ * Affected systems include smartos-20110926T021612Z. Affected locales
+ * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
+ */
+ buf[1] = canary;
+ (void) strxfrm(buf, "a", 1);
+ if (buf[1] != canary)
+ ok = false;
+
+ if (!ok)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYSTEM_ERROR),
+ errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
+ setlocale(LC_COLLATE, NULL)),
+ errhint("Apply system library package updates.")));
+}
+
+
+/*
+ * Cache mechanism for collation information.
+ *
+ * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
+ * (or POSIX), so we can optimize a few code paths in various places.
+ * For the built-in C and POSIX collations, we can know that without even
+ * doing a cache lookup, but we want to support aliases for C/POSIX too.
+ * For the "default" collation, there are separate static cache variables,
+ * since consulting the pg_collation catalog doesn't tell us what we need.
+ *
+ * Also, if a pg_locale_t has been requested for a collation, we cache that
+ * for the life of a backend.
+ *
+ * Note that some code relies on the flags not reporting false negatives
+ * (that is, saying it's not C when it is). For example, char2wchar()
+ * could fail if the locale is C, so str_tolower() shouldn't call it
+ * in that case.
+ *
+ * Note that we currently lack any way to flush the cache. Since we don't
+ * support ALTER COLLATION, this is OK. The worst case is that someone
+ * drops a collation, and a useless cache entry hangs around in existing
+ * backends.
+ */
+
+static collation_cache_entry *
+lookup_collation_cache(Oid collation, bool set_flags)
+{
+ collation_cache_entry *cache_entry;
+ bool found;
+
+ Assert(OidIsValid(collation));
+ Assert(collation != DEFAULT_COLLATION_OID);
+
+ if (collation_cache == NULL)
+ {
+ /* First time through, initialize the hash table */
+ HASHCTL ctl;
+
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(collation_cache_entry);
+ collation_cache = hash_create("Collation cache", 100, &ctl,
+ HASH_ELEM | HASH_BLOBS);
+ }
+
+ cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
+ if (!found)
+ {
+ /*
+ * Make sure cache entry is marked invalid, in case we fail before
+ * setting things.
+ */
+ cache_entry->flags_valid = false;
+ cache_entry->locale = 0;
+ }
+
+ if (set_flags && !cache_entry->flags_valid)
+ {
+ /* Attempt to set the flags */
+ HeapTuple tp;
+ Form_pg_collation collform;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collation);
+ collform = (Form_pg_collation) GETSTRUCT(tp);
+
+ if (collform->collprovider == COLLPROVIDER_LIBC)
+ {
+ Datum datum;
+ bool isnull;
+ const char *collcollate;
+ const char *collctype;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
+ Assert(!isnull);
+ collcollate = TextDatumGetCString(datum);
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
+ Assert(!isnull);
+ collctype = TextDatumGetCString(datum);
+
+ cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
+ (strcmp(collcollate, "POSIX") == 0));
+ cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
+ (strcmp(collctype, "POSIX") == 0));
+ }
+ else
+ {
+ cache_entry->collate_is_c = false;
+ cache_entry->ctype_is_c = false;
+ }
+
+ cache_entry->flags_valid = true;
+
+ ReleaseSysCache(tp);
+ }
+
+ return cache_entry;
+}
+
+
+/*
+ * Detect whether collation's LC_COLLATE property is C
+ */
+bool
+lc_collate_is_c(Oid collation)
+{
+ /*
+ * If we're asked about "collation 0", return false, so that the code will
+ * go into the non-C path and report that the collation is bogus.
+ */
+ if (!OidIsValid(collation))
+ return false;
+
+ /*
+ * If we're asked about the default collation, we have to inquire of the C
+ * library. Cache the result so we only have to compute it once.
+ */
+ if (collation == DEFAULT_COLLATION_OID)
+ {
+ static int result = -1;
+ char *localeptr;
+
+ if (default_locale.provider == COLLPROVIDER_ICU)
+ return false;
+
+ if (result >= 0)
+ return (bool) result;
+ localeptr = setlocale(LC_COLLATE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_COLLATE setting");
+
+ if (strcmp(localeptr, "C") == 0)
+ result = true;
+ else if (strcmp(localeptr, "POSIX") == 0)
+ result = true;
+ else
+ result = false;
+ return (bool) result;
+ }
+
+ /*
+ * If we're asked about the built-in C/POSIX collations, we know that.
+ */
+ if (collation == C_COLLATION_OID ||
+ collation == POSIX_COLLATION_OID)
+ return true;
+
+ /*
+ * Otherwise, we have to consult pg_collation, but we cache that.
+ */
+ return (lookup_collation_cache(collation, true))->collate_is_c;
+}
+
+/*
+ * Detect whether collation's LC_CTYPE property is C
+ */
+bool
+lc_ctype_is_c(Oid collation)
+{
+ /*
+ * If we're asked about "collation 0", return false, so that the code will
+ * go into the non-C path and report that the collation is bogus.
+ */
+ if (!OidIsValid(collation))
+ return false;
+
+ /*
+ * If we're asked about the default collation, we have to inquire of the C
+ * library. Cache the result so we only have to compute it once.
+ */
+ if (collation == DEFAULT_COLLATION_OID)
+ {
+ static int result = -1;
+ char *localeptr;
+
+ if (default_locale.provider == COLLPROVIDER_ICU)
+ return false;
+
+ if (result >= 0)
+ return (bool) result;
+ localeptr = setlocale(LC_CTYPE, NULL);
+ if (!localeptr)
+ elog(ERROR, "invalid LC_CTYPE setting");
+
+ if (strcmp(localeptr, "C") == 0)
+ result = true;
+ else if (strcmp(localeptr, "POSIX") == 0)
+ result = true;
+ else
+ result = false;
+ return (bool) result;
+ }
+
+ /*
+ * If we're asked about the built-in C/POSIX collations, we know that.
+ */
+ if (collation == C_COLLATION_OID ||
+ collation == POSIX_COLLATION_OID)
+ return true;
+
+ /*
+ * Otherwise, we have to consult pg_collation, but we cache that.
+ */
+ return (lookup_collation_cache(collation, true))->ctype_is_c;
+}
+
+struct pg_locale_struct default_locale;
+
+void
+make_icu_collator(const char *iculocstr,
+ struct pg_locale_struct *resultp)
+{
+#ifdef USE_ICU
+ UCollator *collator;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(iculocstr, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ iculocstr, u_errorName(status))));
+
+ if (U_ICU_VERSION_MAJOR_NUM < 54)
+ icu_set_collation_attributes(collator, iculocstr);
+
+ /* We will leak this string if the caller errors later :-( */
+ resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
+ resultp->info.icu.ucol = collator;
+#else /* not USE_ICU */
+ /* could get here if a collation was created by a build with ICU */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ICU is not supported in this build")));
+#endif /* not USE_ICU */
+}
+
+
+/* simple subroutine for reporting errors from newlocale() */
+#ifdef HAVE_LOCALE_T
+static void
+report_newlocale_failure(const char *localename)
+{
+ int save_errno;
+
+ /*
+ * Windows doesn't provide any useful error indication from
+ * _create_locale(), and BSD-derived platforms don't seem to feel they
+ * need to set errno either (even though POSIX is pretty clear that
+ * newlocale should do so). So, if errno hasn't been set, assume ENOENT
+ * is what to report.
+ */
+ if (errno == 0)
+ errno = ENOENT;
+
+ /*
+ * ENOENT means "no such locale", not "no such file", so clarify that
+ * errno with an errdetail message.
+ */
+ save_errno = errno; /* auxiliary funcs might change errno */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("could not create locale \"%s\": %m",
+ localename),
+ (save_errno == ENOENT ?
+ errdetail("The operating system could not find any locale data for the locale name \"%s\".",
+ localename) : 0)));
+}
+#endif /* HAVE_LOCALE_T */
+
+
+/*
+ * Create a locale_t from a collation OID. Results are cached for the
+ * lifetime of the backend. Thus, do not free the result with freelocale().
+ *
+ * As a special optimization, the default/database collation returns 0.
+ * Callers should then revert to the non-locale_t-enabled code path.
+ * Also, callers should avoid calling this before going down a C/POSIX
+ * fastpath, because such a fastpath should work even on platforms without
+ * locale_t support in the C library.
+ *
+ * For simplicity, we always generate COLLATE + CTYPE even though we
+ * might only need one of them. Since this is called only once per session,
+ * it shouldn't cost much.
+ */
+pg_locale_t
+pg_newlocale_from_collation(Oid collid)
+{
+ collation_cache_entry *cache_entry;
+
+ /* Callers must pass a valid OID */
+ Assert(OidIsValid(collid));
+
+ if (collid == DEFAULT_COLLATION_OID)
+ {
+ if (default_locale.provider == COLLPROVIDER_ICU)
+ return &default_locale;
+ else
+ return (pg_locale_t) 0;
+ }
+
+ cache_entry = lookup_collation_cache(collid, false);
+
+ if (cache_entry->locale == 0)
+ {
+ /* We haven't computed this yet in this session, so do it */
+ HeapTuple tp;
+ Form_pg_collation collform;
+ struct pg_locale_struct result;
+ pg_locale_t resultp;
+ Datum datum;
+ bool isnull;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collid);
+ collform = (Form_pg_collation) GETSTRUCT(tp);
+
+ /* We'll fill in the result struct locally before allocating memory */
+ memset(&result, 0, sizeof(result));
+ result.provider = collform->collprovider;
+ result.deterministic = collform->collisdeterministic;
+
+ if (collform->collprovider == COLLPROVIDER_LIBC)
+ {
+#ifdef HAVE_LOCALE_T
+ const char *collcollate;
+ const char *collctype pg_attribute_unused();
+ locale_t loc;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull);
+ Assert(!isnull);
+ collcollate = TextDatumGetCString(datum);
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull);
+ Assert(!isnull);
+ collctype = TextDatumGetCString(datum);
+
+ if (strcmp(collcollate, collctype) == 0)
+ {
+ /* Normal case where they're the same */
+ errno = 0;
+#ifndef WIN32
+ loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
+ NULL);
+#else
+ loc = _create_locale(LC_ALL, collcollate);
+#endif
+ if (!loc)
+ report_newlocale_failure(collcollate);
+ }
+ else
+ {
+#ifndef WIN32
+ /* We need two newlocale() steps */
+ locale_t loc1;
+
+ errno = 0;
+ loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
+ if (!loc1)
+ report_newlocale_failure(collcollate);
+ errno = 0;
+ loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
+ if (!loc)
+ report_newlocale_failure(collctype);
+#else
+
+ /*
+ * XXX The _create_locale() API doesn't appear to support
+ * this. Could perhaps be worked around by changing
+ * pg_locale_t to contain two separate fields.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("collations with different collate and ctype values are not supported on this platform")));
+#endif
+ }
+
+ result.info.lt = loc;
+#else /* not HAVE_LOCALE_T */
+ /* platform that doesn't support locale_t */
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("collation provider LIBC is not supported on this platform")));
+#endif /* not HAVE_LOCALE_T */
+ }
+ else if (collform->collprovider == COLLPROVIDER_ICU)
+ {
+ const char *iculocstr;
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
+ Assert(!isnull);
+ iculocstr = TextDatumGetCString(datum);
+ make_icu_collator(iculocstr, &result);
+ }
+
+ datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
+ &isnull);
+ if (!isnull)
+ {
+ char *actual_versionstr;
+ char *collversionstr;
+
+ collversionstr = TextDatumGetCString(datum);
+
+ datum = SysCacheGetAttr(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate, &isnull);
+ Assert(!isnull);
+
+ actual_versionstr = get_collation_actual_version(collform->collprovider,
+ TextDatumGetCString(datum));
+ if (!actual_versionstr)
+ {
+ /*
+ * This could happen when specifying a version in CREATE
+ * COLLATION but the provider does not support versioning, or
+ * manually creating a mess in the catalogs.
+ */
+ ereport(ERROR,
+ (errmsg("collation \"%s\" has no actual version, but a version was recorded",
+ NameStr(collform->collname))));
+ }
+
+ if (strcmp(actual_versionstr, collversionstr) != 0)
+ ereport(WARNING,
+ (errmsg("collation \"%s\" has version mismatch",
+ NameStr(collform->collname)),
+ errdetail("The collation in the database was created using version %s, "
+ "but the operating system provides version %s.",
+ collversionstr, actual_versionstr),
+ errhint("Rebuild all objects affected by this collation and run "
+ "ALTER COLLATION %s REFRESH VERSION, "
+ "or build PostgreSQL with the right library version.",
+ quote_qualified_identifier(get_namespace_name(collform->collnamespace),
+ NameStr(collform->collname)))));
+ }
+
+ ReleaseSysCache(tp);
+
+ /* We'll keep the pg_locale_t structures in TopMemoryContext */
+ resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
+ *resultp = result;
+
+ cache_entry->locale = resultp;
+ }
+
+ return cache_entry->locale;
+}
+
+/*
+ * Get provider-specific collation version string for the given collation from
+ * the operating system/library.
+ */
+char *
+get_collation_actual_version(char collprovider, const char *collcollate)
+{
+ char *collversion = NULL;
+
+#ifdef USE_ICU
+ if (collprovider == COLLPROVIDER_ICU)
+ {
+ UCollator *collator;
+ UErrorCode status;
+ UVersionInfo versioninfo;
+ char buf[U_MAX_VERSION_STRING_LENGTH];
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(collcollate, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ collcollate, u_errorName(status))));
+ ucol_getVersion(collator, versioninfo);
+ ucol_close(collator);
+
+ u_versionToString(versioninfo, buf);
+ collversion = pstrdup(buf);
+ }
+ else
+#endif
+ if (collprovider == COLLPROVIDER_LIBC &&
+ pg_strcasecmp("C", collcollate) != 0 &&
+ pg_strncasecmp("C.", collcollate, 2) != 0 &&
+ pg_strcasecmp("POSIX", collcollate) != 0)
+ {
+#if defined(__GLIBC__)
+ /* Use the glibc version because we don't have anything better. */
+ collversion = pstrdup(gnu_get_libc_version());
+#elif defined(LC_VERSION_MASK)
+ locale_t loc;
+
+ /* Look up FreeBSD collation version. */
+ loc = newlocale(LC_COLLATE, collcollate, NULL);
+ if (loc)
+ {
+ collversion =
+ pstrdup(querylocale(LC_COLLATE_MASK | LC_VERSION_MASK, loc));
+ freelocale(loc);
+ }
+ else
+ ereport(ERROR,
+ (errmsg("could not load locale \"%s\"", collcollate)));
+#elif defined(WIN32) && _WIN32_WINNT >= 0x0600
+ /*
+ * If we are targeting Windows Vista and above, we can ask for a name
+ * given a collation name (earlier versions required a location code
+ * that we don't have).
+ */
+ NLSVERSIONINFOEX version = {sizeof(NLSVERSIONINFOEX)};
+ WCHAR wide_collcollate[LOCALE_NAME_MAX_LENGTH];
+
+ MultiByteToWideChar(CP_ACP, 0, collcollate, -1, wide_collcollate,
+ LOCALE_NAME_MAX_LENGTH);
+ if (!GetNLSVersionEx(COMPARE_STRING, wide_collcollate, &version))
+ {
+ /*
+ * GetNLSVersionEx() wants a language tag such as "en-US", not a
+ * locale name like "English_United States.1252". Until those
+ * values can be prevented from entering the system, or 100%
+ * reliably converted to the more useful tag format, tolerate the
+ * resulting error and report that we have no version data.
+ */
+ if (GetLastError() == ERROR_INVALID_PARAMETER)
+ return NULL;
+
+ ereport(ERROR,
+ (errmsg("could not get collation version for locale \"%s\": error code %lu",
+ collcollate,
+ GetLastError())));
+ }
+ collversion = psprintf("%d.%d,%d.%d",
+ (version.dwNLSVersion >> 8) & 0xFFFF,
+ version.dwNLSVersion & 0xFF,
+ (version.dwDefinedVersion >> 8) & 0xFFFF,
+ version.dwDefinedVersion & 0xFF);
+#endif
+ }
+
+ return collversion;
+}
+
+
+#ifdef USE_ICU
+/*
+ * Converter object for converting between ICU's UChar strings and C strings
+ * in database encoding. Since the database encoding doesn't change, we only
+ * need one of these per session.
+ */
+static UConverter *icu_converter = NULL;
+
+static void
+init_icu_converter(void)
+{
+ const char *icu_encoding_name;
+ UErrorCode status;
+ UConverter *conv;
+
+ if (icu_converter)
+ return; /* already done */
+
+ icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
+ if (!icu_encoding_name)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("encoding \"%s\" not supported by ICU",
+ pg_encoding_to_char(GetDatabaseEncoding()))));
+
+ status = U_ZERO_ERROR;
+ conv = ucnv_open(icu_encoding_name, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open ICU converter for encoding \"%s\": %s",
+ icu_encoding_name, u_errorName(status))));
+
+ icu_converter = conv;
+}
+
+/*
+ * Convert a string in the database encoding into a string of UChars.
+ *
+ * The source string at buff is of length nbytes
+ * (it needn't be nul-terminated)
+ *
+ * *buff_uchar receives a pointer to the palloc'd result string, and
+ * the function's result is the number of UChars generated.
+ *
+ * The result string is nul-terminated, though most callers rely on the
+ * result length instead.
+ */
+int32_t
+icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
+{
+ UErrorCode status;
+ int32_t len_uchar;
+
+ init_icu_converter();
+
+ status = U_ZERO_ERROR;
+ len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
+ buff, nbytes, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+
+ *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
+
+ status = U_ZERO_ERROR;
+ len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
+ buff, nbytes, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
+
+ return len_uchar;
+}
+
+/*
+ * Convert a string of UChars into the database encoding.
+ *
+ * The source string at buff_uchar is of length len_uchar
+ * (it needn't be nul-terminated)
+ *
+ * *result receives a pointer to the palloc'd result string, and the
+ * function's result is the number of bytes generated (not counting nul).
+ *
+ * The result string is nul-terminated.
+ */
+int32_t
+icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
+{
+ UErrorCode status;
+ int32_t len_result;
+
+ init_icu_converter();
+
+ status = U_ZERO_ERROR;
+ len_result = ucnv_fromUChars(icu_converter, NULL, 0,
+ buff_uchar, len_uchar, &status);
+ if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
+ ereport(ERROR,
+ (errmsg("%s failed: %s", "ucnv_fromUChars",
+ u_errorName(status))));
+
+ *result = palloc(len_result + 1);
+
+ status = U_ZERO_ERROR;
+ len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
+ buff_uchar, len_uchar, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("%s failed: %s", "ucnv_fromUChars",
+ u_errorName(status))));
+
+ return len_result;
+}
+
+/*
+ * Parse collation attributes and apply them to the open collator. This takes
+ * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
+ * applies the key-value arguments.
+ *
+ * Starting with ICU version 54, the attributes are processed automatically by
+ * ucol_open(), so this is only necessary for emulating this behavior on older
+ * versions.
+ */
+pg_attribute_unused()
+static void
+icu_set_collation_attributes(UCollator *collator, const char *loc)
+{
+ char *str = asc_tolower(loc, strlen(loc));
+
+ str = strchr(str, '@');
+ if (!str)
+ return;
+ str++;
+
+ for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
+ {
+ char *e = strchr(token, '=');
+
+ if (e)
+ {
+ char *name;
+ char *value;
+ UColAttribute uattr;
+ UColAttributeValue uvalue;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+
+ *e = '\0';
+ name = token;
+ value = e + 1;
+
+ /*
+ * See attribute name and value lists in ICU i18n/coll.cpp
+ */
+ if (strcmp(name, "colstrength") == 0)
+ uattr = UCOL_STRENGTH;
+ else if (strcmp(name, "colbackwards") == 0)
+ uattr = UCOL_FRENCH_COLLATION;
+ else if (strcmp(name, "colcaselevel") == 0)
+ uattr = UCOL_CASE_LEVEL;
+ else if (strcmp(name, "colcasefirst") == 0)
+ uattr = UCOL_CASE_FIRST;
+ else if (strcmp(name, "colalternate") == 0)
+ uattr = UCOL_ALTERNATE_HANDLING;
+ else if (strcmp(name, "colnormalization") == 0)
+ uattr = UCOL_NORMALIZATION_MODE;
+ else if (strcmp(name, "colnumeric") == 0)
+ uattr = UCOL_NUMERIC_COLLATION;
+ else
+ /* ignore if unknown */
+ continue;
+
+ if (strcmp(value, "primary") == 0)
+ uvalue = UCOL_PRIMARY;
+ else if (strcmp(value, "secondary") == 0)
+ uvalue = UCOL_SECONDARY;
+ else if (strcmp(value, "tertiary") == 0)
+ uvalue = UCOL_TERTIARY;
+ else if (strcmp(value, "quaternary") == 0)
+ uvalue = UCOL_QUATERNARY;
+ else if (strcmp(value, "identical") == 0)
+ uvalue = UCOL_IDENTICAL;
+ else if (strcmp(value, "no") == 0)
+ uvalue = UCOL_OFF;
+ else if (strcmp(value, "yes") == 0)
+ uvalue = UCOL_ON;
+ else if (strcmp(value, "shifted") == 0)
+ uvalue = UCOL_SHIFTED;
+ else if (strcmp(value, "non-ignorable") == 0)
+ uvalue = UCOL_NON_IGNORABLE;
+ else if (strcmp(value, "lower") == 0)
+ uvalue = UCOL_LOWER_FIRST;
+ else if (strcmp(value, "upper") == 0)
+ uvalue = UCOL_UPPER_FIRST;
+ else
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+
+ if (status == U_ZERO_ERROR)
+ ucol_setAttribute(collator, uattr, uvalue, &status);
+
+ /*
+ * Pretend the error came from ucol_open(), for consistent error
+ * message across ICU versions.
+ */
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ loc, u_errorName(status))));
+ }
+ }
+}
+
+#endif /* USE_ICU */
+
+/*
+ * Check if the given locale ID is valid, and ereport(ERROR) if it isn't.
+ */
+void
+check_icu_locale(const char *icu_locale)
+{
+#ifdef USE_ICU
+ UCollator *collator;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(icu_locale, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not open collator for locale \"%s\": %s",
+ icu_locale, u_errorName(status))));
+
+ if (U_ICU_VERSION_MAJOR_NUM < 54)
+ icu_set_collation_attributes(collator, icu_locale);
+ ucol_close(collator);
+#else
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("ICU is not supported in this build")));
+#endif
+}
+
+/*
+ * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
+ * Therefore we keep them here rather than with the mbutils code.
+ */
+
+/*
+ * wchar2char --- convert wide characters to multibyte format
+ *
+ * This has the same API as the standard wcstombs_l() function; in particular,
+ * tolen is the maximum number of bytes to store at *to, and *from must be
+ * zero-terminated. The output will be zero-terminated iff there is room.
+ */
+size_t
+wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
+{
+ size_t result;
+
+ Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
+ if (tolen == 0)
+ return 0;
+
+#ifdef WIN32
+
+ /*
+ * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
+ * for some reason mbstowcs and wcstombs won't do this for us, so we use
+ * MultiByteToWideChar().
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
+ NULL, NULL);
+ /* A zero return is failure */
+ if (result <= 0)
+ result = -1;
+ else
+ {
+ Assert(result <= tolen);
+ /* Microsoft counts the zero terminator in the result */
+ result--;
+ }
+ }
+ else
+#endif /* WIN32 */
+ if (locale == (pg_locale_t) 0)
+ {
+ /* Use wcstombs directly for the default locale */
+ result = wcstombs(to, from, tolen);
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+#ifdef HAVE_WCSTOMBS_L
+ /* Use wcstombs_l for nondefault locales */
+ result = wcstombs_l(to, from, tolen, locale->info.lt);
+#else /* !HAVE_WCSTOMBS_L */
+ /* We have to temporarily set the locale as current ... ugh */
+ locale_t save_locale = uselocale(locale->info.lt);
+
+ result = wcstombs(to, from, tolen);
+
+ uselocale(save_locale);
+#endif /* HAVE_WCSTOMBS_L */
+#else /* !HAVE_LOCALE_T */
+ /* Can't have locale != 0 without HAVE_LOCALE_T */
+ elog(ERROR, "wcstombs_l is not available");
+ result = 0; /* keep compiler quiet */
+#endif /* HAVE_LOCALE_T */
+ }
+
+ return result;
+}
+
+/*
+ * char2wchar --- convert multibyte characters to wide characters
+ *
+ * This has almost the API of mbstowcs_l(), except that *from need not be
+ * null-terminated; instead, the number of input bytes is specified as
+ * fromlen. Also, we ereport() rather than returning -1 for invalid
+ * input encoding. tolen is the maximum number of wchar_t's to store at *to.
+ * The output will be zero-terminated iff there is room.
+ */
+size_t
+char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
+ pg_locale_t locale)
+{
+ size_t result;
+
+ Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
+
+ if (tolen == 0)
+ return 0;
+
+#ifdef WIN32
+ /* See WIN32 "Unicode" comment above */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ /* Win32 API does not work for zero-length input */
+ if (fromlen == 0)
+ result = 0;
+ else
+ {
+ result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
+ /* A zero return is failure */
+ if (result == 0)
+ result = -1;
+ }
+
+ if (result != -1)
+ {
+ Assert(result < tolen);
+ /* Append trailing null wchar (MultiByteToWideChar() does not) */
+ to[result] = 0;
+ }
+ }
+ else
+#endif /* WIN32 */
+ {
+ /* mbstowcs requires ending '\0' */
+ char *str = pnstrdup(from, fromlen);
+
+ if (locale == (pg_locale_t) 0)
+ {
+ /* Use mbstowcs directly for the default locale */
+ result = mbstowcs(to, str, tolen);
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+#ifdef HAVE_MBSTOWCS_L
+ /* Use mbstowcs_l for nondefault locales */
+ result = mbstowcs_l(to, str, tolen, locale->info.lt);
+#else /* !HAVE_MBSTOWCS_L */
+ /* We have to temporarily set the locale as current ... ugh */
+ locale_t save_locale = uselocale(locale->info.lt);
+
+ result = mbstowcs(to, str, tolen);
+
+ uselocale(save_locale);
+#endif /* HAVE_MBSTOWCS_L */
+#else /* !HAVE_LOCALE_T */
+ /* Can't have locale != 0 without HAVE_LOCALE_T */
+ elog(ERROR, "mbstowcs_l is not available");
+ result = 0; /* keep compiler quiet */
+#endif /* HAVE_LOCALE_T */
+ }
+
+ pfree(str);
+ }
+
+ if (result == -1)
+ {
+ /*
+ * Invalid multibyte character encountered. We try to give a useful
+ * error message by letting pg_verifymbstr check the string. But it's
+ * possible that the string is OK to us, and not OK to mbstowcs ---
+ * this suggests that the LC_CTYPE locale is different from the
+ * database encoding. Give a generic error message if pg_verifymbstr
+ * can't find anything wrong.
+ */
+ pg_verifymbstr(from, fromlen, false); /* might not return */
+ /* but if it does ... */
+ ereport(ERROR,
+ (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
+ errmsg("invalid multibyte character for locale"),
+ errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
+ }
+
+ return result;
+}
diff --git a/src/backend/utils/adt/pg_lsn.c b/src/backend/utils/adt/pg_lsn.c
new file mode 100644
index 0000000..4540878
--- /dev/null
+++ b/src/backend/utils/adt/pg_lsn.c
@@ -0,0 +1,313 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_lsn.c
+ * Operations for the pg_lsn datatype.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/pg_lsn.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+#include "utils/numeric.h"
+#include "utils/pg_lsn.h"
+
+#define MAXPG_LSNLEN 17
+#define MAXPG_LSNCOMPONENT 8
+
+/*----------------------------------------------------------
+ * Formatting and conversion routines.
+ *---------------------------------------------------------*/
+
+XLogRecPtr
+pg_lsn_in_internal(const char *str, bool *have_error)
+{
+ int len1,
+ len2;
+ uint32 id,
+ off;
+ XLogRecPtr result;
+
+ Assert(have_error != NULL);
+ *have_error = false;
+
+ /* Sanity check input format. */
+ len1 = strspn(str, "0123456789abcdefABCDEF");
+ if (len1 < 1 || len1 > MAXPG_LSNCOMPONENT || str[len1] != '/')
+ {
+ *have_error = true;
+ return InvalidXLogRecPtr;
+ }
+ len2 = strspn(str + len1 + 1, "0123456789abcdefABCDEF");
+ if (len2 < 1 || len2 > MAXPG_LSNCOMPONENT || str[len1 + 1 + len2] != '\0')
+ {
+ *have_error = true;
+ return InvalidXLogRecPtr;
+ }
+
+ /* Decode result. */
+ id = (uint32) strtoul(str, NULL, 16);
+ off = (uint32) strtoul(str + len1 + 1, NULL, 16);
+ result = ((uint64) id << 32) | off;
+
+ return result;
+}
+
+Datum
+pg_lsn_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ XLogRecPtr result;
+ bool have_error = false;
+
+ result = pg_lsn_in_internal(str, &have_error);
+ if (have_error)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "pg_lsn", str)));
+
+ PG_RETURN_LSN(result);
+}
+
+Datum
+pg_lsn_out(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn = PG_GETARG_LSN(0);
+ char buf[MAXPG_LSNLEN + 1];
+ char *result;
+
+ snprintf(buf, sizeof buf, "%X/%X", LSN_FORMAT_ARGS(lsn));
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+Datum
+pg_lsn_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ XLogRecPtr result;
+
+ result = pq_getmsgint64(buf);
+ PG_RETURN_LSN(result);
+}
+
+Datum
+pg_lsn_send(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn = PG_GETARG_LSN(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, lsn);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*----------------------------------------------------------
+ * Operators for PostgreSQL LSNs
+ *---------------------------------------------------------*/
+
+Datum
+pg_lsn_eq(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 == lsn2);
+}
+
+Datum
+pg_lsn_ne(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 != lsn2);
+}
+
+Datum
+pg_lsn_lt(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 < lsn2);
+}
+
+Datum
+pg_lsn_gt(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 > lsn2);
+}
+
+Datum
+pg_lsn_le(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 <= lsn2);
+}
+
+Datum
+pg_lsn_ge(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_BOOL(lsn1 >= lsn2);
+}
+
+Datum
+pg_lsn_larger(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_LSN((lsn1 > lsn2) ? lsn1 : lsn2);
+}
+
+Datum
+pg_lsn_smaller(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+
+ PG_RETURN_LSN((lsn1 < lsn2) ? lsn1 : lsn2);
+}
+
+/* btree index opclass support */
+Datum
+pg_lsn_cmp(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr a = PG_GETARG_LSN(0);
+ XLogRecPtr b = PG_GETARG_LSN(1);
+
+ if (a > b)
+ PG_RETURN_INT32(1);
+ else if (a == b)
+ PG_RETURN_INT32(0);
+ else
+ PG_RETURN_INT32(-1);
+}
+
+/* hash index opclass support */
+Datum
+pg_lsn_hash(PG_FUNCTION_ARGS)
+{
+ /* We can use hashint8 directly */
+ return hashint8(fcinfo);
+}
+
+Datum
+pg_lsn_hash_extended(PG_FUNCTION_ARGS)
+{
+ return hashint8extended(fcinfo);
+}
+
+
+/*----------------------------------------------------------
+ * Arithmetic operators on PostgreSQL LSNs.
+ *---------------------------------------------------------*/
+
+Datum
+pg_lsn_mi(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn1 = PG_GETARG_LSN(0);
+ XLogRecPtr lsn2 = PG_GETARG_LSN(1);
+ char buf[256];
+ Datum result;
+
+ /* Output could be as large as plus or minus 2^63 - 1. */
+ if (lsn1 < lsn2)
+ snprintf(buf, sizeof buf, "-" UINT64_FORMAT, lsn2 - lsn1);
+ else
+ snprintf(buf, sizeof buf, UINT64_FORMAT, lsn1 - lsn2);
+
+ /* Convert to numeric. */
+ result = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+
+ return result;
+}
+
+/*
+ * Add the number of bytes to pg_lsn, giving a new pg_lsn.
+ * Must handle both positive and negative numbers of bytes.
+ */
+Datum
+pg_lsn_pli(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn = PG_GETARG_LSN(0);
+ Numeric nbytes = PG_GETARG_NUMERIC(1);
+ Datum num;
+ Datum res;
+ char buf[32];
+
+ if (numeric_is_nan(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot add NaN to pg_lsn")));
+
+ /* Convert to numeric */
+ snprintf(buf, sizeof(buf), UINT64_FORMAT, lsn);
+ num = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+
+ /* Add two numerics */
+ res = DirectFunctionCall2(numeric_add,
+ NumericGetDatum(num),
+ NumericGetDatum(nbytes));
+
+ /* Convert to pg_lsn */
+ return DirectFunctionCall1(numeric_pg_lsn, res);
+}
+
+/*
+ * Subtract the number of bytes from pg_lsn, giving a new pg_lsn.
+ * Must handle both positive and negative numbers of bytes.
+ */
+Datum
+pg_lsn_mii(PG_FUNCTION_ARGS)
+{
+ XLogRecPtr lsn = PG_GETARG_LSN(0);
+ Numeric nbytes = PG_GETARG_NUMERIC(1);
+ Datum num;
+ Datum res;
+ char buf[32];
+
+ if (numeric_is_nan(nbytes))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot subtract NaN from pg_lsn")));
+
+ /* Convert to numeric */
+ snprintf(buf, sizeof(buf), UINT64_FORMAT, lsn);
+ num = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+
+ /* Subtract two numerics */
+ res = DirectFunctionCall2(numeric_sub,
+ NumericGetDatum(num),
+ NumericGetDatum(nbytes));
+
+ /* Convert to pg_lsn */
+ return DirectFunctionCall1(numeric_pg_lsn, res);
+}
diff --git a/src/backend/utils/adt/pg_upgrade_support.c b/src/backend/utils/adt/pg_upgrade_support.c
new file mode 100644
index 0000000..67b9675
--- /dev/null
+++ b/src/backend/utils/adt/pg_upgrade_support.c
@@ -0,0 +1,265 @@
+/*
+ * pg_upgrade_support.c
+ *
+ * server-side functions to set backend global variables
+ * to control oid and relfilenode assignment, and do other special
+ * hacks needed for pg_upgrade.
+ *
+ * Copyright (c) 2010-2022, PostgreSQL Global Development Group
+ * src/backend/utils/adt/pg_upgrade_support.c
+ */
+
+#include "postgres.h"
+
+#include "catalog/binary_upgrade.h"
+#include "catalog/heap.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/extension.h"
+#include "miscadmin.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+
+#define CHECK_IS_BINARY_UPGRADE \
+do { \
+ if (!IsBinaryUpgrade) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_CANT_CHANGE_RUNTIME_PARAM), \
+ errmsg("function can only be called when server is in binary upgrade mode"))); \
+} while (0)
+
+Datum
+binary_upgrade_set_next_pg_tablespace_oid(PG_FUNCTION_ARGS)
+{
+ Oid tbspoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_pg_tablespace_oid = tbspoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_pg_type_oid(PG_FUNCTION_ARGS)
+{
+ Oid typoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_pg_type_oid = typoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_array_pg_type_oid(PG_FUNCTION_ARGS)
+{
+ Oid typoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_array_pg_type_oid = typoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_multirange_pg_type_oid(PG_FUNCTION_ARGS)
+{
+ Oid typoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_mrng_pg_type_oid = typoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_multirange_array_pg_type_oid(PG_FUNCTION_ARGS)
+{
+ Oid typoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_mrng_array_pg_type_oid = typoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_heap_pg_class_oid(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_heap_pg_class_oid = reloid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_heap_relfilenode(PG_FUNCTION_ARGS)
+{
+ Oid nodeoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_heap_pg_class_relfilenode = nodeoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_index_pg_class_oid(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_index_pg_class_oid = reloid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_index_relfilenode(PG_FUNCTION_ARGS)
+{
+ Oid nodeoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_index_pg_class_relfilenode = nodeoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_toast_pg_class_oid(PG_FUNCTION_ARGS)
+{
+ Oid reloid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_toast_pg_class_oid = reloid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_toast_relfilenode(PG_FUNCTION_ARGS)
+{
+ Oid nodeoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_toast_pg_class_relfilenode = nodeoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_pg_enum_oid(PG_FUNCTION_ARGS)
+{
+ Oid enumoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_pg_enum_oid = enumoid;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_next_pg_authid_oid(PG_FUNCTION_ARGS)
+{
+ Oid authoid = PG_GETARG_OID(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_next_pg_authid_oid = authoid;
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_create_empty_extension(PG_FUNCTION_ARGS)
+{
+ text *extName;
+ text *schemaName;
+ bool relocatable;
+ text *extVersion;
+ Datum extConfig;
+ Datum extCondition;
+ List *requiredExtensions;
+
+ CHECK_IS_BINARY_UPGRADE;
+
+ /* We must check these things before dereferencing the arguments */
+ if (PG_ARGISNULL(0) ||
+ PG_ARGISNULL(1) ||
+ PG_ARGISNULL(2) ||
+ PG_ARGISNULL(3))
+ elog(ERROR, "null argument to binary_upgrade_create_empty_extension is not allowed");
+
+ extName = PG_GETARG_TEXT_PP(0);
+ schemaName = PG_GETARG_TEXT_PP(1);
+ relocatable = PG_GETARG_BOOL(2);
+ extVersion = PG_GETARG_TEXT_PP(3);
+
+ if (PG_ARGISNULL(4))
+ extConfig = PointerGetDatum(NULL);
+ else
+ extConfig = PG_GETARG_DATUM(4);
+
+ if (PG_ARGISNULL(5))
+ extCondition = PointerGetDatum(NULL);
+ else
+ extCondition = PG_GETARG_DATUM(5);
+
+ requiredExtensions = NIL;
+ if (!PG_ARGISNULL(6))
+ {
+ ArrayType *textArray = PG_GETARG_ARRAYTYPE_P(6);
+ Datum *textDatums;
+ int ndatums;
+ int i;
+
+ deconstruct_array(textArray,
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &textDatums, NULL, &ndatums);
+ for (i = 0; i < ndatums; i++)
+ {
+ char *extName = TextDatumGetCString(textDatums[i]);
+ Oid extOid = get_extension_oid(extName, false);
+
+ requiredExtensions = lappend_oid(requiredExtensions, extOid);
+ }
+ }
+
+ InsertExtensionTuple(text_to_cstring(extName),
+ GetUserId(),
+ get_namespace_oid(text_to_cstring(schemaName), false),
+ relocatable,
+ text_to_cstring(extVersion),
+ extConfig,
+ extCondition,
+ requiredExtensions);
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_record_init_privs(PG_FUNCTION_ARGS)
+{
+ bool record_init_privs = PG_GETARG_BOOL(0);
+
+ CHECK_IS_BINARY_UPGRADE;
+ binary_upgrade_record_init_privs = record_init_privs;
+
+ PG_RETURN_VOID();
+}
+
+Datum
+binary_upgrade_set_missing_value(PG_FUNCTION_ARGS)
+{
+ Oid table_id = PG_GETARG_OID(0);
+ text *attname = PG_GETARG_TEXT_P(1);
+ text *value = PG_GETARG_TEXT_P(2);
+ char *cattname = text_to_cstring(attname);
+ char *cvalue = text_to_cstring(value);
+
+ CHECK_IS_BINARY_UPGRADE;
+ SetAttrMissing(table_id, cattname, cvalue);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
new file mode 100644
index 0000000..6ef7ead
--- /dev/null
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -0,0 +1,2422 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgstatfuncs.c
+ * Functions for accessing various forms of statistics data
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/pgstatfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/xlog.h"
+#include "access/xlogprefetcher.h"
+#include "catalog/catalog.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_type.h"
+#include "common/ip.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "pgstat.h"
+#include "postmaster/bgworker_internals.h"
+#include "postmaster/postmaster.h"
+#include "storage/proc.h"
+#include "storage/procarray.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/inet.h"
+#include "utils/timestamp.h"
+
+#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
+
+#define HAS_PGSTAT_PERMISSIONS(role) (has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS) || has_privs_of_role(GetUserId(), role))
+
+Datum
+pg_stat_get_numscans(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->numscans);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_returned(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_returned);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_fetched(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_inserted(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_inserted);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_updated(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_updated);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_deleted(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_deleted);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_tuples_hot_updated(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->tuples_hot_updated);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_live_tuples(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->n_live_tuples);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_dead_tuples(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->n_dead_tuples);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_mod_since_analyze(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->changes_since_analyze);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_ins_since_vacuum(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->inserts_since_vacuum);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->blocks_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_blocks_hit(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->blocks_hit);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_last_vacuum_time(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = tabentry->vacuum_timestamp;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_last_autovacuum_time(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = tabentry->autovac_vacuum_timestamp;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_last_analyze_time(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = tabentry->analyze_timestamp;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_last_autoanalyze_time(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = tabentry->autovac_analyze_timestamp;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_vacuum_count(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->vacuum_count);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_autovacuum_count(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->autovac_vacuum_count);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_analyze_count(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->analyze_count);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_autoanalyze_count(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatTabEntry *tabentry;
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->autovac_analyze_count);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_function_calls(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_StatFuncEntry *funcentry;
+
+ if ((funcentry = pgstat_fetch_stat_funcentry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_INT64(funcentry->f_numcalls);
+}
+
+Datum
+pg_stat_get_function_total_time(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_StatFuncEntry *funcentry;
+
+ if ((funcentry = pgstat_fetch_stat_funcentry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ /* convert counter from microsec to millisec for display */
+ PG_RETURN_FLOAT8(((double) funcentry->f_total_time) / 1000.0);
+}
+
+Datum
+pg_stat_get_function_self_time(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_StatFuncEntry *funcentry;
+
+ if ((funcentry = pgstat_fetch_stat_funcentry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ /* convert counter from microsec to millisec for display */
+ PG_RETURN_FLOAT8(((double) funcentry->f_self_time) / 1000.0);
+}
+
+Datum
+pg_stat_get_backend_idset(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ int *fctx;
+ int32 result;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ fctx = MemoryContextAlloc(funcctx->multi_call_memory_ctx,
+ 2 * sizeof(int));
+ funcctx->user_fctx = fctx;
+
+ fctx[0] = 0;
+ fctx[1] = pgstat_fetch_stat_numbackends();
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+ fctx = funcctx->user_fctx;
+
+ fctx[0] += 1;
+ result = fctx[0];
+
+ if (result <= fctx[1])
+ {
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, Int32GetDatum(result));
+ }
+ else
+ {
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+/*
+ * Returns command progress information for the named command.
+ */
+Datum
+pg_stat_get_progress_info(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_PROGRESS_COLS PGSTAT_NUM_PROGRESS_PARAM + 3
+ int num_backends = pgstat_fetch_stat_numbackends();
+ int curr_backend;
+ char *cmd = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ ProgressCommandType cmdtype;
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ /* Translate command name into command type code. */
+ if (pg_strcasecmp(cmd, "VACUUM") == 0)
+ cmdtype = PROGRESS_COMMAND_VACUUM;
+ else if (pg_strcasecmp(cmd, "ANALYZE") == 0)
+ cmdtype = PROGRESS_COMMAND_ANALYZE;
+ else if (pg_strcasecmp(cmd, "CLUSTER") == 0)
+ cmdtype = PROGRESS_COMMAND_CLUSTER;
+ else if (pg_strcasecmp(cmd, "CREATE INDEX") == 0)
+ cmdtype = PROGRESS_COMMAND_CREATE_INDEX;
+ else if (pg_strcasecmp(cmd, "BASEBACKUP") == 0)
+ cmdtype = PROGRESS_COMMAND_BASEBACKUP;
+ else if (pg_strcasecmp(cmd, "COPY") == 0)
+ cmdtype = PROGRESS_COMMAND_COPY;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid command name: \"%s\"", cmd)));
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* 1-based index */
+ for (curr_backend = 1; curr_backend <= num_backends; curr_backend++)
+ {
+ LocalPgBackendStatus *local_beentry;
+ PgBackendStatus *beentry;
+ Datum values[PG_STAT_GET_PROGRESS_COLS];
+ bool nulls[PG_STAT_GET_PROGRESS_COLS];
+ int i;
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ local_beentry = pgstat_fetch_stat_local_beentry(curr_backend);
+
+ if (!local_beentry)
+ continue;
+
+ beentry = &local_beentry->backendStatus;
+
+ /*
+ * Report values for only those backends which are running the given
+ * command.
+ */
+ if (!beentry || beentry->st_progress_command != cmdtype)
+ continue;
+
+ /* Value available to all callers */
+ values[0] = Int32GetDatum(beentry->st_procpid);
+ values[1] = ObjectIdGetDatum(beentry->st_databaseid);
+
+ /* show rest of the values including relid only to role members */
+ if (HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ {
+ values[2] = ObjectIdGetDatum(beentry->st_progress_command_target);
+ for (i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++)
+ values[i + 3] = Int64GetDatum(beentry->st_progress_param[i]);
+ }
+ else
+ {
+ nulls[2] = true;
+ for (i = 0; i < PGSTAT_NUM_PROGRESS_PARAM; i++)
+ nulls[i + 3] = true;
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+ }
+
+ return (Datum) 0;
+}
+
+/*
+ * Returns activity of PG backends.
+ */
+Datum
+pg_stat_get_activity(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_ACTIVITY_COLS 30
+ int num_backends = pgstat_fetch_stat_numbackends();
+ int curr_backend;
+ int pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0);
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* 1-based index */
+ for (curr_backend = 1; curr_backend <= num_backends; curr_backend++)
+ {
+ /* for each row */
+ Datum values[PG_STAT_GET_ACTIVITY_COLS];
+ bool nulls[PG_STAT_GET_ACTIVITY_COLS];
+ LocalPgBackendStatus *local_beentry;
+ PgBackendStatus *beentry;
+ PGPROC *proc;
+ const char *wait_event_type = NULL;
+ const char *wait_event = NULL;
+
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Get the next one in the list */
+ local_beentry = pgstat_fetch_stat_local_beentry(curr_backend);
+ if (!local_beentry)
+ {
+ int i;
+
+ /* Ignore missing entries if looking for specific PID */
+ if (pid != -1)
+ continue;
+
+ for (i = 0; i < lengthof(nulls); i++)
+ nulls[i] = true;
+
+ nulls[5] = false;
+ values[5] = CStringGetTextDatum("<backend information not available>");
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+ continue;
+ }
+
+ beentry = &local_beentry->backendStatus;
+
+ /* If looking for specific PID, ignore all the others */
+ if (pid != -1 && beentry->st_procpid != pid)
+ continue;
+
+ /* Values available to all callers */
+ if (beentry->st_databaseid != InvalidOid)
+ values[0] = ObjectIdGetDatum(beentry->st_databaseid);
+ else
+ nulls[0] = true;
+
+ values[1] = Int32GetDatum(beentry->st_procpid);
+
+ if (beentry->st_userid != InvalidOid)
+ values[2] = ObjectIdGetDatum(beentry->st_userid);
+ else
+ nulls[2] = true;
+
+ if (beentry->st_appname)
+ values[3] = CStringGetTextDatum(beentry->st_appname);
+ else
+ nulls[3] = true;
+
+ if (TransactionIdIsValid(local_beentry->backend_xid))
+ values[15] = TransactionIdGetDatum(local_beentry->backend_xid);
+ else
+ nulls[15] = true;
+
+ if (TransactionIdIsValid(local_beentry->backend_xmin))
+ values[16] = TransactionIdGetDatum(local_beentry->backend_xmin);
+ else
+ nulls[16] = true;
+
+ /* Values only available to role member or pg_read_all_stats */
+ if (HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ {
+ SockAddr zero_clientaddr;
+ char *clipped_activity;
+
+ switch (beentry->st_state)
+ {
+ case STATE_IDLE:
+ values[4] = CStringGetTextDatum("idle");
+ break;
+ case STATE_RUNNING:
+ values[4] = CStringGetTextDatum("active");
+ break;
+ case STATE_IDLEINTRANSACTION:
+ values[4] = CStringGetTextDatum("idle in transaction");
+ break;
+ case STATE_FASTPATH:
+ values[4] = CStringGetTextDatum("fastpath function call");
+ break;
+ case STATE_IDLEINTRANSACTION_ABORTED:
+ values[4] = CStringGetTextDatum("idle in transaction (aborted)");
+ break;
+ case STATE_DISABLED:
+ values[4] = CStringGetTextDatum("disabled");
+ break;
+ case STATE_UNDEFINED:
+ nulls[4] = true;
+ break;
+ }
+
+ clipped_activity = pgstat_clip_activity(beentry->st_activity_raw);
+ values[5] = CStringGetTextDatum(clipped_activity);
+ pfree(clipped_activity);
+
+ /* leader_pid */
+ nulls[28] = true;
+
+ proc = BackendPidGetProc(beentry->st_procpid);
+
+ if (proc == NULL && (beentry->st_backendType != B_BACKEND))
+ {
+ /*
+ * For an auxiliary process, retrieve process info from
+ * AuxiliaryProcs stored in shared-memory.
+ */
+ proc = AuxiliaryPidGetProc(beentry->st_procpid);
+ }
+
+ /*
+ * If a PGPROC entry was retrieved, display wait events and lock
+ * group leader information if any. To avoid extra overhead, no
+ * extra lock is being held, so there is no guarantee of
+ * consistency across multiple rows.
+ */
+ if (proc != NULL)
+ {
+ uint32 raw_wait_event;
+ PGPROC *leader;
+
+ raw_wait_event = UINT32_ACCESS_ONCE(proc->wait_event_info);
+ wait_event_type = pgstat_get_wait_event_type(raw_wait_event);
+ wait_event = pgstat_get_wait_event(raw_wait_event);
+
+ leader = proc->lockGroupLeader;
+
+ /*
+ * Show the leader only for active parallel workers. This
+ * leaves the field as NULL for the leader of a parallel
+ * group.
+ */
+ if (leader && leader->pid != beentry->st_procpid)
+ {
+ values[28] = Int32GetDatum(leader->pid);
+ nulls[28] = false;
+ }
+ }
+
+ if (wait_event_type)
+ values[6] = CStringGetTextDatum(wait_event_type);
+ else
+ nulls[6] = true;
+
+ if (wait_event)
+ values[7] = CStringGetTextDatum(wait_event);
+ else
+ nulls[7] = true;
+
+ /*
+ * Don't expose transaction time for walsenders; it confuses
+ * monitoring, particularly because we don't keep the time up-to-
+ * date.
+ */
+ if (beentry->st_xact_start_timestamp != 0 &&
+ beentry->st_backendType != B_WAL_SENDER)
+ values[8] = TimestampTzGetDatum(beentry->st_xact_start_timestamp);
+ else
+ nulls[8] = true;
+
+ if (beentry->st_activity_start_timestamp != 0)
+ values[9] = TimestampTzGetDatum(beentry->st_activity_start_timestamp);
+ else
+ nulls[9] = true;
+
+ if (beentry->st_proc_start_timestamp != 0)
+ values[10] = TimestampTzGetDatum(beentry->st_proc_start_timestamp);
+ else
+ nulls[10] = true;
+
+ if (beentry->st_state_start_timestamp != 0)
+ values[11] = TimestampTzGetDatum(beentry->st_state_start_timestamp);
+ else
+ nulls[11] = true;
+
+ /* A zeroed client addr means we don't know */
+ memset(&zero_clientaddr, 0, sizeof(zero_clientaddr));
+ if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr,
+ sizeof(zero_clientaddr)) == 0)
+ {
+ nulls[12] = true;
+ nulls[13] = true;
+ nulls[14] = true;
+ }
+ else
+ {
+ if (beentry->st_clientaddr.addr.ss_family == AF_INET
+#ifdef HAVE_IPV6
+ || beentry->st_clientaddr.addr.ss_family == AF_INET6
+#endif
+ )
+ {
+ char remote_host[NI_MAXHOST];
+ char remote_port[NI_MAXSERV];
+ int ret;
+
+ remote_host[0] = '\0';
+ remote_port[0] = '\0';
+ ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr,
+ beentry->st_clientaddr.salen,
+ remote_host, sizeof(remote_host),
+ remote_port, sizeof(remote_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret == 0)
+ {
+ clean_ipv6_addr(beentry->st_clientaddr.addr.ss_family, remote_host);
+ values[12] = DirectFunctionCall1(inet_in,
+ CStringGetDatum(remote_host));
+ if (beentry->st_clienthostname &&
+ beentry->st_clienthostname[0])
+ values[13] = CStringGetTextDatum(beentry->st_clienthostname);
+ else
+ nulls[13] = true;
+ values[14] = Int32GetDatum(atoi(remote_port));
+ }
+ else
+ {
+ nulls[12] = true;
+ nulls[13] = true;
+ nulls[14] = true;
+ }
+ }
+ else if (beentry->st_clientaddr.addr.ss_family == AF_UNIX)
+ {
+ /*
+ * Unix sockets always reports NULL for host and -1 for
+ * port, so it's possible to tell the difference to
+ * connections we have no permissions to view, or with
+ * errors.
+ */
+ nulls[12] = true;
+ nulls[13] = true;
+ values[14] = Int32GetDatum(-1);
+ }
+ else
+ {
+ /* Unknown address type, should never happen */
+ nulls[12] = true;
+ nulls[13] = true;
+ nulls[14] = true;
+ }
+ }
+ /* Add backend type */
+ if (beentry->st_backendType == B_BG_WORKER)
+ {
+ const char *bgw_type;
+
+ bgw_type = GetBackgroundWorkerTypeByPid(beentry->st_procpid);
+ if (bgw_type)
+ values[17] = CStringGetTextDatum(bgw_type);
+ else
+ nulls[17] = true;
+ }
+ else
+ values[17] =
+ CStringGetTextDatum(GetBackendTypeDesc(beentry->st_backendType));
+
+ /* SSL information */
+ if (beentry->st_ssl)
+ {
+ values[18] = BoolGetDatum(true); /* ssl */
+ values[19] = CStringGetTextDatum(beentry->st_sslstatus->ssl_version);
+ values[20] = CStringGetTextDatum(beentry->st_sslstatus->ssl_cipher);
+ values[21] = Int32GetDatum(beentry->st_sslstatus->ssl_bits);
+
+ if (beentry->st_sslstatus->ssl_client_dn[0])
+ values[22] = CStringGetTextDatum(beentry->st_sslstatus->ssl_client_dn);
+ else
+ nulls[22] = true;
+
+ if (beentry->st_sslstatus->ssl_client_serial[0])
+ values[23] = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(beentry->st_sslstatus->ssl_client_serial),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ else
+ nulls[23] = true;
+
+ if (beentry->st_sslstatus->ssl_issuer_dn[0])
+ values[24] = CStringGetTextDatum(beentry->st_sslstatus->ssl_issuer_dn);
+ else
+ nulls[24] = true;
+ }
+ else
+ {
+ values[18] = BoolGetDatum(false); /* ssl */
+ nulls[19] = nulls[20] = nulls[21] = nulls[22] = nulls[23] = nulls[24] = true;
+ }
+
+ /* GSSAPI information */
+ if (beentry->st_gss)
+ {
+ values[25] = BoolGetDatum(beentry->st_gssstatus->gss_auth); /* gss_auth */
+ values[26] = CStringGetTextDatum(beentry->st_gssstatus->gss_princ);
+ values[27] = BoolGetDatum(beentry->st_gssstatus->gss_enc); /* GSS Encryption in use */
+ }
+ else
+ {
+ values[25] = BoolGetDatum(false); /* gss_auth */
+ nulls[26] = true; /* No GSS principal */
+ values[27] = BoolGetDatum(false); /* GSS Encryption not in
+ * use */
+ }
+ if (beentry->st_query_id == 0)
+ nulls[29] = true;
+ else
+ values[29] = UInt64GetDatum(beentry->st_query_id);
+ }
+ else
+ {
+ /* No permissions to view data about this session */
+ values[5] = CStringGetTextDatum("<insufficient privilege>");
+ nulls[4] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ nulls[10] = true;
+ nulls[11] = true;
+ nulls[12] = true;
+ nulls[13] = true;
+ nulls[14] = true;
+ nulls[17] = true;
+ nulls[18] = true;
+ nulls[19] = true;
+ nulls[20] = true;
+ nulls[21] = true;
+ nulls[22] = true;
+ nulls[23] = true;
+ nulls[24] = true;
+ nulls[25] = true;
+ nulls[26] = true;
+ nulls[27] = true;
+ nulls[28] = true;
+ nulls[29] = true;
+ }
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+
+ /* If only a single backend was requested, and we found it, break. */
+ if (pid != -1)
+ break;
+ }
+
+ return (Datum) 0;
+}
+
+
+Datum
+pg_backend_pid(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(MyProcPid);
+}
+
+
+Datum
+pg_stat_get_backend_pid(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_INT32(beentry->st_procpid);
+}
+
+
+Datum
+pg_stat_get_backend_dbid(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(beentry->st_databaseid);
+}
+
+
+Datum
+pg_stat_get_backend_userid(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(beentry->st_userid);
+}
+
+
+Datum
+pg_stat_get_backend_activity(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+ const char *activity;
+ char *clipped_activity;
+ text *ret;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ activity = "<backend information not available>";
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ activity = "<insufficient privilege>";
+ else if (*(beentry->st_activity_raw) == '\0')
+ activity = "<command string not enabled>";
+ else
+ activity = beentry->st_activity_raw;
+
+ clipped_activity = pgstat_clip_activity(activity);
+ ret = cstring_to_text(activity);
+ pfree(clipped_activity);
+
+ PG_RETURN_TEXT_P(ret);
+}
+
+Datum
+pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+ PGPROC *proc;
+ const char *wait_event_type = NULL;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ wait_event_type = "<backend information not available>";
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ wait_event_type = "<insufficient privilege>";
+ else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL)
+ wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
+
+ if (!wait_event_type)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(cstring_to_text(wait_event_type));
+}
+
+Datum
+pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+ PGPROC *proc;
+ const char *wait_event = NULL;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ wait_event = "<backend information not available>";
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ wait_event = "<insufficient privilege>";
+ else if ((proc = BackendPidGetProc(beentry->st_procpid)) != NULL)
+ wait_event = pgstat_get_wait_event(proc->wait_event_info);
+
+ if (!wait_event)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(cstring_to_text(wait_event));
+}
+
+
+Datum
+pg_stat_get_backend_activity_start(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ TimestampTz result;
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ PG_RETURN_NULL();
+
+ result = beentry->st_activity_start_timestamp;
+
+ /*
+ * No time recorded for start of current query -- this is the case if the
+ * user hasn't enabled query-level stats collection.
+ */
+ if (result == 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+
+Datum
+pg_stat_get_backend_xact_start(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ TimestampTz result;
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ PG_RETURN_NULL();
+
+ result = beentry->st_xact_start_timestamp;
+
+ if (result == 0) /* not in a transaction */
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+
+Datum
+pg_stat_get_backend_start(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ TimestampTz result;
+ PgBackendStatus *beentry;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ PG_RETURN_NULL();
+
+ result = beentry->st_proc_start_timestamp;
+
+ if (result == 0) /* probably can't happen? */
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+
+Datum
+pg_stat_get_backend_client_addr(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+ SockAddr zero_clientaddr;
+ char remote_host[NI_MAXHOST];
+ int ret;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ PG_RETURN_NULL();
+
+ /* A zeroed client addr means we don't know */
+ memset(&zero_clientaddr, 0, sizeof(zero_clientaddr));
+ if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr,
+ sizeof(zero_clientaddr)) == 0)
+ PG_RETURN_NULL();
+
+ switch (beentry->st_clientaddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ default:
+ PG_RETURN_NULL();
+ }
+
+ remote_host[0] = '\0';
+ ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr,
+ beentry->st_clientaddr.salen,
+ remote_host, sizeof(remote_host),
+ NULL, 0,
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ clean_ipv6_addr(beentry->st_clientaddr.addr.ss_family, remote_host);
+
+ PG_RETURN_INET_P(DirectFunctionCall1(inet_in,
+ CStringGetDatum(remote_host)));
+}
+
+Datum
+pg_stat_get_backend_client_port(PG_FUNCTION_ARGS)
+{
+ int32 beid = PG_GETARG_INT32(0);
+ PgBackendStatus *beentry;
+ SockAddr zero_clientaddr;
+ char remote_port[NI_MAXSERV];
+ int ret;
+
+ if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
+ PG_RETURN_NULL();
+
+ else if (!HAS_PGSTAT_PERMISSIONS(beentry->st_userid))
+ PG_RETURN_NULL();
+
+ /* A zeroed client addr means we don't know */
+ memset(&zero_clientaddr, 0, sizeof(zero_clientaddr));
+ if (memcmp(&(beentry->st_clientaddr), &zero_clientaddr,
+ sizeof(zero_clientaddr)) == 0)
+ PG_RETURN_NULL();
+
+ switch (beentry->st_clientaddr.addr.ss_family)
+ {
+ case AF_INET:
+#ifdef HAVE_IPV6
+ case AF_INET6:
+#endif
+ break;
+ case AF_UNIX:
+ PG_RETURN_INT32(-1);
+ default:
+ PG_RETURN_NULL();
+ }
+
+ remote_port[0] = '\0';
+ ret = pg_getnameinfo_all(&beentry->st_clientaddr.addr,
+ beentry->st_clientaddr.salen,
+ NULL, 0,
+ remote_port, sizeof(remote_port),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(DirectFunctionCall1(int4in,
+ CStringGetDatum(remote_port)));
+}
+
+
+Datum
+pg_stat_get_db_numbackends(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int32 result;
+ int tot_backends = pgstat_fetch_stat_numbackends();
+ int beid;
+
+ result = 0;
+ for (beid = 1; beid <= tot_backends; beid++)
+ {
+ PgBackendStatus *beentry = pgstat_fetch_stat_beentry(beid);
+
+ if (beentry && beentry->st_databaseid == dbid)
+ result++;
+ }
+
+ PG_RETURN_INT32(result);
+}
+
+
+Datum
+pg_stat_get_db_xact_commit(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_xact_commit);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_xact_rollback(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_xact_rollback);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_blocks_fetched(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_blocks_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_blocks_hit(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_blocks_hit);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_tuples_returned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_tuples_returned);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_tuples_fetched(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_tuples_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_tuples_inserted(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_tuples_inserted);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_tuples_updated(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_tuples_updated);
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_tuples_deleted);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_stat_reset_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = dbentry->stat_reset_timestamp;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_db_temp_files(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = dbentry->n_temp_files;
+
+ PG_RETURN_INT64(result);
+}
+
+
+Datum
+pg_stat_get_db_temp_bytes(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = dbentry->n_temp_bytes;
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_tablespace);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_lock);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_snapshot);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_bufferpin);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_startup_deadlock);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_conflict_tablespace +
+ dbentry->n_conflict_lock +
+ dbentry->n_conflict_snapshot +
+ dbentry->n_conflict_bufferpin +
+ dbentry->n_conflict_startup_deadlock);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_deadlocks(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_deadlocks);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_checksum_failures(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_StatDBEntry *dbentry;
+
+ if (!DataChecksumsEnabled())
+ PG_RETURN_NULL();
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (dbentry->n_checksum_failures);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_checksum_last_failure(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ TimestampTz result;
+ PgStat_StatDBEntry *dbentry;
+
+ if (!DataChecksumsEnabled())
+ PG_RETURN_NULL();
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = dbentry->last_checksum_failure;
+
+ if (result == 0)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+Datum
+pg_stat_get_db_blk_read_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_block_read_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_blk_write_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ result = 0;
+ else
+ result = ((double) dbentry->n_block_write_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_session_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->total_session_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_active_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->total_active_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_idle_in_transaction_time(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ double result = 0.0;
+ PgStat_StatDBEntry *dbentry;
+
+ /* convert counter from microsec to millisec for display */
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = ((double) dbentry->total_idle_in_xact_time) / 1000.0;
+
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+pg_stat_get_db_sessions(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_abandoned(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_abandoned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_fatal(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_fatal);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_db_sessions_killed(PG_FUNCTION_ARGS)
+{
+ Oid dbid = PG_GETARG_OID(0);
+ int64 result = 0;
+ PgStat_StatDBEntry *dbentry;
+
+ if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) != NULL)
+ result = (int64) (dbentry->n_sessions_killed);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->timed_checkpoints);
+}
+
+Datum
+pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->requested_checkpoints);
+}
+
+Datum
+pg_stat_get_bgwriter_buf_written_checkpoints(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_written_checkpoints);
+}
+
+Datum
+pg_stat_get_bgwriter_buf_written_clean(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_written_clean);
+}
+
+Datum
+pg_stat_get_bgwriter_maxwritten_clean(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->maxwritten_clean);
+}
+
+Datum
+pg_stat_get_checkpoint_write_time(PG_FUNCTION_ARGS)
+{
+ /* time is already in msec, just convert to double for presentation */
+ PG_RETURN_FLOAT8((double)
+ pgstat_fetch_stat_checkpointer()->checkpoint_write_time);
+}
+
+Datum
+pg_stat_get_checkpoint_sync_time(PG_FUNCTION_ARGS)
+{
+ /* time is already in msec, just convert to double for presentation */
+ PG_RETURN_FLOAT8((double)
+ pgstat_fetch_stat_checkpointer()->checkpoint_sync_time);
+}
+
+Datum
+pg_stat_get_bgwriter_stat_reset_time(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(pgstat_fetch_stat_bgwriter()->stat_reset_timestamp);
+}
+
+Datum
+pg_stat_get_buf_written_backend(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_written_backend);
+}
+
+Datum
+pg_stat_get_buf_fsync_backend(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_checkpointer()->buf_fsync_backend);
+}
+
+Datum
+pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc);
+}
+
+/*
+ * Returns statistics of WAL activity
+ */
+Datum
+pg_stat_get_wal(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_WAL_COLS 9
+ TupleDesc tupdesc;
+ Datum values[PG_STAT_GET_WAL_COLS];
+ bool nulls[PG_STAT_GET_WAL_COLS];
+ char buf[256];
+ PgStat_WalStats *wal_stats;
+
+ /* Initialise values and NULL flags arrays */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Initialise attributes information in the tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_WAL_COLS);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "wal_records",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "wal_fpi",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "wal_bytes",
+ NUMERICOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "wal_buffers_full",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "wal_write",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "wal_sync",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 7, "wal_write_time",
+ FLOAT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 8, "wal_sync_time",
+ FLOAT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 9, "stats_reset",
+ TIMESTAMPTZOID, -1, 0);
+
+ BlessTupleDesc(tupdesc);
+
+ /* Get statistics about WAL activity */
+ wal_stats = pgstat_fetch_stat_wal();
+
+ /* Fill values and NULLs */
+ values[0] = Int64GetDatum(wal_stats->wal_records);
+ values[1] = Int64GetDatum(wal_stats->wal_fpi);
+
+ /* Convert to numeric. */
+ snprintf(buf, sizeof buf, UINT64_FORMAT, wal_stats->wal_bytes);
+ values[2] = DirectFunctionCall3(numeric_in,
+ CStringGetDatum(buf),
+ ObjectIdGetDatum(0),
+ Int32GetDatum(-1));
+
+ values[3] = Int64GetDatum(wal_stats->wal_buffers_full);
+ values[4] = Int64GetDatum(wal_stats->wal_write);
+ values[5] = Int64GetDatum(wal_stats->wal_sync);
+
+ /* Convert counters from microsec to millisec for display */
+ values[6] = Float8GetDatum(((double) wal_stats->wal_write_time) / 1000.0);
+ values[7] = Float8GetDatum(((double) wal_stats->wal_sync_time) / 1000.0);
+
+ values[8] = TimestampTzGetDatum(wal_stats->stat_reset_timestamp);
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Returns statistics of SLRU caches.
+ */
+Datum
+pg_stat_get_slru(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_SLRU_COLS 9
+ ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+ int i;
+ PgStat_SLRUStats *stats;
+
+ InitMaterializedSRF(fcinfo, 0);
+
+ /* request SLRU stats from the cumulative stats system */
+ stats = pgstat_fetch_slru();
+
+ for (i = 0;; i++)
+ {
+ /* for each row */
+ Datum values[PG_STAT_GET_SLRU_COLS];
+ bool nulls[PG_STAT_GET_SLRU_COLS];
+ PgStat_SLRUStats stat;
+ const char *name;
+
+ name = pgstat_get_slru_name(i);
+
+ if (!name)
+ break;
+
+ stat = stats[i];
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ values[0] = PointerGetDatum(cstring_to_text(name));
+ values[1] = Int64GetDatum(stat.blocks_zeroed);
+ values[2] = Int64GetDatum(stat.blocks_hit);
+ values[3] = Int64GetDatum(stat.blocks_read);
+ values[4] = Int64GetDatum(stat.blocks_written);
+ values[5] = Int64GetDatum(stat.blocks_exists);
+ values[6] = Int64GetDatum(stat.flush);
+ values[7] = Int64GetDatum(stat.truncate);
+ values[8] = TimestampTzGetDatum(stat.stat_reset_timestamp);
+
+ tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+ }
+
+ return (Datum) 0;
+}
+
+Datum
+pg_stat_get_xact_numscans(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_numscans);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_returned(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_tuples_returned);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_fetched(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_tuples_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_inserted(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+ PgStat_TableXactStatus *trans;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ {
+ result = tabentry->t_counts.t_tuples_inserted;
+ /* live subtransactions' counts aren't in t_tuples_inserted yet */
+ for (trans = tabentry->trans; trans != NULL; trans = trans->upper)
+ result += trans->tuples_inserted;
+ }
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_updated(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+ PgStat_TableXactStatus *trans;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ {
+ result = tabentry->t_counts.t_tuples_updated;
+ /* live subtransactions' counts aren't in t_tuples_updated yet */
+ for (trans = tabentry->trans; trans != NULL; trans = trans->upper)
+ result += trans->tuples_updated;
+ }
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_deleted(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+ PgStat_TableXactStatus *trans;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ {
+ result = tabentry->t_counts.t_tuples_deleted;
+ /* live subtransactions' counts aren't in t_tuples_deleted yet */
+ for (trans = tabentry->trans; trans != NULL; trans = trans->upper)
+ result += trans->tuples_deleted;
+ }
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_tuples_hot_updated(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_tuples_hot_updated);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_blocks_fetched(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_blocks_fetched);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_blocks_hit(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 result;
+ PgStat_TableStatus *tabentry;
+
+ if ((tabentry = find_tabstat_entry(relid)) == NULL)
+ result = 0;
+ else
+ result = (int64) (tabentry->t_counts.t_blocks_hit);
+
+ PG_RETURN_INT64(result);
+}
+
+Datum
+pg_stat_get_xact_function_calls(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_BackendFunctionEntry *funcentry;
+
+ if ((funcentry = find_funcstat_entry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_INT64(funcentry->f_counts.f_numcalls);
+}
+
+Datum
+pg_stat_get_xact_function_total_time(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_BackendFunctionEntry *funcentry;
+
+ if ((funcentry = find_funcstat_entry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_FLOAT8(INSTR_TIME_GET_MILLISEC(funcentry->f_counts.f_total_time));
+}
+
+Datum
+pg_stat_get_xact_function_self_time(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ PgStat_BackendFunctionEntry *funcentry;
+
+ if ((funcentry = find_funcstat_entry(funcid)) == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_FLOAT8(INSTR_TIME_GET_MILLISEC(funcentry->f_counts.f_self_time));
+}
+
+
+/* Get the timestamp of the current statistics snapshot */
+Datum
+pg_stat_get_snapshot_timestamp(PG_FUNCTION_ARGS)
+{
+ bool have_snapshot;
+ TimestampTz ts;
+
+ ts = pgstat_get_stat_snapshot_timestamp(&have_snapshot);
+
+ if (!have_snapshot)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TIMESTAMPTZ(ts);
+}
+
+/* Discard the active statistics snapshot */
+Datum
+pg_stat_clear_snapshot(PG_FUNCTION_ARGS)
+{
+ pgstat_clear_snapshot();
+
+ PG_RETURN_VOID();
+}
+
+
+/* Force statistics to be reported at the next occasion */
+Datum
+pg_stat_force_next_flush(PG_FUNCTION_ARGS)
+{
+ pgstat_force_next_flush();
+
+ PG_RETURN_VOID();
+}
+
+
+/* Reset all counters for the current database */
+Datum
+pg_stat_reset(PG_FUNCTION_ARGS)
+{
+ pgstat_reset_counters();
+
+ PG_RETURN_VOID();
+}
+
+/* Reset some shared cluster-wide counters */
+Datum
+pg_stat_reset_shared(PG_FUNCTION_ARGS)
+{
+ char *target = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+ if (strcmp(target, "archiver") == 0)
+ pgstat_reset_of_kind(PGSTAT_KIND_ARCHIVER);
+ else if (strcmp(target, "bgwriter") == 0)
+ {
+ /*
+ * Historically checkpointer was part of bgwriter, continue to reset
+ * both for now.
+ */
+ pgstat_reset_of_kind(PGSTAT_KIND_BGWRITER);
+ pgstat_reset_of_kind(PGSTAT_KIND_CHECKPOINTER);
+ }
+ else if (strcmp(target, "recovery_prefetch") == 0)
+ XLogPrefetchResetStats();
+ else if (strcmp(target, "wal") == 0)
+ pgstat_reset_of_kind(PGSTAT_KIND_WAL);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized reset target: \"%s\"", target),
+ errhint("Target must be \"archiver\", \"bgwriter\", \"recovery_prefetch\", or \"wal\".")));
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Reset a statistics for a single object, which may be of current
+ * database or shared across all databases in the cluster.
+ */
+Datum
+pg_stat_reset_single_table_counters(PG_FUNCTION_ARGS)
+{
+ Oid taboid = PG_GETARG_OID(0);
+ Oid dboid = (IsSharedRelation(taboid) ? InvalidOid : MyDatabaseId);
+
+ pgstat_reset(PGSTAT_KIND_RELATION, dboid, taboid);
+
+ PG_RETURN_VOID();
+}
+
+Datum
+pg_stat_reset_single_function_counters(PG_FUNCTION_ARGS)
+{
+ Oid funcoid = PG_GETARG_OID(0);
+
+ pgstat_reset(PGSTAT_KIND_FUNCTION, MyDatabaseId, funcoid);
+
+ PG_RETURN_VOID();
+}
+
+/* Reset SLRU counters (a specific one or all of them). */
+Datum
+pg_stat_reset_slru(PG_FUNCTION_ARGS)
+{
+ char *target = NULL;
+
+ if (PG_ARGISNULL(0))
+ pgstat_reset_of_kind(PGSTAT_KIND_SLRU);
+ else
+ {
+ target = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ pgstat_reset_slru(target);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/* Reset replication slots stats (a specific one or all of them). */
+Datum
+pg_stat_reset_replication_slot(PG_FUNCTION_ARGS)
+{
+ char *target = NULL;
+
+ if (PG_ARGISNULL(0))
+ pgstat_reset_of_kind(PGSTAT_KIND_REPLSLOT);
+ else
+ {
+ target = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ pgstat_reset_replslot(target);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/* Reset subscription stats (a specific one or all of them) */
+Datum
+pg_stat_reset_subscription_stats(PG_FUNCTION_ARGS)
+{
+ Oid subid;
+
+ if (PG_ARGISNULL(0))
+ {
+ /* Clear all subscription stats */
+ pgstat_reset_of_kind(PGSTAT_KIND_SUBSCRIPTION);
+ }
+ else
+ {
+ subid = PG_GETARG_OID(0);
+
+ if (!OidIsValid(subid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid subscription OID %u", subid)));
+ pgstat_reset(PGSTAT_KIND_SUBSCRIPTION, InvalidOid, subid);
+ }
+
+ PG_RETURN_VOID();
+}
+
+Datum
+pg_stat_get_archiver(PG_FUNCTION_ARGS)
+{
+ TupleDesc tupdesc;
+ Datum values[7];
+ bool nulls[7];
+ PgStat_ArchiverStats *archiver_stats;
+
+ /* Initialise values and NULL flags arrays */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Initialise attributes information in the tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(7);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "archived_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "last_archived_wal",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "last_archived_time",
+ TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "failed_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "last_failed_wal",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "last_failed_time",
+ TIMESTAMPTZOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stats_reset",
+ TIMESTAMPTZOID, -1, 0);
+
+ BlessTupleDesc(tupdesc);
+
+ /* Get statistics about the archiver process */
+ archiver_stats = pgstat_fetch_stat_archiver();
+
+ /* Fill values and NULLs */
+ values[0] = Int64GetDatum(archiver_stats->archived_count);
+ if (*(archiver_stats->last_archived_wal) == '\0')
+ nulls[1] = true;
+ else
+ values[1] = CStringGetTextDatum(archiver_stats->last_archived_wal);
+
+ if (archiver_stats->last_archived_timestamp == 0)
+ nulls[2] = true;
+ else
+ values[2] = TimestampTzGetDatum(archiver_stats->last_archived_timestamp);
+
+ values[3] = Int64GetDatum(archiver_stats->failed_count);
+ if (*(archiver_stats->last_failed_wal) == '\0')
+ nulls[4] = true;
+ else
+ values[4] = CStringGetTextDatum(archiver_stats->last_failed_wal);
+
+ if (archiver_stats->last_failed_timestamp == 0)
+ nulls[5] = true;
+ else
+ values[5] = TimestampTzGetDatum(archiver_stats->last_failed_timestamp);
+
+ if (archiver_stats->stat_reset_timestamp == 0)
+ nulls[6] = true;
+ else
+ values[6] = TimestampTzGetDatum(archiver_stats->stat_reset_timestamp);
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Get the statistics for the replication slot. If the slot statistics is not
+ * available, return all-zeroes stats.
+ */
+Datum
+pg_stat_get_replication_slot(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_REPLICATION_SLOT_COLS 10
+ text *slotname_text = PG_GETARG_TEXT_P(0);
+ NameData slotname;
+ TupleDesc tupdesc;
+ Datum values[PG_STAT_GET_REPLICATION_SLOT_COLS];
+ bool nulls[PG_STAT_GET_REPLICATION_SLOT_COLS];
+ PgStat_StatReplSlotEntry *slotent;
+ PgStat_StatReplSlotEntry allzero;
+
+ /* Initialise values and NULL flags arrays */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ /* Initialise attributes information in the tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_REPLICATION_SLOT_COLS);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "slot_name",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "spill_txns",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "spill_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "spill_bytes",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 5, "stream_txns",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 6, "stream_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 7, "stream_bytes",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 8, "total_txns",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 9, "total_bytes",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 10, "stats_reset",
+ TIMESTAMPTZOID, -1, 0);
+ BlessTupleDesc(tupdesc);
+
+ namestrcpy(&slotname, text_to_cstring(slotname_text));
+ slotent = pgstat_fetch_replslot(slotname);
+ if (!slotent)
+ {
+ /*
+ * If the slot is not found, initialise its stats. This is possible if
+ * the create slot message is lost.
+ */
+ memset(&allzero, 0, sizeof(PgStat_StatReplSlotEntry));
+ slotent = &allzero;
+ }
+
+ values[0] = CStringGetTextDatum(NameStr(slotname));
+ values[1] = Int64GetDatum(slotent->spill_txns);
+ values[2] = Int64GetDatum(slotent->spill_count);
+ values[3] = Int64GetDatum(slotent->spill_bytes);
+ values[4] = Int64GetDatum(slotent->stream_txns);
+ values[5] = Int64GetDatum(slotent->stream_count);
+ values[6] = Int64GetDatum(slotent->stream_bytes);
+ values[7] = Int64GetDatum(slotent->total_txns);
+ values[8] = Int64GetDatum(slotent->total_bytes);
+
+ if (slotent->stat_reset_timestamp == 0)
+ nulls[9] = true;
+ else
+ values[9] = TimestampTzGetDatum(slotent->stat_reset_timestamp);
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Get the subscription statistics for the given subscription. If the
+ * subscription statistics is not available, return all-zeros stats.
+ */
+Datum
+pg_stat_get_subscription_stats(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_SUBSCRIPTION_STATS_COLS 4
+ Oid subid = PG_GETARG_OID(0);
+ TupleDesc tupdesc;
+ Datum values[PG_STAT_GET_SUBSCRIPTION_STATS_COLS];
+ bool nulls[PG_STAT_GET_SUBSCRIPTION_STATS_COLS];
+ PgStat_StatSubEntry *subentry;
+ PgStat_StatSubEntry allzero;
+
+ /* Get subscription stats */
+ subentry = pgstat_fetch_stat_subscription(subid);
+
+ /* Initialise attributes information in the tuple descriptor */
+ tupdesc = CreateTemplateTupleDesc(PG_STAT_GET_SUBSCRIPTION_STATS_COLS);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "subid",
+ OIDOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "apply_error_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "sync_error_count",
+ INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 4, "stats_reset",
+ TIMESTAMPTZOID, -1, 0);
+ BlessTupleDesc(tupdesc);
+
+ /* Initialise values and NULL flags arrays */
+ MemSet(values, 0, sizeof(values));
+ MemSet(nulls, 0, sizeof(nulls));
+
+ if (!subentry)
+ {
+ /* If the subscription is not found, initialise its stats */
+ memset(&allzero, 0, sizeof(PgStat_StatSubEntry));
+ subentry = &allzero;
+ }
+
+ /* subid */
+ values[0] = ObjectIdGetDatum(subid);
+
+ /* apply_error_count */
+ values[1] = Int64GetDatum(subentry->apply_error_count);
+
+ /* sync_error_count */
+ values[2] = Int64GetDatum(subentry->sync_error_count);
+
+ /* stats_reset */
+ if (subentry->stat_reset_timestamp == 0)
+ nulls[3] = true;
+ else
+ values[3] = TimestampTzGetDatum(subentry->stat_reset_timestamp);
+
+ /* Returns the record as Datum */
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Checks for presence of stats for object with provided kind, database oid,
+ * object oid.
+ *
+ * This is useful for tests, but not really anything else. Therefore not
+ * documented.
+ */
+Datum
+pg_stat_have_stats(PG_FUNCTION_ARGS)
+{
+ char *stats_type = text_to_cstring(PG_GETARG_TEXT_P(0));
+ Oid dboid = PG_GETARG_OID(1);
+ Oid objoid = PG_GETARG_OID(2);
+ PgStat_Kind kind = pgstat_get_kind_from_str(stats_type);
+
+ PG_RETURN_BOOL(pgstat_have_entry(kind, dboid, objoid));
+}
diff --git a/src/backend/utils/adt/pseudotypes.c b/src/backend/utils/adt/pseudotypes.c
new file mode 100644
index 0000000..c820250
--- /dev/null
+++ b/src/backend/utils/adt/pseudotypes.c
@@ -0,0 +1,391 @@
+/*-------------------------------------------------------------------------
+ *
+ * pseudotypes.c
+ * Functions for the system pseudo-types.
+ *
+ * A pseudo-type isn't really a type and never has any operations, but
+ * we do need to supply input and output functions to satisfy the links
+ * in the pseudo-type's entry in pg_type. In most cases the functions
+ * just throw an error if invoked. (XXX the error messages here cover
+ * the most common case, but might be confusing in some contexts. Can
+ * we do better?)
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/pseudotypes.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rangetypes.h"
+#include "utils/multirangetypes.h"
+
+
+/*
+ * These macros generate input and output functions for a pseudo-type that
+ * will reject all input and output attempts. (But for some types, only
+ * the input function need be dummy.)
+ */
+#define PSEUDOTYPE_DUMMY_INPUT_FUNC(typname) \
+Datum \
+typname##_in(PG_FUNCTION_ARGS) \
+{ \
+ ereport(ERROR, \
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
+ errmsg("cannot accept a value of type %s", #typname))); \
+\
+ PG_RETURN_VOID(); /* keep compiler quiet */ \
+} \
+\
+extern int no_such_variable
+
+#define PSEUDOTYPE_DUMMY_IO_FUNCS(typname) \
+PSEUDOTYPE_DUMMY_INPUT_FUNC(typname); \
+\
+Datum \
+typname##_out(PG_FUNCTION_ARGS) \
+{ \
+ ereport(ERROR, \
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
+ errmsg("cannot display a value of type %s", #typname))); \
+\
+ PG_RETURN_VOID(); /* keep compiler quiet */ \
+} \
+\
+extern int no_such_variable
+
+/*
+ * Likewise for binary send/receive functions. We don't bother with these
+ * at all for many pseudotypes, but some have them. (By convention, if
+ * a type has a send function it should have a receive function, even if
+ * that's only dummy.)
+ */
+#define PSEUDOTYPE_DUMMY_RECEIVE_FUNC(typname) \
+Datum \
+typname##_recv(PG_FUNCTION_ARGS) \
+{ \
+ ereport(ERROR, \
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
+ errmsg("cannot accept a value of type %s", #typname))); \
+\
+ PG_RETURN_VOID(); /* keep compiler quiet */ \
+} \
+\
+extern int no_such_variable
+
+#define PSEUDOTYPE_DUMMY_BINARY_IO_FUNCS(typname) \
+PSEUDOTYPE_DUMMY_RECEIVE_FUNC(typname); \
+\
+Datum \
+typname##_send(PG_FUNCTION_ARGS) \
+{ \
+ ereport(ERROR, \
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
+ errmsg("cannot display a value of type %s", #typname))); \
+\
+ PG_RETURN_VOID(); /* keep compiler quiet */ \
+} \
+\
+extern int no_such_variable
+
+
+/*
+ * cstring
+ *
+ * cstring is marked as a pseudo-type because we don't want people using it
+ * in tables. But it's really a perfectly functional type, so provide
+ * a full set of working I/O functions for it. Among other things, this
+ * allows manual invocation of datatype I/O functions, along the lines of
+ * "SELECT foo_in('blah')" or "SELECT foo_out(some-foo-value)".
+ */
+Datum
+cstring_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_CSTRING(pstrdup(str));
+}
+
+Datum
+cstring_out(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_CSTRING(pstrdup(str));
+}
+
+Datum
+cstring_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ PG_RETURN_CSTRING(str);
+}
+
+Datum
+cstring_send(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, str, strlen(str));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * anyarray
+ *
+ * We need to allow output of anyarray so that, e.g., pg_statistic columns
+ * can be printed. Input has to be disallowed, however.
+ *
+ * XXX anyarray_recv could actually be made to work, since the incoming
+ * array data would contain the element type OID. It seems unlikely that
+ * it'd be sufficiently type-safe, though.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anyarray);
+PSEUDOTYPE_DUMMY_RECEIVE_FUNC(anyarray);
+
+Datum
+anyarray_out(PG_FUNCTION_ARGS)
+{
+ return array_out(fcinfo);
+}
+
+Datum
+anyarray_send(PG_FUNCTION_ARGS)
+{
+ return array_send(fcinfo);
+}
+
+/*
+ * anycompatiblearray
+ *
+ * We may as well allow output, since we do for anyarray.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblearray);
+PSEUDOTYPE_DUMMY_RECEIVE_FUNC(anycompatiblearray);
+
+Datum
+anycompatiblearray_out(PG_FUNCTION_ARGS)
+{
+ return array_out(fcinfo);
+}
+
+Datum
+anycompatiblearray_send(PG_FUNCTION_ARGS)
+{
+ return array_send(fcinfo);
+}
+
+/*
+ * anyenum
+ *
+ * We may as well allow output, since enum_out will in fact work.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anyenum);
+
+Datum
+anyenum_out(PG_FUNCTION_ARGS)
+{
+ return enum_out(fcinfo);
+}
+
+/*
+ * anyrange
+ *
+ * We may as well allow output, since range_out will in fact work.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anyrange);
+
+Datum
+anyrange_out(PG_FUNCTION_ARGS)
+{
+ return range_out(fcinfo);
+}
+
+/*
+ * anycompatiblerange
+ *
+ * We may as well allow output, since range_out will in fact work.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblerange);
+
+Datum
+anycompatiblerange_out(PG_FUNCTION_ARGS)
+{
+ return range_out(fcinfo);
+}
+
+/*
+ * anycompatiblemultirange
+ *
+ * We may as well allow output, since multirange_out will in fact work.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(anycompatiblemultirange);
+
+Datum
+anycompatiblemultirange_out(PG_FUNCTION_ARGS)
+{
+ return multirange_out(fcinfo);
+}
+
+/*
+ * anymultirange_in - input routine for pseudo-type ANYMULTIRANGE.
+ */
+Datum
+anymultirange_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of type %s", "anymultirange")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+/*
+ * anymultirange_out - output routine for pseudo-type ANYMULTIRANGE.
+ *
+ * We may as well allow this, since multirange_out will in fact work.
+ */
+Datum
+anymultirange_out(PG_FUNCTION_ARGS)
+{
+ return multirange_out(fcinfo);
+}
+
+/*
+ * void
+ *
+ * We support void_in so that PL functions can return VOID without any
+ * special hack in the PL handler. Whatever value the PL thinks it's
+ * returning will just be ignored. Conversely, void_out and void_send
+ * are needed so that "SELECT function_returning_void(...)" works.
+ */
+Datum
+void_in(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_VOID(); /* you were expecting something different? */
+}
+
+Datum
+void_out(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_CSTRING(pstrdup(""));
+}
+
+Datum
+void_recv(PG_FUNCTION_ARGS)
+{
+ /*
+ * Note that since we consume no bytes, an attempt to send anything but an
+ * empty string will result in an "invalid message format" error.
+ */
+ PG_RETURN_VOID();
+}
+
+Datum
+void_send(PG_FUNCTION_ARGS)
+{
+ StringInfoData buf;
+
+ /* send an empty string */
+ pq_begintypsend(&buf);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * shell
+ *
+ * shell_in and shell_out are entered in pg_type for "shell" types
+ * (those not yet filled in). They should be unreachable, but we
+ * set them up just in case some code path tries to do I/O without
+ * having checked pg_type.typisdefined anywhere along the way.
+ */
+Datum
+shell_in(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot accept a value of a shell type")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+Datum
+shell_out(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot display a value of a shell type")));
+
+ PG_RETURN_VOID(); /* keep compiler quiet */
+}
+
+
+/*
+ * pg_node_tree
+ *
+ * pg_node_tree isn't really a pseudotype --- it's real enough to be a table
+ * column --- but it presently has no operations of its own, and disallows
+ * input too, so its I/O functions seem to fit here as much as anywhere.
+ *
+ * We must disallow input of pg_node_tree values because the SQL functions
+ * that operate on the type are not secure against malformed input.
+ * We do want to allow output, though.
+ */
+PSEUDOTYPE_DUMMY_INPUT_FUNC(pg_node_tree);
+PSEUDOTYPE_DUMMY_RECEIVE_FUNC(pg_node_tree);
+
+Datum
+pg_node_tree_out(PG_FUNCTION_ARGS)
+{
+ return textout(fcinfo);
+}
+
+Datum
+pg_node_tree_send(PG_FUNCTION_ARGS)
+{
+ return textsend(fcinfo);
+}
+
+/*
+ * pg_ddl_command
+ *
+ * Like pg_node_tree, pg_ddl_command isn't really a pseudotype; it's here
+ * for the same reasons as that one.
+ *
+ * We don't have any good way to output this type directly, so punt
+ * for output as well as input.
+ */
+PSEUDOTYPE_DUMMY_IO_FUNCS(pg_ddl_command);
+PSEUDOTYPE_DUMMY_BINARY_IO_FUNCS(pg_ddl_command);
+
+
+/*
+ * Dummy I/O functions for various other pseudotypes.
+ */
+PSEUDOTYPE_DUMMY_IO_FUNCS(any);
+PSEUDOTYPE_DUMMY_IO_FUNCS(trigger);
+PSEUDOTYPE_DUMMY_IO_FUNCS(event_trigger);
+PSEUDOTYPE_DUMMY_IO_FUNCS(language_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(fdw_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(table_am_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(index_am_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(tsm_handler);
+PSEUDOTYPE_DUMMY_IO_FUNCS(internal);
+PSEUDOTYPE_DUMMY_IO_FUNCS(anyelement);
+PSEUDOTYPE_DUMMY_IO_FUNCS(anynonarray);
+PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatible);
+PSEUDOTYPE_DUMMY_IO_FUNCS(anycompatiblenonarray);
diff --git a/src/backend/utils/adt/quote.c b/src/backend/utils/adt/quote.c
new file mode 100644
index 0000000..0a46674
--- /dev/null
+++ b/src/backend/utils/adt/quote.c
@@ -0,0 +1,131 @@
+/*-------------------------------------------------------------------------
+ *
+ * quote.c
+ * Functions for quoting identifiers and literals
+ *
+ * Portions Copyright (c) 2000-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/quote.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "utils/builtins.h"
+
+
+/*
+ * quote_ident -
+ * returns a properly quoted identifier
+ */
+Datum
+quote_ident(PG_FUNCTION_ARGS)
+{
+ text *t = PG_GETARG_TEXT_PP(0);
+ const char *qstr;
+ char *str;
+
+ str = text_to_cstring(t);
+ qstr = quote_identifier(str);
+ PG_RETURN_TEXT_P(cstring_to_text(qstr));
+}
+
+/*
+ * quote_literal_internal -
+ * helper function for quote_literal and quote_literal_cstr
+ *
+ * NOTE: think not to make this function's behavior change with
+ * standard_conforming_strings. We don't know where the result
+ * literal will be used, and so we must generate a result that
+ * will work with either setting. Take a look at what dblink
+ * uses this for before thinking you know better.
+ */
+static size_t
+quote_literal_internal(char *dst, const char *src, size_t len)
+{
+ const char *s;
+ char *savedst = dst;
+
+ for (s = src; s < src + len; s++)
+ {
+ if (*s == '\\')
+ {
+ *dst++ = ESCAPE_STRING_SYNTAX;
+ break;
+ }
+ }
+
+ *dst++ = '\'';
+ while (len-- > 0)
+ {
+ if (SQL_STR_DOUBLE(*src, true))
+ *dst++ = *src;
+ *dst++ = *src++;
+ }
+ *dst++ = '\'';
+
+ return dst - savedst;
+}
+
+/*
+ * quote_literal -
+ * returns a properly quoted literal
+ */
+Datum
+quote_literal(PG_FUNCTION_ARGS)
+{
+ text *t = PG_GETARG_TEXT_PP(0);
+ text *result;
+ char *cp1;
+ char *cp2;
+ int len;
+
+ len = VARSIZE_ANY_EXHDR(t);
+ /* We make a worst-case result area; wasting a little space is OK */
+ result = (text *) palloc(len * 2 + 3 + VARHDRSZ);
+
+ cp1 = VARDATA_ANY(t);
+ cp2 = VARDATA(result);
+
+ SET_VARSIZE(result, VARHDRSZ + quote_literal_internal(cp2, cp1, len));
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * quote_literal_cstr -
+ * returns a properly quoted literal
+ */
+char *
+quote_literal_cstr(const char *rawstr)
+{
+ char *result;
+ int len;
+ int newlen;
+
+ len = strlen(rawstr);
+ /* We make a worst-case result area; wasting a little space is OK */
+ result = palloc(len * 2 + 3 + 1);
+
+ newlen = quote_literal_internal(result, rawstr, len);
+ result[newlen] = '\0';
+
+ return result;
+}
+
+/*
+ * quote_nullable -
+ * Returns a properly quoted literal, with null values returned
+ * as the text string 'NULL'.
+ */
+Datum
+quote_nullable(PG_FUNCTION_ARGS)
+{
+ if (PG_ARGISNULL(0))
+ PG_RETURN_TEXT_P(cstring_to_text("NULL"));
+ else
+ PG_RETURN_DATUM(DirectFunctionCall1(quote_literal,
+ PG_GETARG_DATUM(0)));
+}
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
new file mode 100644
index 0000000..db980c2
--- /dev/null
+++ b/src/backend/utils/adt/rangetypes.c
@@ -0,0 +1,2622 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes.c
+ * I/O functions, operators, and support functions for range types.
+ *
+ * The stored (serialized) format of a range value is:
+ *
+ * 4 bytes: varlena header
+ * 4 bytes: range type's OID
+ * Lower boundary value, if any, aligned according to subtype's typalign
+ * Upper boundary value, if any, aligned according to subtype's typalign
+ * 1 byte for flags
+ *
+ * This representation is chosen to avoid needing any padding before the
+ * lower boundary value, even when it requires double alignment. We can
+ * expect that the varlena header is presented to us on a suitably aligned
+ * boundary (possibly after detoasting), and then the lower boundary is too.
+ * Note that this means we can't work with a packed (short varlena header)
+ * value; we must detoast it first.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/tupmacs.h"
+#include "common/hashfn.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "port/pg_bitutils.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/timestamp.h"
+
+
+/* fn_extra cache entry for one of the range I/O functions */
+typedef struct RangeIOData
+{
+ TypeCacheEntry *typcache; /* range type's typcache entry */
+ FmgrInfo typioproc; /* element type's I/O function */
+ Oid typioparam; /* element type's I/O parameter */
+} RangeIOData;
+
+
+static RangeIOData *get_range_io_data(FunctionCallInfo fcinfo, Oid rngtypid,
+ IOFuncSelector func);
+static char range_parse_flags(const char *flags_str);
+static void range_parse(const char *input_str, char *flags, char **lbound_str,
+ char **ubound_str);
+static const char *range_parse_bound(const char *string, const char *ptr,
+ char **bound_str, bool *infinite);
+static char *range_deparse(char flags, const char *lbound_str,
+ const char *ubound_str);
+static char *range_bound_escape(const char *value);
+static Size datum_compute_size(Size sz, Datum datum, bool typbyval,
+ char typalign, int16 typlen, char typstorage);
+static Pointer datum_write(Pointer ptr, Datum datum, bool typbyval,
+ char typalign, int16 typlen, char typstorage);
+
+
+/*
+ *----------------------------------------------------------
+ * I/O FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+Datum
+range_in(PG_FUNCTION_ARGS)
+{
+ char *input_str = PG_GETARG_CSTRING(0);
+ Oid rngtypoid = PG_GETARG_OID(1);
+ Oid typmod = PG_GETARG_INT32(2);
+ RangeType *range;
+ RangeIOData *cache;
+ char flags;
+ char *lbound_str;
+ char *ubound_str;
+ RangeBound lower;
+ RangeBound upper;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ cache = get_range_io_data(fcinfo, rngtypoid, IOFunc_input);
+
+ /* parse */
+ range_parse(input_str, &flags, &lbound_str, &ubound_str);
+
+ /* call element type's input function */
+ if (RANGE_HAS_LBOUND(flags))
+ lower.val = InputFunctionCall(&cache->typioproc, lbound_str,
+ cache->typioparam, typmod);
+ if (RANGE_HAS_UBOUND(flags))
+ upper.val = InputFunctionCall(&cache->typioproc, ubound_str,
+ cache->typioparam, typmod);
+
+ lower.infinite = (flags & RANGE_LB_INF) != 0;
+ lower.inclusive = (flags & RANGE_LB_INC) != 0;
+ lower.lower = true;
+ upper.infinite = (flags & RANGE_UB_INF) != 0;
+ upper.inclusive = (flags & RANGE_UB_INC) != 0;
+ upper.lower = false;
+
+ /* serialize and canonicalize */
+ range = make_range(cache->typcache, &lower, &upper, flags & RANGE_EMPTY);
+
+ PG_RETURN_RANGE_P(range);
+}
+
+Datum
+range_out(PG_FUNCTION_ARGS)
+{
+ RangeType *range = PG_GETARG_RANGE_P(0);
+ char *output_str;
+ RangeIOData *cache;
+ char flags;
+ char *lbound_str = NULL;
+ char *ubound_str = NULL;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ cache = get_range_io_data(fcinfo, RangeTypeGetOid(range), IOFunc_output);
+
+ /* deserialize */
+ range_deserialize(cache->typcache, range, &lower, &upper, &empty);
+ flags = range_get_flags(range);
+
+ /* call element type's output function */
+ if (RANGE_HAS_LBOUND(flags))
+ lbound_str = OutputFunctionCall(&cache->typioproc, lower.val);
+ if (RANGE_HAS_UBOUND(flags))
+ ubound_str = OutputFunctionCall(&cache->typioproc, upper.val);
+
+ /* construct result string */
+ output_str = range_deparse(flags, lbound_str, ubound_str);
+
+ PG_RETURN_CSTRING(output_str);
+}
+
+/*
+ * Binary representation: The first byte is the flags, then the lower bound
+ * (if present), then the upper bound (if present). Each bound is represented
+ * by a 4-byte length header and the binary representation of that bound (as
+ * returned by a call to the send function for the subtype).
+ */
+
+Datum
+range_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Oid rngtypoid = PG_GETARG_OID(1);
+ int32 typmod = PG_GETARG_INT32(2);
+ RangeType *range;
+ RangeIOData *cache;
+ char flags;
+ RangeBound lower;
+ RangeBound upper;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ cache = get_range_io_data(fcinfo, rngtypoid, IOFunc_receive);
+
+ /* receive the flags... */
+ flags = (unsigned char) pq_getmsgbyte(buf);
+
+ /*
+ * Mask out any unsupported flags, particularly RANGE_xB_NULL which would
+ * confuse following tests. Note that range_serialize will take care of
+ * cleaning up any inconsistencies in the remaining flags.
+ */
+ flags &= (RANGE_EMPTY |
+ RANGE_LB_INC |
+ RANGE_LB_INF |
+ RANGE_UB_INC |
+ RANGE_UB_INF);
+
+ /* receive the bounds ... */
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ uint32 bound_len = pq_getmsgint(buf, 4);
+ const char *bound_data = pq_getmsgbytes(buf, bound_len);
+ StringInfoData bound_buf;
+
+ initStringInfo(&bound_buf);
+ appendBinaryStringInfo(&bound_buf, bound_data, bound_len);
+
+ lower.val = ReceiveFunctionCall(&cache->typioproc,
+ &bound_buf,
+ cache->typioparam,
+ typmod);
+ pfree(bound_buf.data);
+ }
+ else
+ lower.val = (Datum) 0;
+
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ uint32 bound_len = pq_getmsgint(buf, 4);
+ const char *bound_data = pq_getmsgbytes(buf, bound_len);
+ StringInfoData bound_buf;
+
+ initStringInfo(&bound_buf);
+ appendBinaryStringInfo(&bound_buf, bound_data, bound_len);
+
+ upper.val = ReceiveFunctionCall(&cache->typioproc,
+ &bound_buf,
+ cache->typioparam,
+ typmod);
+ pfree(bound_buf.data);
+ }
+ else
+ upper.val = (Datum) 0;
+
+ pq_getmsgend(buf);
+
+ /* finish constructing RangeBound representation */
+ lower.infinite = (flags & RANGE_LB_INF) != 0;
+ lower.inclusive = (flags & RANGE_LB_INC) != 0;
+ lower.lower = true;
+ upper.infinite = (flags & RANGE_UB_INF) != 0;
+ upper.inclusive = (flags & RANGE_UB_INC) != 0;
+ upper.lower = false;
+
+ /* serialize and canonicalize */
+ range = make_range(cache->typcache, &lower, &upper, flags & RANGE_EMPTY);
+
+ PG_RETURN_RANGE_P(range);
+}
+
+Datum
+range_send(PG_FUNCTION_ARGS)
+{
+ RangeType *range = PG_GETARG_RANGE_P(0);
+ StringInfo buf = makeStringInfo();
+ RangeIOData *cache;
+ char flags;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ cache = get_range_io_data(fcinfo, RangeTypeGetOid(range), IOFunc_send);
+
+ /* deserialize */
+ range_deserialize(cache->typcache, range, &lower, &upper, &empty);
+ flags = range_get_flags(range);
+
+ /* construct output */
+ pq_begintypsend(buf);
+
+ pq_sendbyte(buf, flags);
+
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc,
+ lower.val));
+ uint32 bound_len = VARSIZE(bound) - VARHDRSZ;
+ char *bound_data = VARDATA(bound);
+
+ pq_sendint32(buf, bound_len);
+ pq_sendbytes(buf, bound_data, bound_len);
+ }
+
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ Datum bound = PointerGetDatum(SendFunctionCall(&cache->typioproc,
+ upper.val));
+ uint32 bound_len = VARSIZE(bound) - VARHDRSZ;
+ char *bound_data = VARDATA(bound);
+
+ pq_sendint32(buf, bound_len);
+ pq_sendbytes(buf, bound_data, bound_len);
+ }
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(buf));
+}
+
+/*
+ * get_range_io_data: get cached information needed for range type I/O
+ *
+ * The range I/O functions need a bit more cached info than other range
+ * functions, so they store a RangeIOData struct in fn_extra, not just a
+ * pointer to a type cache entry.
+ */
+static RangeIOData *
+get_range_io_data(FunctionCallInfo fcinfo, Oid rngtypid, IOFuncSelector func)
+{
+ RangeIOData *cache = (RangeIOData *) fcinfo->flinfo->fn_extra;
+
+ if (cache == NULL || cache->typcache->type_id != rngtypid)
+ {
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ char typdelim;
+ Oid typiofunc;
+
+ cache = (RangeIOData *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(RangeIOData));
+ cache->typcache = lookup_type_cache(rngtypid, TYPECACHE_RANGE_INFO);
+ if (cache->typcache->rngelemtype == NULL)
+ elog(ERROR, "type %u is not a range type", rngtypid);
+
+ /* get_type_io_data does more than we need, but is convenient */
+ get_type_io_data(cache->typcache->rngelemtype->type_id,
+ func,
+ &typlen,
+ &typbyval,
+ &typalign,
+ &typdelim,
+ &cache->typioparam,
+ &typiofunc);
+
+ if (!OidIsValid(typiofunc))
+ {
+ /* this could only happen for receive or send */
+ if (func == IOFunc_receive)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary input function available for type %s",
+ format_type_be(cache->typcache->rngelemtype->type_id))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("no binary output function available for type %s",
+ format_type_be(cache->typcache->rngelemtype->type_id))));
+ }
+ fmgr_info_cxt(typiofunc, &cache->typioproc,
+ fcinfo->flinfo->fn_mcxt);
+
+ fcinfo->flinfo->fn_extra = (void *) cache;
+ }
+
+ return cache;
+}
+
+
+/*
+ *----------------------------------------------------------
+ * GENERIC FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+/* Construct standard-form range value from two arguments */
+Datum
+range_constructor2(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Oid rngtypid = get_fn_expr_rettype(fcinfo->flinfo);
+ RangeType *range;
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+
+ typcache = range_get_typcache(fcinfo, rngtypid);
+
+ lower.val = PG_ARGISNULL(0) ? (Datum) 0 : arg1;
+ lower.infinite = PG_ARGISNULL(0);
+ lower.inclusive = true;
+ lower.lower = true;
+
+ upper.val = PG_ARGISNULL(1) ? (Datum) 0 : arg2;
+ upper.infinite = PG_ARGISNULL(1);
+ upper.inclusive = false;
+ upper.lower = false;
+
+ range = make_range(typcache, &lower, &upper, false);
+
+ PG_RETURN_RANGE_P(range);
+}
+
+/* Construct general range value from three arguments */
+Datum
+range_constructor3(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Oid rngtypid = get_fn_expr_rettype(fcinfo->flinfo);
+ RangeType *range;
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ char flags;
+
+ typcache = range_get_typcache(fcinfo, rngtypid);
+
+ if (PG_ARGISNULL(2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("range constructor flags argument must not be null")));
+
+ flags = range_parse_flags(text_to_cstring(PG_GETARG_TEXT_PP(2)));
+
+ lower.val = PG_ARGISNULL(0) ? (Datum) 0 : arg1;
+ lower.infinite = PG_ARGISNULL(0);
+ lower.inclusive = (flags & RANGE_LB_INC) != 0;
+ lower.lower = true;
+
+ upper.val = PG_ARGISNULL(1) ? (Datum) 0 : arg2;
+ upper.infinite = PG_ARGISNULL(1);
+ upper.inclusive = (flags & RANGE_UB_INC) != 0;
+ upper.lower = false;
+
+ range = make_range(typcache, &lower, &upper, false);
+
+ PG_RETURN_RANGE_P(range);
+}
+
+
+/* range -> subtype functions */
+
+/* extract lower bound value */
+Datum
+range_lower(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ range_deserialize(typcache, r1, &lower, &upper, &empty);
+
+ /* Return NULL if there's no finite lower bound */
+ if (empty || lower.infinite)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(lower.val);
+}
+
+/* extract upper bound value */
+Datum
+range_upper(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ range_deserialize(typcache, r1, &lower, &upper, &empty);
+
+ /* Return NULL if there's no finite upper bound */
+ if (empty || upper.infinite)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(upper.val);
+}
+
+
+/* range -> bool functions */
+
+/* is range empty? */
+Datum
+range_empty(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ char flags = range_get_flags(r1);
+
+ PG_RETURN_BOOL(flags & RANGE_EMPTY);
+}
+
+/* is lower bound inclusive? */
+Datum
+range_lower_inc(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ char flags = range_get_flags(r1);
+
+ PG_RETURN_BOOL(flags & RANGE_LB_INC);
+}
+
+/* is upper bound inclusive? */
+Datum
+range_upper_inc(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ char flags = range_get_flags(r1);
+
+ PG_RETURN_BOOL(flags & RANGE_UB_INC);
+}
+
+/* is lower bound infinite? */
+Datum
+range_lower_inf(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ char flags = range_get_flags(r1);
+
+ PG_RETURN_BOOL(flags & RANGE_LB_INF);
+}
+
+/* is upper bound infinite? */
+Datum
+range_upper_inf(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ char flags = range_get_flags(r1);
+
+ PG_RETURN_BOOL(flags & RANGE_UB_INF);
+}
+
+
+/* range, element -> bool functions */
+
+/* contains? */
+Datum
+range_contains_elem(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ Datum val = PG_GETARG_DATUM(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val));
+}
+
+/* contained by? */
+Datum
+elem_contained_by_range(PG_FUNCTION_ARGS)
+{
+ Datum val = PG_GETARG_DATUM(0);
+ RangeType *r = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val));
+}
+
+
+/* range, range -> bool functions */
+
+/* equality (internal version) */
+bool
+range_eq_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ if (empty1 && empty2)
+ return true;
+ if (empty1 != empty2)
+ return false;
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) != 0)
+ return false;
+
+ if (range_cmp_bounds(typcache, &upper1, &upper2) != 0)
+ return false;
+
+ return true;
+}
+
+/* equality */
+Datum
+range_eq(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_eq_internal(typcache, r1, r2));
+}
+
+/* inequality (internal version) */
+bool
+range_ne_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ return (!range_eq_internal(typcache, r1, r2));
+}
+
+/* inequality */
+Datum
+range_ne(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_ne_internal(typcache, r1, r2));
+}
+
+/* contains? */
+Datum
+range_contains(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_contains_internal(typcache, r1, r2));
+}
+
+/* contained by? */
+Datum
+range_contained_by(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_contained_by_internal(typcache, r1, r2));
+}
+
+/* strictly left of? (internal version) */
+bool
+range_before_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range is neither before nor after any other range */
+ if (empty1 || empty2)
+ return false;
+
+ return (range_cmp_bounds(typcache, &upper1, &lower2) < 0);
+}
+
+/* strictly left of? */
+Datum
+range_before(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_before_internal(typcache, r1, r2));
+}
+
+/* strictly right of? (internal version) */
+bool
+range_after_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range is neither before nor after any other range */
+ if (empty1 || empty2)
+ return false;
+
+ return (range_cmp_bounds(typcache, &lower1, &upper2) > 0);
+}
+
+/* strictly right of? */
+Datum
+range_after(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_after_internal(typcache, r1, r2));
+}
+
+/*
+ * Check if two bounds A and B are "adjacent", where A is an upper bound and B
+ * is a lower bound. For the bounds to be adjacent, each subtype value must
+ * satisfy strictly one of the bounds: there are no values which satisfy both
+ * bounds (i.e. less than A and greater than B); and there are no values which
+ * satisfy neither bound (i.e. greater than A and less than B).
+ *
+ * For discrete ranges, we rely on the canonicalization function to see if A..B
+ * normalizes to empty. (If there is no canonicalization function, it's
+ * impossible for such a range to normalize to empty, so we needn't bother to
+ * try.)
+ *
+ * If A == B, the ranges are adjacent only if the bounds have different
+ * inclusive flags (i.e., exactly one of the ranges includes the common
+ * boundary point).
+ *
+ * And if A > B then the ranges are not adjacent in this order.
+ */
+bool
+bounds_adjacent(TypeCacheEntry *typcache, RangeBound boundA, RangeBound boundB)
+{
+ int cmp;
+
+ Assert(!boundA.lower && boundB.lower);
+
+ cmp = range_cmp_bound_values(typcache, &boundA, &boundB);
+ if (cmp < 0)
+ {
+ RangeType *r;
+
+ /*
+ * Bounds do not overlap; see if there are points in between.
+ */
+
+ /* in a continuous subtype, there are assumed to be points between */
+ if (!OidIsValid(typcache->rng_canonical_finfo.fn_oid))
+ return false;
+
+ /*
+ * The bounds are of a discrete range type; so make a range A..B and
+ * see if it's empty.
+ */
+
+ /* flip the inclusion flags */
+ boundA.inclusive = !boundA.inclusive;
+ boundB.inclusive = !boundB.inclusive;
+ /* change upper/lower labels to avoid Assert failures */
+ boundA.lower = true;
+ boundB.lower = false;
+ r = make_range(typcache, &boundA, &boundB, false);
+ return RangeIsEmpty(r);
+ }
+ else if (cmp == 0)
+ return boundA.inclusive != boundB.inclusive;
+ else
+ return false; /* bounds overlap */
+}
+
+/* adjacent to (but not overlapping)? (internal version) */
+bool
+range_adjacent_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range is not adjacent to any other range */
+ if (empty1 || empty2)
+ return false;
+
+ /*
+ * Given two ranges A..B and C..D, the ranges are adjacent if and only if
+ * B is adjacent to C, or D is adjacent to A.
+ */
+ return (bounds_adjacent(typcache, upper1, lower2) ||
+ bounds_adjacent(typcache, upper2, lower1));
+}
+
+/* adjacent to (but not overlapping)? */
+Datum
+range_adjacent(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_adjacent_internal(typcache, r1, r2));
+}
+
+/* overlaps? (internal version) */
+bool
+range_overlaps_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range does not overlap any other range */
+ if (empty1 || empty2)
+ return false;
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0 &&
+ range_cmp_bounds(typcache, &lower1, &upper2) <= 0)
+ return true;
+
+ if (range_cmp_bounds(typcache, &lower2, &lower1) >= 0 &&
+ range_cmp_bounds(typcache, &lower2, &upper1) <= 0)
+ return true;
+
+ return false;
+}
+
+/* overlaps? */
+Datum
+range_overlaps(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_overlaps_internal(typcache, r1, r2));
+}
+
+/* does not extend to right of? (internal version) */
+bool
+range_overleft_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range is neither before nor after any other range */
+ if (empty1 || empty2)
+ return false;
+
+ if (range_cmp_bounds(typcache, &upper1, &upper2) <= 0)
+ return true;
+
+ return false;
+}
+
+/* does not extend to right of? */
+Datum
+range_overleft(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_overleft_internal(typcache, r1, r2));
+}
+
+/* does not extend to left of? (internal version) */
+bool
+range_overright_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* An empty range is neither before nor after any other range */
+ if (empty1 || empty2)
+ return false;
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0)
+ return true;
+
+ return false;
+}
+
+/* does not extend to left of? */
+Datum
+range_overright(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_BOOL(range_overright_internal(typcache, r1, r2));
+}
+
+
+/* range, range -> range functions */
+
+/* set difference */
+Datum
+range_minus(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ RangeType *ret;
+ TypeCacheEntry *typcache;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ ret = range_minus_internal(typcache, r1, r2);
+ if (ret)
+ PG_RETURN_RANGE_P(ret);
+ else
+ PG_RETURN_NULL();
+}
+
+RangeType *
+range_minus_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+ int cmp_l1l2,
+ cmp_l1u2,
+ cmp_u1l2,
+ cmp_u1u2;
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* if either is empty, r1 is the correct answer */
+ if (empty1 || empty2)
+ return r1;
+
+ cmp_l1l2 = range_cmp_bounds(typcache, &lower1, &lower2);
+ cmp_l1u2 = range_cmp_bounds(typcache, &lower1, &upper2);
+ cmp_u1l2 = range_cmp_bounds(typcache, &upper1, &lower2);
+ cmp_u1u2 = range_cmp_bounds(typcache, &upper1, &upper2);
+
+ if (cmp_l1l2 < 0 && cmp_u1u2 > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("result of range difference would not be contiguous")));
+
+ if (cmp_l1u2 > 0 || cmp_u1l2 < 0)
+ return r1;
+
+ if (cmp_l1l2 >= 0 && cmp_u1u2 <= 0)
+ return make_empty_range(typcache);
+
+ if (cmp_l1l2 <= 0 && cmp_u1l2 >= 0 && cmp_u1u2 <= 0)
+ {
+ lower2.inclusive = !lower2.inclusive;
+ lower2.lower = false; /* it will become the upper bound */
+ return make_range(typcache, &lower1, &lower2, false);
+ }
+
+ if (cmp_l1l2 >= 0 && cmp_u1u2 >= 0 && cmp_l1u2 <= 0)
+ {
+ upper2.inclusive = !upper2.inclusive;
+ upper2.lower = true; /* it will become the lower bound */
+ return make_range(typcache, &upper2, &upper1, false);
+ }
+
+ elog(ERROR, "unexpected case in range_minus");
+ return NULL;
+}
+
+/*
+ * Set union. If strict is true, it is an error that the two input ranges
+ * are not adjacent or overlapping.
+ */
+RangeType *
+range_union_internal(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2,
+ bool strict)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+ RangeBound *result_lower;
+ RangeBound *result_upper;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* if either is empty, the other is the correct answer */
+ if (empty1)
+ return r2;
+ if (empty2)
+ return r1;
+
+ if (strict &&
+ !DatumGetBool(range_overlaps_internal(typcache, r1, r2)) &&
+ !DatumGetBool(range_adjacent_internal(typcache, r1, r2)))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("result of range union would not be contiguous")));
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) < 0)
+ result_lower = &lower1;
+ else
+ result_lower = &lower2;
+
+ if (range_cmp_bounds(typcache, &upper1, &upper2) > 0)
+ result_upper = &upper1;
+ else
+ result_upper = &upper2;
+
+ return make_range(typcache, result_lower, result_upper, false);
+}
+
+Datum
+range_union(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_RANGE_P(range_union_internal(typcache, r1, r2, true));
+}
+
+/*
+ * range merge: like set union, except also allow and account for non-adjacent
+ * input ranges.
+ */
+Datum
+range_merge(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_RANGE_P(range_union_internal(typcache, r1, r2, false));
+}
+
+/* set intersection */
+Datum
+range_intersect(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ PG_RETURN_RANGE_P(range_intersect_internal(typcache, r1, r2));
+}
+
+RangeType *
+range_intersect_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+ RangeBound *result_lower;
+ RangeBound *result_upper;
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ if (empty1 || empty2 || !range_overlaps_internal(typcache, r1, r2))
+ return make_empty_range(typcache);
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) >= 0)
+ result_lower = &lower1;
+ else
+ result_lower = &lower2;
+
+ if (range_cmp_bounds(typcache, &upper1, &upper2) <= 0)
+ result_upper = &upper1;
+ else
+ result_upper = &upper2;
+
+ return make_range(typcache, result_lower, result_upper, false);
+}
+
+/* range, range -> range, range functions */
+
+/*
+ * range_split_internal - if r2 intersects the middle of r1, leaving non-empty
+ * ranges on both sides, then return true and set output1 and output2 to the
+ * results of r1 - r2 (in order). Otherwise return false and don't set output1
+ * or output2. Neither input range should be empty.
+ */
+bool
+range_split_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2,
+ RangeType **output1, RangeType **output2)
+{
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) < 0 &&
+ range_cmp_bounds(typcache, &upper1, &upper2) > 0)
+ {
+ /*
+ * Need to invert inclusive/exclusive for the lower2 and upper2
+ * points. They can't be infinite though. We're allowed to overwrite
+ * these RangeBounds since they only exist locally.
+ */
+ lower2.inclusive = !lower2.inclusive;
+ lower2.lower = false;
+ upper2.inclusive = !upper2.inclusive;
+ upper2.lower = true;
+
+ *output1 = make_range(typcache, &lower1, &lower2, false);
+ *output2 = make_range(typcache, &upper2, &upper1, false);
+ return true;
+ }
+
+ return false;
+}
+
+/* range -> range aggregate functions */
+
+Datum
+range_intersect_agg_transfn(PG_FUNCTION_ARGS)
+{
+ MemoryContext aggContext;
+ Oid rngtypoid;
+ TypeCacheEntry *typcache;
+ RangeType *result;
+ RangeType *current;
+
+ if (!AggCheckCallContext(fcinfo, &aggContext))
+ elog(ERROR, "range_intersect_agg_transfn called in non-aggregate context");
+
+ rngtypoid = get_fn_expr_argtype(fcinfo->flinfo, 1);
+ if (!type_is_range(rngtypoid))
+ elog(ERROR, "range_intersect_agg must be called with a range");
+
+ typcache = range_get_typcache(fcinfo, rngtypoid);
+
+ /* strictness ensures these are non-null */
+ result = PG_GETARG_RANGE_P(0);
+ current = PG_GETARG_RANGE_P(1);
+
+ result = range_intersect_internal(typcache, result, current);
+ PG_RETURN_RANGE_P(result);
+}
+
+
+/* Btree support */
+
+/* btree comparator */
+Datum
+range_cmp(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ TypeCacheEntry *typcache;
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+ int cmp;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* For b-tree use, empty ranges sort before all else */
+ if (empty1 && empty2)
+ cmp = 0;
+ else if (empty1)
+ cmp = -1;
+ else if (empty2)
+ cmp = 1;
+ else
+ {
+ cmp = range_cmp_bounds(typcache, &lower1, &lower2);
+ if (cmp == 0)
+ cmp = range_cmp_bounds(typcache, &upper1, &upper2);
+ }
+
+ PG_FREE_IF_COPY(r1, 0);
+ PG_FREE_IF_COPY(r2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+/* inequality operators using the range_cmp function */
+Datum
+range_lt(PG_FUNCTION_ARGS)
+{
+ int cmp = range_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+range_le(PG_FUNCTION_ARGS)
+{
+ int cmp = range_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+range_ge(PG_FUNCTION_ARGS)
+{
+ int cmp = range_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+range_gt(PG_FUNCTION_ARGS)
+{
+ int cmp = range_cmp(fcinfo);
+
+ PG_RETURN_BOOL(cmp > 0);
+}
+
+/* Hash support */
+
+/* hash a range value */
+Datum
+hash_range(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ uint32 result;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *scache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ char flags;
+ uint32 lower_hash;
+ uint32 upper_hash;
+
+ check_stack_depth(); /* recurses when subtype is a range type */
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ /* deserialize */
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+ flags = range_get_flags(r);
+
+ /*
+ * Look up the element type's hash function, if not done already.
+ */
+ scache = typcache->rngelemtype;
+ if (!OidIsValid(scache->hash_proc_finfo.fn_oid))
+ {
+ scache = lookup_type_cache(scache->type_id, TYPECACHE_HASH_PROC_FINFO);
+ if (!OidIsValid(scache->hash_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(scache->type_id))));
+ }
+
+ /*
+ * Apply the hash function to each bound.
+ */
+ if (RANGE_HAS_LBOUND(flags))
+ lower_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo,
+ typcache->rng_collation,
+ lower.val));
+ else
+ lower_hash = 0;
+
+ if (RANGE_HAS_UBOUND(flags))
+ upper_hash = DatumGetUInt32(FunctionCall1Coll(&scache->hash_proc_finfo,
+ typcache->rng_collation,
+ upper.val));
+ else
+ upper_hash = 0;
+
+ /* Merge hashes of flags and bounds */
+ result = hash_uint32((uint32) flags);
+ result ^= lower_hash;
+ result = pg_rotate_left32(result, 1);
+ result ^= upper_hash;
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * Returns 64-bit value by hashing a value to a 64-bit value, with a seed.
+ * Otherwise, similar to hash_range.
+ */
+Datum
+hash_range_extended(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ Datum seed = PG_GETARG_DATUM(1);
+ uint64 result;
+ TypeCacheEntry *typcache;
+ TypeCacheEntry *scache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ char flags;
+ uint64 lower_hash;
+ uint64 upper_hash;
+
+ check_stack_depth();
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+ flags = range_get_flags(r);
+
+ scache = typcache->rngelemtype;
+ if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
+ {
+ scache = lookup_type_cache(scache->type_id,
+ TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+ if (!OidIsValid(scache->hash_extended_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(scache->type_id))));
+ }
+
+ if (RANGE_HAS_LBOUND(flags))
+ lower_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
+ typcache->rng_collation,
+ lower.val,
+ seed));
+ else
+ lower_hash = 0;
+
+ if (RANGE_HAS_UBOUND(flags))
+ upper_hash = DatumGetUInt64(FunctionCall2Coll(&scache->hash_extended_proc_finfo,
+ typcache->rng_collation,
+ upper.val,
+ seed));
+ else
+ upper_hash = 0;
+
+ /* Merge hashes of flags and bounds */
+ result = DatumGetUInt64(hash_uint32_extended((uint32) flags,
+ DatumGetInt64(seed)));
+ result ^= lower_hash;
+ result = ROTATE_HIGH_AND_LOW_32BITS(result);
+ result ^= upper_hash;
+
+ PG_RETURN_UINT64(result);
+}
+
+/*
+ *----------------------------------------------------------
+ * CANONICAL FUNCTIONS
+ *
+ * Functions for specific built-in range types.
+ *----------------------------------------------------------
+ */
+
+Datum
+int4range_canonical(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ PG_RETURN_RANGE_P(r);
+
+ if (!lower.infinite && !lower.inclusive)
+ {
+ lower.val = DirectFunctionCall2(int4pl, lower.val, Int32GetDatum(1));
+ lower.inclusive = true;
+ }
+
+ if (!upper.infinite && upper.inclusive)
+ {
+ upper.val = DirectFunctionCall2(int4pl, upper.val, Int32GetDatum(1));
+ upper.inclusive = false;
+ }
+
+ PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, false));
+}
+
+Datum
+int8range_canonical(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ PG_RETURN_RANGE_P(r);
+
+ if (!lower.infinite && !lower.inclusive)
+ {
+ lower.val = DirectFunctionCall2(int8pl, lower.val, Int64GetDatum(1));
+ lower.inclusive = true;
+ }
+
+ if (!upper.infinite && upper.inclusive)
+ {
+ upper.val = DirectFunctionCall2(int8pl, upper.val, Int64GetDatum(1));
+ upper.inclusive = false;
+ }
+
+ PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, false));
+}
+
+Datum
+daterange_canonical(PG_FUNCTION_ARGS)
+{
+ RangeType *r = PG_GETARG_RANGE_P(0);
+ TypeCacheEntry *typcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r));
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ PG_RETURN_RANGE_P(r);
+
+ if (!lower.infinite && !DATE_NOT_FINITE(DatumGetDateADT(lower.val)) &&
+ !lower.inclusive)
+ {
+ lower.val = DirectFunctionCall2(date_pli, lower.val, Int32GetDatum(1));
+ lower.inclusive = true;
+ }
+
+ if (!upper.infinite && !DATE_NOT_FINITE(DatumGetDateADT(upper.val)) &&
+ upper.inclusive)
+ {
+ upper.val = DirectFunctionCall2(date_pli, upper.val, Int32GetDatum(1));
+ upper.inclusive = false;
+ }
+
+ PG_RETURN_RANGE_P(range_serialize(typcache, &lower, &upper, false));
+}
+
+/*
+ *----------------------------------------------------------
+ * SUBTYPE_DIFF FUNCTIONS
+ *
+ * Functions for specific built-in range types.
+ *
+ * Note that subtype_diff does return the difference, not the absolute value
+ * of the difference, and it must take care to avoid overflow.
+ * (numrange_subdiff is at some risk there ...)
+ *----------------------------------------------------------
+ */
+
+Datum
+int4range_subdiff(PG_FUNCTION_ARGS)
+{
+ int32 v1 = PG_GETARG_INT32(0);
+ int32 v2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_FLOAT8((float8) v1 - (float8) v2);
+}
+
+Datum
+int8range_subdiff(PG_FUNCTION_ARGS)
+{
+ int64 v1 = PG_GETARG_INT64(0);
+ int64 v2 = PG_GETARG_INT64(1);
+
+ PG_RETURN_FLOAT8((float8) v1 - (float8) v2);
+}
+
+Datum
+numrange_subdiff(PG_FUNCTION_ARGS)
+{
+ Datum v1 = PG_GETARG_DATUM(0);
+ Datum v2 = PG_GETARG_DATUM(1);
+ Datum numresult;
+ float8 floatresult;
+
+ numresult = DirectFunctionCall2(numeric_sub, v1, v2);
+
+ floatresult = DatumGetFloat8(DirectFunctionCall1(numeric_float8,
+ numresult));
+
+ PG_RETURN_FLOAT8(floatresult);
+}
+
+Datum
+daterange_subdiff(PG_FUNCTION_ARGS)
+{
+ int32 v1 = PG_GETARG_INT32(0);
+ int32 v2 = PG_GETARG_INT32(1);
+
+ PG_RETURN_FLOAT8((float8) v1 - (float8) v2);
+}
+
+Datum
+tsrange_subdiff(PG_FUNCTION_ARGS)
+{
+ Timestamp v1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp v2 = PG_GETARG_TIMESTAMP(1);
+ float8 result;
+
+ result = ((float8) v1 - (float8) v2) / USECS_PER_SEC;
+ PG_RETURN_FLOAT8(result);
+}
+
+Datum
+tstzrange_subdiff(PG_FUNCTION_ARGS)
+{
+ Timestamp v1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp v2 = PG_GETARG_TIMESTAMP(1);
+ float8 result;
+
+ result = ((float8) v1 - (float8) v2) / USECS_PER_SEC;
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ *----------------------------------------------------------
+ * SUPPORT FUNCTIONS
+ *
+ * These functions aren't in pg_proc, but are useful for
+ * defining new generic range functions in C.
+ *----------------------------------------------------------
+ */
+
+/*
+ * range_get_typcache: get cached information about a range type
+ *
+ * This is for use by range-related functions that follow the convention
+ * of using the fn_extra field as a pointer to the type cache entry for
+ * the range type. Functions that need to cache more information than
+ * that must fend for themselves.
+ */
+TypeCacheEntry *
+range_get_typcache(FunctionCallInfo fcinfo, Oid rngtypid)
+{
+ TypeCacheEntry *typcache = (TypeCacheEntry *) fcinfo->flinfo->fn_extra;
+
+ if (typcache == NULL ||
+ typcache->type_id != rngtypid)
+ {
+ typcache = lookup_type_cache(rngtypid, TYPECACHE_RANGE_INFO);
+ if (typcache->rngelemtype == NULL)
+ elog(ERROR, "type %u is not a range type", rngtypid);
+ fcinfo->flinfo->fn_extra = (void *) typcache;
+ }
+
+ return typcache;
+}
+
+/*
+ * range_serialize: construct a range value from bounds and empty-flag
+ *
+ * This does not force canonicalization of the range value. In most cases,
+ * external callers should only be canonicalization functions. Note that
+ * we perform some datatype-independent canonicalization checks anyway.
+ */
+RangeType *
+range_serialize(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper,
+ bool empty)
+{
+ RangeType *range;
+ int cmp;
+ Size msize;
+ Pointer ptr;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ char typstorage;
+ char flags = 0;
+
+ /*
+ * Verify range is not invalid on its face, and construct flags value,
+ * preventing any non-canonical combinations such as infinite+inclusive.
+ */
+ Assert(lower->lower);
+ Assert(!upper->lower);
+
+ if (empty)
+ flags |= RANGE_EMPTY;
+ else
+ {
+ cmp = range_cmp_bound_values(typcache, lower, upper);
+
+ /* error check: if lower bound value is above upper, it's wrong */
+ if (cmp > 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("range lower bound must be less than or equal to range upper bound")));
+
+ /* if bounds are equal, and not both inclusive, range is empty */
+ if (cmp == 0 && !(lower->inclusive && upper->inclusive))
+ flags |= RANGE_EMPTY;
+ else
+ {
+ /* infinite boundaries are never inclusive */
+ if (lower->infinite)
+ flags |= RANGE_LB_INF;
+ else if (lower->inclusive)
+ flags |= RANGE_LB_INC;
+ if (upper->infinite)
+ flags |= RANGE_UB_INF;
+ else if (upper->inclusive)
+ flags |= RANGE_UB_INC;
+ }
+ }
+
+ /* Fetch information about range's element type */
+ typlen = typcache->rngelemtype->typlen;
+ typbyval = typcache->rngelemtype->typbyval;
+ typalign = typcache->rngelemtype->typalign;
+ typstorage = typcache->rngelemtype->typstorage;
+
+ /* Count space for varlena header and range type's OID */
+ msize = sizeof(RangeType);
+ Assert(msize == MAXALIGN(msize));
+
+ /* Count space for bounds */
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ /*
+ * Make sure item to be inserted is not toasted. It is essential that
+ * we not insert an out-of-line toast value pointer into a range
+ * object, for the same reasons that arrays and records can't contain
+ * them. It would work to store a compressed-in-line value, but we
+ * prefer to decompress and then let compression be applied to the
+ * whole range object if necessary. But, unlike arrays, we do allow
+ * short-header varlena objects to stay as-is.
+ */
+ if (typlen == -1)
+ lower->val = PointerGetDatum(PG_DETOAST_DATUM_PACKED(lower->val));
+
+ msize = datum_compute_size(msize, lower->val, typbyval, typalign,
+ typlen, typstorage);
+ }
+
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ /* Make sure item to be inserted is not toasted */
+ if (typlen == -1)
+ upper->val = PointerGetDatum(PG_DETOAST_DATUM_PACKED(upper->val));
+
+ msize = datum_compute_size(msize, upper->val, typbyval, typalign,
+ typlen, typstorage);
+ }
+
+ /* Add space for flag byte */
+ msize += sizeof(char);
+
+ /* Note: zero-fill is required here, just as in heap tuples */
+ range = (RangeType *) palloc0(msize);
+ SET_VARSIZE(range, msize);
+
+ /* Now fill in the datum */
+ range->rangetypid = typcache->type_id;
+
+ ptr = (char *) (range + 1);
+
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ Assert(lower->lower);
+ ptr = datum_write(ptr, lower->val, typbyval, typalign, typlen,
+ typstorage);
+ }
+
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ Assert(!upper->lower);
+ ptr = datum_write(ptr, upper->val, typbyval, typalign, typlen,
+ typstorage);
+ }
+
+ *((char *) ptr) = flags;
+
+ return range;
+}
+
+/*
+ * range_deserialize: deconstruct a range value
+ *
+ * NB: the given range object must be fully detoasted; it cannot have a
+ * short varlena header.
+ *
+ * Note that if the element type is pass-by-reference, the datums in the
+ * RangeBound structs will be pointers into the given range object.
+ */
+void
+range_deserialize(TypeCacheEntry *typcache, const RangeType *range,
+ RangeBound *lower, RangeBound *upper, bool *empty)
+{
+ char flags;
+ int16 typlen;
+ bool typbyval;
+ char typalign;
+ Pointer ptr;
+ Datum lbound;
+ Datum ubound;
+
+ /* assert caller passed the right typcache entry */
+ Assert(RangeTypeGetOid(range) == typcache->type_id);
+
+ /* fetch the flag byte from datum's last byte */
+ flags = *((const char *) range + VARSIZE(range) - 1);
+
+ /* fetch information about range's element type */
+ typlen = typcache->rngelemtype->typlen;
+ typbyval = typcache->rngelemtype->typbyval;
+ typalign = typcache->rngelemtype->typalign;
+
+ /* initialize data pointer just after the range OID */
+ ptr = (Pointer) (range + 1);
+
+ /* fetch lower bound, if any */
+ if (RANGE_HAS_LBOUND(flags))
+ {
+ /* att_align_pointer cannot be necessary here */
+ lbound = fetch_att(ptr, typbyval, typlen);
+ ptr = (Pointer) att_addlength_pointer(ptr, typlen, ptr);
+ }
+ else
+ lbound = (Datum) 0;
+
+ /* fetch upper bound, if any */
+ if (RANGE_HAS_UBOUND(flags))
+ {
+ ptr = (Pointer) att_align_pointer(ptr, typalign, typlen, ptr);
+ ubound = fetch_att(ptr, typbyval, typlen);
+ /* no need for att_addlength_pointer */
+ }
+ else
+ ubound = (Datum) 0;
+
+ /* emit results */
+
+ *empty = (flags & RANGE_EMPTY) != 0;
+
+ lower->val = lbound;
+ lower->infinite = (flags & RANGE_LB_INF) != 0;
+ lower->inclusive = (flags & RANGE_LB_INC) != 0;
+ lower->lower = true;
+
+ upper->val = ubound;
+ upper->infinite = (flags & RANGE_UB_INF) != 0;
+ upper->inclusive = (flags & RANGE_UB_INC) != 0;
+ upper->lower = false;
+}
+
+/*
+ * range_get_flags: just get the flags from a RangeType value.
+ *
+ * This is frequently useful in places that only need the flags and not
+ * the full results of range_deserialize.
+ */
+char
+range_get_flags(const RangeType *range)
+{
+ /* fetch the flag byte from datum's last byte */
+ return *((char *) range + VARSIZE(range) - 1);
+}
+
+/*
+ * range_set_contain_empty: set the RANGE_CONTAIN_EMPTY bit in the value.
+ *
+ * This is only needed in GiST operations, so we don't include a provision
+ * for setting it in range_serialize; rather, this function must be applied
+ * afterwards.
+ */
+void
+range_set_contain_empty(RangeType *range)
+{
+ char *flagsp;
+
+ /* flag byte is datum's last byte */
+ flagsp = (char *) range + VARSIZE(range) - 1;
+
+ *flagsp |= RANGE_CONTAIN_EMPTY;
+}
+
+/*
+ * This both serializes and canonicalizes (if applicable) the range.
+ * This should be used by most callers.
+ */
+RangeType *
+make_range(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper,
+ bool empty)
+{
+ RangeType *range;
+
+ range = range_serialize(typcache, lower, upper, empty);
+
+ /* no need to call canonical on empty ranges ... */
+ if (OidIsValid(typcache->rng_canonical_finfo.fn_oid) &&
+ !RangeIsEmpty(range))
+ range = DatumGetRangeTypeP(FunctionCall1(&typcache->rng_canonical_finfo,
+ RangeTypePGetDatum(range)));
+
+ return range;
+}
+
+/*
+ * Compare two range boundary points, returning <0, 0, or >0 according to
+ * whether b1 is less than, equal to, or greater than b2.
+ *
+ * The boundaries can be any combination of upper and lower; so it's useful
+ * for a variety of operators.
+ *
+ * The simple case is when b1 and b2 are both finite and inclusive, in which
+ * case the result is just a comparison of the values held in b1 and b2.
+ *
+ * If a bound is exclusive, then we need to know whether it's a lower bound,
+ * in which case we treat the boundary point as "just greater than" the held
+ * value; or an upper bound, in which case we treat the boundary point as
+ * "just less than" the held value.
+ *
+ * If a bound is infinite, it represents minus infinity (less than every other
+ * point) if it's a lower bound; or plus infinity (greater than every other
+ * point) if it's an upper bound.
+ *
+ * There is only one case where two boundaries compare equal but are not
+ * identical: when both bounds are inclusive and hold the same finite value,
+ * but one is an upper bound and the other a lower bound.
+ */
+int
+range_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *b1, const RangeBound *b2)
+{
+ int32 result;
+
+ /*
+ * First, handle cases involving infinity, which don't require invoking
+ * the comparison proc.
+ */
+ if (b1->infinite && b2->infinite)
+ {
+ /*
+ * Both are infinity, so they are equal unless one is lower and the
+ * other not.
+ */
+ if (b1->lower == b2->lower)
+ return 0;
+ else
+ return b1->lower ? -1 : 1;
+ }
+ else if (b1->infinite)
+ return b1->lower ? -1 : 1;
+ else if (b2->infinite)
+ return b2->lower ? 1 : -1;
+
+ /*
+ * Both boundaries are finite, so compare the held values.
+ */
+ result = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ b1->val, b2->val));
+
+ /*
+ * If the comparison is anything other than equal, we're done. If they
+ * compare equal though, we still have to consider whether the boundaries
+ * are inclusive or exclusive.
+ */
+ if (result == 0)
+ {
+ if (!b1->inclusive && !b2->inclusive)
+ {
+ /* both are exclusive */
+ if (b1->lower == b2->lower)
+ return 0;
+ else
+ return b1->lower ? 1 : -1;
+ }
+ else if (!b1->inclusive)
+ return b1->lower ? 1 : -1;
+ else if (!b2->inclusive)
+ return b2->lower ? -1 : 1;
+ else
+ {
+ /*
+ * Both are inclusive and the values held are equal, so they are
+ * equal regardless of whether they are upper or lower boundaries,
+ * or a mix.
+ */
+ return 0;
+ }
+ }
+
+ return result;
+}
+
+/*
+ * Compare two range boundary point values, returning <0, 0, or >0 according
+ * to whether b1 is less than, equal to, or greater than b2.
+ *
+ * This is similar to but simpler than range_cmp_bounds(). We just compare
+ * the values held in b1 and b2, ignoring inclusive/exclusive flags. The
+ * lower/upper flags only matter for infinities, where they tell us if the
+ * infinity is plus or minus.
+ */
+int
+range_cmp_bound_values(TypeCacheEntry *typcache, const RangeBound *b1,
+ const RangeBound *b2)
+{
+ /*
+ * First, handle cases involving infinity, which don't require invoking
+ * the comparison proc.
+ */
+ if (b1->infinite && b2->infinite)
+ {
+ /*
+ * Both are infinity, so they are equal unless one is lower and the
+ * other not.
+ */
+ if (b1->lower == b2->lower)
+ return 0;
+ else
+ return b1->lower ? -1 : 1;
+ }
+ else if (b1->infinite)
+ return b1->lower ? -1 : 1;
+ else if (b2->infinite)
+ return b2->lower ? 1 : -1;
+
+ /*
+ * Both boundaries are finite, so compare the held values.
+ */
+ return DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ b1->val, b2->val));
+}
+
+/*
+ * qsort callback for sorting ranges.
+ *
+ * Two empty ranges compare equal; an empty range sorts to the left of any
+ * non-empty range. Two non-empty ranges are sorted by lower bound first
+ * and by upper bound next.
+ */
+int
+range_compare(const void *key1, const void *key2, void *arg)
+{
+ RangeType *r1 = *(RangeType **) key1;
+ RangeType *r2 = *(RangeType **) key2;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+ RangeBound lower1;
+ RangeBound upper1;
+ RangeBound lower2;
+ RangeBound upper2;
+ bool empty1;
+ bool empty2;
+ int cmp;
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ if (empty1 && empty2)
+ cmp = 0;
+ else if (empty1)
+ cmp = -1;
+ else if (empty2)
+ cmp = 1;
+ else
+ {
+ cmp = range_cmp_bounds(typcache, &lower1, &lower2);
+ if (cmp == 0)
+ cmp = range_cmp_bounds(typcache, &upper1, &upper2);
+ }
+
+ return cmp;
+}
+
+/*
+ * Build an empty range value of the type indicated by the typcache entry.
+ */
+RangeType *
+make_empty_range(TypeCacheEntry *typcache)
+{
+ RangeBound lower;
+ RangeBound upper;
+
+ lower.val = (Datum) 0;
+ lower.infinite = false;
+ lower.inclusive = false;
+ lower.lower = true;
+
+ upper.val = (Datum) 0;
+ upper.infinite = false;
+ upper.inclusive = false;
+ upper.lower = false;
+
+ return make_range(typcache, &lower, &upper, true);
+}
+
+
+/*
+ *----------------------------------------------------------
+ * STATIC FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+/*
+ * Given a string representing the flags for the range type, return the flags
+ * represented as a char.
+ */
+static char
+range_parse_flags(const char *flags_str)
+{
+ char flags = 0;
+
+ if (flags_str[0] == '\0' ||
+ flags_str[1] == '\0' ||
+ flags_str[2] != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid range bound flags"),
+ errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\".")));
+
+ switch (flags_str[0])
+ {
+ case '[':
+ flags |= RANGE_LB_INC;
+ break;
+ case '(':
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid range bound flags"),
+ errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\".")));
+ }
+
+ switch (flags_str[1])
+ {
+ case ']':
+ flags |= RANGE_UB_INC;
+ break;
+ case ')':
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid range bound flags"),
+ errhint("Valid values are \"[]\", \"[)\", \"(]\", and \"()\".")));
+ }
+
+ return flags;
+}
+
+/*
+ * Parse range input.
+ *
+ * Input parameters:
+ * string: input string to be parsed
+ * Output parameters:
+ * *flags: receives flags bitmask
+ * *lbound_str: receives palloc'd lower bound string, or NULL if none
+ * *ubound_str: receives palloc'd upper bound string, or NULL if none
+ *
+ * This is modeled somewhat after record_in in rowtypes.c.
+ * The input syntax is:
+ * <range> := EMPTY
+ * | <lb-inc> <string>, <string> <ub-inc>
+ * <lb-inc> := '[' | '('
+ * <ub-inc> := ']' | ')'
+ *
+ * Whitespace before or after <range> is ignored. Whitespace within a <string>
+ * is taken literally and becomes part of the input string for that bound.
+ *
+ * A <string> of length zero is taken as "infinite" (i.e. no bound), unless it
+ * is surrounded by double-quotes, in which case it is the literal empty
+ * string.
+ *
+ * Within a <string>, special characters (such as comma, parenthesis, or
+ * brackets) can be enclosed in double-quotes or escaped with backslash. Within
+ * double-quotes, a double-quote can be escaped with double-quote or backslash.
+ */
+static void
+range_parse(const char *string, char *flags, char **lbound_str,
+ char **ubound_str)
+{
+ const char *ptr = string;
+ bool infinite;
+
+ *flags = 0;
+
+ /* consume whitespace */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* check for empty range */
+ if (pg_strncasecmp(ptr, RANGE_EMPTY_LITERAL,
+ strlen(RANGE_EMPTY_LITERAL)) == 0)
+ {
+ *flags = RANGE_EMPTY;
+ *lbound_str = NULL;
+ *ubound_str = NULL;
+
+ ptr += strlen(RANGE_EMPTY_LITERAL);
+
+ /* the rest should be whitespace */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ /* should have consumed everything */
+ if (*ptr != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Junk after \"empty\" key word.")));
+
+ return;
+ }
+
+ if (*ptr == '[')
+ {
+ *flags |= RANGE_LB_INC;
+ ptr++;
+ }
+ else if (*ptr == '(')
+ ptr++;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Missing left parenthesis or bracket.")));
+
+ ptr = range_parse_bound(string, ptr, lbound_str, &infinite);
+ if (infinite)
+ *flags |= RANGE_LB_INF;
+
+ if (*ptr == ',')
+ ptr++;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Missing comma after lower bound.")));
+
+ ptr = range_parse_bound(string, ptr, ubound_str, &infinite);
+ if (infinite)
+ *flags |= RANGE_UB_INF;
+
+ if (*ptr == ']')
+ {
+ *flags |= RANGE_UB_INC;
+ ptr++;
+ }
+ else if (*ptr == ')')
+ ptr++;
+ else /* must be a comma */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Too many commas.")));
+
+ /* consume whitespace */
+ while (*ptr != '\0' && isspace((unsigned char) *ptr))
+ ptr++;
+
+ if (*ptr != '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Junk after right parenthesis or bracket.")));
+}
+
+/*
+ * Helper for range_parse: parse and de-quote one bound string.
+ *
+ * We scan until finding comma, right parenthesis, or right bracket.
+ *
+ * Input parameters:
+ * string: entire input string (used only for error reports)
+ * ptr: where to start parsing bound
+ * Output parameters:
+ * *bound_str: receives palloc'd bound string, or NULL if none
+ * *infinite: set true if no bound, else false
+ *
+ * The return value is the scan ptr, advanced past the bound string.
+ */
+static const char *
+range_parse_bound(const char *string, const char *ptr,
+ char **bound_str, bool *infinite)
+{
+ StringInfoData buf;
+
+ /* Check for null: completely empty input means null */
+ if (*ptr == ',' || *ptr == ')' || *ptr == ']')
+ {
+ *bound_str = NULL;
+ *infinite = true;
+ }
+ else
+ {
+ /* Extract string for this bound */
+ bool inquote = false;
+
+ initStringInfo(&buf);
+ while (inquote || !(*ptr == ',' || *ptr == ')' || *ptr == ']'))
+ {
+ char ch = *ptr++;
+
+ if (ch == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Unexpected end of input.")));
+ if (ch == '\\')
+ {
+ if (*ptr == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed range literal: \"%s\"",
+ string),
+ errdetail("Unexpected end of input.")));
+ appendStringInfoChar(&buf, *ptr++);
+ }
+ else if (ch == '"')
+ {
+ if (!inquote)
+ inquote = true;
+ else if (*ptr == '"')
+ {
+ /* doubled quote within quote sequence */
+ appendStringInfoChar(&buf, *ptr++);
+ }
+ else
+ inquote = false;
+ }
+ else
+ appendStringInfoChar(&buf, ch);
+ }
+
+ *bound_str = buf.data;
+ *infinite = false;
+ }
+
+ return ptr;
+}
+
+/*
+ * Convert a deserialized range value to text form
+ *
+ * Inputs are the flags byte, and the two bound values already converted to
+ * text (but not yet quoted). If no bound value, pass NULL.
+ *
+ * Result is a palloc'd string
+ */
+static char *
+range_deparse(char flags, const char *lbound_str, const char *ubound_str)
+{
+ StringInfoData buf;
+
+ if (flags & RANGE_EMPTY)
+ return pstrdup(RANGE_EMPTY_LITERAL);
+
+ initStringInfo(&buf);
+
+ appendStringInfoChar(&buf, (flags & RANGE_LB_INC) ? '[' : '(');
+
+ if (RANGE_HAS_LBOUND(flags))
+ appendStringInfoString(&buf, range_bound_escape(lbound_str));
+
+ appendStringInfoChar(&buf, ',');
+
+ if (RANGE_HAS_UBOUND(flags))
+ appendStringInfoString(&buf, range_bound_escape(ubound_str));
+
+ appendStringInfoChar(&buf, (flags & RANGE_UB_INC) ? ']' : ')');
+
+ return buf.data;
+}
+
+/*
+ * Helper for range_deparse: quote a bound value as needed
+ *
+ * Result is a palloc'd string
+ */
+static char *
+range_bound_escape(const char *value)
+{
+ bool nq;
+ const char *ptr;
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+
+ /* Detect whether we need double quotes for this value */
+ nq = (value[0] == '\0'); /* force quotes for empty string */
+ for (ptr = value; *ptr; ptr++)
+ {
+ char ch = *ptr;
+
+ if (ch == '"' || ch == '\\' ||
+ ch == '(' || ch == ')' ||
+ ch == '[' || ch == ']' ||
+ ch == ',' ||
+ isspace((unsigned char) ch))
+ {
+ nq = true;
+ break;
+ }
+ }
+
+ /* And emit the string */
+ if (nq)
+ appendStringInfoChar(&buf, '"');
+ for (ptr = value; *ptr; ptr++)
+ {
+ char ch = *ptr;
+
+ if (ch == '"' || ch == '\\')
+ appendStringInfoChar(&buf, ch);
+ appendStringInfoChar(&buf, ch);
+ }
+ if (nq)
+ appendStringInfoChar(&buf, '"');
+
+ return buf.data;
+}
+
+/*
+ * Test whether range r1 contains range r2.
+ *
+ * Caller has already checked that they are the same range type, and looked up
+ * the necessary typcache entry.
+ */
+bool
+range_contains_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ RangeBound lower1;
+ RangeBound upper1;
+ bool empty1;
+ RangeBound lower2;
+ RangeBound upper2;
+ bool empty2;
+
+ /* Different types should be prevented by ANYRANGE matching rules */
+ if (RangeTypeGetOid(r1) != RangeTypeGetOid(r2))
+ elog(ERROR, "range types do not match");
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+
+ /* If either range is empty, the answer is easy */
+ if (empty2)
+ return true;
+ else if (empty1)
+ return false;
+
+ /* Else we must have lower1 <= lower2 and upper1 >= upper2 */
+ if (range_cmp_bounds(typcache, &lower1, &lower2) > 0)
+ return false;
+ if (range_cmp_bounds(typcache, &upper1, &upper2) < 0)
+ return false;
+
+ return true;
+}
+
+bool
+range_contained_by_internal(TypeCacheEntry *typcache, const RangeType *r1, const RangeType *r2)
+{
+ return range_contains_internal(typcache, r2, r1);
+}
+
+/*
+ * Test whether range r contains a specific element value.
+ */
+bool
+range_contains_elem_internal(TypeCacheEntry *typcache, const RangeType *r, Datum val)
+{
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ int32 cmp;
+
+ range_deserialize(typcache, r, &lower, &upper, &empty);
+
+ if (empty)
+ return false;
+
+ if (!lower.infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ lower.val, val));
+ if (cmp > 0)
+ return false;
+ if (cmp == 0 && !lower.inclusive)
+ return false;
+ }
+
+ if (!upper.infinite)
+ {
+ cmp = DatumGetInt32(FunctionCall2Coll(&typcache->rng_cmp_proc_finfo,
+ typcache->rng_collation,
+ upper.val, val));
+ if (cmp < 0)
+ return false;
+ if (cmp == 0 && !upper.inclusive)
+ return false;
+ }
+
+ return true;
+}
+
+
+/*
+ * datum_compute_size() and datum_write() are used to insert the bound
+ * values into a range object. They are modeled after heaptuple.c's
+ * heap_compute_data_size() and heap_fill_tuple(), but we need not handle
+ * null values here. TYPE_IS_PACKABLE must test the same conditions as
+ * heaptuple.c's ATT_IS_PACKABLE macro. See the comments thare for more
+ * details.
+ */
+
+/* Does datatype allow packing into the 1-byte-header varlena format? */
+#define TYPE_IS_PACKABLE(typlen, typstorage) \
+ ((typlen) == -1 && (typstorage) != TYPSTORAGE_PLAIN)
+
+/*
+ * Increment data_length by the space needed by the datum, including any
+ * preceding alignment padding.
+ */
+static Size
+datum_compute_size(Size data_length, Datum val, bool typbyval, char typalign,
+ int16 typlen, char typstorage)
+{
+ if (TYPE_IS_PACKABLE(typlen, typstorage) &&
+ VARATT_CAN_MAKE_SHORT(DatumGetPointer(val)))
+ {
+ /*
+ * we're anticipating converting to a short varlena header, so adjust
+ * length and don't count any alignment
+ */
+ data_length += VARATT_CONVERTED_SHORT_SIZE(DatumGetPointer(val));
+ }
+ else
+ {
+ data_length = att_align_datum(data_length, typalign, typlen, val);
+ data_length = att_addlength_datum(data_length, typlen, val);
+ }
+
+ return data_length;
+}
+
+/*
+ * Write the given datum beginning at ptr (after advancing to correct
+ * alignment, if needed). Return the pointer incremented by space used.
+ */
+static Pointer
+datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign,
+ int16 typlen, char typstorage)
+{
+ Size data_length;
+
+ if (typbyval)
+ {
+ /* pass-by-value */
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ store_att_byval(ptr, datum, typlen);
+ data_length = typlen;
+ }
+ else if (typlen == -1)
+ {
+ /* varlena */
+ Pointer val = DatumGetPointer(datum);
+
+ if (VARATT_IS_EXTERNAL(val))
+ {
+ /*
+ * Throw error, because we must never put a toast pointer inside a
+ * range object. Caller should have detoasted it.
+ */
+ elog(ERROR, "cannot store a toast pointer inside a range");
+ data_length = 0; /* keep compiler quiet */
+ }
+ else if (VARATT_IS_SHORT(val))
+ {
+ /* no alignment for short varlenas */
+ data_length = VARSIZE_SHORT(val);
+ memcpy(ptr, val, data_length);
+ }
+ else if (TYPE_IS_PACKABLE(typlen, typstorage) &&
+ VARATT_CAN_MAKE_SHORT(val))
+ {
+ /* convert to short varlena -- no alignment */
+ data_length = VARATT_CONVERTED_SHORT_SIZE(val);
+ SET_VARSIZE_SHORT(ptr, data_length);
+ memcpy(ptr + 1, VARDATA(val), data_length - 1);
+ }
+ else
+ {
+ /* full 4-byte header varlena */
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ data_length = VARSIZE(val);
+ memcpy(ptr, val, data_length);
+ }
+ }
+ else if (typlen == -2)
+ {
+ /* cstring ... never needs alignment */
+ Assert(typalign == TYPALIGN_CHAR);
+ data_length = strlen(DatumGetCString(datum)) + 1;
+ memcpy(ptr, DatumGetPointer(datum), data_length);
+ }
+ else
+ {
+ /* fixed-length pass-by-reference */
+ ptr = (char *) att_align_nominal(ptr, typalign);
+ Assert(typlen > 0);
+ data_length = typlen;
+ memcpy(ptr, DatumGetPointer(datum), data_length);
+ }
+
+ ptr += data_length;
+
+ return ptr;
+}
diff --git a/src/backend/utils/adt/rangetypes_gist.c b/src/backend/utils/adt/rangetypes_gist.c
new file mode 100644
index 0000000..fbf39db
--- /dev/null
+++ b/src/backend/utils/adt/rangetypes_gist.c
@@ -0,0 +1,1798 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_gist.c
+ * GiST support for range types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/stratnum.h"
+#include "utils/datum.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/multirangetypes.h"
+#include "utils/rangetypes.h"
+
+/*
+ * Range class properties used to segregate different classes of ranges in
+ * GiST. Each unique combination of properties is a class. CLS_EMPTY cannot
+ * be combined with anything else.
+ */
+#define CLS_NORMAL 0 /* Ordinary finite range (no bits set) */
+#define CLS_LOWER_INF 1 /* Lower bound is infinity */
+#define CLS_UPPER_INF 2 /* Upper bound is infinity */
+#define CLS_CONTAIN_EMPTY 4 /* Contains underlying empty ranges */
+#define CLS_EMPTY 8 /* Special class for empty ranges */
+
+#define CLS_COUNT 9 /* # of classes; includes all combinations of
+ * properties. CLS_EMPTY doesn't combine with
+ * anything else, so it's only 2^3 + 1. */
+
+/*
+ * Minimum accepted ratio of split for items of the same class. If the items
+ * are of different classes, we will separate along those lines regardless of
+ * the ratio.
+ */
+#define LIMIT_RATIO 0.3
+
+/* Constants for fixed penalty values */
+#define INFINITE_BOUND_PENALTY 2.0
+#define CONTAIN_EMPTY_PENALTY 1.0
+#define DEFAULT_SUBTYPE_DIFF_PENALTY 1.0
+
+/*
+ * Per-item data for range_gist_single_sorting_split.
+ */
+typedef struct
+{
+ int index;
+ RangeBound bound;
+} SingleBoundSortItem;
+
+/* place on left or right side of split? */
+typedef enum
+{
+ SPLIT_LEFT = 0, /* makes initialization to SPLIT_LEFT easier */
+ SPLIT_RIGHT
+} SplitLR;
+
+/*
+ * Context for range_gist_consider_split.
+ */
+typedef struct
+{
+ TypeCacheEntry *typcache; /* typcache for range type */
+ bool has_subtype_diff; /* does it have subtype_diff? */
+ int entries_count; /* total number of entries being split */
+
+ /* Information about currently selected split follows */
+
+ bool first; /* true if no split was selected yet */
+
+ RangeBound *left_upper; /* upper bound of left interval */
+ RangeBound *right_lower; /* lower bound of right interval */
+
+ float4 ratio; /* split ratio */
+ float4 overlap; /* overlap between left and right predicate */
+ int common_left; /* # common entries destined for each side */
+ int common_right;
+} ConsiderSplitContext;
+
+/*
+ * Bounds extracted from a non-empty range, for use in
+ * range_gist_double_sorting_split.
+ */
+typedef struct
+{
+ RangeBound lower;
+ RangeBound upper;
+} NonEmptyRange;
+
+/*
+ * Represents information about an entry that can be placed in either group
+ * without affecting overlap over selected axis ("common entry").
+ */
+typedef struct
+{
+ /* Index of entry in the initial array */
+ int index;
+ /* Delta between closeness of range to each of the two groups */
+ double delta;
+} CommonEntry;
+
+/* Helper macros to place an entry in the left or right group during split */
+/* Note direct access to variables v, typcache, left_range, right_range */
+#define PLACE_LEFT(range, off) \
+ do { \
+ if (v->spl_nleft > 0) \
+ left_range = range_super_union(typcache, left_range, range); \
+ else \
+ left_range = (range); \
+ v->spl_left[v->spl_nleft++] = (off); \
+ } while(0)
+
+#define PLACE_RIGHT(range, off) \
+ do { \
+ if (v->spl_nright > 0) \
+ right_range = range_super_union(typcache, right_range, range); \
+ else \
+ right_range = (range); \
+ v->spl_right[v->spl_nright++] = (off); \
+ } while(0)
+
+/* Copy a RangeType datum (hardwires typbyval and typlen for ranges...) */
+#define rangeCopy(r) \
+ ((RangeType *) DatumGetPointer(datumCopy(PointerGetDatum(r), \
+ false, -1)))
+
+static RangeType *range_super_union(TypeCacheEntry *typcache, RangeType *r1,
+ RangeType *r2);
+static bool range_gist_consistent_int_range(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const RangeType *query);
+static bool range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const MultirangeType *query);
+static bool range_gist_consistent_int_element(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ Datum query);
+static bool range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const RangeType *query);
+static bool range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const MultirangeType *query);
+static bool range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ Datum query);
+static void range_gist_fallback_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v);
+static void range_gist_class_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v,
+ SplitLR *classes_groups);
+static void range_gist_single_sorting_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v,
+ bool use_upper_bound);
+static void range_gist_double_sorting_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v);
+static void range_gist_consider_split(ConsiderSplitContext *context,
+ RangeBound *right_lower, int min_left_count,
+ RangeBound *left_upper, int max_left_count);
+static int get_gist_range_class(RangeType *range);
+static int single_bound_cmp(const void *a, const void *b, void *arg);
+static int interval_cmp_lower(const void *a, const void *b, void *arg);
+static int interval_cmp_upper(const void *a, const void *b, void *arg);
+static int common_entry_cmp(const void *i1, const void *i2);
+static float8 call_subtype_diff(TypeCacheEntry *typcache,
+ Datum val1, Datum val2);
+
+
+/* GiST query consistency check */
+Datum
+range_gist_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ Datum query = PG_GETARG_DATUM(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+ bool result;
+ Oid subtype = PG_GETARG_OID(3);
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ RangeType *key = DatumGetRangeTypeP(entry->key);
+ TypeCacheEntry *typcache;
+
+ /* All operators served by this function are exact */
+ *recheck = false;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
+
+ /*
+ * Perform consistent checking using function corresponding to key type
+ * (leaf or internal) and query subtype (range, multirange, or element).
+ * Note that invalid subtype means that query type matches key type
+ * (range).
+ */
+ if (GIST_LEAF(entry))
+ {
+ if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
+ result = range_gist_consistent_leaf_range(typcache, strategy, key,
+ DatumGetRangeTypeP(query));
+ else if (subtype == ANYMULTIRANGEOID)
+ result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
+ DatumGetMultirangeTypeP(query));
+ else
+ result = range_gist_consistent_leaf_element(typcache, strategy,
+ key, query);
+ }
+ else
+ {
+ if (!OidIsValid(subtype) || subtype == ANYRANGEOID)
+ result = range_gist_consistent_int_range(typcache, strategy, key,
+ DatumGetRangeTypeP(query));
+ else if (subtype == ANYMULTIRANGEOID)
+ result = range_gist_consistent_int_multirange(typcache, strategy, key,
+ DatumGetMultirangeTypeP(query));
+ else
+ result = range_gist_consistent_int_element(typcache, strategy,
+ key, query);
+ }
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * GiST compress method for multiranges: multirange is approximated as union
+ * range with no gaps.
+ */
+Datum
+multirange_gist_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+
+ if (entry->leafkey)
+ {
+ MultirangeType *mr = DatumGetMultirangeTypeP(entry->key);
+ RangeType *r;
+ TypeCacheEntry *typcache;
+ GISTENTRY *retval = palloc(sizeof(GISTENTRY));
+
+ typcache = multirange_get_typcache(fcinfo, MultirangeTypeGetOid(mr));
+ r = multirange_get_union_range(typcache->rngtype, mr);
+
+ gistentryinit(*retval, RangeTypePGetDatum(r),
+ entry->rel, entry->page, entry->offset, false);
+
+ PG_RETURN_POINTER(retval);
+ }
+
+ PG_RETURN_POINTER(entry);
+}
+
+/* GiST query consistency check for multiranges */
+Datum
+multirange_gist_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ Datum query = PG_GETARG_DATUM(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+ bool result;
+ Oid subtype = PG_GETARG_OID(3);
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ RangeType *key = DatumGetRangeTypeP(entry->key);
+ TypeCacheEntry *typcache;
+
+ /*
+ * All operators served by this function are inexact because multirange is
+ * approximated by union range with no gaps.
+ */
+ *recheck = true;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(key));
+
+ /*
+ * Perform consistent checking using function corresponding to key type
+ * (leaf or internal) and query subtype (range, multirange, or element).
+ * Note that invalid subtype means that query type matches key type
+ * (multirange).
+ */
+ if (GIST_LEAF(entry))
+ {
+ if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
+ result = range_gist_consistent_leaf_multirange(typcache, strategy, key,
+ DatumGetMultirangeTypeP(query));
+ else if (subtype == ANYRANGEOID)
+ result = range_gist_consistent_leaf_range(typcache, strategy, key,
+ DatumGetRangeTypeP(query));
+ else
+ result = range_gist_consistent_leaf_element(typcache, strategy,
+ key, query);
+ }
+ else
+ {
+ if (!OidIsValid(subtype) || subtype == ANYMULTIRANGEOID)
+ result = range_gist_consistent_int_multirange(typcache, strategy, key,
+ DatumGetMultirangeTypeP(query));
+ else if (subtype == ANYRANGEOID)
+ result = range_gist_consistent_int_range(typcache, strategy, key,
+ DatumGetRangeTypeP(query));
+ else
+ result = range_gist_consistent_int_element(typcache, strategy,
+ key, query);
+ }
+ PG_RETURN_BOOL(result);
+}
+
+/* form union range */
+Datum
+range_gist_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GISTENTRY *ent = entryvec->vector;
+ RangeType *result_range;
+ TypeCacheEntry *typcache;
+ int i;
+
+ result_range = DatumGetRangeTypeP(ent[0].key);
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(result_range));
+
+ for (i = 1; i < entryvec->n; i++)
+ {
+ result_range = range_super_union(typcache, result_range,
+ DatumGetRangeTypeP(ent[i].key));
+ }
+
+ PG_RETURN_RANGE_P(result_range);
+}
+
+/*
+ * We store ranges as ranges in GiST indexes, so we do not need
+ * compress, decompress, or fetch functions. Note this implies a limit
+ * on the size of range values that can be indexed.
+ */
+
+/*
+ * GiST page split penalty function.
+ *
+ * The penalty function has the following goals (in order from most to least
+ * important):
+ * - Keep normal ranges separate
+ * - Avoid broadening the class of the original predicate
+ * - Avoid broadening (as determined by subtype_diff) the original predicate
+ * - Favor adding ranges to narrower original predicates
+ */
+Datum
+range_gist_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ RangeType *orig = DatumGetRangeTypeP(origentry->key);
+ RangeType *new = DatumGetRangeTypeP(newentry->key);
+ TypeCacheEntry *typcache;
+ bool has_subtype_diff;
+ RangeBound orig_lower,
+ new_lower,
+ orig_upper,
+ new_upper;
+ bool orig_empty,
+ new_empty;
+
+ if (RangeTypeGetOid(orig) != RangeTypeGetOid(new))
+ elog(ERROR, "range types do not match");
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(orig));
+
+ has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+
+ range_deserialize(typcache, orig, &orig_lower, &orig_upper, &orig_empty);
+ range_deserialize(typcache, new, &new_lower, &new_upper, &new_empty);
+
+ /*
+ * Distinct branches for handling distinct classes of ranges. Note that
+ * penalty values only need to be commensurate within the same class of
+ * new range.
+ */
+ if (new_empty)
+ {
+ /* Handle insertion of empty range */
+ if (orig_empty)
+ {
+ /*
+ * The best case is to insert it to empty original range.
+ * Insertion here means no broadening of original range. Also
+ * original range is the most narrow.
+ */
+ *penalty = 0.0;
+ }
+ else if (RangeIsOrContainsEmpty(orig))
+ {
+ /*
+ * The second case is to insert empty range into range which
+ * contains at least one underlying empty range. There is still
+ * no broadening of original range, but original range is not as
+ * narrow as possible.
+ */
+ *penalty = CONTAIN_EMPTY_PENALTY;
+ }
+ else if (orig_lower.infinite && orig_upper.infinite)
+ {
+ /*
+ * Original range requires broadening. (-inf; +inf) is most far
+ * from normal range in this case.
+ */
+ *penalty = 2 * CONTAIN_EMPTY_PENALTY;
+ }
+ else if (orig_lower.infinite || orig_upper.infinite)
+ {
+ /*
+ * (-inf, x) or (x, +inf) original ranges are closer to normal
+ * ranges, so it's worse to mix it with empty ranges.
+ */
+ *penalty = 3 * CONTAIN_EMPTY_PENALTY;
+ }
+ else
+ {
+ /*
+ * The least preferred case is broadening of normal range.
+ */
+ *penalty = 4 * CONTAIN_EMPTY_PENALTY;
+ }
+ }
+ else if (new_lower.infinite && new_upper.infinite)
+ {
+ /* Handle insertion of (-inf, +inf) range */
+ if (orig_lower.infinite && orig_upper.infinite)
+ {
+ /*
+ * Best case is inserting to (-inf, +inf) original range.
+ */
+ *penalty = 0.0;
+ }
+ else if (orig_lower.infinite || orig_upper.infinite)
+ {
+ /*
+ * When original range is (-inf, x) or (x, +inf) it requires
+ * broadening of original range (extension of one bound to
+ * infinity).
+ */
+ *penalty = INFINITE_BOUND_PENALTY;
+ }
+ else
+ {
+ /*
+ * Insertion to normal original range is least preferred.
+ */
+ *penalty = 2 * INFINITE_BOUND_PENALTY;
+ }
+
+ if (RangeIsOrContainsEmpty(orig))
+ {
+ /*
+ * Original range is narrower when it doesn't contain empty
+ * ranges. Add additional penalty otherwise.
+ */
+ *penalty += CONTAIN_EMPTY_PENALTY;
+ }
+ }
+ else if (new_lower.infinite)
+ {
+ /* Handle insertion of (-inf, x) range */
+ if (!orig_empty && orig_lower.infinite)
+ {
+ if (orig_upper.infinite)
+ {
+ /*
+ * (-inf, +inf) range won't be extended by insertion of (-inf,
+ * x) range. It's a less desirable case than insertion to
+ * (-inf, y) original range without extension, because in that
+ * case original range is narrower. But we can't express that
+ * in single float value.
+ */
+ *penalty = 0.0;
+ }
+ else
+ {
+ if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
+ {
+ /*
+ * Get extension of original range using subtype_diff. Use
+ * constant if subtype_diff unavailable.
+ */
+ if (has_subtype_diff)
+ *penalty = call_subtype_diff(typcache,
+ new_upper.val,
+ orig_upper.val);
+ else
+ *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
+ }
+ else
+ {
+ /* No extension of original range */
+ *penalty = 0.0;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * If lower bound of original range is not -inf, then extension of
+ * it is infinity.
+ */
+ *penalty = get_float4_infinity();
+ }
+ }
+ else if (new_upper.infinite)
+ {
+ /* Handle insertion of (x, +inf) range */
+ if (!orig_empty && orig_upper.infinite)
+ {
+ if (orig_lower.infinite)
+ {
+ /*
+ * (-inf, +inf) range won't be extended by insertion of (x,
+ * +inf) range. It's a less desirable case than insertion to
+ * (y, +inf) original range without extension, because in that
+ * case original range is narrower. But we can't express that
+ * in single float value.
+ */
+ *penalty = 0.0;
+ }
+ else
+ {
+ if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
+ {
+ /*
+ * Get extension of original range using subtype_diff. Use
+ * constant if subtype_diff unavailable.
+ */
+ if (has_subtype_diff)
+ *penalty = call_subtype_diff(typcache,
+ orig_lower.val,
+ new_lower.val);
+ else
+ *penalty = DEFAULT_SUBTYPE_DIFF_PENALTY;
+ }
+ else
+ {
+ /* No extension of original range */
+ *penalty = 0.0;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * If upper bound of original range is not +inf, then extension of
+ * it is infinity.
+ */
+ *penalty = get_float4_infinity();
+ }
+ }
+ else
+ {
+ /* Handle insertion of normal (non-empty, non-infinite) range */
+ if (orig_empty || orig_lower.infinite || orig_upper.infinite)
+ {
+ /*
+ * Avoid mixing normal ranges with infinite and empty ranges.
+ */
+ *penalty = get_float4_infinity();
+ }
+ else
+ {
+ /*
+ * Calculate extension of original range by calling subtype_diff.
+ * Use constant if subtype_diff unavailable.
+ */
+ float8 diff = 0.0;
+
+ if (range_cmp_bounds(typcache, &new_lower, &orig_lower) < 0)
+ {
+ if (has_subtype_diff)
+ diff += call_subtype_diff(typcache,
+ orig_lower.val,
+ new_lower.val);
+ else
+ diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
+ }
+ if (range_cmp_bounds(typcache, &new_upper, &orig_upper) > 0)
+ {
+ if (has_subtype_diff)
+ diff += call_subtype_diff(typcache,
+ new_upper.val,
+ orig_upper.val);
+ else
+ diff += DEFAULT_SUBTYPE_DIFF_PENALTY;
+ }
+ *penalty = diff;
+ }
+ }
+
+ PG_RETURN_POINTER(penalty);
+}
+
+/*
+ * The GiST PickSplit method for ranges
+ *
+ * Primarily, we try to segregate ranges of different classes. If splitting
+ * ranges of the same class, use the appropriate split method for that class.
+ */
+Datum
+range_gist_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ TypeCacheEntry *typcache;
+ OffsetNumber i;
+ RangeType *pred_left;
+ int nbytes;
+ OffsetNumber maxoff;
+ int count_in_classes[CLS_COUNT];
+ int j;
+ int non_empty_classes_count = 0;
+ int biggest_class = -1;
+ int biggest_class_count = 0;
+ int total_count;
+
+ /* use first item to look up range type's info */
+ pred_left = DatumGetRangeTypeP(entryvec->vector[FirstOffsetNumber].key);
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(pred_left));
+
+ maxoff = entryvec->n - 1;
+ nbytes = (maxoff + 1) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ /*
+ * Get count distribution of range classes.
+ */
+ memset(count_in_classes, 0, sizeof(count_in_classes));
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
+
+ count_in_classes[get_gist_range_class(range)]++;
+ }
+
+ /*
+ * Count non-empty classes and find biggest class.
+ */
+ total_count = maxoff;
+ for (j = 0; j < CLS_COUNT; j++)
+ {
+ if (count_in_classes[j] > 0)
+ {
+ if (count_in_classes[j] > biggest_class_count)
+ {
+ biggest_class_count = count_in_classes[j];
+ biggest_class = j;
+ }
+ non_empty_classes_count++;
+ }
+ }
+
+ Assert(non_empty_classes_count > 0);
+
+ if (non_empty_classes_count == 1)
+ {
+ /* One non-empty class, so split inside class */
+ if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_NORMAL)
+ {
+ /* double sorting split for normal ranges */
+ range_gist_double_sorting_split(typcache, entryvec, v);
+ }
+ else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_LOWER_INF)
+ {
+ /* upper bound sorting split for (-inf, x) ranges */
+ range_gist_single_sorting_split(typcache, entryvec, v, true);
+ }
+ else if ((biggest_class & ~CLS_CONTAIN_EMPTY) == CLS_UPPER_INF)
+ {
+ /* lower bound sorting split for (x, +inf) ranges */
+ range_gist_single_sorting_split(typcache, entryvec, v, false);
+ }
+ else
+ {
+ /* trivial split for all (-inf, +inf) or all empty ranges */
+ range_gist_fallback_split(typcache, entryvec, v);
+ }
+ }
+ else
+ {
+ /*
+ * Class based split.
+ *
+ * To which side of the split should each class go? Initialize them
+ * all to go to the left side.
+ */
+ SplitLR classes_groups[CLS_COUNT];
+
+ memset(classes_groups, 0, sizeof(classes_groups));
+
+ if (count_in_classes[CLS_NORMAL] > 0)
+ {
+ /* separate normal ranges if any */
+ classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
+ }
+ else
+ {
+ /*----------
+ * Try to split classes in one of two ways:
+ * 1) containing infinities - not containing infinities
+ * 2) containing empty - not containing empty
+ *
+ * Select the way which balances the ranges between left and right
+ * the best. If split in these ways is not possible, there are at
+ * most 3 classes, so just separate biggest class.
+ *----------
+ */
+ int infCount,
+ nonInfCount;
+ int emptyCount,
+ nonEmptyCount;
+
+ nonInfCount =
+ count_in_classes[CLS_NORMAL] +
+ count_in_classes[CLS_CONTAIN_EMPTY] +
+ count_in_classes[CLS_EMPTY];
+ infCount = total_count - nonInfCount;
+
+ nonEmptyCount =
+ count_in_classes[CLS_NORMAL] +
+ count_in_classes[CLS_LOWER_INF] +
+ count_in_classes[CLS_UPPER_INF] +
+ count_in_classes[CLS_LOWER_INF | CLS_UPPER_INF];
+ emptyCount = total_count - nonEmptyCount;
+
+ if (infCount > 0 && nonInfCount > 0 &&
+ (Abs(infCount - nonInfCount) <=
+ Abs(emptyCount - nonEmptyCount)))
+ {
+ classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
+ classes_groups[CLS_CONTAIN_EMPTY] = SPLIT_RIGHT;
+ classes_groups[CLS_EMPTY] = SPLIT_RIGHT;
+ }
+ else if (emptyCount > 0 && nonEmptyCount > 0)
+ {
+ classes_groups[CLS_NORMAL] = SPLIT_RIGHT;
+ classes_groups[CLS_LOWER_INF] = SPLIT_RIGHT;
+ classes_groups[CLS_UPPER_INF] = SPLIT_RIGHT;
+ classes_groups[CLS_LOWER_INF | CLS_UPPER_INF] = SPLIT_RIGHT;
+ }
+ else
+ {
+ /*
+ * Either total_count == emptyCount or total_count ==
+ * infCount.
+ */
+ classes_groups[biggest_class] = SPLIT_RIGHT;
+ }
+ }
+
+ range_gist_class_split(typcache, entryvec, v, classes_groups);
+ }
+
+ PG_RETURN_POINTER(v);
+}
+
+/* equality comparator for GiST */
+Datum
+range_gist_same(PG_FUNCTION_ARGS)
+{
+ RangeType *r1 = PG_GETARG_RANGE_P(0);
+ RangeType *r2 = PG_GETARG_RANGE_P(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ /*
+ * range_eq will ignore the RANGE_CONTAIN_EMPTY flag, so we have to check
+ * that for ourselves. More generally, if the entries have been properly
+ * normalized, then unequal flags bytes must mean unequal ranges ... so
+ * let's just test all the flag bits at once.
+ */
+ if (range_get_flags(r1) != range_get_flags(r2))
+ *result = false;
+ else
+ {
+ TypeCacheEntry *typcache;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(r1));
+
+ *result = range_eq_internal(typcache, r1, r2);
+ }
+
+ PG_RETURN_POINTER(result);
+}
+
+/*
+ *----------------------------------------------------------
+ * STATIC FUNCTIONS
+ *----------------------------------------------------------
+ */
+
+/*
+ * Return the smallest range that contains r1 and r2
+ *
+ * This differs from regular range_union in two critical ways:
+ * 1. It won't throw an error for non-adjacent r1 and r2, but just absorb
+ * the intervening values into the result range.
+ * 2. We track whether any empty range has been union'd into the result,
+ * so that contained_by searches can be indexed. Note that this means
+ * that *all* unions formed within the GiST index must go through here.
+ */
+static RangeType *
+range_super_union(TypeCacheEntry *typcache, RangeType *r1, RangeType *r2)
+{
+ RangeType *result;
+ RangeBound lower1,
+ lower2;
+ RangeBound upper1,
+ upper2;
+ bool empty1,
+ empty2;
+ char flags1,
+ flags2;
+ RangeBound *result_lower;
+ RangeBound *result_upper;
+
+ range_deserialize(typcache, r1, &lower1, &upper1, &empty1);
+ range_deserialize(typcache, r2, &lower2, &upper2, &empty2);
+ flags1 = range_get_flags(r1);
+ flags2 = range_get_flags(r2);
+
+ if (empty1)
+ {
+ /* We can return r2 as-is if it already is or contains empty */
+ if (flags2 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
+ return r2;
+ /* Else we'd better copy it (modify-in-place isn't safe) */
+ r2 = rangeCopy(r2);
+ range_set_contain_empty(r2);
+ return r2;
+ }
+ if (empty2)
+ {
+ /* We can return r1 as-is if it already is or contains empty */
+ if (flags1 & (RANGE_EMPTY | RANGE_CONTAIN_EMPTY))
+ return r1;
+ /* Else we'd better copy it (modify-in-place isn't safe) */
+ r1 = rangeCopy(r1);
+ range_set_contain_empty(r1);
+ return r1;
+ }
+
+ if (range_cmp_bounds(typcache, &lower1, &lower2) <= 0)
+ result_lower = &lower1;
+ else
+ result_lower = &lower2;
+
+ if (range_cmp_bounds(typcache, &upper1, &upper2) >= 0)
+ result_upper = &upper1;
+ else
+ result_upper = &upper2;
+
+ /* optimization to avoid constructing a new range */
+ if (result_lower == &lower1 && result_upper == &upper1 &&
+ ((flags1 & RANGE_CONTAIN_EMPTY) || !(flags2 & RANGE_CONTAIN_EMPTY)))
+ return r1;
+ if (result_lower == &lower2 && result_upper == &upper2 &&
+ ((flags2 & RANGE_CONTAIN_EMPTY) || !(flags1 & RANGE_CONTAIN_EMPTY)))
+ return r2;
+
+ result = make_range(typcache, result_lower, result_upper, false);
+
+ if ((flags1 & RANGE_CONTAIN_EMPTY) || (flags2 & RANGE_CONTAIN_EMPTY))
+ range_set_contain_empty(result);
+
+ return result;
+}
+
+static bool
+multirange_union_range_equal(TypeCacheEntry *typcache,
+ const RangeType *r,
+ const MultirangeType *mr)
+{
+ RangeBound lower1,
+ upper1,
+ lower2,
+ upper2,
+ tmp;
+ bool empty;
+
+ if (RangeIsEmpty(r) || MultirangeIsEmpty(mr))
+ return (RangeIsEmpty(r) && MultirangeIsEmpty(mr));
+
+ range_deserialize(typcache, r, &lower1, &upper1, &empty);
+ Assert(!empty);
+ multirange_get_bounds(typcache, mr, 0, &lower2, &tmp);
+ multirange_get_bounds(typcache, mr, mr->rangeCount - 1, &tmp, &upper2);
+
+ return (range_cmp_bounds(typcache, &lower1, &lower2) == 0 &&
+ range_cmp_bounds(typcache, &upper1, &upper2) == 0);
+}
+
+/*
+ * GiST consistent test on an index internal page with range query
+ */
+static bool
+range_gist_consistent_int_range(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const RangeType *query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ if (RangeIsEmpty(key) || RangeIsEmpty(query))
+ return false;
+ return (!range_overright_internal(typcache, key, query));
+ case RANGESTRAT_OVERLEFT:
+ if (RangeIsEmpty(key) || RangeIsEmpty(query))
+ return false;
+ return (!range_after_internal(typcache, key, query));
+ case RANGESTRAT_OVERLAPS:
+ return range_overlaps_internal(typcache, key, query);
+ case RANGESTRAT_OVERRIGHT:
+ if (RangeIsEmpty(key) || RangeIsEmpty(query))
+ return false;
+ return (!range_before_internal(typcache, key, query));
+ case RANGESTRAT_AFTER:
+ if (RangeIsEmpty(key) || RangeIsEmpty(query))
+ return false;
+ return (!range_overleft_internal(typcache, key, query));
+ case RANGESTRAT_ADJACENT:
+ if (RangeIsEmpty(key) || RangeIsEmpty(query))
+ return false;
+ if (range_adjacent_internal(typcache, key, query))
+ return true;
+ return range_overlaps_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINS:
+ return range_contains_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINED_BY:
+
+ /*
+ * Empty ranges are contained by anything, so if key is or
+ * contains any empty ranges, we must descend into it. Otherwise,
+ * descend only if key overlaps the query.
+ */
+ if (RangeIsOrContainsEmpty(key))
+ return true;
+ return range_overlaps_internal(typcache, key, query);
+ case RANGESTRAT_EQ:
+
+ /*
+ * If query is empty, descend only if the key is or contains any
+ * empty ranges. Otherwise, descend if key contains query.
+ */
+ if (RangeIsEmpty(query))
+ return RangeIsOrContainsEmpty(key);
+ return range_contains_internal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * GiST consistent test on an index internal page with multirange query
+ */
+static bool
+range_gist_consistent_int_multirange(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const MultirangeType *query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
+ return false;
+ return (!range_overright_multirange_internal(typcache, key, query));
+ case RANGESTRAT_OVERLEFT:
+ if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
+ return false;
+ return (!range_after_multirange_internal(typcache, key, query));
+ case RANGESTRAT_OVERLAPS:
+ return range_overlaps_multirange_internal(typcache, key, query);
+ case RANGESTRAT_OVERRIGHT:
+ if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
+ return false;
+ return (!range_before_multirange_internal(typcache, key, query));
+ case RANGESTRAT_AFTER:
+ if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
+ return false;
+ return (!range_overleft_multirange_internal(typcache, key, query));
+ case RANGESTRAT_ADJACENT:
+ if (RangeIsEmpty(key) || MultirangeIsEmpty(query))
+ return false;
+ if (range_adjacent_multirange_internal(typcache, key, query))
+ return true;
+ return range_overlaps_multirange_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINS:
+ return range_contains_multirange_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINED_BY:
+
+ /*
+ * Empty ranges are contained by anything, so if key is or
+ * contains any empty ranges, we must descend into it. Otherwise,
+ * descend only if key overlaps the query.
+ */
+ if (RangeIsOrContainsEmpty(key))
+ return true;
+ return range_overlaps_multirange_internal(typcache, key, query);
+ case RANGESTRAT_EQ:
+
+ /*
+ * If query is empty, descend only if the key is or contains any
+ * empty ranges. Otherwise, descend if key contains query.
+ */
+ if (MultirangeIsEmpty(query))
+ return RangeIsOrContainsEmpty(key);
+ return range_contains_multirange_internal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * GiST consistent test on an index internal page with element query
+ */
+static bool
+range_gist_consistent_int_element(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ Datum query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_CONTAINS_ELEM:
+ return range_contains_elem_internal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * GiST consistent test on an index leaf page with range query
+ */
+static bool
+range_gist_consistent_leaf_range(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const RangeType *query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ return range_before_internal(typcache, key, query);
+ case RANGESTRAT_OVERLEFT:
+ return range_overleft_internal(typcache, key, query);
+ case RANGESTRAT_OVERLAPS:
+ return range_overlaps_internal(typcache, key, query);
+ case RANGESTRAT_OVERRIGHT:
+ return range_overright_internal(typcache, key, query);
+ case RANGESTRAT_AFTER:
+ return range_after_internal(typcache, key, query);
+ case RANGESTRAT_ADJACENT:
+ return range_adjacent_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINS:
+ return range_contains_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINED_BY:
+ return range_contained_by_internal(typcache, key, query);
+ case RANGESTRAT_EQ:
+ return range_eq_internal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * GiST consistent test on an index leaf page with multirange query
+ */
+static bool
+range_gist_consistent_leaf_multirange(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ const MultirangeType *query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ return range_before_multirange_internal(typcache, key, query);
+ case RANGESTRAT_OVERLEFT:
+ return range_overleft_multirange_internal(typcache, key, query);
+ case RANGESTRAT_OVERLAPS:
+ return range_overlaps_multirange_internal(typcache, key, query);
+ case RANGESTRAT_OVERRIGHT:
+ return range_overright_multirange_internal(typcache, key, query);
+ case RANGESTRAT_AFTER:
+ return range_after_multirange_internal(typcache, key, query);
+ case RANGESTRAT_ADJACENT:
+ return range_adjacent_multirange_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINS:
+ return range_contains_multirange_internal(typcache, key, query);
+ case RANGESTRAT_CONTAINED_BY:
+ return multirange_contains_range_internal(typcache, query, key);
+ case RANGESTRAT_EQ:
+ return multirange_union_range_equal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * GiST consistent test on an index leaf page with element query
+ */
+static bool
+range_gist_consistent_leaf_element(TypeCacheEntry *typcache,
+ StrategyNumber strategy,
+ const RangeType *key,
+ Datum query)
+{
+ switch (strategy)
+ {
+ case RANGESTRAT_CONTAINS_ELEM:
+ return range_contains_elem_internal(typcache, key, query);
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ return false; /* keep compiler quiet */
+ }
+}
+
+/*
+ * Trivial split: half of entries will be placed on one page
+ * and the other half on the other page.
+ */
+static void
+range_gist_fallback_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v)
+{
+ RangeType *left_range = NULL;
+ RangeType *right_range = NULL;
+ OffsetNumber i,
+ maxoff,
+ split_idx;
+
+ maxoff = entryvec->n - 1;
+ /* Split entries before this to left page, after to right: */
+ split_idx = (maxoff - FirstOffsetNumber) / 2 + FirstOffsetNumber;
+
+ v->spl_nleft = 0;
+ v->spl_nright = 0;
+ for (i = FirstOffsetNumber; i <= maxoff; i++)
+ {
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
+
+ if (i < split_idx)
+ PLACE_LEFT(range, i);
+ else
+ PLACE_RIGHT(range, i);
+ }
+
+ v->spl_ldatum = RangeTypePGetDatum(left_range);
+ v->spl_rdatum = RangeTypePGetDatum(right_range);
+}
+
+/*
+ * Split based on classes of ranges.
+ *
+ * See get_gist_range_class for class definitions.
+ * classes_groups is an array of length CLS_COUNT indicating the side of the
+ * split to which each class should go.
+ */
+static void
+range_gist_class_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v,
+ SplitLR *classes_groups)
+{
+ RangeType *left_range = NULL;
+ RangeType *right_range = NULL;
+ OffsetNumber i,
+ maxoff;
+
+ maxoff = entryvec->n - 1;
+
+ v->spl_nleft = 0;
+ v->spl_nright = 0;
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
+ int class;
+
+ /* Get class of range */
+ class = get_gist_range_class(range);
+
+ /* Place range to appropriate page */
+ if (classes_groups[class] == SPLIT_LEFT)
+ PLACE_LEFT(range, i);
+ else
+ {
+ Assert(classes_groups[class] == SPLIT_RIGHT);
+ PLACE_RIGHT(range, i);
+ }
+ }
+
+ v->spl_ldatum = RangeTypePGetDatum(left_range);
+ v->spl_rdatum = RangeTypePGetDatum(right_range);
+}
+
+/*
+ * Sorting based split. First half of entries according to the sort will be
+ * placed to one page, and second half of entries will be placed to other
+ * page. use_upper_bound parameter indicates whether to use upper or lower
+ * bound for sorting.
+ */
+static void
+range_gist_single_sorting_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v,
+ bool use_upper_bound)
+{
+ SingleBoundSortItem *sortItems;
+ RangeType *left_range = NULL;
+ RangeType *right_range = NULL;
+ OffsetNumber i,
+ maxoff,
+ split_idx;
+
+ maxoff = entryvec->n - 1;
+
+ sortItems = (SingleBoundSortItem *)
+ palloc(maxoff * sizeof(SingleBoundSortItem));
+
+ /*
+ * Prepare auxiliary array and sort the values.
+ */
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
+ RangeBound bound2;
+ bool empty;
+
+ sortItems[i - 1].index = i;
+ /* Put appropriate bound into array */
+ if (use_upper_bound)
+ range_deserialize(typcache, range, &bound2,
+ &sortItems[i - 1].bound, &empty);
+ else
+ range_deserialize(typcache, range, &sortItems[i - 1].bound,
+ &bound2, &empty);
+ Assert(!empty);
+ }
+
+ qsort_arg(sortItems, maxoff, sizeof(SingleBoundSortItem),
+ single_bound_cmp, typcache);
+
+ split_idx = maxoff / 2;
+
+ v->spl_nleft = 0;
+ v->spl_nright = 0;
+
+ for (i = 0; i < maxoff; i++)
+ {
+ int idx = sortItems[i].index;
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[idx].key);
+
+ if (i < split_idx)
+ PLACE_LEFT(range, idx);
+ else
+ PLACE_RIGHT(range, idx);
+ }
+
+ v->spl_ldatum = RangeTypePGetDatum(left_range);
+ v->spl_rdatum = RangeTypePGetDatum(right_range);
+}
+
+/*
+ * Double sorting split algorithm.
+ *
+ * The algorithm considers dividing ranges into two groups. The first (left)
+ * group contains general left bound. The second (right) group contains
+ * general right bound. The challenge is to find upper bound of left group
+ * and lower bound of right group so that overlap of groups is minimal and
+ * ratio of distribution is acceptable. Algorithm finds for each lower bound of
+ * right group minimal upper bound of left group, and for each upper bound of
+ * left group maximal lower bound of right group. For each found pair
+ * range_gist_consider_split considers replacement of currently selected
+ * split with the new one.
+ *
+ * After that, all the entries are divided into three groups:
+ * 1) Entries which should be placed to the left group
+ * 2) Entries which should be placed to the right group
+ * 3) "Common entries" which can be placed to either group without affecting
+ * amount of overlap.
+ *
+ * The common ranges are distributed by difference of distance from lower
+ * bound of common range to lower bound of right group and distance from upper
+ * bound of common range to upper bound of left group.
+ *
+ * For details see:
+ * "A new double sorting-based node splitting algorithm for R-tree",
+ * A. Korotkov
+ * http://syrcose.ispras.ru/2011/files/SYRCoSE2011_Proceedings.pdf#page=36
+ */
+static void
+range_gist_double_sorting_split(TypeCacheEntry *typcache,
+ GistEntryVector *entryvec,
+ GIST_SPLITVEC *v)
+{
+ ConsiderSplitContext context;
+ OffsetNumber i,
+ maxoff;
+ RangeType *range,
+ *left_range = NULL,
+ *right_range = NULL;
+ int common_entries_count;
+ NonEmptyRange *by_lower,
+ *by_upper;
+ CommonEntry *common_entries;
+ int nentries,
+ i1,
+ i2;
+ RangeBound *right_lower,
+ *left_upper;
+
+ memset(&context, 0, sizeof(ConsiderSplitContext));
+ context.typcache = typcache;
+ context.has_subtype_diff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+
+ maxoff = entryvec->n - 1;
+ nentries = context.entries_count = maxoff - FirstOffsetNumber + 1;
+ context.first = true;
+
+ /* Allocate arrays for sorted range bounds */
+ by_lower = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
+ by_upper = (NonEmptyRange *) palloc(nentries * sizeof(NonEmptyRange));
+
+ /* Fill arrays of bounds */
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ RangeType *range = DatumGetRangeTypeP(entryvec->vector[i].key);
+ bool empty;
+
+ range_deserialize(typcache, range,
+ &by_lower[i - FirstOffsetNumber].lower,
+ &by_lower[i - FirstOffsetNumber].upper,
+ &empty);
+ Assert(!empty);
+ }
+
+ /*
+ * Make two arrays of range bounds: one sorted by lower bound and another
+ * sorted by upper bound.
+ */
+ memcpy(by_upper, by_lower, nentries * sizeof(NonEmptyRange));
+ qsort_arg(by_lower, nentries, sizeof(NonEmptyRange),
+ interval_cmp_lower, typcache);
+ qsort_arg(by_upper, nentries, sizeof(NonEmptyRange),
+ interval_cmp_upper, typcache);
+
+ /*----------
+ * The goal is to form a left and right range, so that every entry
+ * range is contained by either left or right interval (or both).
+ *
+ * For example, with the ranges (0,1), (1,3), (2,3), (2,4):
+ *
+ * 0 1 2 3 4
+ * +-+
+ * +---+
+ * +-+
+ * +---+
+ *
+ * The left and right ranges are of the form (0,a) and (b,4).
+ * We first consider splits where b is the lower bound of an entry.
+ * We iterate through all entries, and for each b, calculate the
+ * smallest possible a. Then we consider splits where a is the
+ * upper bound of an entry, and for each a, calculate the greatest
+ * possible b.
+ *
+ * In the above example, the first loop would consider splits:
+ * b=0: (0,1)-(0,4)
+ * b=1: (0,1)-(1,4)
+ * b=2: (0,3)-(2,4)
+ *
+ * And the second loop:
+ * a=1: (0,1)-(1,4)
+ * a=3: (0,3)-(2,4)
+ * a=4: (0,4)-(2,4)
+ *----------
+ */
+
+ /*
+ * Iterate over lower bound of right group, finding smallest possible
+ * upper bound of left group.
+ */
+ i1 = 0;
+ i2 = 0;
+ right_lower = &by_lower[i1].lower;
+ left_upper = &by_upper[i2].lower;
+ while (true)
+ {
+ /*
+ * Find next lower bound of right group.
+ */
+ while (i1 < nentries &&
+ range_cmp_bounds(typcache, right_lower,
+ &by_lower[i1].lower) == 0)
+ {
+ if (range_cmp_bounds(typcache, &by_lower[i1].upper,
+ left_upper) > 0)
+ left_upper = &by_lower[i1].upper;
+ i1++;
+ }
+ if (i1 >= nentries)
+ break;
+ right_lower = &by_lower[i1].lower;
+
+ /*
+ * Find count of ranges which anyway should be placed to the left
+ * group.
+ */
+ while (i2 < nentries &&
+ range_cmp_bounds(typcache, &by_upper[i2].upper,
+ left_upper) <= 0)
+ i2++;
+
+ /*
+ * Consider found split to see if it's better than what we had.
+ */
+ range_gist_consider_split(&context, right_lower, i1, left_upper, i2);
+ }
+
+ /*
+ * Iterate over upper bound of left group finding greatest possible lower
+ * bound of right group.
+ */
+ i1 = nentries - 1;
+ i2 = nentries - 1;
+ right_lower = &by_lower[i1].upper;
+ left_upper = &by_upper[i2].upper;
+ while (true)
+ {
+ /*
+ * Find next upper bound of left group.
+ */
+ while (i2 >= 0 &&
+ range_cmp_bounds(typcache, left_upper,
+ &by_upper[i2].upper) == 0)
+ {
+ if (range_cmp_bounds(typcache, &by_upper[i2].lower,
+ right_lower) < 0)
+ right_lower = &by_upper[i2].lower;
+ i2--;
+ }
+ if (i2 < 0)
+ break;
+ left_upper = &by_upper[i2].upper;
+
+ /*
+ * Find count of intervals which anyway should be placed to the right
+ * group.
+ */
+ while (i1 >= 0 &&
+ range_cmp_bounds(typcache, &by_lower[i1].lower,
+ right_lower) >= 0)
+ i1--;
+
+ /*
+ * Consider found split to see if it's better than what we had.
+ */
+ range_gist_consider_split(&context, right_lower, i1 + 1,
+ left_upper, i2 + 1);
+ }
+
+ /*
+ * If we failed to find any acceptable splits, use trivial split.
+ */
+ if (context.first)
+ {
+ range_gist_fallback_split(typcache, entryvec, v);
+ return;
+ }
+
+ /*
+ * Ok, we have now selected bounds of the groups. Now we have to
+ * distribute entries themselves. At first we distribute entries which can
+ * be placed unambiguously and collect "common entries" to array.
+ */
+
+ /* Allocate vectors for results */
+ v->spl_left = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+ v->spl_right = (OffsetNumber *) palloc(nentries * sizeof(OffsetNumber));
+ v->spl_nleft = 0;
+ v->spl_nright = 0;
+
+ /*
+ * Allocate an array for "common entries" - entries which can be placed to
+ * either group without affecting overlap along selected axis.
+ */
+ common_entries_count = 0;
+ common_entries = (CommonEntry *) palloc(nentries * sizeof(CommonEntry));
+
+ /*
+ * Distribute entries which can be distributed unambiguously, and collect
+ * common entries.
+ */
+ for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+ {
+ RangeBound lower,
+ upper;
+ bool empty;
+
+ /*
+ * Get upper and lower bounds along selected axis.
+ */
+ range = DatumGetRangeTypeP(entryvec->vector[i].key);
+
+ range_deserialize(typcache, range, &lower, &upper, &empty);
+
+ if (range_cmp_bounds(typcache, &upper, context.left_upper) <= 0)
+ {
+ /* Fits in the left group */
+ if (range_cmp_bounds(typcache, &lower, context.right_lower) >= 0)
+ {
+ /* Fits also in the right group, so "common entry" */
+ common_entries[common_entries_count].index = i;
+ if (context.has_subtype_diff)
+ {
+ /*
+ * delta = (lower - context.right_lower) -
+ * (context.left_upper - upper)
+ */
+ common_entries[common_entries_count].delta =
+ call_subtype_diff(typcache,
+ lower.val,
+ context.right_lower->val) -
+ call_subtype_diff(typcache,
+ context.left_upper->val,
+ upper.val);
+ }
+ else
+ {
+ /* Without subtype_diff, take all deltas as zero */
+ common_entries[common_entries_count].delta = 0;
+ }
+ common_entries_count++;
+ }
+ else
+ {
+ /* Doesn't fit to the right group, so join to the left group */
+ PLACE_LEFT(range, i);
+ }
+ }
+ else
+ {
+ /*
+ * Each entry should fit on either left or right group. Since this
+ * entry didn't fit in the left group, it better fit in the right
+ * group.
+ */
+ Assert(range_cmp_bounds(typcache, &lower,
+ context.right_lower) >= 0);
+ PLACE_RIGHT(range, i);
+ }
+ }
+
+ /*
+ * Distribute "common entries", if any.
+ */
+ if (common_entries_count > 0)
+ {
+ /*
+ * Sort "common entries" by calculated deltas in order to distribute
+ * the most ambiguous entries first.
+ */
+ qsort(common_entries, common_entries_count, sizeof(CommonEntry),
+ common_entry_cmp);
+
+ /*
+ * Distribute "common entries" between groups according to sorting.
+ */
+ for (i = 0; i < common_entries_count; i++)
+ {
+ int idx = common_entries[i].index;
+
+ range = DatumGetRangeTypeP(entryvec->vector[idx].key);
+
+ /*
+ * Check if we have to place this entry in either group to achieve
+ * LIMIT_RATIO.
+ */
+ if (i < context.common_left)
+ PLACE_LEFT(range, idx);
+ else
+ PLACE_RIGHT(range, idx);
+ }
+ }
+
+ v->spl_ldatum = PointerGetDatum(left_range);
+ v->spl_rdatum = PointerGetDatum(right_range);
+}
+
+/*
+ * Consider replacement of currently selected split with a better one
+ * during range_gist_double_sorting_split.
+ */
+static void
+range_gist_consider_split(ConsiderSplitContext *context,
+ RangeBound *right_lower, int min_left_count,
+ RangeBound *left_upper, int max_left_count)
+{
+ int left_count,
+ right_count;
+ float4 ratio,
+ overlap;
+
+ /*
+ * Calculate entries distribution ratio assuming most uniform distribution
+ * of common entries.
+ */
+ if (min_left_count >= (context->entries_count + 1) / 2)
+ left_count = min_left_count;
+ else if (max_left_count <= context->entries_count / 2)
+ left_count = max_left_count;
+ else
+ left_count = context->entries_count / 2;
+ right_count = context->entries_count - left_count;
+
+ /*
+ * Ratio of split: quotient between size of smaller group and total
+ * entries count. This is necessarily 0.5 or less; if it's less than
+ * LIMIT_RATIO then we will never accept the new split.
+ */
+ ratio = ((float4) Min(left_count, right_count)) /
+ ((float4) context->entries_count);
+
+ if (ratio > LIMIT_RATIO)
+ {
+ bool selectthis = false;
+
+ /*
+ * The ratio is acceptable, so compare current split with previously
+ * selected one. We search for minimal overlap (allowing negative
+ * values) and minimal ratio secondarily. If subtype_diff is
+ * available, it's used for overlap measure. Without subtype_diff we
+ * use number of "common entries" as an overlap measure.
+ */
+ if (context->has_subtype_diff)
+ overlap = call_subtype_diff(context->typcache,
+ left_upper->val,
+ right_lower->val);
+ else
+ overlap = max_left_count - min_left_count;
+
+ /* If there is no previous selection, select this split */
+ if (context->first)
+ selectthis = true;
+ else
+ {
+ /*
+ * Choose the new split if it has a smaller overlap, or same
+ * overlap but better ratio.
+ */
+ if (overlap < context->overlap ||
+ (overlap == context->overlap && ratio > context->ratio))
+ selectthis = true;
+ }
+
+ if (selectthis)
+ {
+ /* save information about selected split */
+ context->first = false;
+ context->ratio = ratio;
+ context->overlap = overlap;
+ context->right_lower = right_lower;
+ context->left_upper = left_upper;
+ context->common_left = max_left_count - left_count;
+ context->common_right = left_count - min_left_count;
+ }
+ }
+}
+
+/*
+ * Find class number for range.
+ *
+ * The class number is a valid combination of the properties of the
+ * range. Note: the highest possible number is 8, because CLS_EMPTY
+ * can't be combined with anything else.
+ */
+static int
+get_gist_range_class(RangeType *range)
+{
+ int classNumber;
+ char flags;
+
+ flags = range_get_flags(range);
+ if (flags & RANGE_EMPTY)
+ {
+ classNumber = CLS_EMPTY;
+ }
+ else
+ {
+ classNumber = 0;
+ if (flags & RANGE_LB_INF)
+ classNumber |= CLS_LOWER_INF;
+ if (flags & RANGE_UB_INF)
+ classNumber |= CLS_UPPER_INF;
+ if (flags & RANGE_CONTAIN_EMPTY)
+ classNumber |= CLS_CONTAIN_EMPTY;
+ }
+ return classNumber;
+}
+
+/*
+ * Comparison function for range_gist_single_sorting_split.
+ */
+static int
+single_bound_cmp(const void *a, const void *b, void *arg)
+{
+ SingleBoundSortItem *i1 = (SingleBoundSortItem *) a;
+ SingleBoundSortItem *i2 = (SingleBoundSortItem *) b;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+
+ return range_cmp_bounds(typcache, &i1->bound, &i2->bound);
+}
+
+/*
+ * Compare NonEmptyRanges by lower bound.
+ */
+static int
+interval_cmp_lower(const void *a, const void *b, void *arg)
+{
+ NonEmptyRange *i1 = (NonEmptyRange *) a;
+ NonEmptyRange *i2 = (NonEmptyRange *) b;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+
+ return range_cmp_bounds(typcache, &i1->lower, &i2->lower);
+}
+
+/*
+ * Compare NonEmptyRanges by upper bound.
+ */
+static int
+interval_cmp_upper(const void *a, const void *b, void *arg)
+{
+ NonEmptyRange *i1 = (NonEmptyRange *) a;
+ NonEmptyRange *i2 = (NonEmptyRange *) b;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+
+ return range_cmp_bounds(typcache, &i1->upper, &i2->upper);
+}
+
+/*
+ * Compare CommonEntrys by their deltas.
+ */
+static int
+common_entry_cmp(const void *i1, const void *i2)
+{
+ double delta1 = ((CommonEntry *) i1)->delta;
+ double delta2 = ((CommonEntry *) i2)->delta;
+
+ if (delta1 < delta2)
+ return -1;
+ else if (delta1 > delta2)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Convenience function to invoke type-specific subtype_diff function.
+ * Caller must have already checked that there is one for the range type.
+ */
+static float8
+call_subtype_diff(TypeCacheEntry *typcache, Datum val1, Datum val2)
+{
+ float8 value;
+
+ value = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ val1, val2));
+ /* Cope with buggy subtype_diff function by returning zero */
+ if (value >= 0.0)
+ return value;
+ return 0.0;
+}
diff --git a/src/backend/utils/adt/rangetypes_selfuncs.c b/src/backend/utils/adt/rangetypes_selfuncs.c
new file mode 100644
index 0000000..c2795f4
--- /dev/null
+++ b/src/backend/utils/adt/rangetypes_selfuncs.c
@@ -0,0 +1,1223 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_selfuncs.c
+ * Functions for selectivity estimation of range operators
+ *
+ * Estimates are based on histograms of lower and upper bounds, and the
+ * fraction of empty ranges.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_type.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/selfuncs.h"
+#include "utils/typcache.h"
+
+static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
+ const RangeType *constval, Oid operator);
+static double default_range_selectivity(Oid operator);
+static double calc_hist_selectivity(TypeCacheEntry *typcache,
+ VariableStatData *vardata, const RangeType *constval,
+ Oid operator);
+static double calc_hist_selectivity_scalar(TypeCacheEntry *typcache,
+ const RangeBound *constbound,
+ const RangeBound *hist, int hist_nvalues,
+ bool equal);
+static int rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value,
+ const RangeBound *hist, int hist_length, bool equal);
+static float8 get_position(TypeCacheEntry *typcache, const RangeBound *value,
+ const RangeBound *hist1, const RangeBound *hist2);
+static float8 get_len_position(double value, double hist1, double hist2);
+static float8 get_distance(TypeCacheEntry *typcache, const RangeBound *bound1,
+ const RangeBound *bound2);
+static int length_hist_bsearch(Datum *length_hist_values,
+ int length_hist_nvalues, double value, bool equal);
+static double calc_length_hist_frac(Datum *length_hist_values,
+ int length_hist_nvalues, double length1, double length2, bool equal);
+static double calc_hist_selectivity_contained(TypeCacheEntry *typcache,
+ const RangeBound *lower, RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues);
+static double calc_hist_selectivity_contains(TypeCacheEntry *typcache,
+ const RangeBound *lower, const RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues);
+
+/*
+ * Returns a default selectivity estimate for given operator, when we don't
+ * have statistics or cannot use them for some reason.
+ */
+static double
+default_range_selectivity(Oid operator)
+{
+ switch (operator)
+ {
+ case OID_RANGE_OVERLAP_OP:
+ return 0.01;
+
+ case OID_RANGE_CONTAINS_OP:
+ case OID_RANGE_CONTAINED_OP:
+ return 0.005;
+
+ case OID_RANGE_CONTAINS_ELEM_OP:
+ case OID_RANGE_ELEM_CONTAINED_OP:
+
+ /*
+ * "range @> elem" is more or less identical to a scalar
+ * inequality "A >= b AND A <= c".
+ */
+ return DEFAULT_RANGE_INEQ_SEL;
+
+ case OID_RANGE_LESS_OP:
+ case OID_RANGE_LESS_EQUAL_OP:
+ case OID_RANGE_GREATER_OP:
+ case OID_RANGE_GREATER_EQUAL_OP:
+ case OID_RANGE_LEFT_OP:
+ case OID_RANGE_RIGHT_OP:
+ case OID_RANGE_OVERLAPS_LEFT_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_OP:
+ /* these are similar to regular scalar inequalities */
+ return DEFAULT_INEQ_SEL;
+
+ default:
+ /* all range operators should be handled above, but just in case */
+ return 0.01;
+ }
+}
+
+/*
+ * rangesel -- restriction selectivity for range operators
+ */
+Datum
+rangesel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Selectivity selec;
+ TypeCacheEntry *typcache = NULL;
+ RangeType *constrange = NULL;
+
+ /*
+ * If expression is not (variable op something) or (something op
+ * variable), then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+ }
+
+ /*
+ * All the range operators are strict, so we can cope with a NULL constant
+ * right away.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+
+ /*
+ * If var is on the right, commute the operator, so that we can assume the
+ * var is on the left in what follows.
+ */
+ if (!varonleft)
+ {
+ /* we have other Op var, commute to make var Op other */
+ operator = get_commutator(operator);
+ if (!operator)
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(default_range_selectivity(operator));
+ }
+ }
+
+ /*
+ * OK, there's a Var and a Const we're dealing with here. We need the
+ * Const to be of same range type as the column, else we can't do anything
+ * useful. (Such cases will likely fail at runtime, but here we'd rather
+ * just return a default estimate.)
+ *
+ * If the operator is "range @> element", the constant should be of the
+ * element type of the range column. Convert it to a range that includes
+ * only that single point, so that we don't need special handling for that
+ * in what follows.
+ */
+ if (operator == OID_RANGE_CONTAINS_ELEM_OP)
+ {
+ typcache = range_get_typcache(fcinfo, vardata.vartype);
+
+ if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
+ {
+ RangeBound lower,
+ upper;
+
+ lower.inclusive = true;
+ lower.val = ((Const *) other)->constvalue;
+ lower.infinite = false;
+ lower.lower = true;
+ upper.inclusive = true;
+ upper.val = ((Const *) other)->constvalue;
+ upper.infinite = false;
+ upper.lower = false;
+ constrange = range_serialize(typcache, &lower, &upper, false);
+ }
+ }
+ else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
+ {
+ /*
+ * Here, the Var is the elem, not the range. For now we just punt and
+ * return the default estimate. In future we could disassemble the
+ * range constant and apply scalarineqsel ...
+ */
+ }
+ else if (((Const *) other)->consttype == vardata.vartype)
+ {
+ /* Both sides are the same range type */
+ typcache = range_get_typcache(fcinfo, vardata.vartype);
+
+ constrange = DatumGetRangeTypeP(((Const *) other)->constvalue);
+ }
+
+ /*
+ * If we got a valid constant on one side of the operator, proceed to
+ * estimate using statistics. Otherwise punt and return a default constant
+ * estimate. Note that calc_rangesel need not handle
+ * OID_RANGE_ELEM_CONTAINED_OP.
+ */
+ if (constrange)
+ selec = calc_rangesel(typcache, &vardata, constrange, operator);
+ else
+ selec = default_range_selectivity(operator);
+
+ ReleaseVariableStats(vardata);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+static double
+calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
+ const RangeType *constval, Oid operator)
+{
+ double hist_selec;
+ double selec;
+ float4 empty_frac,
+ null_frac;
+
+ /*
+ * First look up the fraction of NULLs and empty ranges from pg_statistic.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+ AttStatsSlot sslot;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ null_frac = stats->stanullfrac;
+
+ /* Try to get fraction of empty ranges */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+ InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ if (sslot.nnumbers != 1)
+ elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */
+ empty_frac = sslot.numbers[0];
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /* No empty fraction statistic. Assume no empty ranges. */
+ empty_frac = 0.0;
+ }
+ }
+ else
+ {
+ /*
+ * No stats are available. Follow through the calculations below
+ * anyway, assuming no NULLs and no empty ranges. This still allows us
+ * to give a better-than-nothing estimate based on whether the
+ * constant is an empty range or not.
+ */
+ null_frac = 0.0;
+ empty_frac = 0.0;
+ }
+
+ if (RangeIsEmpty(constval))
+ {
+ /*
+ * An empty range matches all ranges, all empty ranges, or nothing,
+ * depending on the operator
+ */
+ switch (operator)
+ {
+ /* these return false if either argument is empty */
+ case OID_RANGE_OVERLAP_OP:
+ case OID_RANGE_OVERLAPS_LEFT_OP:
+ case OID_RANGE_OVERLAPS_RIGHT_OP:
+ case OID_RANGE_LEFT_OP:
+ case OID_RANGE_RIGHT_OP:
+ /* nothing is less than an empty range */
+ case OID_RANGE_LESS_OP:
+ selec = 0.0;
+ break;
+
+ /* only empty ranges can be contained by an empty range */
+ case OID_RANGE_CONTAINED_OP:
+ /* only empty ranges are <= an empty range */
+ case OID_RANGE_LESS_EQUAL_OP:
+ selec = empty_frac;
+ break;
+
+ /* everything contains an empty range */
+ case OID_RANGE_CONTAINS_OP:
+ /* everything is >= an empty range */
+ case OID_RANGE_GREATER_EQUAL_OP:
+ selec = 1.0;
+ break;
+
+ /* all non-empty ranges are > an empty range */
+ case OID_RANGE_GREATER_OP:
+ selec = 1.0 - empty_frac;
+ break;
+
+ /* an element cannot be empty */
+ case OID_RANGE_CONTAINS_ELEM_OP:
+ default:
+ elog(ERROR, "unexpected operator %u", operator);
+ selec = 0.0; /* keep compiler quiet */
+ break;
+ }
+ }
+ else
+ {
+ /*
+ * Calculate selectivity using bound histograms. If that fails for
+ * some reason, e.g no histogram in pg_statistic, use the default
+ * constant estimate for the fraction of non-empty values. This is
+ * still somewhat better than just returning the default estimate,
+ * because this still takes into account the fraction of empty and
+ * NULL tuples, if we had statistics for them.
+ */
+ hist_selec = calc_hist_selectivity(typcache, vardata, constval,
+ operator);
+ if (hist_selec < 0.0)
+ hist_selec = default_range_selectivity(operator);
+
+ /*
+ * Now merge the results for the empty ranges and histogram
+ * calculations, realizing that the histogram covers only the
+ * non-null, non-empty values.
+ */
+ if (operator == OID_RANGE_CONTAINED_OP)
+ {
+ /* empty is contained by anything non-empty */
+ selec = (1.0 - empty_frac) * hist_selec + empty_frac;
+ }
+ else
+ {
+ /* with any other operator, empty Op non-empty matches nothing */
+ selec = (1.0 - empty_frac) * hist_selec;
+ }
+ }
+
+ /* all range operators are strict */
+ selec *= (1.0 - null_frac);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * Calculate range operator selectivity using histograms of range bounds.
+ *
+ * This estimate is for the portion of values that are not empty and not
+ * NULL.
+ */
+static double
+calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
+ const RangeType *constval, Oid operator)
+{
+ AttStatsSlot hslot;
+ AttStatsSlot lslot;
+ int nhist;
+ RangeBound *hist_lower;
+ RangeBound *hist_upper;
+ int i;
+ RangeBound const_lower;
+ RangeBound const_upper;
+ bool empty;
+ double hist_selec;
+
+ /* Can't use the histogram with insecure range support functions */
+ if (!statistic_proc_security_check(vardata,
+ typcache->rng_cmp_proc_finfo.fn_oid))
+ return -1;
+ if (OidIsValid(typcache->rng_subdiff_finfo.fn_oid) &&
+ !statistic_proc_security_check(vardata,
+ typcache->rng_subdiff_finfo.fn_oid))
+ return -1;
+
+ /* Try to get histogram of ranges */
+ if (!(HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&hslot, vardata->statsTuple,
+ STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES)))
+ return -1.0;
+
+ /* check that it's a histogram, not just a dummy entry */
+ if (hslot.nvalues < 2)
+ {
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+
+ /*
+ * Convert histogram of ranges into histograms of its lower and upper
+ * bounds.
+ */
+ nhist = hslot.nvalues;
+ hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
+ for (i = 0; i < nhist; i++)
+ {
+ range_deserialize(typcache, DatumGetRangeTypeP(hslot.values[i]),
+ &hist_lower[i], &hist_upper[i], &empty);
+ /* The histogram should not contain any empty ranges */
+ if (empty)
+ elog(ERROR, "bounds histogram contains an empty range");
+ }
+
+ /* @> and @< also need a histogram of range lengths */
+ if (operator == OID_RANGE_CONTAINS_OP ||
+ operator == OID_RANGE_CONTAINED_OP)
+ {
+ if (!(HeapTupleIsValid(vardata->statsTuple) &&
+ get_attstatsslot(&lslot, vardata->statsTuple,
+ STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
+ InvalidOid,
+ ATTSTATSSLOT_VALUES)))
+ {
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+
+ /* check that it's a histogram, not just a dummy entry */
+ if (lslot.nvalues < 2)
+ {
+ free_attstatsslot(&lslot);
+ free_attstatsslot(&hslot);
+ return -1.0;
+ }
+ }
+ else
+ memset(&lslot, 0, sizeof(lslot));
+
+ /* Extract the bounds of the constant value. */
+ range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
+ Assert(!empty);
+
+ /*
+ * Calculate selectivity comparing the lower or upper bound of the
+ * constant with the histogram of lower or upper bounds.
+ */
+ switch (operator)
+ {
+ case OID_RANGE_LESS_OP:
+
+ /*
+ * The regular b-tree comparison operators (<, <=, >, >=) compare
+ * the lower bounds first, and the upper bounds for values with
+ * equal lower bounds. Estimate that by comparing the lower bounds
+ * only. This gives a fairly accurate estimate assuming there
+ * aren't many rows with a lower bound equal to the constant's
+ * lower bound.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_LESS_EQUAL_OP:
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_GREATER_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_GREATER_EQUAL_OP:
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_LEFT_OP:
+ /* var << const when upper(var) < lower(const) */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_upper, nhist, false);
+ break;
+
+ case OID_RANGE_RIGHT_OP:
+ /* var >> const when lower(var) > upper(const) */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_upper,
+ hist_lower, nhist, true);
+ break;
+
+ case OID_RANGE_OVERLAPS_RIGHT_OP:
+ /* compare lower bounds */
+ hist_selec =
+ 1 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ break;
+
+ case OID_RANGE_OVERLAPS_LEFT_OP:
+ /* compare upper bounds */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_upper,
+ hist_upper, nhist, true);
+ break;
+
+ case OID_RANGE_OVERLAP_OP:
+ case OID_RANGE_CONTAINS_ELEM_OP:
+
+ /*
+ * A && B <=> NOT (A << B OR A >> B).
+ *
+ * Since A << B and A >> B are mutually exclusive events we can
+ * sum their probabilities to find probability of (A << B OR A >>
+ * B).
+ *
+ * "range @> elem" is equivalent to "range && [elem,elem]". The
+ * caller already constructed the singular range from the element
+ * constant, so just treat it the same as &&.
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
+ nhist, false);
+ hist_selec +=
+ (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
+ nhist, true));
+ hist_selec = 1.0 - hist_selec;
+ break;
+
+ case OID_RANGE_CONTAINS_OP:
+ hist_selec =
+ calc_hist_selectivity_contains(typcache, &const_lower,
+ &const_upper, hist_lower, nhist,
+ lslot.values, lslot.nvalues);
+ break;
+
+ case OID_RANGE_CONTAINED_OP:
+ if (const_lower.infinite)
+ {
+ /*
+ * Lower bound no longer matters. Just estimate the fraction
+ * with an upper bound <= const upper bound
+ */
+ hist_selec =
+ calc_hist_selectivity_scalar(typcache, &const_upper,
+ hist_upper, nhist, true);
+ }
+ else if (const_upper.infinite)
+ {
+ hist_selec =
+ 1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
+ hist_lower, nhist, false);
+ }
+ else
+ {
+ hist_selec =
+ calc_hist_selectivity_contained(typcache, &const_lower,
+ &const_upper, hist_lower, nhist,
+ lslot.values, lslot.nvalues);
+ }
+ break;
+
+ default:
+ elog(ERROR, "unknown range operator %u", operator);
+ hist_selec = -1.0; /* keep compiler quiet */
+ break;
+ }
+
+ free_attstatsslot(&lslot);
+ free_attstatsslot(&hslot);
+
+ return hist_selec;
+}
+
+
+/*
+ * Look up the fraction of values less than (or equal, if 'equal' argument
+ * is true) a given const in a histogram of range bounds.
+ */
+static double
+calc_hist_selectivity_scalar(TypeCacheEntry *typcache, const RangeBound *constbound,
+ const RangeBound *hist, int hist_nvalues, bool equal)
+{
+ Selectivity selec;
+ int index;
+
+ /*
+ * Find the histogram bin the given constant falls into. Estimate
+ * selectivity as the number of preceding whole bins.
+ */
+ index = rbound_bsearch(typcache, constbound, hist, hist_nvalues, equal);
+ selec = (Selectivity) (Max(index, 0)) / (Selectivity) (hist_nvalues - 1);
+
+ /* Adjust using linear interpolation within the bin */
+ if (index >= 0 && index < hist_nvalues - 1)
+ selec += get_position(typcache, constbound, &hist[index],
+ &hist[index + 1]) / (Selectivity) (hist_nvalues - 1);
+
+ return selec;
+}
+
+/*
+ * Binary search on an array of range bounds. Returns greatest index of range
+ * bound in array which is less(less or equal) than given range bound. If all
+ * range bounds in array are greater or equal(greater) than given range bound,
+ * return -1. When "equal" flag is set conditions in brackets are used.
+ *
+ * This function is used in scalar operator selectivity estimation. Another
+ * goal of this function is to find a histogram bin where to stop
+ * interpolation of portion of bounds which are less than or equal to given bound.
+ */
+static int
+rbound_bsearch(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist,
+ int hist_length, bool equal)
+{
+ int lower = -1,
+ upper = hist_length - 1,
+ cmp,
+ middle;
+
+ while (lower < upper)
+ {
+ middle = (lower + upper + 1) / 2;
+ cmp = range_cmp_bounds(typcache, &hist[middle], value);
+
+ if (cmp < 0 || (equal && cmp == 0))
+ lower = middle;
+ else
+ upper = middle - 1;
+ }
+ return lower;
+}
+
+
+/*
+ * Binary search on length histogram. Returns greatest index of range length in
+ * histogram which is less than (less than or equal) the given length value. If
+ * all lengths in the histogram are greater than (greater than or equal) the
+ * given length, returns -1.
+ */
+static int
+length_hist_bsearch(Datum *length_hist_values, int length_hist_nvalues,
+ double value, bool equal)
+{
+ int lower = -1,
+ upper = length_hist_nvalues - 1,
+ middle;
+
+ while (lower < upper)
+ {
+ double middleval;
+
+ middle = (lower + upper + 1) / 2;
+
+ middleval = DatumGetFloat8(length_hist_values[middle]);
+ if (middleval < value || (equal && middleval <= value))
+ lower = middle;
+ else
+ upper = middle - 1;
+ }
+ return lower;
+}
+
+/*
+ * Get relative position of value in histogram bin in [0,1] range.
+ */
+static float8
+get_position(TypeCacheEntry *typcache, const RangeBound *value, const RangeBound *hist1,
+ const RangeBound *hist2)
+{
+ bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+ float8 position;
+
+ if (!hist1->infinite && !hist2->infinite)
+ {
+ float8 bin_width;
+
+ /*
+ * Both bounds are finite. Assuming the subtype's comparison function
+ * works sanely, the value must be finite, too, because it lies
+ * somewhere between the bounds. If it doesn't, arbitrarily return
+ * 0.5.
+ */
+ if (value->infinite)
+ return 0.5;
+
+ /* Can't interpolate without subdiff function */
+ if (!has_subdiff)
+ return 0.5;
+
+ /* Calculate relative position using subdiff function. */
+ bin_width = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ hist2->val,
+ hist1->val));
+ if (isnan(bin_width) || bin_width <= 0.0)
+ return 0.5; /* punt for NaN or zero-width bin */
+
+ position = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ value->val,
+ hist1->val))
+ / bin_width;
+
+ if (isnan(position))
+ return 0.5; /* punt for NaN from subdiff, Inf/Inf, etc */
+
+ /* Relative position must be in [0,1] range */
+ position = Max(position, 0.0);
+ position = Min(position, 1.0);
+ return position;
+ }
+ else if (hist1->infinite && !hist2->infinite)
+ {
+ /*
+ * Lower bin boundary is -infinite, upper is finite. If the value is
+ * -infinite, return 0.0 to indicate it's equal to the lower bound.
+ * Otherwise return 1.0 to indicate it's infinitely far from the lower
+ * bound.
+ */
+ return ((value->infinite && value->lower) ? 0.0 : 1.0);
+ }
+ else if (!hist1->infinite && hist2->infinite)
+ {
+ /* same as above, but in reverse */
+ return ((value->infinite && !value->lower) ? 1.0 : 0.0);
+ }
+ else
+ {
+ /*
+ * If both bin boundaries are infinite, they should be equal to each
+ * other, and the value should also be infinite and equal to both
+ * bounds. (But don't Assert that, to avoid crashing if a user creates
+ * a datatype with a broken comparison function).
+ *
+ * Assume the value to lie in the middle of the infinite bounds.
+ */
+ return 0.5;
+ }
+}
+
+
+/*
+ * Get relative position of value in a length histogram bin in [0,1] range.
+ */
+static double
+get_len_position(double value, double hist1, double hist2)
+{
+ if (!isinf(hist1) && !isinf(hist2))
+ {
+ /*
+ * Both bounds are finite. The value should be finite too, because it
+ * lies somewhere between the bounds. If it doesn't, just return
+ * something.
+ */
+ if (isinf(value))
+ return 0.5;
+
+ return 1.0 - (hist2 - value) / (hist2 - hist1);
+ }
+ else if (isinf(hist1) && !isinf(hist2))
+ {
+ /*
+ * Lower bin boundary is -infinite, upper is finite. Return 1.0 to
+ * indicate the value is infinitely far from the lower bound.
+ */
+ return 1.0;
+ }
+ else if (isinf(hist1) && isinf(hist2))
+ {
+ /* same as above, but in reverse */
+ return 0.0;
+ }
+ else
+ {
+ /*
+ * If both bin boundaries are infinite, they should be equal to each
+ * other, and the value should also be infinite and equal to both
+ * bounds. (But don't Assert that, to avoid crashing unnecessarily if
+ * the caller messes up)
+ *
+ * Assume the value to lie in the middle of the infinite bounds.
+ */
+ return 0.5;
+ }
+}
+
+/*
+ * Measure distance between two range bounds.
+ */
+static float8
+get_distance(TypeCacheEntry *typcache, const RangeBound *bound1, const RangeBound *bound2)
+{
+ bool has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+
+ if (!bound1->infinite && !bound2->infinite)
+ {
+ /*
+ * Neither bound is infinite, use subdiff function or return default
+ * value of 1.0 if no subdiff is available.
+ */
+ if (has_subdiff)
+ {
+ float8 res;
+
+ res = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ bound2->val,
+ bound1->val));
+ /* Reject possible NaN result, also negative result */
+ if (isnan(res) || res < 0.0)
+ return 1.0;
+ else
+ return res;
+ }
+ else
+ return 1.0;
+ }
+ else if (bound1->infinite && bound2->infinite)
+ {
+ /* Both bounds are infinite */
+ if (bound1->lower == bound2->lower)
+ return 0.0;
+ else
+ return get_float8_infinity();
+ }
+ else
+ {
+ /* One bound is infinite, the other is not */
+ return get_float8_infinity();
+ }
+}
+
+/*
+ * Calculate the average of function P(x), in the interval [length1, length2],
+ * where P(x) is the fraction of tuples with length < x (or length <= x if
+ * 'equal' is true).
+ */
+static double
+calc_length_hist_frac(Datum *length_hist_values, int length_hist_nvalues,
+ double length1, double length2, bool equal)
+{
+ double frac;
+ double A,
+ B,
+ PA,
+ PB;
+ double pos;
+ int i;
+ double area;
+
+ Assert(length2 >= length1);
+
+ if (length2 < 0.0)
+ return 0.0; /* shouldn't happen, but doesn't hurt to check */
+
+ /* All lengths in the table are <= infinite. */
+ if (isinf(length2) && equal)
+ return 1.0;
+
+ /*----------
+ * The average of a function between A and B can be calculated by the
+ * formula:
+ *
+ * B
+ * 1 /
+ * ------- | P(x)dx
+ * B - A /
+ * A
+ *
+ * The geometrical interpretation of the integral is the area under the
+ * graph of P(x). P(x) is defined by the length histogram. We calculate
+ * the area in a piecewise fashion, iterating through the length histogram
+ * bins. Each bin is a trapezoid:
+ *
+ * P(x2)
+ * /|
+ * / |
+ * P(x1)/ |
+ * | |
+ * | |
+ * ---+---+--
+ * x1 x2
+ *
+ * where x1 and x2 are the boundaries of the current histogram, and P(x1)
+ * and P(x1) are the cumulative fraction of tuples at the boundaries.
+ *
+ * The area of each trapezoid is 1/2 * (P(x2) + P(x1)) * (x2 - x1)
+ *
+ * The first bin contains the lower bound passed by the caller, so we
+ * use linear interpolation between the previous and next histogram bin
+ * boundary to calculate P(x1). Likewise for the last bin: we use linear
+ * interpolation to calculate P(x2). For the bins in between, x1 and x2
+ * lie on histogram bin boundaries, so P(x1) and P(x2) are simply:
+ * P(x1) = (bin index) / (number of bins)
+ * P(x2) = (bin index + 1 / (number of bins)
+ */
+
+ /* First bin, the one that contains lower bound */
+ i = length_hist_bsearch(length_hist_values, length_hist_nvalues, length1, equal);
+ if (i >= length_hist_nvalues - 1)
+ return 1.0;
+
+ if (i < 0)
+ {
+ i = 0;
+ pos = 0.0;
+ }
+ else
+ {
+ /* interpolate length1's position in the bin */
+ pos = get_len_position(length1,
+ DatumGetFloat8(length_hist_values[i]),
+ DatumGetFloat8(length_hist_values[i + 1]));
+ }
+ PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
+ B = length1;
+
+ /*
+ * In the degenerate case that length1 == length2, simply return
+ * P(length1). This is not merely an optimization: if length1 == length2,
+ * we'd divide by zero later on.
+ */
+ if (length2 == length1)
+ return PB;
+
+ /*
+ * Loop through all the bins, until we hit the last bin, the one that
+ * contains the upper bound. (if lower and upper bounds are in the same
+ * bin, this falls out immediately)
+ */
+ area = 0.0;
+ for (; i < length_hist_nvalues - 1; i++)
+ {
+ double bin_upper = DatumGetFloat8(length_hist_values[i + 1]);
+
+ /* check if we've reached the last bin */
+ if (!(bin_upper < length2 || (equal && bin_upper <= length2)))
+ break;
+
+ /* the upper bound of previous bin is the lower bound of this bin */
+ A = B;
+ PA = PB;
+
+ B = bin_upper;
+ PB = (double) i / (double) (length_hist_nvalues - 1);
+
+ /*
+ * Add the area of this trapezoid to the total. The point of the
+ * if-check is to avoid NaN, in the corner case that PA == PB == 0,
+ * and B - A == Inf. The area of a zero-height trapezoid (PA == PB ==
+ * 0) is zero, regardless of the width (B - A).
+ */
+ if (PA > 0 || PB > 0)
+ area += 0.5 * (PB + PA) * (B - A);
+ }
+
+ /* Last bin */
+ A = B;
+ PA = PB;
+
+ B = length2; /* last bin ends at the query upper bound */
+ if (i >= length_hist_nvalues - 1)
+ pos = 0.0;
+ else
+ {
+ if (DatumGetFloat8(length_hist_values[i]) == DatumGetFloat8(length_hist_values[i + 1]))
+ pos = 0.0;
+ else
+ pos = get_len_position(length2, DatumGetFloat8(length_hist_values[i]), DatumGetFloat8(length_hist_values[i + 1]));
+ }
+ PB = (((double) i) + pos) / (double) (length_hist_nvalues - 1);
+
+ if (PA > 0 || PB > 0)
+ area += 0.5 * (PB + PA) * (B - A);
+
+ /*
+ * Ok, we have calculated the area, ie. the integral. Divide by width to
+ * get the requested average.
+ *
+ * Avoid NaN arising from infinite / infinite. This happens at least if
+ * length2 is infinite. It's not clear what the correct value would be in
+ * that case, so 0.5 seems as good as any value.
+ */
+ if (isinf(area) && isinf(length2))
+ frac = 0.5;
+ else
+ frac = area / (length2 - length1);
+
+ return frac;
+}
+
+/*
+ * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction
+ * of ranges that fall within the constant lower and upper bounds. This uses
+ * the histograms of range lower bounds and range lengths, on the assumption
+ * that the range lengths are independent of the lower bounds.
+ *
+ * The caller has already checked that constant lower and upper bounds are
+ * finite.
+ */
+static double
+calc_hist_selectivity_contained(TypeCacheEntry *typcache,
+ const RangeBound *lower, RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues)
+{
+ int i,
+ upper_index;
+ float8 prev_dist;
+ double bin_width;
+ double upper_bin_width;
+ double sum_frac;
+
+ /*
+ * Begin by finding the bin containing the upper bound, in the lower bound
+ * histogram. Any range with a lower bound > constant upper bound can't
+ * match, ie. there are no matches in bins greater than upper_index.
+ */
+ upper->inclusive = !upper->inclusive;
+ upper->lower = true;
+ upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues,
+ false);
+
+ /*
+ * If the upper bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (upper_index < 0)
+ return 0.0;
+
+ /*
+ * If the upper bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ upper_index = Min(upper_index, hist_nvalues - 2);
+
+ /*
+ * Calculate upper_bin_width, ie. the fraction of the (upper_index,
+ * upper_index + 1) bin which is greater than upper bound of query range
+ * using linear interpolation of subdiff function.
+ */
+ upper_bin_width = get_position(typcache, upper,
+ &hist_lower[upper_index],
+ &hist_lower[upper_index + 1]);
+
+ /*
+ * In the loop, dist and prev_dist are the distance of the "current" bin's
+ * lower and upper bounds from the constant upper bound.
+ *
+ * bin_width represents the width of the current bin. Normally it is 1.0,
+ * meaning a full width bin, but can be less in the corner cases: start
+ * and end of the loop. We start with bin_width = upper_bin_width, because
+ * we begin at the bin containing the upper bound.
+ */
+ prev_dist = 0.0;
+ bin_width = upper_bin_width;
+
+ sum_frac = 0.0;
+ for (i = upper_index; i >= 0; i--)
+ {
+ double dist;
+ double length_hist_frac;
+ bool final_bin = false;
+
+ /*
+ * dist -- distance from upper bound of query range to lower bound of
+ * the current bin in the lower bound histogram. Or to the lower bound
+ * of the constant range, if this is the final bin, containing the
+ * constant lower bound.
+ */
+ if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0)
+ {
+ dist = get_distance(typcache, lower, upper);
+
+ /*
+ * Subtract from bin_width the portion of this bin that we want to
+ * ignore.
+ */
+ bin_width -= get_position(typcache, lower, &hist_lower[i],
+ &hist_lower[i + 1]);
+ if (bin_width < 0.0)
+ bin_width = 0.0;
+ final_bin = true;
+ }
+ else
+ dist = get_distance(typcache, &hist_lower[i], upper);
+
+ /*
+ * Estimate the fraction of tuples in this bin that are narrow enough
+ * to not exceed the distance to the upper bound of the query range.
+ */
+ length_hist_frac = calc_length_hist_frac(length_hist_values,
+ length_hist_nvalues,
+ prev_dist, dist, true);
+
+ /*
+ * Add the fraction of tuples in this bin, with a suitable length, to
+ * the total.
+ */
+ sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
+
+ if (final_bin)
+ break;
+
+ bin_width = 1.0;
+ prev_dist = dist;
+ }
+
+ return sum_frac;
+}
+
+/*
+ * Calculate selectivity of "var @> const" operator, ie. estimate the fraction
+ * of ranges that contain the constant lower and upper bounds. This uses
+ * the histograms of range lower bounds and range lengths, on the assumption
+ * that the range lengths are independent of the lower bounds.
+ */
+static double
+calc_hist_selectivity_contains(TypeCacheEntry *typcache,
+ const RangeBound *lower, const RangeBound *upper,
+ const RangeBound *hist_lower, int hist_nvalues,
+ Datum *length_hist_values, int length_hist_nvalues)
+{
+ int i,
+ lower_index;
+ double bin_width,
+ lower_bin_width;
+ double sum_frac;
+ float8 prev_dist;
+
+ /* Find the bin containing the lower bound of query range. */
+ lower_index = rbound_bsearch(typcache, lower, hist_lower, hist_nvalues,
+ true);
+
+ /*
+ * If the lower bound value is below the histogram's lower limit, there
+ * are no matches.
+ */
+ if (lower_index < 0)
+ return 0.0;
+
+ /*
+ * If the lower bound value is at or beyond the histogram's upper limit,
+ * start our loop at the last actual bin, as though the upper bound were
+ * within that bin; get_position will clamp its result to 1.0 anyway.
+ * (This corresponds to assuming that the data population above the
+ * histogram's upper limit is empty, exactly like what we just assumed for
+ * the lower limit.)
+ */
+ lower_index = Min(lower_index, hist_nvalues - 2);
+
+ /*
+ * Calculate lower_bin_width, ie. the fraction of the of (lower_index,
+ * lower_index + 1) bin which is greater than lower bound of query range
+ * using linear interpolation of subdiff function.
+ */
+ lower_bin_width = get_position(typcache, lower, &hist_lower[lower_index],
+ &hist_lower[lower_index + 1]);
+
+ /*
+ * Loop through all the lower bound bins, smaller than the query lower
+ * bound. In the loop, dist and prev_dist are the distance of the
+ * "current" bin's lower and upper bounds from the constant upper bound.
+ * We begin from query lower bound, and walk backwards, so the first bin's
+ * upper bound is the query lower bound, and its distance to the query
+ * upper bound is the length of the query range.
+ *
+ * bin_width represents the width of the current bin. Normally it is 1.0,
+ * meaning a full width bin, except for the first bin, which is only
+ * counted up to the constant lower bound.
+ */
+ prev_dist = get_distance(typcache, lower, upper);
+ sum_frac = 0.0;
+ bin_width = lower_bin_width;
+ for (i = lower_index; i >= 0; i--)
+ {
+ float8 dist;
+ double length_hist_frac;
+
+ /*
+ * dist -- distance from upper bound of query range to current value
+ * of lower bound histogram or lower bound of query range (if we've
+ * reach it).
+ */
+ dist = get_distance(typcache, &hist_lower[i], upper);
+
+ /*
+ * Get average fraction of length histogram which covers intervals
+ * longer than (or equal to) distance to upper bound of query range.
+ */
+ length_hist_frac =
+ 1.0 - calc_length_hist_frac(length_hist_values,
+ length_hist_nvalues,
+ prev_dist, dist, false);
+
+ sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1);
+
+ bin_width = 1.0;
+ prev_dist = dist;
+ }
+
+ return sum_frac;
+}
diff --git a/src/backend/utils/adt/rangetypes_spgist.c b/src/backend/utils/adt/rangetypes_spgist.c
new file mode 100644
index 0000000..1190b80
--- /dev/null
+++ b/src/backend/utils/adt/rangetypes_spgist.c
@@ -0,0 +1,1000 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_spgist.c
+ * implementation of quad tree over ranges mapped to 2d-points for SP-GiST.
+ *
+ * Quad tree is a data structure similar to a binary tree, but is adapted to
+ * 2d data. Each inner node of a quad tree contains a point (centroid) which
+ * divides the 2d-space into 4 quadrants. Each quadrant is associated with a
+ * child node.
+ *
+ * Ranges are mapped to 2d-points so that the lower bound is one dimension,
+ * and the upper bound is another. By convention, we visualize the lower bound
+ * to be the horizontal axis, and upper bound the vertical axis.
+ *
+ * One quirk with this mapping is the handling of empty ranges. An empty range
+ * doesn't have lower and upper bounds, so it cannot be mapped to 2d space in
+ * a straightforward way. To cope with that, the root node can have a 5th
+ * quadrant, which is reserved for empty ranges. Furthermore, there can be
+ * inner nodes in the tree with no centroid. They contain only two child nodes,
+ * one for empty ranges and another for non-empty ones. Such a node can appear
+ * as the root node, or in the tree under the 5th child of the root node (in
+ * which case it will only contain empty nodes).
+ *
+ * The SP-GiST picksplit function uses medians along both axes as the centroid.
+ * This implementation only uses the comparison function of the range element
+ * datatype, therefore it works for any range type.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_spgist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/spgist.h"
+#include "access/stratnum.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/rangetypes.h"
+
+static int16 getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid,
+ const RangeType *tst);
+static int bound_cmp(const void *a, const void *b, void *arg);
+
+static int adjacent_inner_consistent(TypeCacheEntry *typcache,
+ const RangeBound *arg, const RangeBound *centroid,
+ const RangeBound *prev);
+static int adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
+ const RangeBound *centroid);
+
+/*
+ * SP-GiST 'config' interface function.
+ */
+Datum
+spg_range_quad_config(PG_FUNCTION_ARGS)
+{
+ /* spgConfigIn *cfgin = (spgConfigIn *) PG_GETARG_POINTER(0); */
+ spgConfigOut *cfg = (spgConfigOut *) PG_GETARG_POINTER(1);
+
+ cfg->prefixType = ANYRANGEOID;
+ cfg->labelType = VOIDOID; /* we don't need node labels */
+ cfg->canReturnData = true;
+ cfg->longValuesOK = false;
+ PG_RETURN_VOID();
+}
+
+/*----------
+ * Determine which quadrant a 2d-mapped range falls into, relative to the
+ * centroid.
+ *
+ * Quadrants are numbered like this:
+ *
+ * 4 | 1
+ * ----+----
+ * 3 | 2
+ *
+ * Where the lower bound of range is the horizontal axis and upper bound the
+ * vertical axis.
+ *
+ * Ranges on one of the axes are taken to lie in the quadrant with higher value
+ * along perpendicular axis. That is, a value on the horizontal axis is taken
+ * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to
+ * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in
+ * quadrant 1.
+ *
+ * Empty ranges are taken to lie in the special quadrant 5.
+ *----------
+ */
+static int16
+getQuadrant(TypeCacheEntry *typcache, const RangeType *centroid, const RangeType *tst)
+{
+ RangeBound centroidLower,
+ centroidUpper;
+ bool centroidEmpty;
+ RangeBound lower,
+ upper;
+ bool empty;
+
+ range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
+ &centroidEmpty);
+ range_deserialize(typcache, tst, &lower, &upper, &empty);
+
+ if (empty)
+ return 5;
+
+ if (range_cmp_bounds(typcache, &lower, &centroidLower) >= 0)
+ {
+ if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
+ return 1;
+ else
+ return 2;
+ }
+ else
+ {
+ if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
+ return 4;
+ else
+ return 3;
+ }
+}
+
+/*
+ * Choose SP-GiST function: choose path for addition of new range.
+ */
+Datum
+spg_range_quad_choose(PG_FUNCTION_ARGS)
+{
+ spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
+ spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
+ RangeType *inRange = DatumGetRangeTypeP(in->datum),
+ *centroid;
+ int16 quadrant;
+ TypeCacheEntry *typcache;
+
+ if (in->allTheSame)
+ {
+ out->resultType = spgMatchNode;
+ /* nodeN will be set by core */
+ out->result.matchNode.levelAdd = 0;
+ out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
+ PG_RETURN_VOID();
+ }
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange));
+
+ /*
+ * A node with no centroid divides ranges purely on whether they're empty
+ * or not. All empty ranges go to child node 0, all non-empty ranges go to
+ * node 1.
+ */
+ if (!in->hasPrefix)
+ {
+ out->resultType = spgMatchNode;
+ if (RangeIsEmpty(inRange))
+ out->result.matchNode.nodeN = 0;
+ else
+ out->result.matchNode.nodeN = 1;
+ out->result.matchNode.levelAdd = 1;
+ out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
+ PG_RETURN_VOID();
+ }
+
+ centroid = DatumGetRangeTypeP(in->prefixDatum);
+ quadrant = getQuadrant(typcache, centroid, inRange);
+
+ Assert(quadrant <= in->nNodes);
+
+ /* Select node matching to quadrant number */
+ out->resultType = spgMatchNode;
+ out->result.matchNode.nodeN = quadrant - 1;
+ out->result.matchNode.levelAdd = 1;
+ out->result.matchNode.restDatum = RangeTypePGetDatum(inRange);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Bound comparison for sorting.
+ */
+static int
+bound_cmp(const void *a, const void *b, void *arg)
+{
+ RangeBound *ba = (RangeBound *) a;
+ RangeBound *bb = (RangeBound *) b;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+
+ return range_cmp_bounds(typcache, ba, bb);
+}
+
+/*
+ * Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
+ * range and distribute ranges according to quadrants.
+ */
+Datum
+spg_range_quad_picksplit(PG_FUNCTION_ARGS)
+{
+ spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
+ spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
+ int i;
+ int j;
+ int nonEmptyCount;
+ RangeType *centroid;
+ bool empty;
+ TypeCacheEntry *typcache;
+
+ /* Use the median values of lower and upper bounds as the centroid range */
+ RangeBound *lowerBounds,
+ *upperBounds;
+
+ typcache = range_get_typcache(fcinfo,
+ RangeTypeGetOid(DatumGetRangeTypeP(in->datums[0])));
+
+ /* Allocate memory for bounds */
+ lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
+ upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
+ j = 0;
+
+ /* Deserialize bounds of ranges, count non-empty ranges */
+ for (i = 0; i < in->nTuples; i++)
+ {
+ range_deserialize(typcache, DatumGetRangeTypeP(in->datums[i]),
+ &lowerBounds[j], &upperBounds[j], &empty);
+ if (!empty)
+ j++;
+ }
+ nonEmptyCount = j;
+
+ /*
+ * All the ranges are empty. The best we can do is to construct an inner
+ * node with no centroid, and put all ranges into node 0. If non-empty
+ * ranges are added later, they will be routed to node 1.
+ */
+ if (nonEmptyCount == 0)
+ {
+ out->nNodes = 2;
+ out->hasPrefix = false;
+ /* Prefix is empty */
+ out->prefixDatum = PointerGetDatum(NULL);
+ out->nodeLabels = NULL;
+
+ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
+ out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
+
+ /* Place all ranges into node 0 */
+ for (i = 0; i < in->nTuples; i++)
+ {
+ RangeType *range = DatumGetRangeTypeP(in->datums[i]);
+
+ out->leafTupleDatums[i] = RangeTypePGetDatum(range);
+ out->mapTuplesToNodes[i] = 0;
+ }
+ PG_RETURN_VOID();
+ }
+
+ /* Sort range bounds in order to find medians */
+ qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
+ bound_cmp, typcache);
+ qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
+ bound_cmp, typcache);
+
+ /* Construct "centroid" range from medians of lower and upper bounds */
+ centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
+ &upperBounds[nonEmptyCount / 2], false);
+ out->hasPrefix = true;
+ out->prefixDatum = RangeTypePGetDatum(centroid);
+
+ /* Create node for empty ranges only if it is a root node */
+ out->nNodes = (in->level == 0) ? 5 : 4;
+ out->nodeLabels = NULL; /* we don't need node labels */
+
+ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
+ out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);
+
+ /*
+ * Assign ranges to corresponding nodes according to quadrants relative to
+ * "centroid" range.
+ */
+ for (i = 0; i < in->nTuples; i++)
+ {
+ RangeType *range = DatumGetRangeTypeP(in->datums[i]);
+ int16 quadrant = getQuadrant(typcache, centroid, range);
+
+ out->leafTupleDatums[i] = RangeTypePGetDatum(range);
+ out->mapTuplesToNodes[i] = quadrant - 1;
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SP-GiST consistent function for inner nodes: check which nodes are
+ * consistent with given set of queries.
+ */
+Datum
+spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
+{
+ spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
+ spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
+ int which;
+ int i;
+ MemoryContext oldCtx;
+
+ /*
+ * For adjacent search we need also previous centroid (if any) to improve
+ * the precision of the consistent check. In this case needPrevious flag
+ * is set and centroid is passed into traversalValue.
+ */
+ bool needPrevious = false;
+
+ if (in->allTheSame)
+ {
+ /* Report that all nodes should be visited */
+ out->nNodes = in->nNodes;
+ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+ for (i = 0; i < in->nNodes; i++)
+ out->nodeNumbers[i] = i;
+ PG_RETURN_VOID();
+ }
+
+ if (!in->hasPrefix)
+ {
+ /*
+ * No centroid on this inner node. Such a node has two child nodes,
+ * the first for empty ranges, and the second for non-empty ones.
+ */
+ Assert(in->nNodes == 2);
+
+ /*
+ * Nth bit of which variable means that (N - 1)th node should be
+ * visited. Initially all bits are set. Bits of nodes which should be
+ * skipped will be unset.
+ */
+ which = (1 << 1) | (1 << 2);
+ for (i = 0; i < in->nkeys; i++)
+ {
+ StrategyNumber strategy = in->scankeys[i].sk_strategy;
+ bool empty;
+
+ /*
+ * The only strategy when second argument of operator is not range
+ * is RANGESTRAT_CONTAINS_ELEM.
+ */
+ if (strategy != RANGESTRAT_CONTAINS_ELEM)
+ empty = RangeIsEmpty(DatumGetRangeTypeP(in->scankeys[i].sk_argument));
+ else
+ empty = false;
+
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ case RANGESTRAT_OVERLEFT:
+ case RANGESTRAT_OVERLAPS:
+ case RANGESTRAT_OVERRIGHT:
+ case RANGESTRAT_AFTER:
+ case RANGESTRAT_ADJACENT:
+ /* These strategies return false if any argument is empty */
+ if (empty)
+ which = 0;
+ else
+ which &= (1 << 2);
+ break;
+
+ case RANGESTRAT_CONTAINS:
+
+ /*
+ * All ranges contain an empty range. Only non-empty
+ * ranges can contain a non-empty range.
+ */
+ if (!empty)
+ which &= (1 << 2);
+ break;
+
+ case RANGESTRAT_CONTAINED_BY:
+
+ /*
+ * Only an empty range is contained by an empty range.
+ * Both empty and non-empty ranges can be contained by a
+ * non-empty range.
+ */
+ if (empty)
+ which &= (1 << 1);
+ break;
+
+ case RANGESTRAT_CONTAINS_ELEM:
+ which &= (1 << 2);
+ break;
+
+ case RANGESTRAT_EQ:
+ if (empty)
+ which &= (1 << 1);
+ else
+ which &= (1 << 2);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ break;
+ }
+ if (which == 0)
+ break; /* no need to consider remaining conditions */
+ }
+ }
+ else
+ {
+ RangeBound centroidLower,
+ centroidUpper;
+ bool centroidEmpty;
+ TypeCacheEntry *typcache;
+ RangeType *centroid;
+
+ /* This node has a centroid. Fetch it. */
+ centroid = DatumGetRangeTypeP(in->prefixDatum);
+ typcache = range_get_typcache(fcinfo,
+ RangeTypeGetOid(DatumGetRangeTypeP(centroid)));
+ range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
+ &centroidEmpty);
+
+ Assert(in->nNodes == 4 || in->nNodes == 5);
+
+ /*
+ * Nth bit of which variable means that (N - 1)th node (Nth quadrant)
+ * should be visited. Initially all bits are set. Bits of nodes which
+ * can be skipped will be unset.
+ */
+ which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);
+
+ for (i = 0; i < in->nkeys; i++)
+ {
+ StrategyNumber strategy;
+ RangeBound lower,
+ upper;
+ bool empty;
+ RangeType *range = NULL;
+
+ RangeType *prevCentroid = NULL;
+ RangeBound prevLower,
+ prevUpper;
+ bool prevEmpty;
+
+ /* Restrictions on range bounds according to scan strategy */
+ RangeBound *minLower = NULL,
+ *maxLower = NULL,
+ *minUpper = NULL,
+ *maxUpper = NULL;
+
+ /* Are the restrictions on range bounds inclusive? */
+ bool inclusive = true;
+ bool strictEmpty = true;
+ int cmp,
+ which1,
+ which2;
+
+ strategy = in->scankeys[i].sk_strategy;
+
+ /*
+ * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
+ * the argument is a single element. Expand the single element to
+ * a range containing only the element, and treat it like
+ * RANGESTRAT_CONTAINS.
+ */
+ if (strategy == RANGESTRAT_CONTAINS_ELEM)
+ {
+ lower.inclusive = true;
+ lower.infinite = false;
+ lower.lower = true;
+ lower.val = in->scankeys[i].sk_argument;
+
+ upper.inclusive = true;
+ upper.infinite = false;
+ upper.lower = false;
+ upper.val = in->scankeys[i].sk_argument;
+
+ empty = false;
+
+ strategy = RANGESTRAT_CONTAINS;
+ }
+ else
+ {
+ range = DatumGetRangeTypeP(in->scankeys[i].sk_argument);
+ range_deserialize(typcache, range, &lower, &upper, &empty);
+ }
+
+ /*
+ * Most strategies are handled by forming a bounding box from the
+ * search key, defined by a minLower, maxLower, minUpper,
+ * maxUpper. Some modify 'which' directly, to specify exactly
+ * which quadrants need to be visited.
+ *
+ * For most strategies, nothing matches an empty search key, and
+ * an empty range never matches a non-empty key. If a strategy
+ * does not behave like that wrt. empty ranges, set strictEmpty to
+ * false.
+ */
+ switch (strategy)
+ {
+ case RANGESTRAT_BEFORE:
+
+ /*
+ * Range A is before range B if upper bound of A is lower
+ * than lower bound of B.
+ */
+ maxUpper = &lower;
+ inclusive = false;
+ break;
+
+ case RANGESTRAT_OVERLEFT:
+
+ /*
+ * Range A is overleft to range B if upper bound of A is
+ * less than or equal to upper bound of B.
+ */
+ maxUpper = &upper;
+ break;
+
+ case RANGESTRAT_OVERLAPS:
+
+ /*
+ * Non-empty ranges overlap, if lower bound of each range
+ * is lower or equal to upper bound of the other range.
+ */
+ maxLower = &upper;
+ minUpper = &lower;
+ break;
+
+ case RANGESTRAT_OVERRIGHT:
+
+ /*
+ * Range A is overright to range B if lower bound of A is
+ * greater than or equal to lower bound of B.
+ */
+ minLower = &lower;
+ break;
+
+ case RANGESTRAT_AFTER:
+
+ /*
+ * Range A is after range B if lower bound of A is greater
+ * than upper bound of B.
+ */
+ minLower = &upper;
+ inclusive = false;
+ break;
+
+ case RANGESTRAT_ADJACENT:
+ if (empty)
+ break; /* Skip to strictEmpty check. */
+
+ /*
+ * Previously selected quadrant could exclude possibility
+ * for lower or upper bounds to be adjacent. Deserialize
+ * previous centroid range if present for checking this.
+ */
+ if (in->traversalValue)
+ {
+ prevCentroid = DatumGetRangeTypeP(in->traversalValue);
+ range_deserialize(typcache, prevCentroid,
+ &prevLower, &prevUpper, &prevEmpty);
+ }
+
+ /*
+ * For a range's upper bound to be adjacent to the
+ * argument's lower bound, it will be found along the line
+ * adjacent to (and just below) Y=lower. Therefore, if the
+ * argument's lower bound is less than the centroid's
+ * upper bound, the line falls in quadrants 2 and 3; if
+ * greater, the line falls in quadrants 1 and 4. (see
+ * adjacent_cmp_bounds for description of edge cases).
+ */
+ cmp = adjacent_inner_consistent(typcache, &lower,
+ &centroidUpper,
+ prevCentroid ? &prevUpper : NULL);
+ if (cmp > 0)
+ which1 = (1 << 1) | (1 << 4);
+ else if (cmp < 0)
+ which1 = (1 << 2) | (1 << 3);
+ else
+ which1 = 0;
+
+ /*
+ * Also search for ranges's adjacent to argument's upper
+ * bound. They will be found along the line adjacent to
+ * (and just right of) X=upper, which falls in quadrants 3
+ * and 4, or 1 and 2.
+ */
+ cmp = adjacent_inner_consistent(typcache, &upper,
+ &centroidLower,
+ prevCentroid ? &prevLower : NULL);
+ if (cmp > 0)
+ which2 = (1 << 1) | (1 << 2);
+ else if (cmp < 0)
+ which2 = (1 << 3) | (1 << 4);
+ else
+ which2 = 0;
+
+ /* We must chase down ranges adjacent to either bound. */
+ which &= which1 | which2;
+
+ needPrevious = true;
+ break;
+
+ case RANGESTRAT_CONTAINS:
+
+ /*
+ * Non-empty range A contains non-empty range B if lower
+ * bound of A is lower or equal to lower bound of range B
+ * and upper bound of range A is greater than or equal to
+ * upper bound of range A.
+ *
+ * All non-empty ranges contain an empty range.
+ */
+ strictEmpty = false;
+ if (!empty)
+ {
+ which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
+ maxLower = &lower;
+ minUpper = &upper;
+ }
+ break;
+
+ case RANGESTRAT_CONTAINED_BY:
+ /* The opposite of contains. */
+ strictEmpty = false;
+ if (empty)
+ {
+ /* An empty range is only contained by an empty range */
+ which &= (1 << 5);
+ }
+ else
+ {
+ minLower = &lower;
+ maxUpper = &upper;
+ }
+ break;
+
+ case RANGESTRAT_EQ:
+
+ /*
+ * Equal range can be only in the same quadrant where
+ * argument would be placed to.
+ */
+ strictEmpty = false;
+ which &= (1 << getQuadrant(typcache, centroid, range));
+ break;
+
+ default:
+ elog(ERROR, "unrecognized range strategy: %d", strategy);
+ break;
+ }
+
+ if (strictEmpty)
+ {
+ if (empty)
+ {
+ /* Scan key is empty, no branches are satisfying */
+ which = 0;
+ break;
+ }
+ else
+ {
+ /* Shouldn't visit tree branch with empty ranges */
+ which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
+ }
+ }
+
+ /*
+ * Using the bounding box, see which quadrants we have to descend
+ * into.
+ */
+ if (minLower)
+ {
+ /*
+ * If the centroid's lower bound is less than or equal to the
+ * minimum lower bound, anything in the 3rd and 4th quadrants
+ * will have an even smaller lower bound, and thus can't
+ * match.
+ */
+ if (range_cmp_bounds(typcache, &centroidLower, minLower) <= 0)
+ which &= (1 << 1) | (1 << 2) | (1 << 5);
+ }
+ if (maxLower)
+ {
+ /*
+ * If the centroid's lower bound is greater than the maximum
+ * lower bound, anything in the 1st and 2nd quadrants will
+ * also have a greater than or equal lower bound, and thus
+ * can't match. If the centroid's lower bound is equal to the
+ * maximum lower bound, we can still exclude the 1st and 2nd
+ * quadrants if we're looking for a value strictly greater
+ * than the maximum.
+ */
+ int cmp;
+
+ cmp = range_cmp_bounds(typcache, &centroidLower, maxLower);
+ if (cmp > 0 || (!inclusive && cmp == 0))
+ which &= (1 << 3) | (1 << 4) | (1 << 5);
+ }
+ if (minUpper)
+ {
+ /*
+ * If the centroid's upper bound is less than or equal to the
+ * minimum upper bound, anything in the 2nd and 3rd quadrants
+ * will have an even smaller upper bound, and thus can't
+ * match.
+ */
+ if (range_cmp_bounds(typcache, &centroidUpper, minUpper) <= 0)
+ which &= (1 << 1) | (1 << 4) | (1 << 5);
+ }
+ if (maxUpper)
+ {
+ /*
+ * If the centroid's upper bound is greater than the maximum
+ * upper bound, anything in the 1st and 4th quadrants will
+ * also have a greater than or equal upper bound, and thus
+ * can't match. If the centroid's upper bound is equal to the
+ * maximum upper bound, we can still exclude the 1st and 4th
+ * quadrants if we're looking for a value strictly greater
+ * than the maximum.
+ */
+ int cmp;
+
+ cmp = range_cmp_bounds(typcache, &centroidUpper, maxUpper);
+ if (cmp > 0 || (!inclusive && cmp == 0))
+ which &= (1 << 2) | (1 << 3) | (1 << 5);
+ }
+
+ if (which == 0)
+ break; /* no need to consider remaining conditions */
+ }
+ }
+
+ /* We must descend into the quadrant(s) identified by 'which' */
+ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
+ if (needPrevious)
+ out->traversalValues = (void **) palloc(sizeof(void *) * in->nNodes);
+ out->nNodes = 0;
+
+ /*
+ * Elements of traversalValues should be allocated in
+ * traversalMemoryContext
+ */
+ oldCtx = MemoryContextSwitchTo(in->traversalMemoryContext);
+
+ for (i = 1; i <= in->nNodes; i++)
+ {
+ if (which & (1 << i))
+ {
+ /* Save previous prefix if needed */
+ if (needPrevious)
+ {
+ Datum previousCentroid;
+
+ /*
+ * We know, that in->prefixDatum in this place is varlena,
+ * because it's range
+ */
+ previousCentroid = datumCopy(in->prefixDatum, false, -1);
+ out->traversalValues[out->nNodes] = (void *) previousCentroid;
+ }
+ out->nodeNumbers[out->nNodes] = i - 1;
+ out->nNodes++;
+ }
+ }
+
+ MemoryContextSwitchTo(oldCtx);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * adjacent_cmp_bounds
+ *
+ * Given an argument and centroid bound, this function determines if any
+ * bounds that are adjacent to the argument are smaller than, or greater than
+ * or equal to centroid. For brevity, we call the arg < centroid "left", and
+ * arg >= centroid case "right". This corresponds to how the quadrants are
+ * arranged, if you imagine that "left" is equivalent to "down" and "right"
+ * is equivalent to "up".
+ *
+ * For the "left" case, returns -1, and for the "right" case, returns 1.
+ */
+static int
+adjacent_cmp_bounds(TypeCacheEntry *typcache, const RangeBound *arg,
+ const RangeBound *centroid)
+{
+ int cmp;
+
+ Assert(arg->lower != centroid->lower);
+
+ cmp = range_cmp_bounds(typcache, arg, centroid);
+
+ if (centroid->lower)
+ {
+ /*------
+ * The argument is an upper bound, we are searching for adjacent lower
+ * bounds. A matching adjacent lower bound must be *larger* than the
+ * argument, but only just.
+ *
+ * The following table illustrates the desired result with a fixed
+ * argument bound, and different centroids. The CMP column shows
+ * the value of 'cmp' variable, and ADJ shows whether the argument
+ * and centroid are adjacent, per bounds_adjacent(). (N) means we
+ * don't need to check for that case, because it's implied by CMP.
+ * With the argument range [..., 500), the adjacent range we're
+ * searching for is [500, ...):
+ *
+ * ARGUMENT CENTROID CMP ADJ
+ * [..., 500) [498, ...) > (N) [500, ...) is to the right
+ * [..., 500) [499, ...) = (N) [500, ...) is to the right
+ * [..., 500) [500, ...) < Y [500, ...) is to the right
+ * [..., 500) [501, ...) < N [500, ...) is to the left
+ *
+ * So, we must search left when the argument is smaller than, and not
+ * adjacent, to the centroid. Otherwise search right.
+ *------
+ */
+ if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid))
+ return -1;
+ else
+ return 1;
+ }
+ else
+ {
+ /*------
+ * The argument is a lower bound, we are searching for adjacent upper
+ * bounds. A matching adjacent upper bound must be *smaller* than the
+ * argument, but only just.
+ *
+ * ARGUMENT CENTROID CMP ADJ
+ * [500, ...) [..., 499) > (N) [..., 500) is to the right
+ * [500, ...) [..., 500) > (Y) [..., 500) is to the right
+ * [500, ...) [..., 501) = (N) [..., 500) is to the left
+ * [500, ...) [..., 502) < (N) [..., 500) is to the left
+ *
+ * We must search left when the argument is smaller than or equal to
+ * the centroid. Otherwise search right. We don't need to check
+ * whether the argument is adjacent with the centroid, because it
+ * doesn't matter.
+ *------
+ */
+ if (cmp <= 0)
+ return -1;
+ else
+ return 1;
+ }
+}
+
+/*----------
+ * adjacent_inner_consistent
+ *
+ * Like adjacent_cmp_bounds, but also takes into account the previous
+ * level's centroid. We might've traversed left (or right) at the previous
+ * node, in search for ranges adjacent to the other bound, even though we
+ * already ruled out the possibility for any matches in that direction for
+ * this bound. By comparing the argument with the previous centroid, and
+ * the previous centroid with the current centroid, we can determine which
+ * direction we should've moved in at previous level, and which direction we
+ * actually moved.
+ *
+ * If there can be any matches to the left, returns -1. If to the right,
+ * returns 1. If there can be no matches below this centroid, because we
+ * already ruled them out at the previous level, returns 0.
+ *
+ * XXX: Comparing just the previous and current level isn't foolproof; we
+ * might still search some branches unnecessarily. For example, imagine that
+ * we are searching for value 15, and we traverse the following centroids
+ * (only considering one bound for the moment):
+ *
+ * Level 1: 20
+ * Level 2: 50
+ * Level 3: 25
+ *
+ * At this point, previous centroid is 50, current centroid is 25, and the
+ * target value is to the left. But because we already moved right from
+ * centroid 20 to 50 in the first level, there cannot be any values < 20 in
+ * the current branch. But we don't know that just by looking at the previous
+ * and current centroid, so we traverse left, unnecessarily. The reason we are
+ * down this branch is that we're searching for matches with the *other*
+ * bound. If we kept track of which bound we are searching for explicitly,
+ * instead of deducing that from the previous and current centroid, we could
+ * avoid some unnecessary work.
+ *----------
+ */
+static int
+adjacent_inner_consistent(TypeCacheEntry *typcache, const RangeBound *arg,
+ const RangeBound *centroid, const RangeBound *prev)
+{
+ if (prev)
+ {
+ int prevcmp;
+ int cmp;
+
+ /*
+ * Which direction were we supposed to traverse at previous level,
+ * left or right?
+ */
+ prevcmp = adjacent_cmp_bounds(typcache, arg, prev);
+
+ /* and which direction did we actually go? */
+ cmp = range_cmp_bounds(typcache, centroid, prev);
+
+ /* if the two don't agree, there's nothing to see here */
+ if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0))
+ return 0;
+ }
+
+ return adjacent_cmp_bounds(typcache, arg, centroid);
+}
+
+/*
+ * SP-GiST consistent function for leaf nodes: check leaf value against query
+ * using corresponding function.
+ */
+Datum
+spg_range_quad_leaf_consistent(PG_FUNCTION_ARGS)
+{
+ spgLeafConsistentIn *in = (spgLeafConsistentIn *) PG_GETARG_POINTER(0);
+ spgLeafConsistentOut *out = (spgLeafConsistentOut *) PG_GETARG_POINTER(1);
+ RangeType *leafRange = DatumGetRangeTypeP(in->leafDatum);
+ TypeCacheEntry *typcache;
+ bool res;
+ int i;
+
+ /* all tests are exact */
+ out->recheck = false;
+
+ /* leafDatum is what it is... */
+ out->leafValue = in->leafDatum;
+
+ typcache = range_get_typcache(fcinfo, RangeTypeGetOid(leafRange));
+
+ /* Perform the required comparison(s) */
+ res = true;
+ for (i = 0; i < in->nkeys; i++)
+ {
+ Datum keyDatum = in->scankeys[i].sk_argument;
+
+ /* Call the function corresponding to the scan strategy */
+ switch (in->scankeys[i].sk_strategy)
+ {
+ case RANGESTRAT_BEFORE:
+ res = range_before_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_OVERLEFT:
+ res = range_overleft_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_OVERLAPS:
+ res = range_overlaps_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_OVERRIGHT:
+ res = range_overright_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_AFTER:
+ res = range_after_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_ADJACENT:
+ res = range_adjacent_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_CONTAINS:
+ res = range_contains_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_CONTAINED_BY:
+ res = range_contained_by_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ case RANGESTRAT_CONTAINS_ELEM:
+ res = range_contains_elem_internal(typcache, leafRange,
+ keyDatum);
+ break;
+ case RANGESTRAT_EQ:
+ res = range_eq_internal(typcache, leafRange,
+ DatumGetRangeTypeP(keyDatum));
+ break;
+ default:
+ elog(ERROR, "unrecognized range strategy: %d",
+ in->scankeys[i].sk_strategy);
+ break;
+ }
+
+ /*
+ * If leaf datum doesn't match to a query key, no need to check
+ * subsequent keys.
+ */
+ if (!res)
+ break;
+ }
+
+ PG_RETURN_BOOL(res);
+}
diff --git a/src/backend/utils/adt/rangetypes_typanalyze.c b/src/backend/utils/adt/rangetypes_typanalyze.c
new file mode 100644
index 0000000..2043d3f
--- /dev/null
+++ b/src/backend/utils/adt/rangetypes_typanalyze.c
@@ -0,0 +1,427 @@
+/*-------------------------------------------------------------------------
+ *
+ * rangetypes_typanalyze.c
+ * Functions for gathering statistics from range columns
+ *
+ * For a range type column, histograms of lower and upper bounds, and
+ * the fraction of NULL and empty ranges are collected.
+ *
+ * Both histograms have the same length, and they are combined into a
+ * single array of ranges. This has the same shape as the histogram that
+ * std_typanalyze would collect, but the values are different. Each range
+ * in the array is a valid range, even though the lower and upper bounds
+ * come from different tuples. In theory, the standard scalar selectivity
+ * functions could be used with the combined histogram.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rangetypes_typanalyze.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_operator.h"
+#include "commands/vacuum.h"
+#include "utils/float.h"
+#include "utils/fmgrprotos.h"
+#include "utils/lsyscache.h"
+#include "utils/rangetypes.h"
+#include "utils/multirangetypes.h"
+
+static int float8_qsort_cmp(const void *a1, const void *a2, void *arg);
+static int range_bound_qsort_cmp(const void *a1, const void *a2, void *arg);
+static void compute_range_stats(VacAttrStats *stats,
+ AnalyzeAttrFetchFunc fetchfunc, int samplerows,
+ double totalrows);
+
+/*
+ * range_typanalyze -- typanalyze function for range columns
+ */
+Datum
+range_typanalyze(PG_FUNCTION_ARGS)
+{
+ VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+ TypeCacheEntry *typcache;
+ Form_pg_attribute attr = stats->attr;
+
+ /* Get information about range type; note column might be a domain */
+ typcache = range_get_typcache(fcinfo, getBaseType(stats->attrtypid));
+
+ if (attr->attstattarget < 0)
+ attr->attstattarget = default_statistics_target;
+
+ stats->compute_stats = compute_range_stats;
+ stats->extra_data = typcache;
+ /* same as in std_typanalyze */
+ stats->minrows = 300 * attr->attstattarget;
+
+ PG_RETURN_BOOL(true);
+}
+
+/*
+ * multirange_typanalyze -- typanalyze function for multirange columns
+ *
+ * We do the same analysis as for ranges, but on the smallest range that
+ * completely includes the multirange.
+ */
+Datum
+multirange_typanalyze(PG_FUNCTION_ARGS)
+{
+ VacAttrStats *stats = (VacAttrStats *) PG_GETARG_POINTER(0);
+ TypeCacheEntry *typcache;
+ Form_pg_attribute attr = stats->attr;
+
+ /* Get information about multirange type; note column might be a domain */
+ typcache = multirange_get_typcache(fcinfo, getBaseType(stats->attrtypid));
+
+ if (attr->attstattarget < 0)
+ attr->attstattarget = default_statistics_target;
+
+ stats->compute_stats = compute_range_stats;
+ stats->extra_data = typcache;
+ /* same as in std_typanalyze */
+ stats->minrows = 300 * attr->attstattarget;
+
+ PG_RETURN_BOOL(true);
+}
+
+/*
+ * Comparison function for sorting float8s, used for range lengths.
+ */
+static int
+float8_qsort_cmp(const void *a1, const void *a2, void *arg)
+{
+ const float8 *f1 = (const float8 *) a1;
+ const float8 *f2 = (const float8 *) a2;
+
+ if (*f1 < *f2)
+ return -1;
+ else if (*f1 == *f2)
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * Comparison function for sorting RangeBounds.
+ */
+static int
+range_bound_qsort_cmp(const void *a1, const void *a2, void *arg)
+{
+ RangeBound *b1 = (RangeBound *) a1;
+ RangeBound *b2 = (RangeBound *) a2;
+ TypeCacheEntry *typcache = (TypeCacheEntry *) arg;
+
+ return range_cmp_bounds(typcache, b1, b2);
+}
+
+/*
+ * compute_range_stats() -- compute statistics for a range column
+ */
+static void
+compute_range_stats(VacAttrStats *stats, AnalyzeAttrFetchFunc fetchfunc,
+ int samplerows, double totalrows)
+{
+ TypeCacheEntry *typcache = (TypeCacheEntry *) stats->extra_data;
+ TypeCacheEntry *mltrng_typcache = NULL;
+ bool has_subdiff;
+ int null_cnt = 0;
+ int non_null_cnt = 0;
+ int non_empty_cnt = 0;
+ int empty_cnt = 0;
+ int range_no;
+ int slot_idx;
+ int num_bins = stats->attr->attstattarget;
+ int num_hist;
+ float8 *lengths;
+ RangeBound *lowers,
+ *uppers;
+ double total_width = 0;
+
+ if (typcache->typtype == TYPTYPE_MULTIRANGE)
+ {
+ mltrng_typcache = typcache;
+ typcache = typcache->rngtype;
+ }
+ else
+ Assert(typcache->typtype == TYPTYPE_RANGE);
+ has_subdiff = OidIsValid(typcache->rng_subdiff_finfo.fn_oid);
+
+ /* Allocate memory to hold range bounds and lengths of the sample ranges. */
+ lowers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
+ uppers = (RangeBound *) palloc(sizeof(RangeBound) * samplerows);
+ lengths = (float8 *) palloc(sizeof(float8) * samplerows);
+
+ /* Loop over the sample ranges. */
+ for (range_no = 0; range_no < samplerows; range_no++)
+ {
+ Datum value;
+ bool isnull,
+ empty;
+ MultirangeType *multirange;
+ RangeType *range;
+ RangeBound lower,
+ upper;
+ float8 length;
+
+ vacuum_delay_point();
+
+ value = fetchfunc(stats, range_no, &isnull);
+ if (isnull)
+ {
+ /* range is null, just count that */
+ null_cnt++;
+ continue;
+ }
+
+ /*
+ * XXX: should we ignore wide values, like std_typanalyze does, to
+ * avoid bloating the statistics table?
+ */
+ total_width += VARSIZE_ANY(DatumGetPointer(value));
+
+ /* Get range and deserialize it for further analysis. */
+ if (mltrng_typcache != NULL)
+ {
+ /* Treat multiranges like a big range without gaps. */
+ multirange = DatumGetMultirangeTypeP(value);
+ if (!MultirangeIsEmpty(multirange))
+ {
+ RangeBound tmp;
+
+ multirange_get_bounds(typcache, multirange, 0,
+ &lower, &tmp);
+ multirange_get_bounds(typcache, multirange,
+ multirange->rangeCount - 1,
+ &tmp, &upper);
+ empty = false;
+ }
+ else
+ {
+ empty = true;
+ }
+ }
+ else
+ {
+ range = DatumGetRangeTypeP(value);
+ range_deserialize(typcache, range, &lower, &upper, &empty);
+ }
+
+ if (!empty)
+ {
+ /* Remember bounds and length for further usage in histograms */
+ lowers[non_empty_cnt] = lower;
+ uppers[non_empty_cnt] = upper;
+
+ if (lower.infinite || upper.infinite)
+ {
+ /* Length of any kind of an infinite range is infinite */
+ length = get_float8_infinity();
+ }
+ else if (has_subdiff)
+ {
+ /*
+ * For an ordinary range, use subdiff function between upper
+ * and lower bound values.
+ */
+ length = DatumGetFloat8(FunctionCall2Coll(&typcache->rng_subdiff_finfo,
+ typcache->rng_collation,
+ upper.val, lower.val));
+ }
+ else
+ {
+ /* Use default value of 1.0 if no subdiff is available. */
+ length = 1.0;
+ }
+ lengths[non_empty_cnt] = length;
+
+ non_empty_cnt++;
+ }
+ else
+ empty_cnt++;
+
+ non_null_cnt++;
+ }
+
+ slot_idx = 0;
+
+ /* We can only compute real stats if we found some non-null values. */
+ if (non_null_cnt > 0)
+ {
+ Datum *bound_hist_values;
+ Datum *length_hist_values;
+ int pos,
+ posfrac,
+ delta,
+ deltafrac,
+ i;
+ MemoryContext old_cxt;
+ float4 *emptyfrac;
+
+ stats->stats_valid = true;
+ /* Do the simple null-frac and width stats */
+ stats->stanullfrac = (double) null_cnt / (double) samplerows;
+ stats->stawidth = total_width / (double) non_null_cnt;
+
+ /* Estimate that non-null values are unique */
+ stats->stadistinct = -1.0 * (1.0 - stats->stanullfrac);
+
+ /* Must copy the target values into anl_context */
+ old_cxt = MemoryContextSwitchTo(stats->anl_context);
+
+ /*
+ * Generate a bounds histogram slot entry if there are at least two
+ * values.
+ */
+ if (non_empty_cnt >= 2)
+ {
+ /* Sort bound values */
+ qsort_interruptible(lowers, non_empty_cnt, sizeof(RangeBound),
+ range_bound_qsort_cmp, typcache);
+ qsort_interruptible(uppers, non_empty_cnt, sizeof(RangeBound),
+ range_bound_qsort_cmp, typcache);
+
+ num_hist = non_empty_cnt;
+ if (num_hist > num_bins)
+ num_hist = num_bins + 1;
+
+ bound_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+ /*
+ * The object of this loop is to construct ranges from first and
+ * last entries in lowers[] and uppers[] along with evenly-spaced
+ * values in between. So the i'th value is a range of lowers[(i *
+ * (nvals - 1)) / (num_hist - 1)] and uppers[(i * (nvals - 1)) /
+ * (num_hist - 1)]. But computing that subscript directly risks
+ * integer overflow when the stats target is more than a couple
+ * thousand. Instead we add (nvals - 1) / (num_hist - 1) to pos
+ * at each step, tracking the integral and fractional parts of the
+ * sum separately.
+ */
+ delta = (non_empty_cnt - 1) / (num_hist - 1);
+ deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
+ pos = posfrac = 0;
+
+ for (i = 0; i < num_hist; i++)
+ {
+ bound_hist_values[i] = PointerGetDatum(range_serialize(typcache,
+ &lowers[pos],
+ &uppers[pos],
+ false));
+ pos += delta;
+ posfrac += deltafrac;
+ if (posfrac >= (num_hist - 1))
+ {
+ /* fractional part exceeds 1, carry to integer part */
+ pos++;
+ posfrac -= (num_hist - 1);
+ }
+ }
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_BOUNDS_HISTOGRAM;
+ stats->stavalues[slot_idx] = bound_hist_values;
+ stats->numvalues[slot_idx] = num_hist;
+
+ /* Store ranges even if we're analyzing a multirange column */
+ stats->statypid[slot_idx] = typcache->type_id;
+ stats->statyplen[slot_idx] = typcache->typlen;
+ stats->statypbyval[slot_idx] = typcache->typbyval;
+ stats->statypalign[slot_idx] = typcache->typalign;
+
+ slot_idx++;
+ }
+
+ /*
+ * Generate a length histogram slot entry if there are at least two
+ * values.
+ */
+ if (non_empty_cnt >= 2)
+ {
+ /*
+ * Ascending sort of range lengths for further filling of
+ * histogram
+ */
+ qsort_interruptible(lengths, non_empty_cnt, sizeof(float8),
+ float8_qsort_cmp, NULL);
+
+ num_hist = non_empty_cnt;
+ if (num_hist > num_bins)
+ num_hist = num_bins + 1;
+
+ length_hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+
+ /*
+ * The object of this loop is to copy the first and last lengths[]
+ * entries along with evenly-spaced values in between. So the i'th
+ * value is lengths[(i * (nvals - 1)) / (num_hist - 1)]. But
+ * computing that subscript directly risks integer overflow when
+ * the stats target is more than a couple thousand. Instead we
+ * add (nvals - 1) / (num_hist - 1) to pos at each step, tracking
+ * the integral and fractional parts of the sum separately.
+ */
+ delta = (non_empty_cnt - 1) / (num_hist - 1);
+ deltafrac = (non_empty_cnt - 1) % (num_hist - 1);
+ pos = posfrac = 0;
+
+ for (i = 0; i < num_hist; i++)
+ {
+ length_hist_values[i] = Float8GetDatum(lengths[pos]);
+ pos += delta;
+ posfrac += deltafrac;
+ if (posfrac >= (num_hist - 1))
+ {
+ /* fractional part exceeds 1, carry to integer part */
+ pos++;
+ posfrac -= (num_hist - 1);
+ }
+ }
+ }
+ else
+ {
+ /*
+ * Even when we don't create the histogram, store an empty array
+ * to mean "no histogram". We can't just leave stavalues NULL,
+ * because get_attstatsslot() errors if you ask for stavalues, and
+ * it's NULL. We'll still store the empty fraction in stanumbers.
+ */
+ length_hist_values = palloc(0);
+ num_hist = 0;
+ }
+ stats->staop[slot_idx] = Float8LessOperator;
+ stats->stacoll[slot_idx] = InvalidOid;
+ stats->stavalues[slot_idx] = length_hist_values;
+ stats->numvalues[slot_idx] = num_hist;
+ stats->statypid[slot_idx] = FLOAT8OID;
+ stats->statyplen[slot_idx] = sizeof(float8);
+ stats->statypbyval[slot_idx] = FLOAT8PASSBYVAL;
+ stats->statypalign[slot_idx] = 'd';
+
+ /* Store the fraction of empty ranges */
+ emptyfrac = (float4 *) palloc(sizeof(float4));
+ *emptyfrac = ((double) empty_cnt) / ((double) non_null_cnt);
+ stats->stanumbers[slot_idx] = emptyfrac;
+ stats->numnumbers[slot_idx] = 1;
+
+ stats->stakind[slot_idx] = STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM;
+ slot_idx++;
+
+ MemoryContextSwitchTo(old_cxt);
+ }
+ else if (null_cnt > 0)
+ {
+ /* We found only nulls; assume the column is entirely null */
+ stats->stats_valid = true;
+ stats->stanullfrac = 1.0;
+ stats->stawidth = 0; /* "unknown" */
+ stats->stadistinct = 0.0; /* "unknown" */
+ }
+
+ /*
+ * We don't need to bother cleaning up any of our temporary palloc's. The
+ * hashtable should also go away, as it used a child memory context.
+ */
+}
diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c
new file mode 100644
index 0000000..0e0c5d5
--- /dev/null
+++ b/src/backend/utils/adt/regexp.c
@@ -0,0 +1,1996 @@
+/*-------------------------------------------------------------------------
+ *
+ * regexp.c
+ * Postgres' interface to the regular expression package.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/regexp.c
+ *
+ * Alistair Crooks added the code for the regex caching
+ * agc - cached the regular expressions used - there's a good chance
+ * that we'll get a hit, so this saves a compile step for every
+ * attempted match. I haven't actually measured the speed improvement,
+ * but it `looks' a lot quicker visually when watching regression
+ * test output.
+ *
+ * agc - incorporated Keith Bostic's Berkeley regex code into
+ * the tree for all ports. To distinguish this regex code from any that
+ * is existent on a platform, I've prepended the string "pg_" to
+ * the functions regcomp, regerror, regexec and regfree.
+ * Fixed a bug that was originally a typo by me, where `i' was used
+ * instead of `oldest' when compiling regular expressions - benign
+ * results mostly, although occasionally it bit you...
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "regex/regex.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/varlena.h"
+
+#define PG_GETARG_TEXT_PP_IF_EXISTS(_n) \
+ (PG_NARGS() > (_n) ? PG_GETARG_TEXT_PP(_n) : NULL)
+
+
+/* all the options of interest for regex functions */
+typedef struct pg_re_flags
+{
+ int cflags; /* compile flags for Spencer's regex code */
+ bool glob; /* do it globally (for each occurrence) */
+} pg_re_flags;
+
+/* cross-call state for regexp_match and regexp_split functions */
+typedef struct regexp_matches_ctx
+{
+ text *orig_str; /* data string in original TEXT form */
+ int nmatches; /* number of places where pattern matched */
+ int npatterns; /* number of capturing subpatterns */
+ /* We store start char index and end+1 char index for each match */
+ /* so the number of entries in match_locs is nmatches * npatterns * 2 */
+ int *match_locs; /* 0-based character indexes */
+ int next_match; /* 0-based index of next match to process */
+ /* workspace for build_regexp_match_result() */
+ Datum *elems; /* has npatterns elements */
+ bool *nulls; /* has npatterns elements */
+ pg_wchar *wide_str; /* wide-char version of original string */
+ char *conv_buf; /* conversion buffer, if needed */
+ int conv_bufsiz; /* size thereof */
+} regexp_matches_ctx;
+
+/*
+ * We cache precompiled regular expressions using a "self organizing list"
+ * structure, in which recently-used items tend to be near the front.
+ * Whenever we use an entry, it's moved up to the front of the list.
+ * Over time, an item's average position corresponds to its frequency of use.
+ *
+ * When we first create an entry, it's inserted at the front of
+ * the array, dropping the entry at the end of the array if necessary to
+ * make room. (This might seem to be weighting the new entry too heavily,
+ * but if we insert new entries further back, we'll be unable to adjust to
+ * a sudden shift in the query mix where we are presented with MAX_CACHED_RES
+ * never-before-seen items used circularly. We ought to be able to handle
+ * that case, so we have to insert at the front.)
+ *
+ * Knuth mentions a variant strategy in which a used item is moved up just
+ * one place in the list. Although he says this uses fewer comparisons on
+ * average, it seems not to adapt very well to the situation where you have
+ * both some reusable patterns and a steady stream of non-reusable patterns.
+ * A reusable pattern that isn't used at least as often as non-reusable
+ * patterns are seen will "fail to keep up" and will drop off the end of the
+ * cache. With move-to-front, a reusable pattern is guaranteed to stay in
+ * the cache as long as it's used at least once in every MAX_CACHED_RES uses.
+ */
+
+/* this is the maximum number of cached regular expressions */
+#ifndef MAX_CACHED_RES
+#define MAX_CACHED_RES 32
+#endif
+
+/* this structure describes one cached regular expression */
+typedef struct cached_re_str
+{
+ char *cre_pat; /* original RE (not null terminated!) */
+ int cre_pat_len; /* length of original RE, in bytes */
+ int cre_flags; /* compile flags: extended,icase etc */
+ Oid cre_collation; /* collation to use */
+ regex_t cre_re; /* the compiled regular expression */
+} cached_re_str;
+
+static int num_res = 0; /* # of cached re's */
+static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
+
+
+/* Local functions */
+static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
+ pg_re_flags *flags,
+ int start_search,
+ Oid collation,
+ bool use_subpatterns,
+ bool ignore_degenerate,
+ bool fetching_unmatched);
+static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
+static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
+
+
+/*
+ * RE_compile_and_cache - compile a RE, caching if possible
+ *
+ * Returns regex_t *
+ *
+ * text_re --- the pattern, expressed as a TEXT object
+ * cflags --- compile options for the pattern
+ * collation --- collation to use for LC_CTYPE-dependent behavior
+ *
+ * Pattern is given in the database encoding. We internally convert to
+ * an array of pg_wchar, which is what Spencer's regex package wants.
+ */
+regex_t *
+RE_compile_and_cache(text *text_re, int cflags, Oid collation)
+{
+ int text_re_len = VARSIZE_ANY_EXHDR(text_re);
+ char *text_re_val = VARDATA_ANY(text_re);
+ pg_wchar *pattern;
+ int pattern_len;
+ int i;
+ int regcomp_result;
+ cached_re_str re_temp;
+ char errMsg[100];
+
+ /*
+ * Look for a match among previously compiled REs. Since the data
+ * structure is self-organizing with most-used entries at the front, our
+ * search strategy can just be to scan from the front.
+ */
+ for (i = 0; i < num_res; i++)
+ {
+ if (re_array[i].cre_pat_len == text_re_len &&
+ re_array[i].cre_flags == cflags &&
+ re_array[i].cre_collation == collation &&
+ memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
+ {
+ /*
+ * Found a match; move it to front if not there already.
+ */
+ if (i > 0)
+ {
+ re_temp = re_array[i];
+ memmove(&re_array[1], &re_array[0], i * sizeof(cached_re_str));
+ re_array[0] = re_temp;
+ }
+
+ return &re_array[0].cre_re;
+ }
+ }
+
+ /*
+ * Couldn't find it, so try to compile the new RE. To avoid leaking
+ * resources on failure, we build into the re_temp local.
+ */
+
+ /* Convert pattern string to wide characters */
+ pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
+ pattern_len = pg_mb2wchar_with_len(text_re_val,
+ pattern,
+ text_re_len);
+
+ regcomp_result = pg_regcomp(&re_temp.cre_re,
+ pattern,
+ pattern_len,
+ cflags,
+ collation);
+
+ pfree(pattern);
+
+ if (regcomp_result != REG_OKAY)
+ {
+ /* re didn't compile (no need for pg_regfree, if so) */
+
+ /*
+ * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
+ * before reporting a regex error. This is so that if the regex
+ * library aborts and returns REG_CANCEL, we don't print an error
+ * message that implies the regex was invalid.
+ */
+ CHECK_FOR_INTERRUPTS();
+
+ pg_regerror(regcomp_result, &re_temp.cre_re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("invalid regular expression: %s", errMsg)));
+ }
+
+ /*
+ * We use malloc/free for the cre_pat field because the storage has to
+ * persist across transactions, and because we want to get control back on
+ * out-of-memory. The Max() is because some malloc implementations return
+ * NULL for malloc(0).
+ */
+ re_temp.cre_pat = malloc(Max(text_re_len, 1));
+ if (re_temp.cre_pat == NULL)
+ {
+ pg_regfree(&re_temp.cre_re);
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory")));
+ }
+ memcpy(re_temp.cre_pat, text_re_val, text_re_len);
+ re_temp.cre_pat_len = text_re_len;
+ re_temp.cre_flags = cflags;
+ re_temp.cre_collation = collation;
+
+ /*
+ * Okay, we have a valid new item in re_temp; insert it into the storage
+ * array. Discard last entry if needed.
+ */
+ if (num_res >= MAX_CACHED_RES)
+ {
+ --num_res;
+ Assert(num_res < MAX_CACHED_RES);
+ pg_regfree(&re_array[num_res].cre_re);
+ free(re_array[num_res].cre_pat);
+ }
+
+ if (num_res > 0)
+ memmove(&re_array[1], &re_array[0], num_res * sizeof(cached_re_str));
+
+ re_array[0] = re_temp;
+ num_res++;
+
+ return &re_array[0].cre_re;
+}
+
+/*
+ * RE_wchar_execute - execute a RE on pg_wchar data
+ *
+ * Returns true on match, false on no match
+ *
+ * re --- the compiled pattern as returned by RE_compile_and_cache
+ * data --- the data to match against (need not be null-terminated)
+ * data_len --- the length of the data string
+ * start_search -- the offset in the data to start searching
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Data is given as array of pg_wchar which is what Spencer's regex package
+ * wants.
+ */
+static bool
+RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len,
+ int start_search, int nmatch, regmatch_t *pmatch)
+{
+ int regexec_result;
+ char errMsg[100];
+
+ /* Perform RE match and return result */
+ regexec_result = pg_regexec(re,
+ data,
+ data_len,
+ start_search,
+ NULL, /* no details */
+ nmatch,
+ pmatch,
+ 0);
+
+ if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+ {
+ /* re failed??? */
+ CHECK_FOR_INTERRUPTS();
+ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("regular expression failed: %s", errMsg)));
+ }
+
+ return (regexec_result == REG_OKAY);
+}
+
+/*
+ * RE_execute - execute a RE
+ *
+ * Returns true on match, false on no match
+ *
+ * re --- the compiled pattern as returned by RE_compile_and_cache
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Data is given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+static bool
+RE_execute(regex_t *re, char *dat, int dat_len,
+ int nmatch, regmatch_t *pmatch)
+{
+ pg_wchar *data;
+ int data_len;
+ bool match;
+
+ /* Convert data string to wide characters */
+ data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
+ data_len = pg_mb2wchar_with_len(dat, data, dat_len);
+
+ /* Perform RE match and return result */
+ match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch);
+
+ pfree(data);
+ return match;
+}
+
+/*
+ * RE_compile_and_execute - compile and execute a RE
+ *
+ * Returns true on match, false on no match
+ *
+ * text_re --- the pattern, expressed as a TEXT object
+ * dat --- the data to match against (need not be null-terminated)
+ * dat_len --- the length of the data string
+ * cflags --- compile options for the pattern
+ * collation --- collation to use for LC_CTYPE-dependent behavior
+ * nmatch, pmatch --- optional return area for match details
+ *
+ * Both pattern and data are given in the database encoding. We internally
+ * convert to array of pg_wchar which is what Spencer's regex package wants.
+ */
+bool
+RE_compile_and_execute(text *text_re, char *dat, int dat_len,
+ int cflags, Oid collation,
+ int nmatch, regmatch_t *pmatch)
+{
+ regex_t *re;
+
+ /* Use REG_NOSUB if caller does not want sub-match details */
+ if (nmatch < 2)
+ cflags |= REG_NOSUB;
+
+ /* Compile RE */
+ re = RE_compile_and_cache(text_re, cflags, collation);
+
+ return RE_execute(re, dat, dat_len, nmatch, pmatch);
+}
+
+
+/*
+ * parse_re_flags - parse the options argument of regexp_match and friends
+ *
+ * flags --- output argument, filled with desired options
+ * opts --- TEXT object, or NULL for defaults
+ *
+ * This accepts all the options allowed by any of the callers; callers that
+ * don't want some have to reject them after the fact.
+ */
+static void
+parse_re_flags(pg_re_flags *flags, text *opts)
+{
+ /* regex flavor is always folded into the compile flags */
+ flags->cflags = REG_ADVANCED;
+ flags->glob = false;
+
+ if (opts)
+ {
+ char *opt_p = VARDATA_ANY(opts);
+ int opt_len = VARSIZE_ANY_EXHDR(opts);
+ int i;
+
+ for (i = 0; i < opt_len; i++)
+ {
+ switch (opt_p[i])
+ {
+ case 'g':
+ flags->glob = true;
+ break;
+ case 'b': /* BREs (but why???) */
+ flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED | REG_QUOTE);
+ break;
+ case 'c': /* case sensitive */
+ flags->cflags &= ~REG_ICASE;
+ break;
+ case 'e': /* plain EREs */
+ flags->cflags |= REG_EXTENDED;
+ flags->cflags &= ~(REG_ADVANCED | REG_QUOTE);
+ break;
+ case 'i': /* case insensitive */
+ flags->cflags |= REG_ICASE;
+ break;
+ case 'm': /* Perloid synonym for n */
+ case 'n': /* \n affects ^ $ . [^ */
+ flags->cflags |= REG_NEWLINE;
+ break;
+ case 'p': /* ~Perl, \n affects . [^ */
+ flags->cflags |= REG_NLSTOP;
+ flags->cflags &= ~REG_NLANCH;
+ break;
+ case 'q': /* literal string */
+ flags->cflags |= REG_QUOTE;
+ flags->cflags &= ~(REG_ADVANCED | REG_EXTENDED);
+ break;
+ case 's': /* single line, \n ordinary */
+ flags->cflags &= ~REG_NEWLINE;
+ break;
+ case 't': /* tight syntax */
+ flags->cflags &= ~REG_EXPANDED;
+ break;
+ case 'w': /* weird, \n affects ^ $ only */
+ flags->cflags &= ~REG_NLSTOP;
+ flags->cflags |= REG_NLANCH;
+ break;
+ case 'x': /* expanded syntax */
+ flags->cflags |= REG_EXPANDED;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid regular expression option: \"%.*s\"",
+ pg_mblen(opt_p + i), opt_p + i)));
+ break;
+ }
+ }
+ }
+}
+
+
+/*
+ * interface routines called by the function manager
+ */
+
+Datum
+nameregexeq(PG_FUNCTION_ARGS)
+{
+ Name n = PG_GETARG_NAME(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(RE_compile_and_execute(p,
+ NameStr(*n),
+ strlen(NameStr(*n)),
+ REG_ADVANCED,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+nameregexne(PG_FUNCTION_ARGS)
+{
+ Name n = PG_GETARG_NAME(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(!RE_compile_and_execute(p,
+ NameStr(*n),
+ strlen(NameStr(*n)),
+ REG_ADVANCED,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+textregexeq(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(RE_compile_and_execute(p,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
+ REG_ADVANCED,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+textregexne(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(!RE_compile_and_execute(p,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
+ REG_ADVANCED,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+
+/*
+ * routines that use the regexp stuff, but ignore the case.
+ * for this, we use the REG_ICASE flag to pg_regcomp
+ */
+
+
+Datum
+nameicregexeq(PG_FUNCTION_ARGS)
+{
+ Name n = PG_GETARG_NAME(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(RE_compile_and_execute(p,
+ NameStr(*n),
+ strlen(NameStr(*n)),
+ REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+nameicregexne(PG_FUNCTION_ARGS)
+{
+ Name n = PG_GETARG_NAME(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(!RE_compile_and_execute(p,
+ NameStr(*n),
+ strlen(NameStr(*n)),
+ REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+texticregexeq(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(RE_compile_and_execute(p,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
+ REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+Datum
+texticregexne(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_BOOL(!RE_compile_and_execute(p,
+ VARDATA_ANY(s),
+ VARSIZE_ANY_EXHDR(s),
+ REG_ADVANCED | REG_ICASE,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+
+/*
+ * textregexsubstr()
+ * Return a substring matched by a regular expression.
+ */
+Datum
+textregexsubstr(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ regex_t *re;
+ regmatch_t pmatch[2];
+ int so,
+ eo;
+
+ /* Compile RE */
+ re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
+
+ /*
+ * We pass two regmatch_t structs to get info about the overall match and
+ * the match for the first parenthesized subexpression (if any). If there
+ * is a parenthesized subexpression, we return what it matched; else
+ * return what the whole regexp matched.
+ */
+ if (!RE_execute(re,
+ VARDATA_ANY(s), VARSIZE_ANY_EXHDR(s),
+ 2, pmatch))
+ PG_RETURN_NULL(); /* definitely no match */
+
+ if (re->re_nsub > 0)
+ {
+ /* has parenthesized subexpressions, use the first one */
+ so = pmatch[1].rm_so;
+ eo = pmatch[1].rm_eo;
+ }
+ else
+ {
+ /* no parenthesized subexpression, use whole match */
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ }
+
+ /*
+ * It is possible to have a match to the whole pattern but no match for a
+ * subexpression; for example 'foo(bar)?' is considered to match 'foo' but
+ * there is no subexpression match. So this extra test for match failure
+ * is not redundant.
+ */
+ if (so < 0 || eo < 0)
+ PG_RETURN_NULL();
+
+ return DirectFunctionCall3(text_substr,
+ PointerGetDatum(s),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so));
+}
+
+/*
+ * textregexreplace_noopt()
+ * Return a string matched by a regular expression, with replacement.
+ *
+ * This version doesn't have an option argument: we default to case
+ * sensitive match, replace the first instance only.
+ */
+Datum
+textregexreplace_noopt(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
+
+ PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
+ REG_ADVANCED, PG_GET_COLLATION(),
+ 0, 1));
+}
+
+/*
+ * textregexreplace()
+ * Return a string matched by a regular expression, with replacement.
+ */
+Datum
+textregexreplace(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
+ text *opt = PG_GETARG_TEXT_PP(3);
+ pg_re_flags flags;
+
+ /*
+ * regexp_replace() with four arguments will be preferentially resolved as
+ * this form when the fourth argument is of type UNKNOWN. However, the
+ * user might have intended to call textregexreplace_extended_no_n. If we
+ * see flags that look like an integer, emit the same error that
+ * parse_re_flags would, but add a HINT about how to fix it.
+ */
+ if (VARSIZE_ANY_EXHDR(opt) > 0)
+ {
+ char *opt_p = VARDATA_ANY(opt);
+
+ if (*opt_p >= '0' && *opt_p <= '9')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid regular expression option: \"%.*s\"",
+ pg_mblen(opt_p), opt_p),
+ errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
+ }
+
+ parse_re_flags(&flags, opt);
+
+ PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
+ flags.cflags, PG_GET_COLLATION(),
+ 0, flags.glob ? 0 : 1));
+}
+
+/*
+ * textregexreplace_extended()
+ * Return a string matched by a regular expression, with replacement.
+ * Extends textregexreplace by allowing a start position and the
+ * choice of the occurrence to replace (0 means all occurrences).
+ */
+Datum
+textregexreplace_extended(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ text *p = PG_GETARG_TEXT_PP(1);
+ text *r = PG_GETARG_TEXT_PP(2);
+ int start = 1;
+ int n = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
+ pg_re_flags re_flags;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 3)
+ {
+ start = PG_GETARG_INT32(3);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 4)
+ {
+ n = PG_GETARG_INT32(4);
+ if (n < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+
+ /* If N was not specified, deduce it from the 'g' flag */
+ if (PG_NARGS() <= 4)
+ n = re_flags.glob ? 0 : 1;
+
+ /* Do the replacement(s) */
+ PG_RETURN_TEXT_P(replace_text_regexp(s, p, r,
+ re_flags.cflags, PG_GET_COLLATION(),
+ start - 1, n));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
+{
+ return textregexreplace_extended(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
+{
+ return textregexreplace_extended(fcinfo);
+}
+
+/*
+ * similar_to_escape(), similar_escape()
+ *
+ * Convert a SQL "SIMILAR TO" regexp pattern to POSIX style, so it can be
+ * used by our regexp engine.
+ *
+ * similar_escape_internal() is the common workhorse for three SQL-exposed
+ * functions. esc_text can be passed as NULL to select the default escape
+ * (which is '\'), or as an empty string to select no escape character.
+ */
+static text *
+similar_escape_internal(text *pat_text, text *esc_text)
+{
+ text *result;
+ char *p,
+ *e,
+ *r;
+ int plen,
+ elen;
+ bool afterescape = false;
+ bool incharclass = false;
+ int nquotes = 0;
+
+ p = VARDATA_ANY(pat_text);
+ plen = VARSIZE_ANY_EXHDR(pat_text);
+ if (esc_text == NULL)
+ {
+ /* No ESCAPE clause provided; default to backslash as escape */
+ e = "\\";
+ elen = 1;
+ }
+ else
+ {
+ e = VARDATA_ANY(esc_text);
+ elen = VARSIZE_ANY_EXHDR(esc_text);
+ if (elen == 0)
+ e = NULL; /* no escape character */
+ else if (elen > 1)
+ {
+ int escape_mblen = pg_mbstrlen_with_len(e, elen);
+
+ if (escape_mblen > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
+ errmsg("invalid escape string"),
+ errhint("Escape string must be empty or one character.")));
+ }
+ }
+
+ /*----------
+ * We surround the transformed input string with
+ * ^(?: ... )$
+ * which requires some explanation. We need "^" and "$" to force
+ * the pattern to match the entire input string as per the SQL spec.
+ * The "(?:" and ")" are a non-capturing set of parens; we have to have
+ * parens in case the string contains "|", else the "^" and "$" will
+ * be bound into the first and last alternatives which is not what we
+ * want, and the parens must be non capturing because we don't want them
+ * to count when selecting output for SUBSTRING.
+ *
+ * When the pattern is divided into three parts by escape-double-quotes,
+ * what we emit is
+ * ^(?:part1){1,1}?(part2){1,1}(?:part3)$
+ * which requires even more explanation. The "{1,1}?" on part1 makes it
+ * non-greedy so that it will match the smallest possible amount of text
+ * not the largest, as required by SQL. The plain parens around part2
+ * are capturing parens so that that part is what controls the result of
+ * SUBSTRING. The "{1,1}" forces part2 to be greedy, so that it matches
+ * the largest possible amount of text; hence part3 must match the
+ * smallest amount of text, as required by SQL. We don't need an explicit
+ * greediness marker on part3. Note that this also confines the effects
+ * of any "|" characters to the respective part, which is what we want.
+ *
+ * The SQL spec says that SUBSTRING's pattern must contain exactly two
+ * escape-double-quotes, but we only complain if there's more than two.
+ * With none, we act as though part1 and part3 are empty; with one, we
+ * act as though part3 is empty. Both behaviors fall out of omitting
+ * the relevant part separators in the above expansion. If the result
+ * of this function is used in a plain regexp match (SIMILAR TO), the
+ * escape-double-quotes have no effect on the match behavior.
+ *----------
+ */
+
+ /*
+ * We need room for the prefix/postfix and part separators, plus as many
+ * as 3 output bytes per input byte; since the input is at most 1GB this
+ * can't overflow size_t.
+ */
+ result = (text *) palloc(VARHDRSZ + 23 + 3 * (size_t) plen);
+ r = VARDATA(result);
+
+ *r++ = '^';
+ *r++ = '(';
+ *r++ = '?';
+ *r++ = ':';
+
+ while (plen > 0)
+ {
+ char pchar = *p;
+
+ /*
+ * If both the escape character and the current character from the
+ * pattern are multi-byte, we need to take the slow path.
+ *
+ * But if one of them is single-byte, we can process the pattern one
+ * byte at a time, ignoring multi-byte characters. (This works
+ * because all server-encodings have the property that a valid
+ * multi-byte character representation cannot contain the
+ * representation of a valid single-byte character.)
+ */
+
+ if (elen > 1)
+ {
+ int mblen = pg_mblen(p);
+
+ if (mblen > 1)
+ {
+ /* slow, multi-byte path */
+ if (afterescape)
+ {
+ *r++ = '\\';
+ memcpy(r, p, mblen);
+ r += mblen;
+ afterescape = false;
+ }
+ else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
+ {
+ /* SQL escape character; do not send to output */
+ afterescape = true;
+ }
+ else
+ {
+ /*
+ * We know it's a multi-byte character, so we don't need
+ * to do all the comparisons to single-byte characters
+ * that we do below.
+ */
+ memcpy(r, p, mblen);
+ r += mblen;
+ }
+
+ p += mblen;
+ plen -= mblen;
+
+ continue;
+ }
+ }
+
+ /* fast path */
+ if (afterescape)
+ {
+ if (pchar == '"' && !incharclass) /* escape-double-quote? */
+ {
+ /* emit appropriate part separator, per notes above */
+ if (nquotes == 0)
+ {
+ *r++ = ')';
+ *r++ = '{';
+ *r++ = '1';
+ *r++ = ',';
+ *r++ = '1';
+ *r++ = '}';
+ *r++ = '?';
+ *r++ = '(';
+ }
+ else if (nquotes == 1)
+ {
+ *r++ = ')';
+ *r++ = '{';
+ *r++ = '1';
+ *r++ = ',';
+ *r++ = '1';
+ *r++ = '}';
+ *r++ = '(';
+ *r++ = '?';
+ *r++ = ':';
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER),
+ errmsg("SQL regular expression may not contain more than two escape-double-quote separators")));
+ nquotes++;
+ }
+ else
+ {
+ /*
+ * We allow any character at all to be escaped; notably, this
+ * allows access to POSIX character-class escapes such as
+ * "\d". The SQL spec is considerably more restrictive.
+ */
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ afterescape = false;
+ }
+ else if (e && pchar == *e)
+ {
+ /* SQL escape character; do not send to output */
+ afterescape = true;
+ }
+ else if (incharclass)
+ {
+ if (pchar == '\\')
+ *r++ = '\\';
+ *r++ = pchar;
+ if (pchar == ']')
+ incharclass = false;
+ }
+ else if (pchar == '[')
+ {
+ *r++ = pchar;
+ incharclass = true;
+ }
+ else if (pchar == '%')
+ {
+ *r++ = '.';
+ *r++ = '*';
+ }
+ else if (pchar == '_')
+ *r++ = '.';
+ else if (pchar == '(')
+ {
+ /* convert to non-capturing parenthesis */
+ *r++ = '(';
+ *r++ = '?';
+ *r++ = ':';
+ }
+ else if (pchar == '\\' || pchar == '.' ||
+ pchar == '^' || pchar == '$')
+ {
+ *r++ = '\\';
+ *r++ = pchar;
+ }
+ else
+ *r++ = pchar;
+ p++, plen--;
+ }
+
+ *r++ = ')';
+ *r++ = '$';
+
+ SET_VARSIZE(result, r - ((char *) result));
+
+ return result;
+}
+
+/*
+ * similar_to_escape(pattern, escape)
+ */
+Datum
+similar_to_escape_2(PG_FUNCTION_ARGS)
+{
+ text *pat_text = PG_GETARG_TEXT_PP(0);
+ text *esc_text = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ result = similar_escape_internal(pat_text, esc_text);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * similar_to_escape(pattern)
+ * Inserts a default escape character.
+ */
+Datum
+similar_to_escape_1(PG_FUNCTION_ARGS)
+{
+ text *pat_text = PG_GETARG_TEXT_PP(0);
+ text *result;
+
+ result = similar_escape_internal(pat_text, NULL);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * similar_escape(pattern, escape)
+ *
+ * Legacy function for compatibility with views stored using the
+ * pre-v13 expansion of SIMILAR TO. Unlike the above functions, this
+ * is non-strict, which leads to not-per-spec handling of "ESCAPE NULL".
+ */
+Datum
+similar_escape(PG_FUNCTION_ARGS)
+{
+ text *pat_text;
+ text *esc_text;
+ text *result;
+
+ /* This function is not strict, so must test explicitly */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ pat_text = PG_GETARG_TEXT_PP(0);
+
+ if (PG_ARGISNULL(1))
+ esc_text = NULL; /* use default escape character */
+ else
+ esc_text = PG_GETARG_TEXT_PP(1);
+
+ result = similar_escape_internal(pat_text, esc_text);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * regexp_count()
+ * Return the number of matches of a pattern within a string.
+ */
+Datum
+regexp_count(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_count()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ false, /* can ignore subexprs */
+ false, false);
+
+ PG_RETURN_INT32(matchctx->nmatches);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_count(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_count_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_count(fcinfo);
+}
+
+/*
+ * regexp_instr()
+ * Return the match's position within the string
+ */
+Datum
+regexp_instr(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ int n = 1;
+ int endoption = 0;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
+ int subexpr = 0;
+ int pos;
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 3)
+ {
+ n = PG_GETARG_INT32(3);
+ if (n <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+ if (PG_NARGS() > 4)
+ {
+ endoption = PG_GETARG_INT32(4);
+ if (endoption != 0 && endoption != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "endoption", endoption)));
+ }
+ if (PG_NARGS() > 6)
+ {
+ subexpr = PG_GETARG_INT32(6);
+ if (subexpr < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "subexpr", subexpr)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_instr()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ (subexpr > 0), /* need submatches? */
+ false, false);
+
+ /* When n exceeds matches return 0 (includes case of no matches) */
+ if (n > matchctx->nmatches)
+ PG_RETURN_INT32(0);
+
+ /* When subexpr exceeds number of subexpressions return 0 */
+ if (subexpr > matchctx->npatterns)
+ PG_RETURN_INT32(0);
+
+ /* Select the appropriate match position to return */
+ pos = (n - 1) * matchctx->npatterns;
+ if (subexpr > 0)
+ pos += subexpr - 1;
+ pos *= 2;
+ if (endoption == 1)
+ pos += 1;
+
+ if (matchctx->match_locs[pos] >= 0)
+ PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
+ else
+ PG_RETURN_INT32(0); /* position not identifiable */
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_n(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_endoption(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
+{
+ return regexp_instr(fcinfo);
+}
+
+/*
+ * regexp_like()
+ * Test for a pattern match within a string.
+ */
+Datum
+regexp_like(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_like()")));
+
+ /* Otherwise it's like textregexeq/texticregexeq */
+ PG_RETURN_BOOL(RE_compile_and_execute(pattern,
+ VARDATA_ANY(str),
+ VARSIZE_ANY_EXHDR(str),
+ re_flags.cflags,
+ PG_GET_COLLATION(),
+ 0, NULL));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_like_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_like(fcinfo);
+}
+
+/*
+ * regexp_match()
+ * Return the first substring(s) matching a pattern within a string.
+ */
+Datum
+regexp_match(PG_FUNCTION_ARGS)
+{
+ text *orig_str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_match()"),
+ errhint("Use the regexp_matches function instead.")));
+
+ matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
+ PG_GET_COLLATION(), true, false, false);
+
+ if (matchctx->nmatches == 0)
+ PG_RETURN_NULL();
+
+ Assert(matchctx->nmatches == 1);
+
+ /* Create workspace that build_regexp_match_result needs */
+ matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+ matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+ PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx)));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_match_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_match(fcinfo);
+}
+
+/*
+ * regexp_matches()
+ * Return a table of all matches of a pattern within a string.
+ */
+Datum
+regexp_matches(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ regexp_matches_ctx *matchctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+
+ /* be sure to copy the input string into the multi-call ctx */
+ matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
+ &re_flags, 0,
+ PG_GET_COLLATION(),
+ true, false, false);
+
+ /* Pre-create workspace that build_regexp_match_result needs */
+ matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
+ matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) matchctx;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ matchctx = (regexp_matches_ctx *) funcctx->user_fctx;
+
+ if (matchctx->next_match < matchctx->nmatches)
+ {
+ ArrayType *result_ary;
+
+ result_ary = build_regexp_match_result(matchctx);
+ matchctx->next_match++;
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_matches_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_matches(fcinfo);
+}
+
+/*
+ * setup_regexp_matches --- do the initial matching for regexp_match,
+ * regexp_split, and related functions
+ *
+ * To avoid having to re-find the compiled pattern on each call, we do
+ * all the matching in one swoop. The returned regexp_matches_ctx contains
+ * the locations of all the substrings matching the pattern.
+ *
+ * start_search: the character (not byte) offset in orig_str at which to
+ * begin the search. Returned positions are relative to orig_str anyway.
+ * use_subpatterns: collect data about matches to parenthesized subexpressions.
+ * ignore_degenerate: ignore zero-length matches.
+ * fetching_unmatched: caller wants to fetch unmatched substrings.
+ *
+ * We don't currently assume that fetching_unmatched is exclusive of fetching
+ * the matched text too; if it's set, the conversion buffer is large enough to
+ * fetch any single matched or unmatched string, but not any larger
+ * substring. (In practice, when splitting the matches are usually small
+ * anyway, and it didn't seem worth complicating the code further.)
+ */
+static regexp_matches_ctx *
+setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
+ int start_search,
+ Oid collation,
+ bool use_subpatterns,
+ bool ignore_degenerate,
+ bool fetching_unmatched)
+{
+ regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
+ int eml = pg_database_encoding_max_length();
+ int orig_len;
+ pg_wchar *wide_str;
+ int wide_len;
+ int cflags;
+ regex_t *cpattern;
+ regmatch_t *pmatch;
+ int pmatch_len;
+ int array_len;
+ int array_idx;
+ int prev_match_end;
+ int prev_valid_match_end;
+ int maxlen = 0; /* largest fetch length in characters */
+
+ /* save original string --- we'll extract result substrings from it */
+ matchctx->orig_str = orig_str;
+
+ /* convert string to pg_wchar form for matching */
+ orig_len = VARSIZE_ANY_EXHDR(orig_str);
+ wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
+ wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
+
+ /* set up the compiled pattern */
+ cflags = re_flags->cflags;
+ if (!use_subpatterns)
+ cflags |= REG_NOSUB;
+ cpattern = RE_compile_and_cache(pattern, cflags, collation);
+
+ /* do we want to remember subpatterns? */
+ if (use_subpatterns && cpattern->re_nsub > 0)
+ {
+ matchctx->npatterns = cpattern->re_nsub;
+ pmatch_len = cpattern->re_nsub + 1;
+ }
+ else
+ {
+ use_subpatterns = false;
+ matchctx->npatterns = 1;
+ pmatch_len = 1;
+ }
+
+ /* temporary output space for RE package */
+ pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
+
+ /*
+ * the real output space (grown dynamically if needed)
+ *
+ * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
+ * than at 2^27
+ */
+ array_len = re_flags->glob ? 255 : 31;
+ matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
+ array_idx = 0;
+
+ /* search for the pattern, perhaps repeatedly */
+ prev_match_end = 0;
+ prev_valid_match_end = 0;
+ while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
+ pmatch_len, pmatch))
+ {
+ /*
+ * If requested, ignore degenerate matches, which are zero-length
+ * matches occurring at the start or end of a string or just after a
+ * previous match.
+ */
+ if (!ignore_degenerate ||
+ (pmatch[0].rm_so < wide_len &&
+ pmatch[0].rm_eo > prev_match_end))
+ {
+ /* enlarge output space if needed */
+ while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
+ {
+ array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
+ if (array_len > MaxAllocSize / sizeof(int))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("too many regular expression matches")));
+ matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
+ sizeof(int) * array_len);
+ }
+
+ /* save this match's locations */
+ if (use_subpatterns)
+ {
+ int i;
+
+ for (i = 1; i <= matchctx->npatterns; i++)
+ {
+ int so = pmatch[i].rm_so;
+ int eo = pmatch[i].rm_eo;
+
+ matchctx->match_locs[array_idx++] = so;
+ matchctx->match_locs[array_idx++] = eo;
+ if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
+ maxlen = (eo - so);
+ }
+ }
+ else
+ {
+ int so = pmatch[0].rm_so;
+ int eo = pmatch[0].rm_eo;
+
+ matchctx->match_locs[array_idx++] = so;
+ matchctx->match_locs[array_idx++] = eo;
+ if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
+ maxlen = (eo - so);
+ }
+ matchctx->nmatches++;
+
+ /*
+ * check length of unmatched portion between end of previous valid
+ * (nondegenerate, or degenerate but not ignored) match and start
+ * of current one
+ */
+ if (fetching_unmatched &&
+ pmatch[0].rm_so >= 0 &&
+ (pmatch[0].rm_so - prev_valid_match_end) > maxlen)
+ maxlen = (pmatch[0].rm_so - prev_valid_match_end);
+ prev_valid_match_end = pmatch[0].rm_eo;
+ }
+ prev_match_end = pmatch[0].rm_eo;
+
+ /* if not glob, stop after one match */
+ if (!re_flags->glob)
+ break;
+
+ /*
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
+ */
+ start_search = prev_match_end;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ start_search++;
+ if (start_search > wide_len)
+ break;
+ }
+
+ /*
+ * check length of unmatched portion between end of last match and end of
+ * input string
+ */
+ if (fetching_unmatched &&
+ (wide_len - prev_valid_match_end) > maxlen)
+ maxlen = (wide_len - prev_valid_match_end);
+
+ /*
+ * Keep a note of the end position of the string for the benefit of
+ * splitting code.
+ */
+ matchctx->match_locs[array_idx] = wide_len;
+
+ if (eml > 1)
+ {
+ int64 maxsiz = eml * (int64) maxlen;
+ int conv_bufsiz;
+
+ /*
+ * Make the conversion buffer large enough for any substring of
+ * interest.
+ *
+ * Worst case: assume we need the maximum size (maxlen*eml), but take
+ * advantage of the fact that the original string length in bytes is
+ * an upper bound on the byte length of any fetched substring (and we
+ * know that len+1 is safe to allocate because the varlena header is
+ * longer than 1 byte).
+ */
+ if (maxsiz > orig_len)
+ conv_bufsiz = orig_len + 1;
+ else
+ conv_bufsiz = maxsiz + 1; /* safe since maxsiz < 2^30 */
+
+ matchctx->conv_buf = palloc(conv_bufsiz);
+ matchctx->conv_bufsiz = conv_bufsiz;
+ matchctx->wide_str = wide_str;
+ }
+ else
+ {
+ /* No need to keep the wide string if we're in a single-byte charset. */
+ pfree(wide_str);
+ matchctx->wide_str = NULL;
+ matchctx->conv_buf = NULL;
+ matchctx->conv_bufsiz = 0;
+ }
+
+ /* Clean up temp storage */
+ pfree(pmatch);
+
+ return matchctx;
+}
+
+/*
+ * build_regexp_match_result - build output array for current match
+ */
+static ArrayType *
+build_regexp_match_result(regexp_matches_ctx *matchctx)
+{
+ char *buf = matchctx->conv_buf;
+ Datum *elems = matchctx->elems;
+ bool *nulls = matchctx->nulls;
+ int dims[1];
+ int lbs[1];
+ int loc;
+ int i;
+
+ /* Extract matching substrings from the original string */
+ loc = matchctx->next_match * matchctx->npatterns * 2;
+ for (i = 0; i < matchctx->npatterns; i++)
+ {
+ int so = matchctx->match_locs[loc++];
+ int eo = matchctx->match_locs[loc++];
+
+ if (so < 0 || eo < 0)
+ {
+ elems[i] = (Datum) 0;
+ nulls[i] = true;
+ }
+ else if (buf)
+ {
+ int len = pg_wchar2mb_with_len(matchctx->wide_str + so,
+ buf,
+ eo - so);
+
+ Assert(len < matchctx->conv_bufsiz);
+ elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
+ nulls[i] = false;
+ }
+ else
+ {
+ elems[i] = DirectFunctionCall3(text_substr,
+ PointerGetDatum(matchctx->orig_str),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so));
+ nulls[i] = false;
+ }
+ }
+
+ /* And form an array */
+ dims[0] = matchctx->npatterns;
+ lbs[0] = 1;
+ /* XXX: this hardcodes assumptions about the text type */
+ return construct_md_array(elems, nulls, 1, dims, lbs,
+ TEXTOID, -1, false, TYPALIGN_INT);
+}
+
+/*
+ * regexp_split_to_table()
+ * Split the string at matches of the pattern, returning the
+ * split-out substrings as a table.
+ */
+Datum
+regexp_split_to_table(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ regexp_matches_ctx *splitctx;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
+ pg_re_flags re_flags;
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_split_to_table()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* be sure to copy the input string into the multi-call ctx */
+ splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
+ &re_flags, 0,
+ PG_GET_COLLATION(),
+ false, true, true);
+
+ MemoryContextSwitchTo(oldcontext);
+ funcctx->user_fctx = (void *) splitctx;
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ splitctx = (regexp_matches_ctx *) funcctx->user_fctx;
+
+ if (splitctx->next_match <= splitctx->nmatches)
+ {
+ Datum result = build_regexp_split_result(splitctx);
+
+ splitctx->next_match++;
+ SRF_RETURN_NEXT(funcctx, result);
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_split_to_table_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_split_to_table(fcinfo);
+}
+
+/*
+ * regexp_split_to_array()
+ * Split the string at matches of the pattern, returning the
+ * split-out substrings as an array.
+ */
+Datum
+regexp_split_to_array(PG_FUNCTION_ARGS)
+{
+ ArrayBuildState *astate = NULL;
+ pg_re_flags re_flags;
+ regexp_matches_ctx *splitctx;
+
+ /* Determine options */
+ parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_split_to_array()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
+ PG_GETARG_TEXT_PP(1),
+ &re_flags, 0,
+ PG_GET_COLLATION(),
+ false, true, true);
+
+ while (splitctx->next_match <= splitctx->nmatches)
+ {
+ astate = accumArrayResult(astate,
+ build_regexp_split_result(splitctx),
+ false,
+ TEXTOID,
+ CurrentMemoryContext);
+ splitctx->next_match++;
+ }
+
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_split_to_array_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_split_to_array(fcinfo);
+}
+
+/*
+ * build_regexp_split_result - build output string for current match
+ *
+ * We return the string between the current match and the previous one,
+ * or the string after the last match when next_match == nmatches.
+ */
+static Datum
+build_regexp_split_result(regexp_matches_ctx *splitctx)
+{
+ char *buf = splitctx->conv_buf;
+ int startpos;
+ int endpos;
+
+ if (splitctx->next_match > 0)
+ startpos = splitctx->match_locs[splitctx->next_match * 2 - 1];
+ else
+ startpos = 0;
+ if (startpos < 0)
+ elog(ERROR, "invalid match ending position");
+
+ endpos = splitctx->match_locs[splitctx->next_match * 2];
+ if (endpos < startpos)
+ elog(ERROR, "invalid match starting position");
+
+ if (buf)
+ {
+ int len;
+
+ len = pg_wchar2mb_with_len(splitctx->wide_str + startpos,
+ buf,
+ endpos - startpos);
+ Assert(len < splitctx->conv_bufsiz);
+ return PointerGetDatum(cstring_to_text_with_len(buf, len));
+ }
+ else
+ {
+ return DirectFunctionCall3(text_substr,
+ PointerGetDatum(splitctx->orig_str),
+ Int32GetDatum(startpos + 1),
+ Int32GetDatum(endpos - startpos));
+ }
+}
+
+/*
+ * regexp_substr()
+ * Return the substring that matches a regular expression pattern
+ */
+Datum
+regexp_substr(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *pattern = PG_GETARG_TEXT_PP(1);
+ int start = 1;
+ int n = 1;
+ text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
+ int subexpr = 0;
+ int so,
+ eo,
+ pos;
+ pg_re_flags re_flags;
+ regexp_matches_ctx *matchctx;
+
+ /* Collect optional parameters */
+ if (PG_NARGS() > 2)
+ {
+ start = PG_GETARG_INT32(2);
+ if (start <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "start", start)));
+ }
+ if (PG_NARGS() > 3)
+ {
+ n = PG_GETARG_INT32(3);
+ if (n <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "n", n)));
+ }
+ if (PG_NARGS() > 5)
+ {
+ subexpr = PG_GETARG_INT32(5);
+ if (subexpr < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid value for parameter \"%s\": %d",
+ "subexpr", subexpr)));
+ }
+
+ /* Determine options */
+ parse_re_flags(&re_flags, flags);
+ /* User mustn't specify 'g' */
+ if (re_flags.glob)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ /* translator: %s is a SQL function name */
+ errmsg("%s does not support the \"global\" option",
+ "regexp_substr()")));
+ /* But we find all the matches anyway */
+ re_flags.glob = true;
+
+ /* Do the matching */
+ matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
+ PG_GET_COLLATION(),
+ (subexpr > 0), /* need submatches? */
+ false, false);
+
+ /* When n exceeds matches return NULL (includes case of no matches) */
+ if (n > matchctx->nmatches)
+ PG_RETURN_NULL();
+
+ /* When subexpr exceeds number of subexpressions return NULL */
+ if (subexpr > matchctx->npatterns)
+ PG_RETURN_NULL();
+
+ /* Select the appropriate match position to return */
+ pos = (n - 1) * matchctx->npatterns;
+ if (subexpr > 0)
+ pos += subexpr - 1;
+ pos *= 2;
+ so = matchctx->match_locs[pos];
+ eo = matchctx->match_locs[pos + 1];
+
+ if (so < 0 || eo < 0)
+ PG_RETURN_NULL(); /* unidentifiable location */
+
+ PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
+ PointerGetDatum(matchctx->orig_str),
+ Int32GetDatum(so + 1),
+ Int32GetDatum(eo - so)));
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_start(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_n(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_flags(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/* This is separate to keep the opr_sanity regression test from complaining */
+Datum
+regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
+{
+ return regexp_substr(fcinfo);
+}
+
+/*
+ * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
+ *
+ * The result is NULL if there is no fixed prefix, else a palloc'd string.
+ * If it is an exact match, not just a prefix, *exact is returned as true.
+ */
+char *
+regexp_fixed_prefix(text *text_re, bool case_insensitive, Oid collation,
+ bool *exact)
+{
+ char *result;
+ regex_t *re;
+ int cflags;
+ int re_result;
+ pg_wchar *str;
+ size_t slen;
+ size_t maxlen;
+ char errMsg[100];
+
+ *exact = false; /* default result */
+
+ /* Compile RE */
+ cflags = REG_ADVANCED;
+ if (case_insensitive)
+ cflags |= REG_ICASE;
+
+ re = RE_compile_and_cache(text_re, cflags | REG_NOSUB, collation);
+
+ /* Examine it to see if there's a fixed prefix */
+ re_result = pg_regprefix(re, &str, &slen);
+
+ switch (re_result)
+ {
+ case REG_NOMATCH:
+ return NULL;
+
+ case REG_PREFIX:
+ /* continue with wchar conversion */
+ break;
+
+ case REG_EXACT:
+ *exact = true;
+ /* continue with wchar conversion */
+ break;
+
+ default:
+ /* re failed??? */
+ CHECK_FOR_INTERRUPTS();
+ pg_regerror(re_result, re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("regular expression failed: %s", errMsg)));
+ break;
+ }
+
+ /* Convert pg_wchar result back to database encoding */
+ maxlen = pg_database_encoding_max_length() * slen + 1;
+ result = (char *) palloc(maxlen);
+ slen = pg_wchar2mb_with_len(str, result, slen);
+ Assert(slen < maxlen);
+
+ free(str);
+
+ return result;
+}
diff --git a/src/backend/utils/adt/regproc.c b/src/backend/utils/adt/regproc.c
new file mode 100644
index 0000000..6d4c1c2
--- /dev/null
+++ b/src/backend/utils/adt/regproc.c
@@ -0,0 +1,2062 @@
+/*-------------------------------------------------------------------------
+ *
+ * regproc.c
+ * Functions for the built-in types regproc, regclass, regtype, etc.
+ *
+ * These types are all binary-compatible with type Oid, and rely on Oid
+ * for comparison and so forth. Their only interesting behavior is in
+ * special I/O conversion routines.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/regproc.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_ts_config.h"
+#include "catalog/pg_ts_dict.h"
+#include "catalog/pg_type.h"
+#include "lib/stringinfo.h"
+#include "miscadmin.h"
+#include "parser/parse_type.h"
+#include "parser/scansup.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/regproc.h"
+#include "utils/syscache.h"
+#include "utils/varlena.h"
+
+static void parseNameAndArgTypes(const char *string, bool allowNone,
+ List **names, int *nargs, Oid *argtypes);
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+/*
+ * regprocin - converts "proname" to proc OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_proc entry.
+ */
+Datum
+regprocin(PG_FUNCTION_ARGS)
+{
+ char *pro_name_or_oid = PG_GETARG_CSTRING(0);
+ RegProcedure result = InvalidOid;
+ List *names;
+ FuncCandidateList clist;
+
+ /* '-' ? */
+ if (strcmp(pro_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (pro_name_or_oid[0] >= '0' &&
+ pro_name_or_oid[0] <= '9' &&
+ strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(pro_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* Else it's a name, possibly schema-qualified */
+
+ /*
+ * We should never get here in bootstrap mode, as all references should
+ * have been resolved by genbki.pl.
+ */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regproc values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_proc entries in the current search path.
+ */
+ names = stringToQualifiedNameList(pro_name_or_oid);
+ clist = FuncnameGetCandidates(names, -1, NIL, false, false, false, false);
+
+ if (clist == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function \"%s\" does not exist", pro_name_or_oid)));
+ else if (clist->next != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+ errmsg("more than one function named \"%s\"",
+ pro_name_or_oid)));
+
+ result = clist->oid;
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regproc - converts "proname" to proc OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regproc(PG_FUNCTION_ARGS)
+{
+ char *pro_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ List *names;
+ FuncCandidateList clist;
+
+ /*
+ * Parse the name into components and see if it matches any pg_proc
+ * entries in the current search path.
+ */
+ names = stringToQualifiedNameList(pro_name);
+ clist = FuncnameGetCandidates(names, -1, NIL, false, false, false, true);
+
+ if (clist == NULL || clist->next != NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(clist->oid);
+}
+
+/*
+ * regprocout - converts proc OID to "pro_name"
+ */
+Datum
+regprocout(PG_FUNCTION_ARGS)
+{
+ RegProcedure proid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple proctup;
+
+ if (proid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(proid));
+
+ if (HeapTupleIsValid(proctup))
+ {
+ Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup);
+ char *proname = NameStr(procform->proname);
+
+ /*
+ * In bootstrap mode, skip the fancy namespace stuff and just return
+ * the proc name. (This path is only needed for debugging output
+ * anyway.)
+ */
+ if (IsBootstrapProcessingMode())
+ result = pstrdup(proname);
+ else
+ {
+ char *nspname;
+ FuncCandidateList clist;
+
+ /*
+ * Would this proc be found (uniquely!) by regprocin? If not,
+ * qualify it.
+ */
+ clist = FuncnameGetCandidates(list_make1(makeString(proname)),
+ -1, NIL, false, false, false, false);
+ if (clist != NULL && clist->next == NULL &&
+ clist->oid == proid)
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(procform->pronamespace);
+
+ result = quote_qualified_identifier(nspname, proname);
+ }
+
+ ReleaseSysCache(proctup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_proc entry, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", proid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regprocrecv - converts external binary format to regproc
+ */
+Datum
+regprocrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regprocsend - converts regproc to binary format
+ */
+Datum
+regprocsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regprocedurein - converts "proname(args)" to proc OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_proc entry.
+ */
+Datum
+regprocedurein(PG_FUNCTION_ARGS)
+{
+ char *pro_name_or_oid = PG_GETARG_CSTRING(0);
+ RegProcedure result = InvalidOid;
+ List *names;
+ int nargs;
+ Oid argtypes[FUNC_MAX_ARGS];
+ FuncCandidateList clist;
+
+ /* '-' ? */
+ if (strcmp(pro_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (pro_name_or_oid[0] >= '0' &&
+ pro_name_or_oid[0] <= '9' &&
+ strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(pro_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regprocedure values must be OIDs in bootstrap mode");
+
+ /*
+ * Else it's a name and arguments. Parse the name and arguments, look up
+ * potential matches in the current namespace search list, and scan to see
+ * which one exactly matches the given argument types. (There will not be
+ * more than one match.)
+ */
+ parseNameAndArgTypes(pro_name_or_oid, false, &names, &nargs, argtypes);
+
+ clist = FuncnameGetCandidates(names, nargs, NIL, false, false,
+ false, false);
+
+ for (; clist; clist = clist->next)
+ {
+ if (memcmp(clist->args, argtypes, nargs * sizeof(Oid)) == 0)
+ break;
+ }
+
+ if (clist == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("function \"%s\" does not exist", pro_name_or_oid)));
+
+ result = clist->oid;
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regprocedure - converts "proname(args)" to proc OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regprocedure(PG_FUNCTION_ARGS)
+{
+ char *pro_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ List *names;
+ int nargs;
+ Oid argtypes[FUNC_MAX_ARGS];
+ FuncCandidateList clist;
+
+ /*
+ * Parse the name and arguments, look up potential matches in the current
+ * namespace search list, and scan to see which one exactly matches the
+ * given argument types. (There will not be more than one match.)
+ */
+ parseNameAndArgTypes(pro_name, false, &names, &nargs, argtypes);
+
+ clist = FuncnameGetCandidates(names, nargs, NIL, false, false, false, true);
+
+ for (; clist; clist = clist->next)
+ {
+ if (memcmp(clist->args, argtypes, nargs * sizeof(Oid)) == 0)
+ PG_RETURN_OID(clist->oid);
+ }
+
+ PG_RETURN_NULL();
+}
+
+/*
+ * format_procedure - converts proc OID to "pro_name(args)"
+ *
+ * This exports the useful functionality of regprocedureout for use
+ * in other backend modules. The result is a palloc'd string.
+ */
+char *
+format_procedure(Oid procedure_oid)
+{
+ return format_procedure_extended(procedure_oid, 0);
+}
+
+char *
+format_procedure_qualified(Oid procedure_oid)
+{
+ return format_procedure_extended(procedure_oid, FORMAT_PROC_FORCE_QUALIFY);
+}
+
+/*
+ * format_procedure_extended - converts procedure OID to "pro_name(args)"
+ *
+ * This exports the useful functionality of regprocedureout for use
+ * in other backend modules. The result is a palloc'd string, or NULL.
+ *
+ * Routine to produce regprocedure names; see format_procedure above.
+ *
+ * The following bits in 'flags' modify the behavior:
+ * - FORMAT_PROC_INVALID_AS_NULL
+ * if the procedure OID is invalid or unknown, return NULL instead
+ * of the numeric OID.
+ * - FORMAT_PROC_FORCE_QUALIFY
+ * always schema-qualify procedure names, regardless of search_path
+ */
+char *
+format_procedure_extended(Oid procedure_oid, bits16 flags)
+{
+ char *result;
+ HeapTuple proctup;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(procedure_oid));
+
+ if (HeapTupleIsValid(proctup))
+ {
+ Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup);
+ char *proname = NameStr(procform->proname);
+ int nargs = procform->pronargs;
+ int i;
+ char *nspname;
+ StringInfoData buf;
+
+ /* XXX no support here for bootstrap mode */
+ Assert(!IsBootstrapProcessingMode());
+
+ initStringInfo(&buf);
+
+ /*
+ * Would this proc be found (given the right args) by regprocedurein?
+ * If not, or if caller requests it, we need to qualify it.
+ */
+ if ((flags & FORMAT_PROC_FORCE_QUALIFY) == 0 &&
+ FunctionIsVisible(procedure_oid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(procform->pronamespace);
+
+ appendStringInfo(&buf, "%s(",
+ quote_qualified_identifier(nspname, proname));
+ for (i = 0; i < nargs; i++)
+ {
+ Oid thisargtype = procform->proargtypes.values[i];
+
+ if (i > 0)
+ appendStringInfoChar(&buf, ',');
+ appendStringInfoString(&buf,
+ (flags & FORMAT_PROC_FORCE_QUALIFY) != 0 ?
+ format_type_be_qualified(thisargtype) :
+ format_type_be(thisargtype));
+ }
+ appendStringInfoChar(&buf, ')');
+
+ result = buf.data;
+
+ ReleaseSysCache(proctup);
+ }
+ else if ((flags & FORMAT_PROC_INVALID_AS_NULL) != 0)
+ {
+ /* If object is undefined, return NULL as wanted by caller */
+ result = NULL;
+ }
+ else
+ {
+ /* If OID doesn't match any pg_proc entry, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", procedure_oid);
+ }
+
+ return result;
+}
+
+/*
+ * Output an objname/objargs representation for the procedure with the
+ * given OID. If it doesn't exist, an error is thrown.
+ *
+ * This can be used to feed get_object_address.
+ */
+void
+format_procedure_parts(Oid procedure_oid, List **objnames, List **objargs,
+ bool missing_ok)
+{
+ HeapTuple proctup;
+ Form_pg_proc procform;
+ int nargs;
+ int i;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(procedure_oid));
+
+ if (!HeapTupleIsValid(proctup))
+ {
+ if (!missing_ok)
+ elog(ERROR, "cache lookup failed for procedure with OID %u", procedure_oid);
+ return;
+ }
+
+ procform = (Form_pg_proc) GETSTRUCT(proctup);
+ nargs = procform->pronargs;
+
+ *objnames = list_make2(get_namespace_name_or_temp(procform->pronamespace),
+ pstrdup(NameStr(procform->proname)));
+ *objargs = NIL;
+ for (i = 0; i < nargs; i++)
+ {
+ Oid thisargtype = procform->proargtypes.values[i];
+
+ *objargs = lappend(*objargs, format_type_be_qualified(thisargtype));
+ }
+
+ ReleaseSysCache(proctup);
+}
+
+/*
+ * regprocedureout - converts proc OID to "pro_name(args)"
+ */
+Datum
+regprocedureout(PG_FUNCTION_ARGS)
+{
+ RegProcedure proid = PG_GETARG_OID(0);
+ char *result;
+
+ if (proid == InvalidOid)
+ result = pstrdup("-");
+ else
+ result = format_procedure(proid);
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regprocedurerecv - converts external binary format to regprocedure
+ */
+Datum
+regprocedurerecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regproceduresend - converts regprocedure to binary format
+ */
+Datum
+regproceduresend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regoperin - converts "oprname" to operator OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '0' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_operator entry.
+ */
+Datum
+regoperin(PG_FUNCTION_ARGS)
+{
+ char *opr_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result = InvalidOid;
+ List *names;
+ FuncCandidateList clist;
+
+ /* '0' ? */
+ if (strcmp(opr_name_or_oid, "0") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (opr_name_or_oid[0] >= '0' &&
+ opr_name_or_oid[0] <= '9' &&
+ strspn(opr_name_or_oid, "0123456789") == strlen(opr_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(opr_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* Else it's a name, possibly schema-qualified */
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regoper values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_operator entries in the current search path.
+ */
+ names = stringToQualifiedNameList(opr_name_or_oid);
+ clist = OpernameGetCandidates(names, '\0', false);
+
+ if (clist == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("operator does not exist: %s", opr_name_or_oid)));
+ else if (clist->next != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_AMBIGUOUS_FUNCTION),
+ errmsg("more than one operator named %s",
+ opr_name_or_oid)));
+
+ result = clist->oid;
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regoper - converts "oprname" to operator OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regoper(PG_FUNCTION_ARGS)
+{
+ char *opr_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ List *names;
+ FuncCandidateList clist;
+
+ /*
+ * Parse the name into components and see if it matches any pg_operator
+ * entries in the current search path.
+ */
+ names = stringToQualifiedNameList(opr_name);
+ clist = OpernameGetCandidates(names, '\0', true);
+
+ if (clist == NULL || clist->next != NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(clist->oid);
+}
+
+/*
+ * regoperout - converts operator OID to "opr_name"
+ */
+Datum
+regoperout(PG_FUNCTION_ARGS)
+{
+ Oid oprid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple opertup;
+
+ if (oprid == InvalidOid)
+ {
+ result = pstrdup("0");
+ PG_RETURN_CSTRING(result);
+ }
+
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(oprid));
+
+ if (HeapTupleIsValid(opertup))
+ {
+ Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup);
+ char *oprname = NameStr(operform->oprname);
+
+ /*
+ * In bootstrap mode, skip the fancy namespace stuff and just return
+ * the oper name. (This path is only needed for debugging output
+ * anyway.)
+ */
+ if (IsBootstrapProcessingMode())
+ result = pstrdup(oprname);
+ else
+ {
+ FuncCandidateList clist;
+
+ /*
+ * Would this oper be found (uniquely!) by regoperin? If not,
+ * qualify it.
+ */
+ clist = OpernameGetCandidates(list_make1(makeString(oprname)),
+ '\0', false);
+ if (clist != NULL && clist->next == NULL &&
+ clist->oid == oprid)
+ result = pstrdup(oprname);
+ else
+ {
+ const char *nspname;
+
+ nspname = get_namespace_name(operform->oprnamespace);
+ nspname = quote_identifier(nspname);
+ result = (char *) palloc(strlen(nspname) + strlen(oprname) + 2);
+ sprintf(result, "%s.%s", nspname, oprname);
+ }
+ }
+
+ ReleaseSysCache(opertup);
+ }
+ else
+ {
+ /*
+ * If OID doesn't match any pg_operator entry, return it numerically
+ */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", oprid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regoperrecv - converts external binary format to regoper
+ */
+Datum
+regoperrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regopersend - converts regoper to binary format
+ */
+Datum
+regopersend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regoperatorin - converts "oprname(args)" to operator OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '0' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_operator entry.
+ */
+Datum
+regoperatorin(PG_FUNCTION_ARGS)
+{
+ char *opr_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+ int nargs;
+ Oid argtypes[FUNC_MAX_ARGS];
+
+ /* '0' ? */
+ if (strcmp(opr_name_or_oid, "0") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (opr_name_or_oid[0] >= '0' &&
+ opr_name_or_oid[0] <= '9' &&
+ strspn(opr_name_or_oid, "0123456789") == strlen(opr_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(opr_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regoperator values must be OIDs in bootstrap mode");
+
+ /*
+ * Else it's a name and arguments. Parse the name and arguments, look up
+ * potential matches in the current namespace search list, and scan to see
+ * which one exactly matches the given argument types. (There will not be
+ * more than one match.)
+ */
+ parseNameAndArgTypes(opr_name_or_oid, true, &names, &nargs, argtypes);
+ if (nargs == 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_PARAMETER),
+ errmsg("missing argument"),
+ errhint("Use NONE to denote the missing argument of a unary operator.")));
+ if (nargs != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg("too many arguments"),
+ errhint("Provide two argument types for operator.")));
+
+ result = OpernameGetOprid(names, argtypes[0], argtypes[1]);
+
+ if (!OidIsValid(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("operator does not exist: %s", opr_name_or_oid)));
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regoperator - converts "oprname(args)" to operator OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regoperator(PG_FUNCTION_ARGS)
+{
+ char *opr_name_or_oid = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ List *names;
+ int nargs;
+ Oid argtypes[FUNC_MAX_ARGS];
+
+ /*
+ * Parse the name and arguments, look up potential matches in the current
+ * namespace search list, and scan to see which one exactly matches the
+ * given argument types. (There will not be more than one match.)
+ */
+ parseNameAndArgTypes(opr_name_or_oid, true, &names, &nargs, argtypes);
+ if (nargs == 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_PARAMETER),
+ errmsg("missing argument"),
+ errhint("Use NONE to denote the missing argument of a unary operator.")));
+ if (nargs != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg("too many arguments"),
+ errhint("Provide two argument types for operator.")));
+
+ result = OpernameGetOprid(names, argtypes[0], argtypes[1]);
+
+ if (!OidIsValid(result))
+ PG_RETURN_NULL();
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * format_operator_extended - converts operator OID to "opr_name(args)"
+ *
+ * This exports the useful functionality of regoperatorout for use
+ * in other backend modules. The result is a palloc'd string, or NULL.
+ *
+ * The following bits in 'flags' modify the behavior:
+ * - FORMAT_OPERATOR_INVALID_AS_NULL
+ * if the operator OID is invalid or unknown, return NULL instead
+ * of the numeric OID.
+ * - FORMAT_OPERATOR_FORCE_QUALIFY
+ * always schema-qualify operator names, regardless of search_path
+ */
+char *
+format_operator_extended(Oid operator_oid, bits16 flags)
+{
+ char *result;
+ HeapTuple opertup;
+
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operator_oid));
+
+ if (HeapTupleIsValid(opertup))
+ {
+ Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup);
+ char *oprname = NameStr(operform->oprname);
+ char *nspname;
+ StringInfoData buf;
+
+ /* XXX no support here for bootstrap mode */
+ Assert(!IsBootstrapProcessingMode());
+
+ initStringInfo(&buf);
+
+ /*
+ * Would this oper be found (given the right args) by regoperatorin?
+ * If not, or if caller explicitly requests it, we need to qualify it.
+ */
+ if ((flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 ||
+ !OperatorIsVisible(operator_oid))
+ {
+ nspname = get_namespace_name(operform->oprnamespace);
+ appendStringInfo(&buf, "%s.",
+ quote_identifier(nspname));
+ }
+
+ appendStringInfo(&buf, "%s(", oprname);
+
+ if (operform->oprleft)
+ appendStringInfo(&buf, "%s,",
+ (flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 ?
+ format_type_be_qualified(operform->oprleft) :
+ format_type_be(operform->oprleft));
+ else
+ appendStringInfoString(&buf, "NONE,");
+
+ if (operform->oprright)
+ appendStringInfo(&buf, "%s)",
+ (flags & FORMAT_OPERATOR_FORCE_QUALIFY) != 0 ?
+ format_type_be_qualified(operform->oprright) :
+ format_type_be(operform->oprright));
+ else
+ appendStringInfoString(&buf, "NONE)");
+
+ result = buf.data;
+
+ ReleaseSysCache(opertup);
+ }
+ else if ((flags & FORMAT_OPERATOR_INVALID_AS_NULL) != 0)
+ {
+ /* If object is undefined, return NULL as wanted by caller */
+ result = NULL;
+ }
+ else
+ {
+ /*
+ * If OID doesn't match any pg_operator entry, return it numerically
+ */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", operator_oid);
+ }
+
+ return result;
+}
+
+char *
+format_operator(Oid operator_oid)
+{
+ return format_operator_extended(operator_oid, 0);
+}
+
+char *
+format_operator_qualified(Oid operator_oid)
+{
+ return format_operator_extended(operator_oid,
+ FORMAT_OPERATOR_FORCE_QUALIFY);
+}
+
+void
+format_operator_parts(Oid operator_oid, List **objnames, List **objargs,
+ bool missing_ok)
+{
+ HeapTuple opertup;
+ Form_pg_operator oprForm;
+
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operator_oid));
+ if (!HeapTupleIsValid(opertup))
+ {
+ if (!missing_ok)
+ elog(ERROR, "cache lookup failed for operator with OID %u",
+ operator_oid);
+ return;
+ }
+
+ oprForm = (Form_pg_operator) GETSTRUCT(opertup);
+ *objnames = list_make2(get_namespace_name_or_temp(oprForm->oprnamespace),
+ pstrdup(NameStr(oprForm->oprname)));
+ *objargs = NIL;
+ if (oprForm->oprleft)
+ *objargs = lappend(*objargs,
+ format_type_be_qualified(oprForm->oprleft));
+ if (oprForm->oprright)
+ *objargs = lappend(*objargs,
+ format_type_be_qualified(oprForm->oprright));
+
+ ReleaseSysCache(opertup);
+}
+
+/*
+ * regoperatorout - converts operator OID to "opr_name(args)"
+ */
+Datum
+regoperatorout(PG_FUNCTION_ARGS)
+{
+ Oid oprid = PG_GETARG_OID(0);
+ char *result;
+
+ if (oprid == InvalidOid)
+ result = pstrdup("0");
+ else
+ result = format_operator(oprid);
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regoperatorrecv - converts external binary format to regoperator
+ */
+Datum
+regoperatorrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regoperatorsend - converts regoperator to binary format
+ */
+Datum
+regoperatorsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regclassin - converts "classname" to class OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_class entry.
+ */
+Datum
+regclassin(PG_FUNCTION_ARGS)
+{
+ char *class_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result = InvalidOid;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(class_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (class_name_or_oid[0] >= '0' &&
+ class_name_or_oid[0] <= '9' &&
+ strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(class_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* Else it's a name, possibly schema-qualified */
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regclass values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_class entries in the current search path.
+ */
+ names = stringToQualifiedNameList(class_name_or_oid);
+
+ /* We might not even have permissions on this relation; don't lock it. */
+ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regclass - converts "classname" to class OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regclass(PG_FUNCTION_ARGS)
+{
+ char *class_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ List *names;
+
+ /*
+ * Parse the name into components and see if it matches any pg_class
+ * entries in the current search path.
+ */
+ names = stringToQualifiedNameList(class_name);
+
+ /* We might not even have permissions on this relation; don't lock it. */
+ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, true);
+
+ if (OidIsValid(result))
+ PG_RETURN_OID(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * regclassout - converts class OID to "class_name"
+ */
+Datum
+regclassout(PG_FUNCTION_ARGS)
+{
+ Oid classid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple classtup;
+
+ if (classid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ classtup = SearchSysCache1(RELOID, ObjectIdGetDatum(classid));
+
+ if (HeapTupleIsValid(classtup))
+ {
+ Form_pg_class classform = (Form_pg_class) GETSTRUCT(classtup);
+ char *classname = NameStr(classform->relname);
+
+ /*
+ * In bootstrap mode, skip the fancy namespace stuff and just return
+ * the class name. (This path is only needed for debugging output
+ * anyway.)
+ */
+ if (IsBootstrapProcessingMode())
+ result = pstrdup(classname);
+ else
+ {
+ char *nspname;
+
+ /*
+ * Would this class be found by regclassin? If not, qualify it.
+ */
+ if (RelationIsVisible(classid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(classform->relnamespace);
+
+ result = quote_qualified_identifier(nspname, classname);
+ }
+
+ ReleaseSysCache(classtup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_class entry, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", classid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regclassrecv - converts external binary format to regclass
+ */
+Datum
+regclassrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regclasssend - converts regclass to binary format
+ */
+Datum
+regclasssend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regcollationin - converts "collationname" to collation OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_collation entry.
+ */
+Datum
+regcollationin(PG_FUNCTION_ARGS)
+{
+ char *collation_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result = InvalidOid;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(collation_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (collation_name_or_oid[0] >= '0' &&
+ collation_name_or_oid[0] <= '9' &&
+ strspn(collation_name_or_oid, "0123456789") == strlen(collation_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(collation_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* Else it's a name, possibly schema-qualified */
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regcollation values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_collation entries in the current search path.
+ */
+ names = stringToQualifiedNameList(collation_name_or_oid);
+
+ result = get_collation_oid(names, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regcollation - converts "collationname" to collation OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regcollation(PG_FUNCTION_ARGS)
+{
+ char *collation_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ List *names;
+
+ /*
+ * Parse the name into components and see if it matches any pg_collation
+ * entries in the current search path.
+ */
+ names = stringToQualifiedNameList(collation_name);
+
+ /* We might not even have permissions on this relation; don't lock it. */
+ result = get_collation_oid(names, true);
+
+ if (OidIsValid(result))
+ PG_RETURN_OID(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * regcollationout - converts collation OID to "collation_name"
+ */
+Datum
+regcollationout(PG_FUNCTION_ARGS)
+{
+ Oid collationid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple collationtup;
+
+ if (collationid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ collationtup = SearchSysCache1(COLLOID, ObjectIdGetDatum(collationid));
+
+ if (HeapTupleIsValid(collationtup))
+ {
+ Form_pg_collation collationform = (Form_pg_collation) GETSTRUCT(collationtup);
+ char *collationname = NameStr(collationform->collname);
+
+ /*
+ * In bootstrap mode, skip the fancy namespace stuff and just return
+ * the collation name. (This path is only needed for debugging output
+ * anyway.)
+ */
+ if (IsBootstrapProcessingMode())
+ result = pstrdup(collationname);
+ else
+ {
+ char *nspname;
+
+ /*
+ * Would this collation be found by regcollationin? If not,
+ * qualify it.
+ */
+ if (CollationIsVisible(collationid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(collationform->collnamespace);
+
+ result = quote_qualified_identifier(nspname, collationname);
+ }
+
+ ReleaseSysCache(collationtup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_collation entry, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", collationid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regcollationrecv - converts external binary format to regcollation
+ */
+Datum
+regcollationrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regcollationsend - converts regcollation to binary format
+ */
+Datum
+regcollationsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regtypein - converts "typename" to type OID
+ *
+ * The type name can be specified using the full type syntax recognized by
+ * the parser; for example, DOUBLE PRECISION and INTEGER[] will work and be
+ * translated to the correct type names. (We ignore any typmod info
+ * generated by the parser, however.)
+ *
+ * We also accept a numeric OID, for symmetry with the output routine,
+ * and for possible use in bootstrap mode.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_type entry.
+ */
+Datum
+regtypein(PG_FUNCTION_ARGS)
+{
+ char *typ_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result = InvalidOid;
+ int32 typmod;
+
+ /* '-' ? */
+ if (strcmp(typ_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (typ_name_or_oid[0] >= '0' &&
+ typ_name_or_oid[0] <= '9' &&
+ strspn(typ_name_or_oid, "0123456789") == strlen(typ_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(typ_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* Else it's a type name, possibly schema-qualified or decorated */
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regtype values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: invoke the full parser to deal with special cases such as
+ * array syntax.
+ */
+ parseTypeString(typ_name_or_oid, &result, &typmod, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regtype - converts "typename" to type OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regtype(PG_FUNCTION_ARGS)
+{
+ char *typ_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ int32 typmod;
+
+ /*
+ * Invoke the full parser to deal with special cases such as array syntax.
+ */
+ parseTypeString(typ_name, &result, &typmod, true);
+
+ if (OidIsValid(result))
+ PG_RETURN_OID(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * regtypeout - converts type OID to "typ_name"
+ */
+Datum
+regtypeout(PG_FUNCTION_ARGS)
+{
+ Oid typid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple typetup;
+
+ if (typid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+
+ if (HeapTupleIsValid(typetup))
+ {
+ Form_pg_type typeform = (Form_pg_type) GETSTRUCT(typetup);
+
+ /*
+ * In bootstrap mode, skip the fancy namespace stuff and just return
+ * the type name. (This path is only needed for debugging output
+ * anyway.)
+ */
+ if (IsBootstrapProcessingMode())
+ {
+ char *typname = NameStr(typeform->typname);
+
+ result = pstrdup(typname);
+ }
+ else
+ result = format_type_be(typid);
+
+ ReleaseSysCache(typetup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_type entry, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", typid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regtyperecv - converts external binary format to regtype
+ */
+Datum
+regtyperecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regtypesend - converts regtype to binary format
+ */
+Datum
+regtypesend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regconfigin - converts "tsconfigname" to tsconfig OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_ts_config entry.
+ */
+Datum
+regconfigin(PG_FUNCTION_ARGS)
+{
+ char *cfg_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(cfg_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (cfg_name_or_oid[0] >= '0' &&
+ cfg_name_or_oid[0] <= '9' &&
+ strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(cfg_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regconfig values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_ts_config entries in the current search path.
+ */
+ names = stringToQualifiedNameList(cfg_name_or_oid);
+
+ result = get_ts_config_oid(names, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * regconfigout - converts tsconfig OID to "tsconfigname"
+ */
+Datum
+regconfigout(PG_FUNCTION_ARGS)
+{
+ Oid cfgid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple cfgtup;
+
+ if (cfgid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ cfgtup = SearchSysCache1(TSCONFIGOID, ObjectIdGetDatum(cfgid));
+
+ if (HeapTupleIsValid(cfgtup))
+ {
+ Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup);
+ char *cfgname = NameStr(cfgform->cfgname);
+ char *nspname;
+
+ /*
+ * Would this config be found by regconfigin? If not, qualify it.
+ */
+ if (TSConfigIsVisible(cfgid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(cfgform->cfgnamespace);
+
+ result = quote_qualified_identifier(nspname, cfgname);
+
+ ReleaseSysCache(cfgtup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_ts_config row, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", cfgid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regconfigrecv - converts external binary format to regconfig
+ */
+Datum
+regconfigrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regconfigsend - converts regconfig to binary format
+ */
+Datum
+regconfigsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+
+/*
+ * regdictionaryin - converts "tsdictionaryname" to tsdictionary OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_ts_dict entry.
+ */
+Datum
+regdictionaryin(PG_FUNCTION_ARGS)
+{
+ char *dict_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(dict_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (dict_name_or_oid[0] >= '0' &&
+ dict_name_or_oid[0] <= '9' &&
+ strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(dict_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regdictionary values must be OIDs in bootstrap mode");
+
+ /*
+ * Normal case: parse the name into components and see if it matches any
+ * pg_ts_dict entries in the current search path.
+ */
+ names = stringToQualifiedNameList(dict_name_or_oid);
+
+ result = get_ts_dict_oid(names, false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * regdictionaryout - converts tsdictionary OID to "tsdictionaryname"
+ */
+Datum
+regdictionaryout(PG_FUNCTION_ARGS)
+{
+ Oid dictid = PG_GETARG_OID(0);
+ char *result;
+ HeapTuple dicttup;
+
+ if (dictid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ dicttup = SearchSysCache1(TSDICTOID, ObjectIdGetDatum(dictid));
+
+ if (HeapTupleIsValid(dicttup))
+ {
+ Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup);
+ char *dictname = NameStr(dictform->dictname);
+ char *nspname;
+
+ /*
+ * Would this dictionary be found by regdictionaryin? If not, qualify
+ * it.
+ */
+ if (TSDictionaryIsVisible(dictid))
+ nspname = NULL;
+ else
+ nspname = get_namespace_name(dictform->dictnamespace);
+
+ result = quote_qualified_identifier(nspname, dictname);
+
+ ReleaseSysCache(dicttup);
+ }
+ else
+ {
+ /* If OID doesn't match any pg_ts_dict row, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", dictid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regdictionaryrecv - converts external binary format to regdictionary
+ */
+Datum
+regdictionaryrecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regdictionarysend - converts regdictionary to binary format
+ */
+Datum
+regdictionarysend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+/*
+ * regrolein - converts "rolename" to role OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_authid entry.
+ */
+Datum
+regrolein(PG_FUNCTION_ARGS)
+{
+ char *role_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(role_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (role_name_or_oid[0] >= '0' &&
+ role_name_or_oid[0] <= '9' &&
+ strspn(role_name_or_oid, "0123456789") == strlen(role_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(role_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regrole values must be OIDs in bootstrap mode");
+
+ /* Normal case: see if the name matches any pg_authid entry. */
+ names = stringToQualifiedNameList(role_name_or_oid);
+
+ if (list_length(names) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_role_oid(strVal(linitial(names)), false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regrole - converts "rolename" to role OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regrole(PG_FUNCTION_ARGS)
+{
+ char *role_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ List *names;
+
+ names = stringToQualifiedNameList(role_name);
+
+ if (list_length(names) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_role_oid(strVal(linitial(names)), true);
+
+ if (OidIsValid(result))
+ PG_RETURN_OID(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * regroleout - converts role OID to "role_name"
+ */
+Datum
+regroleout(PG_FUNCTION_ARGS)
+{
+ Oid roleoid = PG_GETARG_OID(0);
+ char *result;
+
+ if (roleoid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ result = GetUserNameFromId(roleoid, true);
+
+ if (result)
+ {
+ /* pstrdup is not really necessary, but it avoids a compiler warning */
+ result = pstrdup(quote_identifier(result));
+ }
+ else
+ {
+ /* If OID doesn't match any role, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", roleoid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regrolerecv - converts external binary format to regrole
+ */
+Datum
+regrolerecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regrolesend - converts regrole to binary format
+ */
+Datum
+regrolesend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+/*
+ * regnamespacein - converts "nspname" to namespace OID
+ *
+ * We also accept a numeric OID, for symmetry with the output routine.
+ *
+ * '-' signifies unknown (OID 0). In all other cases, the input must
+ * match an existing pg_namespace entry.
+ */
+Datum
+regnamespacein(PG_FUNCTION_ARGS)
+{
+ char *nsp_name_or_oid = PG_GETARG_CSTRING(0);
+ Oid result;
+ List *names;
+
+ /* '-' ? */
+ if (strcmp(nsp_name_or_oid, "-") == 0)
+ PG_RETURN_OID(InvalidOid);
+
+ /* Numeric OID? */
+ if (nsp_name_or_oid[0] >= '0' &&
+ nsp_name_or_oid[0] <= '9' &&
+ strspn(nsp_name_or_oid, "0123456789") == strlen(nsp_name_or_oid))
+ {
+ result = DatumGetObjectId(DirectFunctionCall1(oidin,
+ CStringGetDatum(nsp_name_or_oid)));
+ PG_RETURN_OID(result);
+ }
+
+ /* The rest of this wouldn't work in bootstrap mode */
+ if (IsBootstrapProcessingMode())
+ elog(ERROR, "regnamespace values must be OIDs in bootstrap mode");
+
+ /* Normal case: see if the name matches any pg_namespace entry. */
+ names = stringToQualifiedNameList(nsp_name_or_oid);
+
+ if (list_length(names) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_namespace_oid(strVal(linitial(names)), false);
+
+ PG_RETURN_OID(result);
+}
+
+/*
+ * to_regnamespace - converts "nspname" to namespace OID
+ *
+ * If the name is not found, we return NULL.
+ */
+Datum
+to_regnamespace(PG_FUNCTION_ARGS)
+{
+ char *nsp_name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ Oid result;
+ List *names;
+
+ names = stringToQualifiedNameList(nsp_name);
+
+ if (list_length(names) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ result = get_namespace_oid(strVal(linitial(names)), true);
+
+ if (OidIsValid(result))
+ PG_RETURN_OID(result);
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * regnamespaceout - converts namespace OID to "nsp_name"
+ */
+Datum
+regnamespaceout(PG_FUNCTION_ARGS)
+{
+ Oid nspid = PG_GETARG_OID(0);
+ char *result;
+
+ if (nspid == InvalidOid)
+ {
+ result = pstrdup("-");
+ PG_RETURN_CSTRING(result);
+ }
+
+ result = get_namespace_name(nspid);
+
+ if (result)
+ {
+ /* pstrdup is not really necessary, but it avoids a compiler warning */
+ result = pstrdup(quote_identifier(result));
+ }
+ else
+ {
+ /* If OID doesn't match any namespace, return it numerically */
+ result = (char *) palloc(NAMEDATALEN);
+ snprintf(result, NAMEDATALEN, "%u", nspid);
+ }
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * regnamespacerecv - converts external binary format to regnamespace
+ */
+Datum
+regnamespacerecv(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidrecv, so share code */
+ return oidrecv(fcinfo);
+}
+
+/*
+ * regnamespacesend - converts regnamespace to binary format
+ */
+Datum
+regnamespacesend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as oidsend, so share code */
+ return oidsend(fcinfo);
+}
+
+/*
+ * text_regclass: convert text to regclass
+ *
+ * This could be replaced by CoerceViaIO, except that we need to treat
+ * text-to-regclass as an implicit cast to support legacy forms of nextval()
+ * and related functions.
+ */
+Datum
+text_regclass(PG_FUNCTION_ARGS)
+{
+ text *relname = PG_GETARG_TEXT_PP(0);
+ Oid result;
+ RangeVar *rv;
+
+ rv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+
+ /* We might not even have permissions on this relation; don't lock it. */
+ result = RangeVarGetRelid(rv, NoLock, false);
+
+ PG_RETURN_OID(result);
+}
+
+
+/*
+ * Given a C string, parse it into a qualified-name list.
+ */
+List *
+stringToQualifiedNameList(const char *string)
+{
+ char *rawname;
+ List *result = NIL;
+ List *namelist;
+ ListCell *l;
+
+ /* We need a modifiable copy of the input string. */
+ rawname = pstrdup(string);
+
+ if (!SplitIdentifierString(rawname, '.', &namelist))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ if (namelist == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+
+ result = lappend(result, makeString(pstrdup(curname)));
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return result;
+}
+
+/*****************************************************************************
+ * SUPPORT ROUTINES *
+ *****************************************************************************/
+
+/*
+ * Given a C string, parse it into a qualified function or operator name
+ * followed by a parenthesized list of type names. Reduce the
+ * type names to an array of OIDs (returned into *nargs and *argtypes;
+ * the argtypes array should be of size FUNC_MAX_ARGS). The function or
+ * operator name is returned to *names as a List of Strings.
+ *
+ * If allowNone is true, accept "NONE" and return it as InvalidOid (this is
+ * for unary operators).
+ */
+static void
+parseNameAndArgTypes(const char *string, bool allowNone, List **names,
+ int *nargs, Oid *argtypes)
+{
+ char *rawname;
+ char *ptr;
+ char *ptr2;
+ char *typename;
+ bool in_quote;
+ bool had_comma;
+ int paren_count;
+ Oid typeid;
+ int32 typmod;
+
+ /* We need a modifiable copy of the input string. */
+ rawname = pstrdup(string);
+
+ /* Scan to find the expected left paren; mustn't be quoted */
+ in_quote = false;
+ for (ptr = rawname; *ptr; ptr++)
+ {
+ if (*ptr == '"')
+ in_quote = !in_quote;
+ else if (*ptr == '(' && !in_quote)
+ break;
+ }
+ if (*ptr == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected a left parenthesis")));
+
+ /* Separate the name and parse it into a list */
+ *ptr++ = '\0';
+ *names = stringToQualifiedNameList(rawname);
+
+ /* Check for the trailing right parenthesis and remove it */
+ ptr2 = ptr + strlen(ptr);
+ while (--ptr2 > ptr)
+ {
+ if (!scanner_isspace(*ptr2))
+ break;
+ }
+ if (*ptr2 != ')')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected a right parenthesis")));
+
+ *ptr2 = '\0';
+
+ /* Separate the remaining string into comma-separated type names */
+ *nargs = 0;
+ had_comma = false;
+
+ for (;;)
+ {
+ /* allow leading whitespace */
+ while (scanner_isspace(*ptr))
+ ptr++;
+ if (*ptr == '\0')
+ {
+ /* End of string. Okay unless we had a comma before. */
+ if (had_comma)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("expected a type name")));
+ break;
+ }
+ typename = ptr;
+ /* Find end of type name --- end of string or comma */
+ /* ... but not a quoted or parenthesized comma */
+ in_quote = false;
+ paren_count = 0;
+ for (; *ptr; ptr++)
+ {
+ if (*ptr == '"')
+ in_quote = !in_quote;
+ else if (*ptr == ',' && !in_quote && paren_count == 0)
+ break;
+ else if (!in_quote)
+ {
+ switch (*ptr)
+ {
+ case '(':
+ case '[':
+ paren_count++;
+ break;
+ case ')':
+ case ']':
+ paren_count--;
+ break;
+ }
+ }
+ }
+ if (in_quote || paren_count != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("improper type name")));
+
+ ptr2 = ptr;
+ if (*ptr == ',')
+ {
+ had_comma = true;
+ *ptr++ = '\0';
+ }
+ else
+ {
+ had_comma = false;
+ Assert(*ptr == '\0');
+ }
+ /* Lop off trailing whitespace */
+ while (--ptr2 >= typename)
+ {
+ if (!scanner_isspace(*ptr2))
+ break;
+ *ptr2 = '\0';
+ }
+
+ if (allowNone && pg_strcasecmp(typename, "none") == 0)
+ {
+ /* Special case for NONE */
+ typeid = InvalidOid;
+ typmod = -1;
+ }
+ else
+ {
+ /* Use full parser to resolve the type name */
+ parseTypeString(typename, &typeid, &typmod, false);
+ }
+ if (*nargs >= FUNC_MAX_ARGS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg("too many arguments")));
+
+ argtypes[*nargs] = typeid;
+ (*nargs)++;
+ }
+
+ pfree(rawname);
+}
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
new file mode 100644
index 0000000..51b3fdc
--- /dev/null
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -0,0 +1,3020 @@
+/*-------------------------------------------------------------------------
+ *
+ * ri_triggers.c
+ *
+ * Generic trigger procedures for referential integrity constraint
+ * checks.
+ *
+ * Note about memory management: the private hashtables kept here live
+ * across query and transaction boundaries, in fact they live as long as
+ * the backend does. This works because the hashtable structures
+ * themselves are allocated by dynahash.c in its permanent DynaHashCxt,
+ * and the SPI plans they point to are saved using SPI_keepplan().
+ * There is not currently any provision for throwing away a no-longer-needed
+ * plan --- consider improving this someday.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ * src/backend/utils/adt/ri_triggers.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/xact.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_type.h"
+#include "commands/trigger.h"
+#include "executor/executor.h"
+#include "executor/spi.h"
+#include "lib/ilist.h"
+#include "miscadmin.h"
+#include "parser/parse_coerce.h"
+#include "parser/parse_relation.h"
+#include "storage/bufmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/inval.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/rls.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+
+/*
+ * Local definitions
+ */
+
+#define RI_MAX_NUMKEYS INDEX_MAX_KEYS
+
+#define RI_INIT_CONSTRAINTHASHSIZE 64
+#define RI_INIT_QUERYHASHSIZE (RI_INIT_CONSTRAINTHASHSIZE * 4)
+
+#define RI_KEYS_ALL_NULL 0
+#define RI_KEYS_SOME_NULL 1
+#define RI_KEYS_NONE_NULL 2
+
+/* RI query type codes */
+/* these queries are executed against the PK (referenced) table: */
+#define RI_PLAN_CHECK_LOOKUPPK 1
+#define RI_PLAN_CHECK_LOOKUPPK_FROM_PK 2
+#define RI_PLAN_LAST_ON_PK RI_PLAN_CHECK_LOOKUPPK_FROM_PK
+/* these queries are executed against the FK (referencing) table: */
+#define RI_PLAN_CASCADE_ONDELETE 3
+#define RI_PLAN_CASCADE_ONUPDATE 4
+/* For RESTRICT, the same plan can be used for both ON DELETE and ON UPDATE triggers. */
+#define RI_PLAN_RESTRICT 5
+#define RI_PLAN_SETNULL_ONDELETE 6
+#define RI_PLAN_SETNULL_ONUPDATE 7
+#define RI_PLAN_SETDEFAULT_ONDELETE 8
+#define RI_PLAN_SETDEFAULT_ONUPDATE 9
+
+#define MAX_QUOTED_NAME_LEN (NAMEDATALEN*2+3)
+#define MAX_QUOTED_REL_NAME_LEN (MAX_QUOTED_NAME_LEN*2)
+
+#define RIAttName(rel, attnum) NameStr(*attnumAttName(rel, attnum))
+#define RIAttType(rel, attnum) attnumTypeId(rel, attnum)
+#define RIAttCollation(rel, attnum) attnumCollationId(rel, attnum)
+
+#define RI_TRIGTYPE_INSERT 1
+#define RI_TRIGTYPE_UPDATE 2
+#define RI_TRIGTYPE_DELETE 3
+
+
+/*
+ * RI_ConstraintInfo
+ *
+ * Information extracted from an FK pg_constraint entry. This is cached in
+ * ri_constraint_cache.
+ */
+typedef struct RI_ConstraintInfo
+{
+ Oid constraint_id; /* OID of pg_constraint entry (hash key) */
+ bool valid; /* successfully initialized? */
+ Oid constraint_root_id; /* OID of topmost ancestor constraint;
+ * same as constraint_id if not inherited */
+ uint32 oidHashValue; /* hash value of constraint_id */
+ uint32 rootHashValue; /* hash value of constraint_root_id */
+ NameData conname; /* name of the FK constraint */
+ Oid pk_relid; /* referenced relation */
+ Oid fk_relid; /* referencing relation */
+ char confupdtype; /* foreign key's ON UPDATE action */
+ char confdeltype; /* foreign key's ON DELETE action */
+ int ndelsetcols; /* number of columns referenced in ON DELETE
+ * SET clause */
+ int16 confdelsetcols[RI_MAX_NUMKEYS]; /* attnums of cols to set on
+ * delete */
+ char confmatchtype; /* foreign key's match type */
+ int nkeys; /* number of key columns */
+ int16 pk_attnums[RI_MAX_NUMKEYS]; /* attnums of referenced cols */
+ int16 fk_attnums[RI_MAX_NUMKEYS]; /* attnums of referencing cols */
+ Oid pf_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (PK = FK) */
+ Oid pp_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (PK = PK) */
+ Oid ff_eq_oprs[RI_MAX_NUMKEYS]; /* equality operators (FK = FK) */
+ dlist_node valid_link; /* Link in list of valid entries */
+} RI_ConstraintInfo;
+
+/*
+ * RI_QueryKey
+ *
+ * The key identifying a prepared SPI plan in our query hashtable
+ */
+typedef struct RI_QueryKey
+{
+ Oid constr_id; /* OID of pg_constraint entry */
+ int32 constr_queryno; /* query type ID, see RI_PLAN_XXX above */
+} RI_QueryKey;
+
+/*
+ * RI_QueryHashEntry
+ */
+typedef struct RI_QueryHashEntry
+{
+ RI_QueryKey key;
+ SPIPlanPtr plan;
+} RI_QueryHashEntry;
+
+/*
+ * RI_CompareKey
+ *
+ * The key identifying an entry showing how to compare two values
+ */
+typedef struct RI_CompareKey
+{
+ Oid eq_opr; /* the equality operator to apply */
+ Oid typeid; /* the data type to apply it to */
+} RI_CompareKey;
+
+/*
+ * RI_CompareHashEntry
+ */
+typedef struct RI_CompareHashEntry
+{
+ RI_CompareKey key;
+ bool valid; /* successfully initialized? */
+ FmgrInfo eq_opr_finfo; /* call info for equality fn */
+ FmgrInfo cast_func_finfo; /* in case we must coerce input */
+} RI_CompareHashEntry;
+
+
+/*
+ * Local data
+ */
+static HTAB *ri_constraint_cache = NULL;
+static HTAB *ri_query_cache = NULL;
+static HTAB *ri_compare_cache = NULL;
+static dlist_head ri_constraint_cache_valid_list;
+static int ri_constraint_cache_valid_count = 0;
+
+
+/*
+ * Local function prototypes
+ */
+static bool ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
+ TupleTableSlot *oldslot,
+ const RI_ConstraintInfo *riinfo);
+static Datum ri_restrict(TriggerData *trigdata, bool is_no_action);
+static Datum ri_set(TriggerData *trigdata, bool is_set_null, int tgkind);
+static void quoteOneName(char *buffer, const char *name);
+static void quoteRelationName(char *buffer, Relation rel);
+static void ri_GenerateQual(StringInfo buf,
+ const char *sep,
+ const char *leftop, Oid leftoptype,
+ Oid opoid,
+ const char *rightop, Oid rightoptype);
+static void ri_GenerateQualCollation(StringInfo buf, Oid collation);
+static int ri_NullCheck(TupleDesc tupdesc, TupleTableSlot *slot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk);
+static void ri_BuildQueryKey(RI_QueryKey *key,
+ const RI_ConstraintInfo *riinfo,
+ int32 constr_queryno);
+static bool ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk);
+static bool ri_AttributesEqual(Oid eq_opr, Oid typeid,
+ Datum oldvalue, Datum newvalue);
+
+static void ri_InitHashTables(void);
+static void InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue);
+static SPIPlanPtr ri_FetchPreparedPlan(RI_QueryKey *key);
+static void ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan);
+static RI_CompareHashEntry *ri_HashCompareOp(Oid eq_opr, Oid typeid);
+
+static void ri_CheckTrigger(FunctionCallInfo fcinfo, const char *funcname,
+ int tgkind);
+static const RI_ConstraintInfo *ri_FetchConstraintInfo(Trigger *trigger,
+ Relation trig_rel, bool rel_is_pk);
+static const RI_ConstraintInfo *ri_LoadConstraintInfo(Oid constraintOid);
+static Oid get_ri_constraint_root(Oid constrOid);
+static SPIPlanPtr ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes,
+ RI_QueryKey *qkey, Relation fk_rel, Relation pk_rel);
+static bool ri_PerformCheck(const RI_ConstraintInfo *riinfo,
+ RI_QueryKey *qkey, SPIPlanPtr qplan,
+ Relation fk_rel, Relation pk_rel,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot,
+ bool detectNewRows, int expect_OK);
+static void ri_ExtractValues(Relation rel, TupleTableSlot *slot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk,
+ Datum *vals, char *nulls);
+static void ri_ReportViolation(const RI_ConstraintInfo *riinfo,
+ Relation pk_rel, Relation fk_rel,
+ TupleTableSlot *violatorslot, TupleDesc tupdesc,
+ int queryno, bool partgone) pg_attribute_noreturn();
+
+
+/*
+ * RI_FKey_check -
+ *
+ * Check foreign key existence (combined for INSERT and UPDATE).
+ */
+static Datum
+RI_FKey_check(TriggerData *trigdata)
+{
+ const RI_ConstraintInfo *riinfo;
+ Relation fk_rel;
+ Relation pk_rel;
+ TupleTableSlot *newslot;
+ RI_QueryKey qkey;
+ SPIPlanPtr qplan;
+
+ riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
+ trigdata->tg_relation, false);
+
+ if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ newslot = trigdata->tg_newslot;
+ else
+ newslot = trigdata->tg_trigslot;
+
+ /*
+ * We should not even consider checking the row if it is no longer valid,
+ * since it was either deleted (so the deferred check should be skipped)
+ * or updated (in which case only the latest version of the row should be
+ * checked). Test its liveness according to SnapshotSelf. We need pin
+ * and lock on the buffer to call HeapTupleSatisfiesVisibility. Caller
+ * should be holding pin, but not lock.
+ */
+ if (!table_tuple_satisfies_snapshot(trigdata->tg_relation, newslot, SnapshotSelf))
+ return PointerGetDatum(NULL);
+
+ /*
+ * Get the relation descriptors of the FK and PK tables.
+ *
+ * pk_rel is opened in RowShareLock mode since that's what our eventual
+ * SELECT FOR KEY SHARE will get on it.
+ */
+ fk_rel = trigdata->tg_relation;
+ pk_rel = table_open(riinfo->pk_relid, RowShareLock);
+
+ switch (ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false))
+ {
+ case RI_KEYS_ALL_NULL:
+
+ /*
+ * No further check needed - an all-NULL key passes every type of
+ * foreign key constraint.
+ */
+ table_close(pk_rel, RowShareLock);
+ return PointerGetDatum(NULL);
+
+ case RI_KEYS_SOME_NULL:
+
+ /*
+ * This is the only case that differs between the three kinds of
+ * MATCH.
+ */
+ switch (riinfo->confmatchtype)
+ {
+ case FKCONSTR_MATCH_FULL:
+
+ /*
+ * Not allowed - MATCH FULL says either all or none of the
+ * attributes can be NULLs
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
+ errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"",
+ RelationGetRelationName(fk_rel),
+ NameStr(riinfo->conname)),
+ errdetail("MATCH FULL does not allow mixing of null and nonnull key values."),
+ errtableconstraint(fk_rel,
+ NameStr(riinfo->conname))));
+ table_close(pk_rel, RowShareLock);
+ return PointerGetDatum(NULL);
+
+ case FKCONSTR_MATCH_SIMPLE:
+
+ /*
+ * MATCH SIMPLE - if ANY column is null, the key passes
+ * the constraint.
+ */
+ table_close(pk_rel, RowShareLock);
+ return PointerGetDatum(NULL);
+
+#ifdef NOT_USED
+ case FKCONSTR_MATCH_PARTIAL:
+
+ /*
+ * MATCH PARTIAL - all non-null columns must match. (not
+ * implemented, can be done by modifying the query below
+ * to only include non-null columns, or by writing a
+ * special version here)
+ */
+ break;
+#endif
+ }
+
+ case RI_KEYS_NONE_NULL:
+
+ /*
+ * Have a full qualified key - continue below for all three kinds
+ * of MATCH.
+ */
+ break;
+ }
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /* Fetch or prepare a saved plan for the real check */
+ ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CHECK_LOOKUPPK);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ char pkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ Oid queryoids[RI_MAX_NUMKEYS];
+ const char *pk_only;
+
+ /* ----------
+ * The query string built is
+ * SELECT 1 FROM [ONLY] <pktable> x WHERE pkatt1 = $1 [AND ...]
+ * FOR KEY SHARE OF x
+ * The type id's for the $ parameters are those of the
+ * corresponding FK attributes.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(pkrelname, pk_rel);
+ appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x",
+ pk_only, pkrelname);
+ querysep = "WHERE";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(pk_rel, riinfo->pk_attnums[i]));
+ sprintf(paramname, "$%d", i + 1);
+ ri_GenerateQual(&querybuf, querysep,
+ attname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ paramname, fk_type);
+ querysep = "AND";
+ queryoids[i] = fk_type;
+ }
+ appendStringInfoString(&querybuf, " FOR KEY SHARE OF x");
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * Now check that foreign key exists in PK table
+ *
+ * XXX detectNewRows must be true when a partitioned table is on the
+ * referenced side. The reason is that our snapshot must be fresh in
+ * order for the hack in find_inheritance_children() to work.
+ */
+ ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ NULL, newslot,
+ pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE,
+ SPI_OK_SELECT);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ table_close(pk_rel, RowShareLock);
+
+ return PointerGetDatum(NULL);
+}
+
+
+/*
+ * RI_FKey_check_ins -
+ *
+ * Check foreign key existence at insert event on FK table.
+ */
+Datum
+RI_FKey_check_ins(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_check_ins", RI_TRIGTYPE_INSERT);
+
+ /* Share code with UPDATE case. */
+ return RI_FKey_check((TriggerData *) fcinfo->context);
+}
+
+
+/*
+ * RI_FKey_check_upd -
+ *
+ * Check foreign key existence at update event on FK table.
+ */
+Datum
+RI_FKey_check_upd(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_check_upd", RI_TRIGTYPE_UPDATE);
+
+ /* Share code with INSERT case. */
+ return RI_FKey_check((TriggerData *) fcinfo->context);
+}
+
+
+/*
+ * ri_Check_Pk_Match
+ *
+ * Check to see if another PK row has been created that provides the same
+ * key values as the "oldslot" that's been modified or deleted in our trigger
+ * event. Returns true if a match is found in the PK table.
+ *
+ * We assume the caller checked that the oldslot contains no NULL key values,
+ * since otherwise a match is impossible.
+ */
+static bool
+ri_Check_Pk_Match(Relation pk_rel, Relation fk_rel,
+ TupleTableSlot *oldslot,
+ const RI_ConstraintInfo *riinfo)
+{
+ SPIPlanPtr qplan;
+ RI_QueryKey qkey;
+ bool result;
+
+ /* Only called for non-null rows */
+ Assert(ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) == RI_KEYS_NONE_NULL);
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * Fetch or prepare a saved plan for checking PK table with values coming
+ * from a PK row
+ */
+ ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CHECK_LOOKUPPK_FROM_PK);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ char pkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ const char *pk_only;
+ Oid queryoids[RI_MAX_NUMKEYS];
+
+ /* ----------
+ * The query string built is
+ * SELECT 1 FROM [ONLY] <pktable> x WHERE pkatt1 = $1 [AND ...]
+ * FOR KEY SHARE OF x
+ * The type id's for the $ parameters are those of the
+ * PK attributes themselves.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(pkrelname, pk_rel);
+ appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x",
+ pk_only, pkrelname);
+ querysep = "WHERE";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(pk_rel, riinfo->pk_attnums[i]));
+ sprintf(paramname, "$%d", i + 1);
+ ri_GenerateQual(&querybuf, querysep,
+ attname, pk_type,
+ riinfo->pp_eq_oprs[i],
+ paramname, pk_type);
+ querysep = "AND";
+ queryoids[i] = pk_type;
+ }
+ appendStringInfoString(&querybuf, " FOR KEY SHARE OF x");
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * We have a plan now. Run it.
+ */
+ result = ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ oldslot, NULL,
+ true, /* treat like update */
+ SPI_OK_SELECT);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ return result;
+}
+
+
+/*
+ * RI_FKey_noaction_del -
+ *
+ * Give an error and roll back the current transaction if the
+ * delete has resulted in a violation of the given referential
+ * integrity constraint.
+ */
+Datum
+RI_FKey_noaction_del(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_noaction_del", RI_TRIGTYPE_DELETE);
+
+ /* Share code with RESTRICT/UPDATE cases. */
+ return ri_restrict((TriggerData *) fcinfo->context, true);
+}
+
+/*
+ * RI_FKey_restrict_del -
+ *
+ * Restrict delete from PK table to rows unreferenced by foreign key.
+ *
+ * The SQL standard intends that this referential action occur exactly when
+ * the delete is performed, rather than after. This appears to be
+ * the only difference between "NO ACTION" and "RESTRICT". In Postgres
+ * we still implement this as an AFTER trigger, but it's non-deferrable.
+ */
+Datum
+RI_FKey_restrict_del(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_restrict_del", RI_TRIGTYPE_DELETE);
+
+ /* Share code with NO ACTION/UPDATE cases. */
+ return ri_restrict((TriggerData *) fcinfo->context, false);
+}
+
+/*
+ * RI_FKey_noaction_upd -
+ *
+ * Give an error and roll back the current transaction if the
+ * update has resulted in a violation of the given referential
+ * integrity constraint.
+ */
+Datum
+RI_FKey_noaction_upd(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_noaction_upd", RI_TRIGTYPE_UPDATE);
+
+ /* Share code with RESTRICT/DELETE cases. */
+ return ri_restrict((TriggerData *) fcinfo->context, true);
+}
+
+/*
+ * RI_FKey_restrict_upd -
+ *
+ * Restrict update of PK to rows unreferenced by foreign key.
+ *
+ * The SQL standard intends that this referential action occur exactly when
+ * the update is performed, rather than after. This appears to be
+ * the only difference between "NO ACTION" and "RESTRICT". In Postgres
+ * we still implement this as an AFTER trigger, but it's non-deferrable.
+ */
+Datum
+RI_FKey_restrict_upd(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_restrict_upd", RI_TRIGTYPE_UPDATE);
+
+ /* Share code with NO ACTION/DELETE cases. */
+ return ri_restrict((TriggerData *) fcinfo->context, false);
+}
+
+/*
+ * ri_restrict -
+ *
+ * Common code for ON DELETE RESTRICT, ON DELETE NO ACTION,
+ * ON UPDATE RESTRICT, and ON UPDATE NO ACTION.
+ */
+static Datum
+ri_restrict(TriggerData *trigdata, bool is_no_action)
+{
+ const RI_ConstraintInfo *riinfo;
+ Relation fk_rel;
+ Relation pk_rel;
+ TupleTableSlot *oldslot;
+ RI_QueryKey qkey;
+ SPIPlanPtr qplan;
+
+ riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
+ trigdata->tg_relation, true);
+
+ /*
+ * Get the relation descriptors of the FK and PK tables and the old tuple.
+ *
+ * fk_rel is opened in RowShareLock mode since that's what our eventual
+ * SELECT FOR KEY SHARE will get on it.
+ */
+ fk_rel = table_open(riinfo->fk_relid, RowShareLock);
+ pk_rel = trigdata->tg_relation;
+ oldslot = trigdata->tg_trigslot;
+
+ /*
+ * If another PK row now exists providing the old key values, we should
+ * not do anything. However, this check should only be made in the NO
+ * ACTION case; in RESTRICT cases we don't wish to allow another row to be
+ * substituted.
+ */
+ if (is_no_action &&
+ ri_Check_Pk_Match(pk_rel, fk_rel, oldslot, riinfo))
+ {
+ table_close(fk_rel, RowShareLock);
+ return PointerGetDatum(NULL);
+ }
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * Fetch or prepare a saved plan for the restrict lookup (it's the same
+ * query for delete and update cases)
+ */
+ ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_RESTRICT);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ Oid queryoids[RI_MAX_NUMKEYS];
+ const char *fk_only;
+
+ /* ----------
+ * The query string built is
+ * SELECT 1 FROM [ONLY] <fktable> x WHERE $1 = fkatt1 [AND ...]
+ * FOR KEY SHARE OF x
+ * The type id's for the $ parameters are those of the
+ * corresponding PK attributes.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(fkrelname, fk_rel);
+ appendStringInfo(&querybuf, "SELECT 1 FROM %s%s x",
+ fk_only, fkrelname);
+ querysep = "WHERE";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ sprintf(paramname, "$%d", i + 1);
+ ri_GenerateQual(&querybuf, querysep,
+ paramname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ querysep = "AND";
+ queryoids[i] = pk_type;
+ }
+ appendStringInfoString(&querybuf, " FOR KEY SHARE OF x");
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * We have a plan now. Run it to check for existing references.
+ */
+ ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ oldslot, NULL,
+ true, /* must detect new rows */
+ SPI_OK_SELECT);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ table_close(fk_rel, RowShareLock);
+
+ return PointerGetDatum(NULL);
+}
+
+
+/*
+ * RI_FKey_cascade_del -
+ *
+ * Cascaded delete foreign key references at delete event on PK table.
+ */
+Datum
+RI_FKey_cascade_del(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+ const RI_ConstraintInfo *riinfo;
+ Relation fk_rel;
+ Relation pk_rel;
+ TupleTableSlot *oldslot;
+ RI_QueryKey qkey;
+ SPIPlanPtr qplan;
+
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_cascade_del", RI_TRIGTYPE_DELETE);
+
+ riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
+ trigdata->tg_relation, true);
+
+ /*
+ * Get the relation descriptors of the FK and PK tables and the old tuple.
+ *
+ * fk_rel is opened in RowExclusiveLock mode since that's what our
+ * eventual DELETE will get on it.
+ */
+ fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock);
+ pk_rel = trigdata->tg_relation;
+ oldslot = trigdata->tg_trigslot;
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /* Fetch or prepare a saved plan for the cascaded delete */
+ ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CASCADE_ONDELETE);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ Oid queryoids[RI_MAX_NUMKEYS];
+ const char *fk_only;
+
+ /* ----------
+ * The query string built is
+ * DELETE FROM [ONLY] <fktable> WHERE $1 = fkatt1 [AND ...]
+ * The type id's for the $ parameters are those of the
+ * corresponding PK attributes.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(fkrelname, fk_rel);
+ appendStringInfo(&querybuf, "DELETE FROM %s%s",
+ fk_only, fkrelname);
+ querysep = "WHERE";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ sprintf(paramname, "$%d", i + 1);
+ ri_GenerateQual(&querybuf, querysep,
+ paramname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ querysep = "AND";
+ queryoids[i] = pk_type;
+ }
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * We have a plan now. Build up the arguments from the key values in the
+ * deleted PK tuple and delete the referencing rows
+ */
+ ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ oldslot, NULL,
+ true, /* must detect new rows */
+ SPI_OK_DELETE);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ table_close(fk_rel, RowExclusiveLock);
+
+ return PointerGetDatum(NULL);
+}
+
+
+/*
+ * RI_FKey_cascade_upd -
+ *
+ * Cascaded update foreign key references at update event on PK table.
+ */
+Datum
+RI_FKey_cascade_upd(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+ const RI_ConstraintInfo *riinfo;
+ Relation fk_rel;
+ Relation pk_rel;
+ TupleTableSlot *newslot;
+ TupleTableSlot *oldslot;
+ RI_QueryKey qkey;
+ SPIPlanPtr qplan;
+
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_cascade_upd", RI_TRIGTYPE_UPDATE);
+
+ riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
+ trigdata->tg_relation, true);
+
+ /*
+ * Get the relation descriptors of the FK and PK tables and the new and
+ * old tuple.
+ *
+ * fk_rel is opened in RowExclusiveLock mode since that's what our
+ * eventual UPDATE will get on it.
+ */
+ fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock);
+ pk_rel = trigdata->tg_relation;
+ newslot = trigdata->tg_newslot;
+ oldslot = trigdata->tg_trigslot;
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /* Fetch or prepare a saved plan for the cascaded update */
+ ri_BuildQueryKey(&qkey, riinfo, RI_PLAN_CASCADE_ONUPDATE);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ StringInfoData qualbuf;
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ const char *qualsep;
+ Oid queryoids[RI_MAX_NUMKEYS * 2];
+ const char *fk_only;
+
+ /* ----------
+ * The query string built is
+ * UPDATE [ONLY] <fktable> SET fkatt1 = $1 [, ...]
+ * WHERE $n = fkatt1 [AND ...]
+ * The type id's for the $ parameters are those of the
+ * corresponding PK attributes. Note that we are assuming
+ * there is an assignment cast from the PK to the FK type;
+ * else the parser will fail.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ initStringInfo(&qualbuf);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(fkrelname, fk_rel);
+ appendStringInfo(&querybuf, "UPDATE %s%s SET",
+ fk_only, fkrelname);
+ querysep = "";
+ qualsep = "WHERE";
+ for (int i = 0, j = riinfo->nkeys; i < riinfo->nkeys; i++, j++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ appendStringInfo(&querybuf,
+ "%s %s = $%d",
+ querysep, attname, i + 1);
+ sprintf(paramname, "$%d", j + 1);
+ ri_GenerateQual(&qualbuf, qualsep,
+ paramname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ querysep = ",";
+ qualsep = "AND";
+ queryoids[i] = pk_type;
+ queryoids[j] = pk_type;
+ }
+ appendBinaryStringInfo(&querybuf, qualbuf.data, qualbuf.len);
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys * 2, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * We have a plan now. Run it to update the existing references.
+ */
+ ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ oldslot, newslot,
+ true, /* must detect new rows */
+ SPI_OK_UPDATE);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ table_close(fk_rel, RowExclusiveLock);
+
+ return PointerGetDatum(NULL);
+}
+
+
+/*
+ * RI_FKey_setnull_del -
+ *
+ * Set foreign key references to NULL values at delete event on PK table.
+ */
+Datum
+RI_FKey_setnull_del(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_setnull_del", RI_TRIGTYPE_DELETE);
+
+ /* Share code with UPDATE case */
+ return ri_set((TriggerData *) fcinfo->context, true, RI_TRIGTYPE_DELETE);
+}
+
+/*
+ * RI_FKey_setnull_upd -
+ *
+ * Set foreign key references to NULL at update event on PK table.
+ */
+Datum
+RI_FKey_setnull_upd(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_setnull_upd", RI_TRIGTYPE_UPDATE);
+
+ /* Share code with DELETE case */
+ return ri_set((TriggerData *) fcinfo->context, true, RI_TRIGTYPE_UPDATE);
+}
+
+/*
+ * RI_FKey_setdefault_del -
+ *
+ * Set foreign key references to defaults at delete event on PK table.
+ */
+Datum
+RI_FKey_setdefault_del(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_setdefault_del", RI_TRIGTYPE_DELETE);
+
+ /* Share code with UPDATE case */
+ return ri_set((TriggerData *) fcinfo->context, false, RI_TRIGTYPE_DELETE);
+}
+
+/*
+ * RI_FKey_setdefault_upd -
+ *
+ * Set foreign key references to defaults at update event on PK table.
+ */
+Datum
+RI_FKey_setdefault_upd(PG_FUNCTION_ARGS)
+{
+ /* Check that this is a valid trigger call on the right time and event. */
+ ri_CheckTrigger(fcinfo, "RI_FKey_setdefault_upd", RI_TRIGTYPE_UPDATE);
+
+ /* Share code with DELETE case */
+ return ri_set((TriggerData *) fcinfo->context, false, RI_TRIGTYPE_UPDATE);
+}
+
+/*
+ * ri_set -
+ *
+ * Common code for ON DELETE SET NULL, ON DELETE SET DEFAULT, ON UPDATE SET
+ * NULL, and ON UPDATE SET DEFAULT.
+ */
+static Datum
+ri_set(TriggerData *trigdata, bool is_set_null, int tgkind)
+{
+ const RI_ConstraintInfo *riinfo;
+ Relation fk_rel;
+ Relation pk_rel;
+ TupleTableSlot *oldslot;
+ RI_QueryKey qkey;
+ SPIPlanPtr qplan;
+ int32 queryno;
+
+ riinfo = ri_FetchConstraintInfo(trigdata->tg_trigger,
+ trigdata->tg_relation, true);
+
+ /*
+ * Get the relation descriptors of the FK and PK tables and the old tuple.
+ *
+ * fk_rel is opened in RowExclusiveLock mode since that's what our
+ * eventual UPDATE will get on it.
+ */
+ fk_rel = table_open(riinfo->fk_relid, RowExclusiveLock);
+ pk_rel = trigdata->tg_relation;
+ oldslot = trigdata->tg_trigslot;
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * Fetch or prepare a saved plan for the trigger.
+ */
+ switch (tgkind)
+ {
+ case RI_TRIGTYPE_UPDATE:
+ queryno = is_set_null
+ ? RI_PLAN_SETNULL_ONUPDATE
+ : RI_PLAN_SETDEFAULT_ONUPDATE;
+ break;
+ case RI_TRIGTYPE_DELETE:
+ queryno = is_set_null
+ ? RI_PLAN_SETNULL_ONDELETE
+ : RI_PLAN_SETDEFAULT_ONDELETE;
+ break;
+ default:
+ elog(ERROR, "invalid tgkind passed to ri_set");
+ }
+
+ ri_BuildQueryKey(&qkey, riinfo, queryno);
+
+ if ((qplan = ri_FetchPreparedPlan(&qkey)) == NULL)
+ {
+ StringInfoData querybuf;
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char attname[MAX_QUOTED_NAME_LEN];
+ char paramname[16];
+ const char *querysep;
+ const char *qualsep;
+ Oid queryoids[RI_MAX_NUMKEYS];
+ const char *fk_only;
+ int num_cols_to_set;
+ const int16 *set_cols;
+
+ switch (tgkind)
+ {
+ case RI_TRIGTYPE_UPDATE:
+ num_cols_to_set = riinfo->nkeys;
+ set_cols = riinfo->fk_attnums;
+ break;
+ case RI_TRIGTYPE_DELETE:
+
+ /*
+ * If confdelsetcols are present, then we only update the
+ * columns specified in that array, otherwise we update all
+ * the referencing columns.
+ */
+ if (riinfo->ndelsetcols != 0)
+ {
+ num_cols_to_set = riinfo->ndelsetcols;
+ set_cols = riinfo->confdelsetcols;
+ }
+ else
+ {
+ num_cols_to_set = riinfo->nkeys;
+ set_cols = riinfo->fk_attnums;
+ }
+ break;
+ default:
+ elog(ERROR, "invalid tgkind passed to ri_set");
+ }
+
+ /* ----------
+ * The query string built is
+ * UPDATE [ONLY] <fktable> SET fkatt1 = {NULL|DEFAULT} [, ...]
+ * WHERE $1 = fkatt1 [AND ...]
+ * The type id's for the $ parameters are those of the
+ * corresponding PK attributes.
+ * ----------
+ */
+ initStringInfo(&querybuf);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ quoteRelationName(fkrelname, fk_rel);
+ appendStringInfo(&querybuf, "UPDATE %s%s SET",
+ fk_only, fkrelname);
+
+ /*
+ * Add assignment clauses
+ */
+ querysep = "";
+ for (int i = 0; i < num_cols_to_set; i++)
+ {
+ quoteOneName(attname, RIAttName(fk_rel, set_cols[i]));
+ appendStringInfo(&querybuf,
+ "%s %s = %s",
+ querysep, attname,
+ is_set_null ? "NULL" : "DEFAULT");
+ querysep = ",";
+ }
+
+ /*
+ * Add WHERE clause
+ */
+ qualsep = "WHERE";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(attname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+
+ sprintf(paramname, "$%d", i + 1);
+ ri_GenerateQual(&querybuf, qualsep,
+ paramname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ attname, fk_type);
+ if (pk_coll != fk_coll && !get_collation_isdeterministic(pk_coll))
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ qualsep = "AND";
+ queryoids[i] = pk_type;
+ }
+
+ /* Prepare and save the plan */
+ qplan = ri_PlanCheck(querybuf.data, riinfo->nkeys, queryoids,
+ &qkey, fk_rel, pk_rel);
+ }
+
+ /*
+ * We have a plan now. Run it to update the existing references.
+ */
+ ri_PerformCheck(riinfo, &qkey, qplan,
+ fk_rel, pk_rel,
+ oldslot, NULL,
+ true, /* must detect new rows */
+ SPI_OK_UPDATE);
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ table_close(fk_rel, RowExclusiveLock);
+
+ if (is_set_null)
+ return PointerGetDatum(NULL);
+ else
+ {
+ /*
+ * If we just deleted or updated the PK row whose key was equal to the
+ * FK columns' default values, and a referencing row exists in the FK
+ * table, we would have updated that row to the same values it already
+ * had --- and RI_FKey_fk_upd_check_required would hence believe no
+ * check is necessary. So we need to do another lookup now and in
+ * case a reference still exists, abort the operation. That is
+ * already implemented in the NO ACTION trigger, so just run it. (This
+ * recheck is only needed in the SET DEFAULT case, since CASCADE would
+ * remove such rows in case of a DELETE operation or would change the
+ * FK key values in case of an UPDATE, while SET NULL is certain to
+ * result in rows that satisfy the FK constraint.)
+ */
+ return ri_restrict(trigdata, true);
+ }
+}
+
+
+/*
+ * RI_FKey_pk_upd_check_required -
+ *
+ * Check if we really need to fire the RI trigger for an update or delete to a PK
+ * relation. This is called by the AFTER trigger queue manager to see if
+ * it can skip queuing an instance of an RI trigger. Returns true if the
+ * trigger must be fired, false if we can prove the constraint will still
+ * be satisfied.
+ *
+ * newslot will be NULL if this is called for a delete.
+ */
+bool
+RI_FKey_pk_upd_check_required(Trigger *trigger, Relation pk_rel,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot)
+{
+ const RI_ConstraintInfo *riinfo;
+
+ riinfo = ri_FetchConstraintInfo(trigger, pk_rel, true);
+
+ /*
+ * If any old key value is NULL, the row could not have been referenced by
+ * an FK row, so no check is needed.
+ */
+ if (ri_NullCheck(RelationGetDescr(pk_rel), oldslot, riinfo, true) != RI_KEYS_NONE_NULL)
+ return false;
+
+ /* If all old and new key values are equal, no check is needed */
+ if (newslot && ri_KeysEqual(pk_rel, oldslot, newslot, riinfo, true))
+ return false;
+
+ /* Else we need to fire the trigger. */
+ return true;
+}
+
+/*
+ * RI_FKey_fk_upd_check_required -
+ *
+ * Check if we really need to fire the RI trigger for an update to an FK
+ * relation. This is called by the AFTER trigger queue manager to see if
+ * it can skip queuing an instance of an RI trigger. Returns true if the
+ * trigger must be fired, false if we can prove the constraint will still
+ * be satisfied.
+ */
+bool
+RI_FKey_fk_upd_check_required(Trigger *trigger, Relation fk_rel,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot)
+{
+ const RI_ConstraintInfo *riinfo;
+ int ri_nullcheck;
+ Datum xminDatum;
+ TransactionId xmin;
+ bool isnull;
+
+ /*
+ * AfterTriggerSaveEvent() handles things such that this function is never
+ * called for partitioned tables.
+ */
+ Assert(fk_rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE);
+
+ riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false);
+
+ ri_nullcheck = ri_NullCheck(RelationGetDescr(fk_rel), newslot, riinfo, false);
+
+ /*
+ * If all new key values are NULL, the row satisfies the constraint, so no
+ * check is needed.
+ */
+ if (ri_nullcheck == RI_KEYS_ALL_NULL)
+ return false;
+
+ /*
+ * If some new key values are NULL, the behavior depends on the match
+ * type.
+ */
+ else if (ri_nullcheck == RI_KEYS_SOME_NULL)
+ {
+ switch (riinfo->confmatchtype)
+ {
+ case FKCONSTR_MATCH_SIMPLE:
+
+ /*
+ * If any new key value is NULL, the row must satisfy the
+ * constraint, so no check is needed.
+ */
+ return false;
+
+ case FKCONSTR_MATCH_PARTIAL:
+
+ /*
+ * Don't know, must run full check.
+ */
+ break;
+
+ case FKCONSTR_MATCH_FULL:
+
+ /*
+ * If some new key values are NULL, the row fails the
+ * constraint. We must not throw error here, because the row
+ * might get invalidated before the constraint is to be
+ * checked, but we should queue the event to apply the check
+ * later.
+ */
+ return true;
+ }
+ }
+
+ /*
+ * Continues here for no new key values are NULL, or we couldn't decide
+ * yet.
+ */
+
+ /*
+ * If the original row was inserted by our own transaction, we must fire
+ * the trigger whether or not the keys are equal. This is because our
+ * UPDATE will invalidate the INSERT so that the INSERT RI trigger will
+ * not do anything; so we had better do the UPDATE check. (We could skip
+ * this if we knew the INSERT trigger already fired, but there is no easy
+ * way to know that.)
+ */
+ xminDatum = slot_getsysattr(oldslot, MinTransactionIdAttributeNumber, &isnull);
+ Assert(!isnull);
+ xmin = DatumGetTransactionId(xminDatum);
+ if (TransactionIdIsCurrentTransactionId(xmin))
+ return true;
+
+ /* If all old and new key values are equal, no check is needed */
+ if (ri_KeysEqual(fk_rel, oldslot, newslot, riinfo, false))
+ return false;
+
+ /* Else we need to fire the trigger. */
+ return true;
+}
+
+/*
+ * RI_Initial_Check -
+ *
+ * Check an entire table for non-matching values using a single query.
+ * This is not a trigger procedure, but is called during ALTER TABLE
+ * ADD FOREIGN KEY to validate the initial table contents.
+ *
+ * We expect that the caller has made provision to prevent any problems
+ * caused by concurrent actions. This could be either by locking rel and
+ * pkrel at ShareRowExclusiveLock or higher, or by otherwise ensuring
+ * that triggers implementing the checks are already active.
+ * Hence, we do not need to lock individual rows for the check.
+ *
+ * If the check fails because the current user doesn't have permissions
+ * to read both tables, return false to let our caller know that they will
+ * need to do something else to check the constraint.
+ */
+bool
+RI_Initial_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
+{
+ const RI_ConstraintInfo *riinfo;
+ StringInfoData querybuf;
+ char pkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char pkattname[MAX_QUOTED_NAME_LEN + 3];
+ char fkattname[MAX_QUOTED_NAME_LEN + 3];
+ RangeTblEntry *pkrte;
+ RangeTblEntry *fkrte;
+ const char *sep;
+ const char *fk_only;
+ const char *pk_only;
+ int save_nestlevel;
+ char workmembuf[32];
+ int spi_result;
+ SPIPlanPtr qplan;
+
+ riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false);
+
+ /*
+ * Check to make sure current user has enough permissions to do the test
+ * query. (If not, caller can fall back to the trigger method, which
+ * works because it changes user IDs on the fly.)
+ *
+ * XXX are there any other show-stopper conditions to check?
+ */
+ pkrte = makeNode(RangeTblEntry);
+ pkrte->rtekind = RTE_RELATION;
+ pkrte->relid = RelationGetRelid(pk_rel);
+ pkrte->relkind = pk_rel->rd_rel->relkind;
+ pkrte->rellockmode = AccessShareLock;
+ pkrte->requiredPerms = ACL_SELECT;
+
+ fkrte = makeNode(RangeTblEntry);
+ fkrte->rtekind = RTE_RELATION;
+ fkrte->relid = RelationGetRelid(fk_rel);
+ fkrte->relkind = fk_rel->rd_rel->relkind;
+ fkrte->rellockmode = AccessShareLock;
+ fkrte->requiredPerms = ACL_SELECT;
+
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ int attno;
+
+ attno = riinfo->pk_attnums[i] - FirstLowInvalidHeapAttributeNumber;
+ pkrte->selectedCols = bms_add_member(pkrte->selectedCols, attno);
+
+ attno = riinfo->fk_attnums[i] - FirstLowInvalidHeapAttributeNumber;
+ fkrte->selectedCols = bms_add_member(fkrte->selectedCols, attno);
+ }
+
+ if (!ExecCheckRTPerms(list_make2(fkrte, pkrte), false))
+ return false;
+
+ /*
+ * Also punt if RLS is enabled on either table unless this role has the
+ * bypassrls right or is the table owner of the table(s) involved which
+ * have RLS enabled.
+ */
+ if (!has_bypassrls_privilege(GetUserId()) &&
+ ((pk_rel->rd_rel->relrowsecurity &&
+ !pg_class_ownercheck(pkrte->relid, GetUserId())) ||
+ (fk_rel->rd_rel->relrowsecurity &&
+ !pg_class_ownercheck(fkrte->relid, GetUserId()))))
+ return false;
+
+ /*----------
+ * The query string built is:
+ * SELECT fk.keycols FROM [ONLY] relname fk
+ * LEFT OUTER JOIN [ONLY] pkrelname pk
+ * ON (pk.pkkeycol1=fk.keycol1 [AND ...])
+ * WHERE pk.pkkeycol1 IS NULL AND
+ * For MATCH SIMPLE:
+ * (fk.keycol1 IS NOT NULL [AND ...])
+ * For MATCH FULL:
+ * (fk.keycol1 IS NOT NULL [OR ...])
+ *
+ * We attach COLLATE clauses to the operators when comparing columns
+ * that have different collations.
+ *----------
+ */
+ initStringInfo(&querybuf);
+ appendStringInfoString(&querybuf, "SELECT ");
+ sep = "";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ quoteOneName(fkattname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ appendStringInfo(&querybuf, "%sfk.%s", sep, fkattname);
+ sep = ", ";
+ }
+
+ quoteRelationName(pkrelname, pk_rel);
+ quoteRelationName(fkrelname, fk_rel);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ pk_only = pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ appendStringInfo(&querybuf,
+ " FROM %s%s fk LEFT OUTER JOIN %s%s pk ON",
+ fk_only, fkrelname, pk_only, pkrelname);
+
+ strcpy(pkattname, "pk.");
+ strcpy(fkattname, "fk.");
+ sep = "(";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(pkattname + 3,
+ RIAttName(pk_rel, riinfo->pk_attnums[i]));
+ quoteOneName(fkattname + 3,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ ri_GenerateQual(&querybuf, sep,
+ pkattname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ fkattname, fk_type);
+ if (pk_coll != fk_coll)
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ sep = "AND";
+ }
+
+ /*
+ * It's sufficient to test any one pk attribute for null to detect a join
+ * failure.
+ */
+ quoteOneName(pkattname, RIAttName(pk_rel, riinfo->pk_attnums[0]));
+ appendStringInfo(&querybuf, ") WHERE pk.%s IS NULL AND (", pkattname);
+
+ sep = "";
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ quoteOneName(fkattname, RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ appendStringInfo(&querybuf,
+ "%sfk.%s IS NOT NULL",
+ sep, fkattname);
+ switch (riinfo->confmatchtype)
+ {
+ case FKCONSTR_MATCH_SIMPLE:
+ sep = " AND ";
+ break;
+ case FKCONSTR_MATCH_FULL:
+ sep = " OR ";
+ break;
+ }
+ }
+ appendStringInfoChar(&querybuf, ')');
+
+ /*
+ * Temporarily increase work_mem so that the check query can be executed
+ * more efficiently. It seems okay to do this because the query is simple
+ * enough to not use a multiple of work_mem, and one typically would not
+ * have many large foreign-key validations happening concurrently. So
+ * this seems to meet the criteria for being considered a "maintenance"
+ * operation, and accordingly we use maintenance_work_mem. However, we
+ * must also set hash_mem_multiplier to 1, since it is surely not okay to
+ * let that get applied to the maintenance_work_mem value.
+ *
+ * We use the equivalent of a function SET option to allow the setting to
+ * persist for exactly the duration of the check query. guc.c also takes
+ * care of undoing the setting on error.
+ */
+ save_nestlevel = NewGUCNestLevel();
+
+ snprintf(workmembuf, sizeof(workmembuf), "%d", maintenance_work_mem);
+ (void) set_config_option("work_mem", workmembuf,
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+ (void) set_config_option("hash_mem_multiplier", "1",
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * Generate the plan. We don't need to cache it, and there are no
+ * arguments to the plan.
+ */
+ qplan = SPI_prepare(querybuf.data, 0, NULL);
+
+ if (qplan == NULL)
+ elog(ERROR, "SPI_prepare returned %s for %s",
+ SPI_result_code_string(SPI_result), querybuf.data);
+
+ /*
+ * Run the plan. For safety we force a current snapshot to be used. (In
+ * transaction-snapshot mode, this arguably violates transaction isolation
+ * rules, but we really haven't got much choice.) We don't need to
+ * register the snapshot, because SPI_execute_snapshot will see to it. We
+ * need at most one tuple returned, so pass limit = 1.
+ */
+ spi_result = SPI_execute_snapshot(qplan,
+ NULL, NULL,
+ GetLatestSnapshot(),
+ InvalidSnapshot,
+ true, false, 1);
+
+ /* Check result */
+ if (spi_result != SPI_OK_SELECT)
+ elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result));
+
+ /* Did we find a tuple violating the constraint? */
+ if (SPI_processed > 0)
+ {
+ TupleTableSlot *slot;
+ HeapTuple tuple = SPI_tuptable->vals[0];
+ TupleDesc tupdesc = SPI_tuptable->tupdesc;
+ RI_ConstraintInfo fake_riinfo;
+
+ slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+
+ heap_deform_tuple(tuple, tupdesc,
+ slot->tts_values, slot->tts_isnull);
+ ExecStoreVirtualTuple(slot);
+
+ /*
+ * The columns to look at in the result tuple are 1..N, not whatever
+ * they are in the fk_rel. Hack up riinfo so that the subroutines
+ * called here will behave properly.
+ *
+ * In addition to this, we have to pass the correct tupdesc to
+ * ri_ReportViolation, overriding its normal habit of using the pk_rel
+ * or fk_rel's tupdesc.
+ */
+ memcpy(&fake_riinfo, riinfo, sizeof(RI_ConstraintInfo));
+ for (int i = 0; i < fake_riinfo.nkeys; i++)
+ fake_riinfo.fk_attnums[i] = i + 1;
+
+ /*
+ * If it's MATCH FULL, and there are any nulls in the FK keys,
+ * complain about that rather than the lack of a match. MATCH FULL
+ * disallows partially-null FK rows.
+ */
+ if (fake_riinfo.confmatchtype == FKCONSTR_MATCH_FULL &&
+ ri_NullCheck(tupdesc, slot, &fake_riinfo, false) != RI_KEYS_NONE_NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
+ errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"",
+ RelationGetRelationName(fk_rel),
+ NameStr(fake_riinfo.conname)),
+ errdetail("MATCH FULL does not allow mixing of null and nonnull key values."),
+ errtableconstraint(fk_rel,
+ NameStr(fake_riinfo.conname))));
+
+ /*
+ * We tell ri_ReportViolation we were doing the RI_PLAN_CHECK_LOOKUPPK
+ * query, which isn't true, but will cause it to use
+ * fake_riinfo.fk_attnums as we need.
+ */
+ ri_ReportViolation(&fake_riinfo,
+ pk_rel, fk_rel,
+ slot, tupdesc,
+ RI_PLAN_CHECK_LOOKUPPK, false);
+
+ ExecDropSingleTupleTableSlot(slot);
+ }
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ /*
+ * Restore work_mem and hash_mem_multiplier.
+ */
+ AtEOXact_GUC(true, save_nestlevel);
+
+ return true;
+}
+
+/*
+ * RI_PartitionRemove_Check -
+ *
+ * Verify no referencing values exist, when a partition is detached on
+ * the referenced side of a foreign key constraint.
+ */
+void
+RI_PartitionRemove_Check(Trigger *trigger, Relation fk_rel, Relation pk_rel)
+{
+ const RI_ConstraintInfo *riinfo;
+ StringInfoData querybuf;
+ char *constraintDef;
+ char pkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char fkrelname[MAX_QUOTED_REL_NAME_LEN];
+ char pkattname[MAX_QUOTED_NAME_LEN + 3];
+ char fkattname[MAX_QUOTED_NAME_LEN + 3];
+ const char *sep;
+ const char *fk_only;
+ int save_nestlevel;
+ char workmembuf[32];
+ int spi_result;
+ SPIPlanPtr qplan;
+ int i;
+
+ riinfo = ri_FetchConstraintInfo(trigger, fk_rel, false);
+
+ /*
+ * We don't check permissions before displaying the error message, on the
+ * assumption that the user detaching the partition must have enough
+ * privileges to examine the table contents anyhow.
+ */
+
+ /*----------
+ * The query string built is:
+ * SELECT fk.keycols FROM [ONLY] relname fk
+ * JOIN pkrelname pk
+ * ON (pk.pkkeycol1=fk.keycol1 [AND ...])
+ * WHERE (<partition constraint>) AND
+ * For MATCH SIMPLE:
+ * (fk.keycol1 IS NOT NULL [AND ...])
+ * For MATCH FULL:
+ * (fk.keycol1 IS NOT NULL [OR ...])
+ *
+ * We attach COLLATE clauses to the operators when comparing columns
+ * that have different collations.
+ *----------
+ */
+ initStringInfo(&querybuf);
+ appendStringInfoString(&querybuf, "SELECT ");
+ sep = "";
+ for (i = 0; i < riinfo->nkeys; i++)
+ {
+ quoteOneName(fkattname,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ appendStringInfo(&querybuf, "%sfk.%s", sep, fkattname);
+ sep = ", ";
+ }
+
+ quoteRelationName(pkrelname, pk_rel);
+ quoteRelationName(fkrelname, fk_rel);
+ fk_only = fk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE ?
+ "" : "ONLY ";
+ appendStringInfo(&querybuf,
+ " FROM %s%s fk JOIN %s pk ON",
+ fk_only, fkrelname, pkrelname);
+ strcpy(pkattname, "pk.");
+ strcpy(fkattname, "fk.");
+ sep = "(";
+ for (i = 0; i < riinfo->nkeys; i++)
+ {
+ Oid pk_type = RIAttType(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_type = RIAttType(fk_rel, riinfo->fk_attnums[i]);
+ Oid pk_coll = RIAttCollation(pk_rel, riinfo->pk_attnums[i]);
+ Oid fk_coll = RIAttCollation(fk_rel, riinfo->fk_attnums[i]);
+
+ quoteOneName(pkattname + 3,
+ RIAttName(pk_rel, riinfo->pk_attnums[i]));
+ quoteOneName(fkattname + 3,
+ RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ ri_GenerateQual(&querybuf, sep,
+ pkattname, pk_type,
+ riinfo->pf_eq_oprs[i],
+ fkattname, fk_type);
+ if (pk_coll != fk_coll)
+ ri_GenerateQualCollation(&querybuf, pk_coll);
+ sep = "AND";
+ }
+
+ /*
+ * Start the WHERE clause with the partition constraint (except if this is
+ * the default partition and there's no other partition, because the
+ * partition constraint is the empty string in that case.)
+ */
+ constraintDef = pg_get_partconstrdef_string(RelationGetRelid(pk_rel), "pk");
+ if (constraintDef && constraintDef[0] != '\0')
+ appendStringInfo(&querybuf, ") WHERE %s AND (",
+ constraintDef);
+ else
+ appendStringInfoString(&querybuf, ") WHERE (");
+
+ sep = "";
+ for (i = 0; i < riinfo->nkeys; i++)
+ {
+ quoteOneName(fkattname, RIAttName(fk_rel, riinfo->fk_attnums[i]));
+ appendStringInfo(&querybuf,
+ "%sfk.%s IS NOT NULL",
+ sep, fkattname);
+ switch (riinfo->confmatchtype)
+ {
+ case FKCONSTR_MATCH_SIMPLE:
+ sep = " AND ";
+ break;
+ case FKCONSTR_MATCH_FULL:
+ sep = " OR ";
+ break;
+ }
+ }
+ appendStringInfoChar(&querybuf, ')');
+
+ /*
+ * Temporarily increase work_mem so that the check query can be executed
+ * more efficiently. It seems okay to do this because the query is simple
+ * enough to not use a multiple of work_mem, and one typically would not
+ * have many large foreign-key validations happening concurrently. So
+ * this seems to meet the criteria for being considered a "maintenance"
+ * operation, and accordingly we use maintenance_work_mem. However, we
+ * must also set hash_mem_multiplier to 1, since it is surely not okay to
+ * let that get applied to the maintenance_work_mem value.
+ *
+ * We use the equivalent of a function SET option to allow the setting to
+ * persist for exactly the duration of the check query. guc.c also takes
+ * care of undoing the setting on error.
+ */
+ save_nestlevel = NewGUCNestLevel();
+
+ snprintf(workmembuf, sizeof(workmembuf), "%d", maintenance_work_mem);
+ (void) set_config_option("work_mem", workmembuf,
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+ (void) set_config_option("hash_mem_multiplier", "1",
+ PGC_USERSET, PGC_S_SESSION,
+ GUC_ACTION_SAVE, true, 0, false);
+
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * Generate the plan. We don't need to cache it, and there are no
+ * arguments to the plan.
+ */
+ qplan = SPI_prepare(querybuf.data, 0, NULL);
+
+ if (qplan == NULL)
+ elog(ERROR, "SPI_prepare returned %s for %s",
+ SPI_result_code_string(SPI_result), querybuf.data);
+
+ /*
+ * Run the plan. For safety we force a current snapshot to be used. (In
+ * transaction-snapshot mode, this arguably violates transaction isolation
+ * rules, but we really haven't got much choice.) We don't need to
+ * register the snapshot, because SPI_execute_snapshot will see to it. We
+ * need at most one tuple returned, so pass limit = 1.
+ */
+ spi_result = SPI_execute_snapshot(qplan,
+ NULL, NULL,
+ GetLatestSnapshot(),
+ InvalidSnapshot,
+ true, false, 1);
+
+ /* Check result */
+ if (spi_result != SPI_OK_SELECT)
+ elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result));
+
+ /* Did we find a tuple that would violate the constraint? */
+ if (SPI_processed > 0)
+ {
+ TupleTableSlot *slot;
+ HeapTuple tuple = SPI_tuptable->vals[0];
+ TupleDesc tupdesc = SPI_tuptable->tupdesc;
+ RI_ConstraintInfo fake_riinfo;
+
+ slot = MakeSingleTupleTableSlot(tupdesc, &TTSOpsVirtual);
+
+ heap_deform_tuple(tuple, tupdesc,
+ slot->tts_values, slot->tts_isnull);
+ ExecStoreVirtualTuple(slot);
+
+ /*
+ * The columns to look at in the result tuple are 1..N, not whatever
+ * they are in the fk_rel. Hack up riinfo so that ri_ReportViolation
+ * will behave properly.
+ *
+ * In addition to this, we have to pass the correct tupdesc to
+ * ri_ReportViolation, overriding its normal habit of using the pk_rel
+ * or fk_rel's tupdesc.
+ */
+ memcpy(&fake_riinfo, riinfo, sizeof(RI_ConstraintInfo));
+ for (i = 0; i < fake_riinfo.nkeys; i++)
+ fake_riinfo.pk_attnums[i] = i + 1;
+
+ ri_ReportViolation(&fake_riinfo, pk_rel, fk_rel,
+ slot, tupdesc, 0, true);
+ }
+
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ /*
+ * Restore work_mem and hash_mem_multiplier.
+ */
+ AtEOXact_GUC(true, save_nestlevel);
+}
+
+
+/* ----------
+ * Local functions below
+ * ----------
+ */
+
+
+/*
+ * quoteOneName --- safely quote a single SQL name
+ *
+ * buffer must be MAX_QUOTED_NAME_LEN long (includes room for \0)
+ */
+static void
+quoteOneName(char *buffer, const char *name)
+{
+ /* Rather than trying to be smart, just always quote it. */
+ *buffer++ = '"';
+ while (*name)
+ {
+ if (*name == '"')
+ *buffer++ = '"';
+ *buffer++ = *name++;
+ }
+ *buffer++ = '"';
+ *buffer = '\0';
+}
+
+/*
+ * quoteRelationName --- safely quote a fully qualified relation name
+ *
+ * buffer must be MAX_QUOTED_REL_NAME_LEN long (includes room for \0)
+ */
+static void
+quoteRelationName(char *buffer, Relation rel)
+{
+ quoteOneName(buffer, get_namespace_name(RelationGetNamespace(rel)));
+ buffer += strlen(buffer);
+ *buffer++ = '.';
+ quoteOneName(buffer, RelationGetRelationName(rel));
+}
+
+/*
+ * ri_GenerateQual --- generate a WHERE clause equating two variables
+ *
+ * This basically appends " sep leftop op rightop" to buf, adding casts
+ * and schema qualification as needed to ensure that the parser will select
+ * the operator we specify. leftop and rightop should be parenthesized
+ * if they aren't variables or parameters.
+ */
+static void
+ri_GenerateQual(StringInfo buf,
+ const char *sep,
+ const char *leftop, Oid leftoptype,
+ Oid opoid,
+ const char *rightop, Oid rightoptype)
+{
+ appendStringInfo(buf, " %s ", sep);
+ generate_operator_clause(buf, leftop, leftoptype, opoid,
+ rightop, rightoptype);
+}
+
+/*
+ * ri_GenerateQualCollation --- add a COLLATE spec to a WHERE clause
+ *
+ * At present, we intentionally do not use this function for RI queries that
+ * compare a variable to a $n parameter. Since parameter symbols always have
+ * default collation, the effect will be to use the variable's collation.
+ * Now that is only strictly correct when testing the referenced column, since
+ * the SQL standard specifies that RI comparisons should use the referenced
+ * column's collation. However, so long as all collations have the same
+ * notion of equality (which they do, because texteq reduces to bitwise
+ * equality), there's no visible semantic impact from using the referencing
+ * column's collation when testing it, and this is a good thing to do because
+ * it lets us use a normal index on the referencing column. However, we do
+ * have to use this function when directly comparing the referencing and
+ * referenced columns, if they are of different collations; else the parser
+ * will fail to resolve the collation to use.
+ */
+static void
+ri_GenerateQualCollation(StringInfo buf, Oid collation)
+{
+ HeapTuple tp;
+ Form_pg_collation colltup;
+ char *collname;
+ char onename[MAX_QUOTED_NAME_LEN];
+
+ /* Nothing to do if it's a noncollatable data type */
+ if (!OidIsValid(collation))
+ return;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collation);
+ colltup = (Form_pg_collation) GETSTRUCT(tp);
+ collname = NameStr(colltup->collname);
+
+ /*
+ * We qualify the name always, for simplicity and to ensure the query is
+ * not search-path-dependent.
+ */
+ quoteOneName(onename, get_namespace_name(colltup->collnamespace));
+ appendStringInfo(buf, " COLLATE %s", onename);
+ quoteOneName(onename, collname);
+ appendStringInfo(buf, ".%s", onename);
+
+ ReleaseSysCache(tp);
+}
+
+/* ----------
+ * ri_BuildQueryKey -
+ *
+ * Construct a hashtable key for a prepared SPI plan of an FK constraint.
+ *
+ * key: output argument, *key is filled in based on the other arguments
+ * riinfo: info derived from pg_constraint entry
+ * constr_queryno: an internal number identifying the query type
+ * (see RI_PLAN_XXX constants at head of file)
+ * ----------
+ */
+static void
+ri_BuildQueryKey(RI_QueryKey *key, const RI_ConstraintInfo *riinfo,
+ int32 constr_queryno)
+{
+ /*
+ * Inherited constraints with a common ancestor can share ri_query_cache
+ * entries for all query types except RI_PLAN_CHECK_LOOKUPPK_FROM_PK.
+ * Except in that case, the query processes the other table involved in
+ * the FK constraint (i.e., not the table on which the trigger has been
+ * fired), and so it will be the same for all members of the inheritance
+ * tree. So we may use the root constraint's OID in the hash key, rather
+ * than the constraint's own OID. This avoids creating duplicate SPI
+ * plans, saving lots of work and memory when there are many partitions
+ * with similar FK constraints.
+ *
+ * (Note that we must still have a separate RI_ConstraintInfo for each
+ * constraint, because partitions can have different column orders,
+ * resulting in different pk_attnums[] or fk_attnums[] array contents.)
+ *
+ * We assume struct RI_QueryKey contains no padding bytes, else we'd need
+ * to use memset to clear them.
+ */
+ if (constr_queryno != RI_PLAN_CHECK_LOOKUPPK_FROM_PK)
+ key->constr_id = riinfo->constraint_root_id;
+ else
+ key->constr_id = riinfo->constraint_id;
+ key->constr_queryno = constr_queryno;
+}
+
+/*
+ * Check that RI trigger function was called in expected context
+ */
+static void
+ri_CheckTrigger(FunctionCallInfo fcinfo, const char *funcname, int tgkind)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+
+ if (!CALLED_AS_TRIGGER(fcinfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" was not called by trigger manager", funcname)));
+
+ /*
+ * Check proper event
+ */
+ if (!TRIGGER_FIRED_AFTER(trigdata->tg_event) ||
+ !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired AFTER ROW", funcname)));
+
+ switch (tgkind)
+ {
+ case RI_TRIGTYPE_INSERT:
+ if (!TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired for INSERT", funcname)));
+ break;
+ case RI_TRIGTYPE_UPDATE:
+ if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired for UPDATE", funcname)));
+ break;
+ case RI_TRIGTYPE_DELETE:
+ if (!TRIGGER_FIRED_BY_DELETE(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("function \"%s\" must be fired for DELETE", funcname)));
+ break;
+ }
+}
+
+
+/*
+ * Fetch the RI_ConstraintInfo struct for the trigger's FK constraint.
+ */
+static const RI_ConstraintInfo *
+ri_FetchConstraintInfo(Trigger *trigger, Relation trig_rel, bool rel_is_pk)
+{
+ Oid constraintOid = trigger->tgconstraint;
+ const RI_ConstraintInfo *riinfo;
+
+ /*
+ * Check that the FK constraint's OID is available; it might not be if
+ * we've been invoked via an ordinary trigger or an old-style "constraint
+ * trigger".
+ */
+ if (!OidIsValid(constraintOid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("no pg_constraint entry for trigger \"%s\" on table \"%s\"",
+ trigger->tgname, RelationGetRelationName(trig_rel)),
+ errhint("Remove this referential integrity trigger and its mates, then do ALTER TABLE ADD CONSTRAINT.")));
+
+ /* Find or create a hashtable entry for the constraint */
+ riinfo = ri_LoadConstraintInfo(constraintOid);
+
+ /* Do some easy cross-checks against the trigger call data */
+ if (rel_is_pk)
+ {
+ if (riinfo->fk_relid != trigger->tgconstrrelid ||
+ riinfo->pk_relid != RelationGetRelid(trig_rel))
+ elog(ERROR, "wrong pg_constraint entry for trigger \"%s\" on table \"%s\"",
+ trigger->tgname, RelationGetRelationName(trig_rel));
+ }
+ else
+ {
+ if (riinfo->fk_relid != RelationGetRelid(trig_rel) ||
+ riinfo->pk_relid != trigger->tgconstrrelid)
+ elog(ERROR, "wrong pg_constraint entry for trigger \"%s\" on table \"%s\"",
+ trigger->tgname, RelationGetRelationName(trig_rel));
+ }
+
+ if (riinfo->confmatchtype != FKCONSTR_MATCH_FULL &&
+ riinfo->confmatchtype != FKCONSTR_MATCH_PARTIAL &&
+ riinfo->confmatchtype != FKCONSTR_MATCH_SIMPLE)
+ elog(ERROR, "unrecognized confmatchtype: %d",
+ riinfo->confmatchtype);
+
+ if (riinfo->confmatchtype == FKCONSTR_MATCH_PARTIAL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("MATCH PARTIAL not yet implemented")));
+
+ return riinfo;
+}
+
+/*
+ * Fetch or create the RI_ConstraintInfo struct for an FK constraint.
+ */
+static const RI_ConstraintInfo *
+ri_LoadConstraintInfo(Oid constraintOid)
+{
+ RI_ConstraintInfo *riinfo;
+ bool found;
+ HeapTuple tup;
+ Form_pg_constraint conForm;
+
+ /*
+ * On the first call initialize the hashtable
+ */
+ if (!ri_constraint_cache)
+ ri_InitHashTables();
+
+ /*
+ * Find or create a hash entry. If we find a valid one, just return it.
+ */
+ riinfo = (RI_ConstraintInfo *) hash_search(ri_constraint_cache,
+ (void *) &constraintOid,
+ HASH_ENTER, &found);
+ if (!found)
+ riinfo->valid = false;
+ else if (riinfo->valid)
+ return riinfo;
+
+ /*
+ * Fetch the pg_constraint row so we can fill in the entry.
+ */
+ tup = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constraintOid));
+ if (!HeapTupleIsValid(tup)) /* should not happen */
+ elog(ERROR, "cache lookup failed for constraint %u", constraintOid);
+ conForm = (Form_pg_constraint) GETSTRUCT(tup);
+
+ if (conForm->contype != CONSTRAINT_FOREIGN) /* should not happen */
+ elog(ERROR, "constraint %u is not a foreign key constraint",
+ constraintOid);
+
+ /* And extract data */
+ Assert(riinfo->constraint_id == constraintOid);
+ if (OidIsValid(conForm->conparentid))
+ riinfo->constraint_root_id =
+ get_ri_constraint_root(conForm->conparentid);
+ else
+ riinfo->constraint_root_id = constraintOid;
+ riinfo->oidHashValue = GetSysCacheHashValue1(CONSTROID,
+ ObjectIdGetDatum(constraintOid));
+ riinfo->rootHashValue = GetSysCacheHashValue1(CONSTROID,
+ ObjectIdGetDatum(riinfo->constraint_root_id));
+ memcpy(&riinfo->conname, &conForm->conname, sizeof(NameData));
+ riinfo->pk_relid = conForm->confrelid;
+ riinfo->fk_relid = conForm->conrelid;
+ riinfo->confupdtype = conForm->confupdtype;
+ riinfo->confdeltype = conForm->confdeltype;
+ riinfo->confmatchtype = conForm->confmatchtype;
+
+ DeconstructFkConstraintRow(tup,
+ &riinfo->nkeys,
+ riinfo->fk_attnums,
+ riinfo->pk_attnums,
+ riinfo->pf_eq_oprs,
+ riinfo->pp_eq_oprs,
+ riinfo->ff_eq_oprs,
+ &riinfo->ndelsetcols,
+ riinfo->confdelsetcols);
+
+ ReleaseSysCache(tup);
+
+ /*
+ * For efficient processing of invalidation messages below, we keep a
+ * doubly-linked list, and a count, of all currently valid entries.
+ */
+ dlist_push_tail(&ri_constraint_cache_valid_list, &riinfo->valid_link);
+ ri_constraint_cache_valid_count++;
+
+ riinfo->valid = true;
+
+ return riinfo;
+}
+
+/*
+ * get_ri_constraint_root
+ * Returns the OID of the constraint's root parent
+ */
+static Oid
+get_ri_constraint_root(Oid constrOid)
+{
+ for (;;)
+ {
+ HeapTuple tuple;
+ Oid constrParentOid;
+
+ tuple = SearchSysCache1(CONSTROID, ObjectIdGetDatum(constrOid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for constraint %u", constrOid);
+ constrParentOid = ((Form_pg_constraint) GETSTRUCT(tuple))->conparentid;
+ ReleaseSysCache(tuple);
+ if (!OidIsValid(constrParentOid))
+ break; /* we reached the root constraint */
+ constrOid = constrParentOid;
+ }
+ return constrOid;
+}
+
+/*
+ * Callback for pg_constraint inval events
+ *
+ * While most syscache callbacks just flush all their entries, pg_constraint
+ * gets enough update traffic that it's probably worth being smarter.
+ * Invalidate any ri_constraint_cache entry associated with the syscache
+ * entry with the specified hash value, or all entries if hashvalue == 0.
+ *
+ * Note: at the time a cache invalidation message is processed there may be
+ * active references to the cache. Because of this we never remove entries
+ * from the cache, but only mark them invalid, which is harmless to active
+ * uses. (Any query using an entry should hold a lock sufficient to keep that
+ * data from changing under it --- but we may get cache flushes anyway.)
+ */
+static void
+InvalidateConstraintCacheCallBack(Datum arg, int cacheid, uint32 hashvalue)
+{
+ dlist_mutable_iter iter;
+
+ Assert(ri_constraint_cache != NULL);
+
+ /*
+ * If the list of currently valid entries gets excessively large, we mark
+ * them all invalid so we can empty the list. This arrangement avoids
+ * O(N^2) behavior in situations where a session touches many foreign keys
+ * and also does many ALTER TABLEs, such as a restore from pg_dump.
+ */
+ if (ri_constraint_cache_valid_count > 1000)
+ hashvalue = 0; /* pretend it's a cache reset */
+
+ dlist_foreach_modify(iter, &ri_constraint_cache_valid_list)
+ {
+ RI_ConstraintInfo *riinfo = dlist_container(RI_ConstraintInfo,
+ valid_link, iter.cur);
+
+ /*
+ * We must invalidate not only entries directly matching the given
+ * hash value, but also child entries, in case the invalidation
+ * affects a root constraint.
+ */
+ if (hashvalue == 0 ||
+ riinfo->oidHashValue == hashvalue ||
+ riinfo->rootHashValue == hashvalue)
+ {
+ riinfo->valid = false;
+ /* Remove invalidated entries from the list, too */
+ dlist_delete(iter.cur);
+ ri_constraint_cache_valid_count--;
+ }
+ }
+}
+
+
+/*
+ * Prepare execution plan for a query to enforce an RI restriction
+ */
+static SPIPlanPtr
+ri_PlanCheck(const char *querystr, int nargs, Oid *argtypes,
+ RI_QueryKey *qkey, Relation fk_rel, Relation pk_rel)
+{
+ SPIPlanPtr qplan;
+ Relation query_rel;
+ Oid save_userid;
+ int save_sec_context;
+
+ /*
+ * Use the query type code to determine whether the query is run against
+ * the PK or FK table; we'll do the check as that table's owner
+ */
+ if (qkey->constr_queryno <= RI_PLAN_LAST_ON_PK)
+ query_rel = pk_rel;
+ else
+ query_rel = fk_rel;
+
+ /* Switch to proper UID to perform check as */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(RelationGetForm(query_rel)->relowner,
+ save_sec_context | SECURITY_LOCAL_USERID_CHANGE |
+ SECURITY_NOFORCE_RLS);
+
+ /* Create the plan */
+ qplan = SPI_prepare(querystr, nargs, argtypes);
+
+ if (qplan == NULL)
+ elog(ERROR, "SPI_prepare returned %s for %s", SPI_result_code_string(SPI_result), querystr);
+
+ /* Restore UID and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Save the plan */
+ SPI_keepplan(qplan);
+ ri_HashPreparedPlan(qkey, qplan);
+
+ return qplan;
+}
+
+/*
+ * Perform a query to enforce an RI restriction
+ */
+static bool
+ri_PerformCheck(const RI_ConstraintInfo *riinfo,
+ RI_QueryKey *qkey, SPIPlanPtr qplan,
+ Relation fk_rel, Relation pk_rel,
+ TupleTableSlot *oldslot, TupleTableSlot *newslot,
+ bool detectNewRows, int expect_OK)
+{
+ Relation query_rel,
+ source_rel;
+ bool source_is_pk;
+ Snapshot test_snapshot;
+ Snapshot crosscheck_snapshot;
+ int limit;
+ int spi_result;
+ Oid save_userid;
+ int save_sec_context;
+ Datum vals[RI_MAX_NUMKEYS * 2];
+ char nulls[RI_MAX_NUMKEYS * 2];
+
+ /*
+ * Use the query type code to determine whether the query is run against
+ * the PK or FK table; we'll do the check as that table's owner
+ */
+ if (qkey->constr_queryno <= RI_PLAN_LAST_ON_PK)
+ query_rel = pk_rel;
+ else
+ query_rel = fk_rel;
+
+ /*
+ * The values for the query are taken from the table on which the trigger
+ * is called - it is normally the other one with respect to query_rel. An
+ * exception is ri_Check_Pk_Match(), which uses the PK table for both (and
+ * sets queryno to RI_PLAN_CHECK_LOOKUPPK_FROM_PK). We might eventually
+ * need some less klugy way to determine this.
+ */
+ if (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK)
+ {
+ source_rel = fk_rel;
+ source_is_pk = false;
+ }
+ else
+ {
+ source_rel = pk_rel;
+ source_is_pk = true;
+ }
+
+ /* Extract the parameters to be passed into the query */
+ if (newslot)
+ {
+ ri_ExtractValues(source_rel, newslot, riinfo, source_is_pk,
+ vals, nulls);
+ if (oldslot)
+ ri_ExtractValues(source_rel, oldslot, riinfo, source_is_pk,
+ vals + riinfo->nkeys, nulls + riinfo->nkeys);
+ }
+ else
+ {
+ ri_ExtractValues(source_rel, oldslot, riinfo, source_is_pk,
+ vals, nulls);
+ }
+
+ /*
+ * In READ COMMITTED mode, we just need to use an up-to-date regular
+ * snapshot, and we will see all rows that could be interesting. But in
+ * transaction-snapshot mode, we can't change the transaction snapshot. If
+ * the caller passes detectNewRows == false then it's okay to do the query
+ * with the transaction snapshot; otherwise we use a current snapshot, and
+ * tell the executor to error out if it finds any rows under the current
+ * snapshot that wouldn't be visible per the transaction snapshot. Note
+ * that SPI_execute_snapshot will register the snapshots, so we don't need
+ * to bother here.
+ */
+ if (IsolationUsesXactSnapshot() && detectNewRows)
+ {
+ CommandCounterIncrement(); /* be sure all my own work is visible */
+ test_snapshot = GetLatestSnapshot();
+ crosscheck_snapshot = GetTransactionSnapshot();
+ }
+ else
+ {
+ /* the default SPI behavior is okay */
+ test_snapshot = InvalidSnapshot;
+ crosscheck_snapshot = InvalidSnapshot;
+ }
+
+ /*
+ * If this is a select query (e.g., for a 'no action' or 'restrict'
+ * trigger), we only need to see if there is a single row in the table,
+ * matching the key. Otherwise, limit = 0 - because we want the query to
+ * affect ALL the matching rows.
+ */
+ limit = (expect_OK == SPI_OK_SELECT) ? 1 : 0;
+
+ /* Switch to proper UID to perform check as */
+ GetUserIdAndSecContext(&save_userid, &save_sec_context);
+ SetUserIdAndSecContext(RelationGetForm(query_rel)->relowner,
+ save_sec_context | SECURITY_LOCAL_USERID_CHANGE |
+ SECURITY_NOFORCE_RLS);
+
+ /* Finally we can run the query. */
+ spi_result = SPI_execute_snapshot(qplan,
+ vals, nulls,
+ test_snapshot, crosscheck_snapshot,
+ false, false, limit);
+
+ /* Restore UID and security context */
+ SetUserIdAndSecContext(save_userid, save_sec_context);
+
+ /* Check result */
+ if (spi_result < 0)
+ elog(ERROR, "SPI_execute_snapshot returned %s", SPI_result_code_string(spi_result));
+
+ if (expect_OK >= 0 && spi_result != expect_OK)
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("referential integrity query on \"%s\" from constraint \"%s\" on \"%s\" gave unexpected result",
+ RelationGetRelationName(pk_rel),
+ NameStr(riinfo->conname),
+ RelationGetRelationName(fk_rel)),
+ errhint("This is most likely due to a rule having rewritten the query.")));
+
+ /* XXX wouldn't it be clearer to do this part at the caller? */
+ if (qkey->constr_queryno != RI_PLAN_CHECK_LOOKUPPK_FROM_PK &&
+ expect_OK == SPI_OK_SELECT &&
+ (SPI_processed == 0) == (qkey->constr_queryno == RI_PLAN_CHECK_LOOKUPPK))
+ ri_ReportViolation(riinfo,
+ pk_rel, fk_rel,
+ newslot ? newslot : oldslot,
+ NULL,
+ qkey->constr_queryno, false);
+
+ return SPI_processed != 0;
+}
+
+/*
+ * Extract fields from a tuple into Datum/nulls arrays
+ */
+static void
+ri_ExtractValues(Relation rel, TupleTableSlot *slot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk,
+ Datum *vals, char *nulls)
+{
+ const int16 *attnums;
+ bool isnull;
+
+ if (rel_is_pk)
+ attnums = riinfo->pk_attnums;
+ else
+ attnums = riinfo->fk_attnums;
+
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ vals[i] = slot_getattr(slot, attnums[i], &isnull);
+ nulls[i] = isnull ? 'n' : ' ';
+ }
+}
+
+/*
+ * Produce an error report
+ *
+ * If the failed constraint was on insert/update to the FK table,
+ * we want the key names and values extracted from there, and the error
+ * message to look like 'key blah is not present in PK'.
+ * Otherwise, the attr names and values come from the PK table and the
+ * message looks like 'key blah is still referenced from FK'.
+ */
+static void
+ri_ReportViolation(const RI_ConstraintInfo *riinfo,
+ Relation pk_rel, Relation fk_rel,
+ TupleTableSlot *violatorslot, TupleDesc tupdesc,
+ int queryno, bool partgone)
+{
+ StringInfoData key_names;
+ StringInfoData key_values;
+ bool onfk;
+ const int16 *attnums;
+ Oid rel_oid;
+ AclResult aclresult;
+ bool has_perm = true;
+
+ /*
+ * Determine which relation to complain about. If tupdesc wasn't passed
+ * by caller, assume the violator tuple came from there.
+ */
+ onfk = (queryno == RI_PLAN_CHECK_LOOKUPPK);
+ if (onfk)
+ {
+ attnums = riinfo->fk_attnums;
+ rel_oid = fk_rel->rd_id;
+ if (tupdesc == NULL)
+ tupdesc = fk_rel->rd_att;
+ }
+ else
+ {
+ attnums = riinfo->pk_attnums;
+ rel_oid = pk_rel->rd_id;
+ if (tupdesc == NULL)
+ tupdesc = pk_rel->rd_att;
+ }
+
+ /*
+ * Check permissions- if the user does not have access to view the data in
+ * any of the key columns then we don't include the errdetail() below.
+ *
+ * Check if RLS is enabled on the relation first. If so, we don't return
+ * any specifics to avoid leaking data.
+ *
+ * Check table-level permissions next and, failing that, column-level
+ * privileges.
+ *
+ * When a partition at the referenced side is being detached/dropped, we
+ * needn't check, since the user must be the table owner anyway.
+ */
+ if (partgone)
+ has_perm = true;
+ else if (check_enable_rls(rel_oid, InvalidOid, true) != RLS_ENABLED)
+ {
+ aclresult = pg_class_aclcheck(rel_oid, GetUserId(), ACL_SELECT);
+ if (aclresult != ACLCHECK_OK)
+ {
+ /* Try for column-level permissions */
+ for (int idx = 0; idx < riinfo->nkeys; idx++)
+ {
+ aclresult = pg_attribute_aclcheck(rel_oid, attnums[idx],
+ GetUserId(),
+ ACL_SELECT);
+
+ /* No access to the key */
+ if (aclresult != ACLCHECK_OK)
+ {
+ has_perm = false;
+ break;
+ }
+ }
+ }
+ }
+ else
+ has_perm = false;
+
+ if (has_perm)
+ {
+ /* Get printable versions of the keys involved */
+ initStringInfo(&key_names);
+ initStringInfo(&key_values);
+ for (int idx = 0; idx < riinfo->nkeys; idx++)
+ {
+ int fnum = attnums[idx];
+ Form_pg_attribute att = TupleDescAttr(tupdesc, fnum - 1);
+ char *name,
+ *val;
+ Datum datum;
+ bool isnull;
+
+ name = NameStr(att->attname);
+
+ datum = slot_getattr(violatorslot, fnum, &isnull);
+ if (!isnull)
+ {
+ Oid foutoid;
+ bool typisvarlena;
+
+ getTypeOutputInfo(att->atttypid, &foutoid, &typisvarlena);
+ val = OidOutputFunctionCall(foutoid, datum);
+ }
+ else
+ val = "null";
+
+ if (idx > 0)
+ {
+ appendStringInfoString(&key_names, ", ");
+ appendStringInfoString(&key_values, ", ");
+ }
+ appendStringInfoString(&key_names, name);
+ appendStringInfoString(&key_values, val);
+ }
+ }
+
+ if (partgone)
+ ereport(ERROR,
+ (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
+ errmsg("removing partition \"%s\" violates foreign key constraint \"%s\"",
+ RelationGetRelationName(pk_rel),
+ NameStr(riinfo->conname)),
+ errdetail("Key (%s)=(%s) is still referenced from table \"%s\".",
+ key_names.data, key_values.data,
+ RelationGetRelationName(fk_rel)),
+ errtableconstraint(fk_rel, NameStr(riinfo->conname))));
+ else if (onfk)
+ ereport(ERROR,
+ (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
+ errmsg("insert or update on table \"%s\" violates foreign key constraint \"%s\"",
+ RelationGetRelationName(fk_rel),
+ NameStr(riinfo->conname)),
+ has_perm ?
+ errdetail("Key (%s)=(%s) is not present in table \"%s\".",
+ key_names.data, key_values.data,
+ RelationGetRelationName(pk_rel)) :
+ errdetail("Key is not present in table \"%s\".",
+ RelationGetRelationName(pk_rel)),
+ errtableconstraint(fk_rel, NameStr(riinfo->conname))));
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_FOREIGN_KEY_VIOLATION),
+ errmsg("update or delete on table \"%s\" violates foreign key constraint \"%s\" on table \"%s\"",
+ RelationGetRelationName(pk_rel),
+ NameStr(riinfo->conname),
+ RelationGetRelationName(fk_rel)),
+ has_perm ?
+ errdetail("Key (%s)=(%s) is still referenced from table \"%s\".",
+ key_names.data, key_values.data,
+ RelationGetRelationName(fk_rel)) :
+ errdetail("Key is still referenced from table \"%s\".",
+ RelationGetRelationName(fk_rel)),
+ errtableconstraint(fk_rel, NameStr(riinfo->conname))));
+}
+
+
+/*
+ * ri_NullCheck -
+ *
+ * Determine the NULL state of all key values in a tuple
+ *
+ * Returns one of RI_KEYS_ALL_NULL, RI_KEYS_NONE_NULL or RI_KEYS_SOME_NULL.
+ */
+static int
+ri_NullCheck(TupleDesc tupDesc,
+ TupleTableSlot *slot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk)
+{
+ const int16 *attnums;
+ bool allnull = true;
+ bool nonenull = true;
+
+ if (rel_is_pk)
+ attnums = riinfo->pk_attnums;
+ else
+ attnums = riinfo->fk_attnums;
+
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ if (slot_attisnull(slot, attnums[i]))
+ nonenull = false;
+ else
+ allnull = false;
+ }
+
+ if (allnull)
+ return RI_KEYS_ALL_NULL;
+
+ if (nonenull)
+ return RI_KEYS_NONE_NULL;
+
+ return RI_KEYS_SOME_NULL;
+}
+
+
+/*
+ * ri_InitHashTables -
+ *
+ * Initialize our internal hash tables.
+ */
+static void
+ri_InitHashTables(void)
+{
+ HASHCTL ctl;
+
+ ctl.keysize = sizeof(Oid);
+ ctl.entrysize = sizeof(RI_ConstraintInfo);
+ ri_constraint_cache = hash_create("RI constraint cache",
+ RI_INIT_CONSTRAINTHASHSIZE,
+ &ctl, HASH_ELEM | HASH_BLOBS);
+
+ /* Arrange to flush cache on pg_constraint changes */
+ CacheRegisterSyscacheCallback(CONSTROID,
+ InvalidateConstraintCacheCallBack,
+ (Datum) 0);
+
+ ctl.keysize = sizeof(RI_QueryKey);
+ ctl.entrysize = sizeof(RI_QueryHashEntry);
+ ri_query_cache = hash_create("RI query cache",
+ RI_INIT_QUERYHASHSIZE,
+ &ctl, HASH_ELEM | HASH_BLOBS);
+
+ ctl.keysize = sizeof(RI_CompareKey);
+ ctl.entrysize = sizeof(RI_CompareHashEntry);
+ ri_compare_cache = hash_create("RI compare cache",
+ RI_INIT_QUERYHASHSIZE,
+ &ctl, HASH_ELEM | HASH_BLOBS);
+}
+
+
+/*
+ * ri_FetchPreparedPlan -
+ *
+ * Lookup for a query key in our private hash table of prepared
+ * and saved SPI execution plans. Return the plan if found or NULL.
+ */
+static SPIPlanPtr
+ri_FetchPreparedPlan(RI_QueryKey *key)
+{
+ RI_QueryHashEntry *entry;
+ SPIPlanPtr plan;
+
+ /*
+ * On the first call initialize the hashtable
+ */
+ if (!ri_query_cache)
+ ri_InitHashTables();
+
+ /*
+ * Lookup for the key
+ */
+ entry = (RI_QueryHashEntry *) hash_search(ri_query_cache,
+ (void *) key,
+ HASH_FIND, NULL);
+ if (entry == NULL)
+ return NULL;
+
+ /*
+ * Check whether the plan is still valid. If it isn't, we don't want to
+ * simply rely on plancache.c to regenerate it; rather we should start
+ * from scratch and rebuild the query text too. This is to cover cases
+ * such as table/column renames. We depend on the plancache machinery to
+ * detect possible invalidations, though.
+ *
+ * CAUTION: this check is only trustworthy if the caller has already
+ * locked both FK and PK rels.
+ */
+ plan = entry->plan;
+ if (plan && SPI_plan_is_valid(plan))
+ return plan;
+
+ /*
+ * Otherwise we might as well flush the cached plan now, to free a little
+ * memory space before we make a new one.
+ */
+ entry->plan = NULL;
+ if (plan)
+ SPI_freeplan(plan);
+
+ return NULL;
+}
+
+
+/*
+ * ri_HashPreparedPlan -
+ *
+ * Add another plan to our private SPI query plan hashtable.
+ */
+static void
+ri_HashPreparedPlan(RI_QueryKey *key, SPIPlanPtr plan)
+{
+ RI_QueryHashEntry *entry;
+ bool found;
+
+ /*
+ * On the first call initialize the hashtable
+ */
+ if (!ri_query_cache)
+ ri_InitHashTables();
+
+ /*
+ * Add the new plan. We might be overwriting an entry previously found
+ * invalid by ri_FetchPreparedPlan.
+ */
+ entry = (RI_QueryHashEntry *) hash_search(ri_query_cache,
+ (void *) key,
+ HASH_ENTER, &found);
+ Assert(!found || entry->plan == NULL);
+ entry->plan = plan;
+}
+
+
+/*
+ * ri_KeysEqual -
+ *
+ * Check if all key values in OLD and NEW are equal.
+ *
+ * Note: at some point we might wish to redefine this as checking for
+ * "IS NOT DISTINCT" rather than "=", that is, allow two nulls to be
+ * considered equal. Currently there is no need since all callers have
+ * previously found at least one of the rows to contain no nulls.
+ */
+static bool
+ri_KeysEqual(Relation rel, TupleTableSlot *oldslot, TupleTableSlot *newslot,
+ const RI_ConstraintInfo *riinfo, bool rel_is_pk)
+{
+ const int16 *attnums;
+
+ if (rel_is_pk)
+ attnums = riinfo->pk_attnums;
+ else
+ attnums = riinfo->fk_attnums;
+
+ /* XXX: could be worthwhile to fetch all necessary attrs at once */
+ for (int i = 0; i < riinfo->nkeys; i++)
+ {
+ Datum oldvalue;
+ Datum newvalue;
+ bool isnull;
+
+ /*
+ * Get one attribute's oldvalue. If it is NULL - they're not equal.
+ */
+ oldvalue = slot_getattr(oldslot, attnums[i], &isnull);
+ if (isnull)
+ return false;
+
+ /*
+ * Get one attribute's newvalue. If it is NULL - they're not equal.
+ */
+ newvalue = slot_getattr(newslot, attnums[i], &isnull);
+ if (isnull)
+ return false;
+
+ if (rel_is_pk)
+ {
+ /*
+ * If we are looking at the PK table, then do a bytewise
+ * comparison. We must propagate PK changes if the value is
+ * changed to one that "looks" different but would compare as
+ * equal using the equality operator. This only makes a
+ * difference for ON UPDATE CASCADE, but for consistency we treat
+ * all changes to the PK the same.
+ */
+ Form_pg_attribute att = TupleDescAttr(oldslot->tts_tupleDescriptor, attnums[i] - 1);
+
+ if (!datum_image_eq(oldvalue, newvalue, att->attbyval, att->attlen))
+ return false;
+ }
+ else
+ {
+ /*
+ * For the FK table, compare with the appropriate equality
+ * operator. Changes that compare equal will still satisfy the
+ * constraint after the update.
+ */
+ if (!ri_AttributesEqual(riinfo->ff_eq_oprs[i], RIAttType(rel, attnums[i]),
+ oldvalue, newvalue))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+/*
+ * ri_AttributesEqual -
+ *
+ * Call the appropriate equality comparison operator for two values.
+ *
+ * NB: we have already checked that neither value is null.
+ */
+static bool
+ri_AttributesEqual(Oid eq_opr, Oid typeid,
+ Datum oldvalue, Datum newvalue)
+{
+ RI_CompareHashEntry *entry = ri_HashCompareOp(eq_opr, typeid);
+
+ /* Do we need to cast the values? */
+ if (OidIsValid(entry->cast_func_finfo.fn_oid))
+ {
+ oldvalue = FunctionCall3(&entry->cast_func_finfo,
+ oldvalue,
+ Int32GetDatum(-1), /* typmod */
+ BoolGetDatum(false)); /* implicit coercion */
+ newvalue = FunctionCall3(&entry->cast_func_finfo,
+ newvalue,
+ Int32GetDatum(-1), /* typmod */
+ BoolGetDatum(false)); /* implicit coercion */
+ }
+
+ /*
+ * Apply the comparison operator.
+ *
+ * Note: This function is part of a call stack that determines whether an
+ * update to a row is significant enough that it needs checking or action
+ * on the other side of a foreign-key constraint. Therefore, the
+ * comparison here would need to be done with the collation of the *other*
+ * table. For simplicity (e.g., we might not even have the other table
+ * open), we'll just use the default collation here, which could lead to
+ * some false negatives. All this would break if we ever allow
+ * database-wide collations to be nondeterministic.
+ */
+ return DatumGetBool(FunctionCall2Coll(&entry->eq_opr_finfo,
+ DEFAULT_COLLATION_OID,
+ oldvalue, newvalue));
+}
+
+/*
+ * ri_HashCompareOp -
+ *
+ * See if we know how to compare two values, and create a new hash entry
+ * if not.
+ */
+static RI_CompareHashEntry *
+ri_HashCompareOp(Oid eq_opr, Oid typeid)
+{
+ RI_CompareKey key;
+ RI_CompareHashEntry *entry;
+ bool found;
+
+ /*
+ * On the first call initialize the hashtable
+ */
+ if (!ri_compare_cache)
+ ri_InitHashTables();
+
+ /*
+ * Find or create a hash entry. Note we're assuming RI_CompareKey
+ * contains no struct padding.
+ */
+ key.eq_opr = eq_opr;
+ key.typeid = typeid;
+ entry = (RI_CompareHashEntry *) hash_search(ri_compare_cache,
+ (void *) &key,
+ HASH_ENTER, &found);
+ if (!found)
+ entry->valid = false;
+
+ /*
+ * If not already initialized, do so. Since we'll keep this hash entry
+ * for the life of the backend, put any subsidiary info for the function
+ * cache structs into TopMemoryContext.
+ */
+ if (!entry->valid)
+ {
+ Oid lefttype,
+ righttype,
+ castfunc;
+ CoercionPathType pathtype;
+
+ /* We always need to know how to call the equality operator */
+ fmgr_info_cxt(get_opcode(eq_opr), &entry->eq_opr_finfo,
+ TopMemoryContext);
+
+ /*
+ * If we chose to use a cast from FK to PK type, we may have to apply
+ * the cast function to get to the operator's input type.
+ *
+ * XXX eventually it would be good to support array-coercion cases
+ * here and in ri_AttributesEqual(). At the moment there is no point
+ * because cases involving nonidentical array types will be rejected
+ * at constraint creation time.
+ *
+ * XXX perhaps also consider supporting CoerceViaIO? No need at the
+ * moment since that will never be generated for implicit coercions.
+ */
+ op_input_types(eq_opr, &lefttype, &righttype);
+ Assert(lefttype == righttype);
+ if (typeid == lefttype)
+ castfunc = InvalidOid; /* simplest case */
+ else
+ {
+ pathtype = find_coercion_pathway(lefttype, typeid,
+ COERCION_IMPLICIT,
+ &castfunc);
+ if (pathtype != COERCION_PATH_FUNC &&
+ pathtype != COERCION_PATH_RELABELTYPE)
+ {
+ /*
+ * The declared input type of the eq_opr might be a
+ * polymorphic type such as ANYARRAY or ANYENUM, or other
+ * special cases such as RECORD; find_coercion_pathway
+ * currently doesn't subsume these special cases.
+ */
+ if (!IsBinaryCoercible(typeid, lefttype))
+ elog(ERROR, "no conversion function from %s to %s",
+ format_type_be(typeid),
+ format_type_be(lefttype));
+ }
+ }
+ if (OidIsValid(castfunc))
+ fmgr_info_cxt(castfunc, &entry->cast_func_finfo,
+ TopMemoryContext);
+ else
+ entry->cast_func_finfo.fn_oid = InvalidOid;
+ entry->valid = true;
+ }
+
+ return entry;
+}
+
+
+/*
+ * Given a trigger function OID, determine whether it is an RI trigger,
+ * and if so whether it is attached to PK or FK relation.
+ */
+int
+RI_FKey_trigger_type(Oid tgfoid)
+{
+ switch (tgfoid)
+ {
+ case F_RI_FKEY_CASCADE_DEL:
+ case F_RI_FKEY_CASCADE_UPD:
+ case F_RI_FKEY_RESTRICT_DEL:
+ case F_RI_FKEY_RESTRICT_UPD:
+ case F_RI_FKEY_SETNULL_DEL:
+ case F_RI_FKEY_SETNULL_UPD:
+ case F_RI_FKEY_SETDEFAULT_DEL:
+ case F_RI_FKEY_SETDEFAULT_UPD:
+ case F_RI_FKEY_NOACTION_DEL:
+ case F_RI_FKEY_NOACTION_UPD:
+ return RI_TRIGGER_PK;
+
+ case F_RI_FKEY_CHECK_INS:
+ case F_RI_FKEY_CHECK_UPD:
+ return RI_TRIGGER_FK;
+ }
+
+ return RI_TRIGGER_NONE;
+}
diff --git a/src/backend/utils/adt/rowtypes.c b/src/backend/utils/adt/rowtypes.c
new file mode 100644
index 0000000..db843a0
--- /dev/null
+++ b/src/backend/utils/adt/rowtypes.c
@@ -0,0 +1,2017 @@
+/*-------------------------------------------------------------------------
+ *
+ * rowtypes.c
+ * I/O and comparison functions for generic composite types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/rowtypes.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+
+#include "access/detoast.h"
+#include "access/htup_details.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/lsyscache.h"
+#include "utils/typcache.h"
+
+
+/*
+ * structure to cache metadata needed for record I/O
+ */
+typedef struct ColumnIOData
+{
+ Oid column_type;
+ Oid typiofunc;
+ Oid typioparam;
+ bool typisvarlena;
+ FmgrInfo proc;
+} ColumnIOData;
+
+typedef struct RecordIOData
+{
+ Oid record_type;
+ int32 record_typmod;
+ int ncolumns;
+ ColumnIOData columns[FLEXIBLE_ARRAY_MEMBER];
+} RecordIOData;
+
+/*
+ * structure to cache metadata needed for record comparison
+ */
+typedef struct ColumnCompareData
+{
+ TypeCacheEntry *typentry; /* has everything we need, actually */
+} ColumnCompareData;
+
+typedef struct RecordCompareData
+{
+ int ncolumns; /* allocated length of columns[] */
+ Oid record1_type;
+ int32 record1_typmod;
+ Oid record2_type;
+ int32 record2_typmod;
+ ColumnCompareData columns[FLEXIBLE_ARRAY_MEMBER];
+} RecordCompareData;
+
+
+/*
+ * record_in - input routine for any composite type.
+ */
+Datum
+record_in(PG_FUNCTION_ARGS)
+{
+ char *string = PG_GETARG_CSTRING(0);
+ Oid tupType = PG_GETARG_OID(1);
+ int32 tupTypmod = PG_GETARG_INT32(2);
+ HeapTupleHeader result;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+ RecordIOData *my_extra;
+ bool needComma = false;
+ int ncolumns;
+ int i;
+ char *ptr;
+ Datum *values;
+ bool *nulls;
+ StringInfoData buf;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /*
+ * Give a friendly error message if we did not get enough info to identify
+ * the target record type. (lookup_rowtype_tupdesc would fail anyway, but
+ * with a non-user-friendly message.) In ordinary SQL usage, we'll get -1
+ * for typmod, since composite types and RECORD have no type modifiers at
+ * the SQL level, and thus must fail for RECORD. However some callers can
+ * supply a valid typmod, and then we can do something useful for RECORD.
+ */
+ if (tupType == RECORDOID && tupTypmod < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("input of anonymous composite types is not implemented")));
+
+ /*
+ * This comes from the composite type's pg_type.oid and stores system oids
+ * in user tables, specifically DatumTupleFields. This oid must be
+ * preserved by binary upgrades.
+ */
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /*
+ * We arrange to look up the needed I/O info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns != ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ my_extra->record_type = InvalidOid;
+ my_extra->record_typmod = 0;
+ }
+
+ if (my_extra->record_type != tupType ||
+ my_extra->record_typmod != tupTypmod)
+ {
+ MemSet(my_extra, 0,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra->record_type = tupType;
+ my_extra->record_typmod = tupTypmod;
+ my_extra->ncolumns = ncolumns;
+ }
+
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+ /*
+ * Scan the string. We use "buf" to accumulate the de-quoted data for
+ * each column, which is then fed to the appropriate input converter.
+ */
+ ptr = string;
+ /* Allow leading whitespace */
+ while (*ptr && isspace((unsigned char) *ptr))
+ ptr++;
+ if (*ptr++ != '(')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"", string),
+ errdetail("Missing left parenthesis.")));
+
+ initStringInfo(&buf);
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+ ColumnIOData *column_info = &my_extra->columns[i];
+ Oid column_type = att->atttypid;
+ char *column_data;
+
+ /* Ignore dropped columns in datatype, but fill with nulls */
+ if (att->attisdropped)
+ {
+ values[i] = (Datum) 0;
+ nulls[i] = true;
+ continue;
+ }
+
+ if (needComma)
+ {
+ /* Skip comma that separates prior field from this one */
+ if (*ptr == ',')
+ ptr++;
+ else
+ /* *ptr must be ')' */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"", string),
+ errdetail("Too few columns.")));
+ }
+
+ /* Check for null: completely empty input means null */
+ if (*ptr == ',' || *ptr == ')')
+ {
+ column_data = NULL;
+ nulls[i] = true;
+ }
+ else
+ {
+ /* Extract string for this column */
+ bool inquote = false;
+
+ resetStringInfo(&buf);
+ while (inquote || !(*ptr == ',' || *ptr == ')'))
+ {
+ char ch = *ptr++;
+
+ if (ch == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"",
+ string),
+ errdetail("Unexpected end of input.")));
+ if (ch == '\\')
+ {
+ if (*ptr == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"",
+ string),
+ errdetail("Unexpected end of input.")));
+ appendStringInfoChar(&buf, *ptr++);
+ }
+ else if (ch == '"')
+ {
+ if (!inquote)
+ inquote = true;
+ else if (*ptr == '"')
+ {
+ /* doubled quote within quote sequence */
+ appendStringInfoChar(&buf, *ptr++);
+ }
+ else
+ inquote = false;
+ }
+ else
+ appendStringInfoChar(&buf, ch);
+ }
+
+ column_data = buf.data;
+ nulls[i] = false;
+ }
+
+ /*
+ * Convert the column value
+ */
+ if (column_info->column_type != column_type)
+ {
+ getTypeInputInfo(column_type,
+ &column_info->typiofunc,
+ &column_info->typioparam);
+ fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
+ fcinfo->flinfo->fn_mcxt);
+ column_info->column_type = column_type;
+ }
+
+ values[i] = InputFunctionCall(&column_info->proc,
+ column_data,
+ column_info->typioparam,
+ att->atttypmod);
+
+ /*
+ * Prep for next column
+ */
+ needComma = true;
+ }
+
+ if (*ptr++ != ')')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"", string),
+ errdetail("Too many columns.")));
+ /* Allow trailing whitespace */
+ while (*ptr && isspace((unsigned char) *ptr))
+ ptr++;
+ if (*ptr)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("malformed record literal: \"%s\"", string),
+ errdetail("Junk after right parenthesis.")));
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+
+ /*
+ * We cannot return tuple->t_data because heap_form_tuple allocates it as
+ * part of a larger chunk, and our caller may expect to be able to pfree
+ * our result. So must copy the info into a new palloc chunk.
+ */
+ result = (HeapTupleHeader) palloc(tuple->t_len);
+ memcpy(result, tuple->t_data, tuple->t_len);
+
+ heap_freetuple(tuple);
+ pfree(buf.data);
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ PG_RETURN_HEAPTUPLEHEADER(result);
+}
+
+/*
+ * record_out - output routine for any composite type.
+ */
+Datum
+record_out(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0);
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tuple;
+ RecordIOData *my_extra;
+ bool needComma = false;
+ int ncolumns;
+ int i;
+ Datum *values;
+ bool *nulls;
+ StringInfoData buf;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from the tuple itself */
+ tupType = HeapTupleHeaderGetTypeId(rec);
+ tupTypmod = HeapTupleHeaderGetTypMod(rec);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /* Build a temporary HeapTuple control structure */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = rec;
+
+ /*
+ * We arrange to look up the needed I/O info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns != ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ my_extra->record_type = InvalidOid;
+ my_extra->record_typmod = 0;
+ }
+
+ if (my_extra->record_type != tupType ||
+ my_extra->record_typmod != tupTypmod)
+ {
+ MemSet(my_extra, 0,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra->record_type = tupType;
+ my_extra->record_typmod = tupTypmod;
+ my_extra->ncolumns = ncolumns;
+ }
+
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+ /* Break down the tuple into fields */
+ heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+ /* And build the result string */
+ initStringInfo(&buf);
+
+ appendStringInfoChar(&buf, '(');
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+ ColumnIOData *column_info = &my_extra->columns[i];
+ Oid column_type = att->atttypid;
+ Datum attr;
+ char *value;
+ char *tmp;
+ bool nq;
+
+ /* Ignore dropped columns in datatype */
+ if (att->attisdropped)
+ continue;
+
+ if (needComma)
+ appendStringInfoChar(&buf, ',');
+ needComma = true;
+
+ if (nulls[i])
+ {
+ /* emit nothing... */
+ continue;
+ }
+
+ /*
+ * Convert the column value to text
+ */
+ if (column_info->column_type != column_type)
+ {
+ getTypeOutputInfo(column_type,
+ &column_info->typiofunc,
+ &column_info->typisvarlena);
+ fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
+ fcinfo->flinfo->fn_mcxt);
+ column_info->column_type = column_type;
+ }
+
+ attr = values[i];
+ value = OutputFunctionCall(&column_info->proc, attr);
+
+ /* Detect whether we need double quotes for this value */
+ nq = (value[0] == '\0'); /* force quotes for empty string */
+ for (tmp = value; *tmp; tmp++)
+ {
+ char ch = *tmp;
+
+ if (ch == '"' || ch == '\\' ||
+ ch == '(' || ch == ')' || ch == ',' ||
+ isspace((unsigned char) ch))
+ {
+ nq = true;
+ break;
+ }
+ }
+
+ /* And emit the string */
+ if (nq)
+ appendStringInfoCharMacro(&buf, '"');
+ for (tmp = value; *tmp; tmp++)
+ {
+ char ch = *tmp;
+
+ if (ch == '"' || ch == '\\')
+ appendStringInfoCharMacro(&buf, ch);
+ appendStringInfoCharMacro(&buf, ch);
+ }
+ if (nq)
+ appendStringInfoCharMacro(&buf, '"');
+ }
+
+ appendStringInfoChar(&buf, ')');
+
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ PG_RETURN_CSTRING(buf.data);
+}
+
+/*
+ * record_recv - binary input routine for any composite type.
+ */
+Datum
+record_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ Oid tupType = PG_GETARG_OID(1);
+ int32 tupTypmod = PG_GETARG_INT32(2);
+ HeapTupleHeader result;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+ RecordIOData *my_extra;
+ int ncolumns;
+ int usercols;
+ int validcols;
+ int i;
+ Datum *values;
+ bool *nulls;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /*
+ * Give a friendly error message if we did not get enough info to identify
+ * the target record type. (lookup_rowtype_tupdesc would fail anyway, but
+ * with a non-user-friendly message.) In ordinary SQL usage, we'll get -1
+ * for typmod, since composite types and RECORD have no type modifiers at
+ * the SQL level, and thus must fail for RECORD. However some callers can
+ * supply a valid typmod, and then we can do something useful for RECORD.
+ */
+ if (tupType == RECORDOID && tupTypmod < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("input of anonymous composite types is not implemented")));
+
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /*
+ * We arrange to look up the needed I/O info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns != ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ my_extra->record_type = InvalidOid;
+ my_extra->record_typmod = 0;
+ }
+
+ if (my_extra->record_type != tupType ||
+ my_extra->record_typmod != tupTypmod)
+ {
+ MemSet(my_extra, 0,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra->record_type = tupType;
+ my_extra->record_typmod = tupTypmod;
+ my_extra->ncolumns = ncolumns;
+ }
+
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+ /* Fetch number of columns user thinks it has */
+ usercols = pq_getmsgint(buf, 4);
+
+ /* Need to scan to count nondeleted columns */
+ validcols = 0;
+ for (i = 0; i < ncolumns; i++)
+ {
+ if (!TupleDescAttr(tupdesc, i)->attisdropped)
+ validcols++;
+ }
+ if (usercols != validcols)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("wrong number of columns: %d, expected %d",
+ usercols, validcols)));
+
+ /* Process each column */
+ for (i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+ ColumnIOData *column_info = &my_extra->columns[i];
+ Oid column_type = att->atttypid;
+ Oid coltypoid;
+ int itemlen;
+ StringInfoData item_buf;
+ StringInfo bufptr;
+ char csave;
+
+ /* Ignore dropped columns in datatype, but fill with nulls */
+ if (att->attisdropped)
+ {
+ values[i] = (Datum) 0;
+ nulls[i] = true;
+ continue;
+ }
+
+ /* Check column type recorded in the data */
+ coltypoid = pq_getmsgint(buf, sizeof(Oid));
+
+ /*
+ * From a security standpoint, it doesn't matter whether the input's
+ * column type matches what we expect: the column type's receive
+ * function has to be robust enough to cope with invalid data.
+ * However, from a user-friendliness standpoint, it's nicer to
+ * complain about type mismatches than to throw "improper binary
+ * format" errors. But there's a problem: only built-in types have
+ * OIDs that are stable enough to believe that a mismatch is a real
+ * issue. So complain only if both OIDs are in the built-in range.
+ * Otherwise, carry on with the column type we "should" be getting.
+ */
+ if (coltypoid != column_type &&
+ coltypoid < FirstGenbkiObjectId &&
+ column_type < FirstGenbkiObjectId)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("binary data has type %u (%s) instead of expected %u (%s) in record column %d",
+ coltypoid,
+ format_type_extended(coltypoid, -1,
+ FORMAT_TYPE_ALLOW_INVALID),
+ column_type,
+ format_type_extended(column_type, -1,
+ FORMAT_TYPE_ALLOW_INVALID),
+ i + 1)));
+
+ /* Get and check the item length */
+ itemlen = pq_getmsgint(buf, 4);
+ if (itemlen < -1 || itemlen > (buf->len - buf->cursor))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("insufficient data left in message")));
+
+ if (itemlen == -1)
+ {
+ /* -1 length means NULL */
+ bufptr = NULL;
+ nulls[i] = true;
+ csave = 0; /* keep compiler quiet */
+ }
+ else
+ {
+ /*
+ * Rather than copying data around, we just set up a phony
+ * StringInfo pointing to the correct portion of the input buffer.
+ * We assume we can scribble on the input buffer so as to maintain
+ * the convention that StringInfos have a trailing null.
+ */
+ item_buf.data = &buf->data[buf->cursor];
+ item_buf.maxlen = itemlen + 1;
+ item_buf.len = itemlen;
+ item_buf.cursor = 0;
+
+ buf->cursor += itemlen;
+
+ csave = buf->data[buf->cursor];
+ buf->data[buf->cursor] = '\0';
+
+ bufptr = &item_buf;
+ nulls[i] = false;
+ }
+
+ /* Now call the column's receiveproc */
+ if (column_info->column_type != column_type)
+ {
+ getTypeBinaryInputInfo(column_type,
+ &column_info->typiofunc,
+ &column_info->typioparam);
+ fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
+ fcinfo->flinfo->fn_mcxt);
+ column_info->column_type = column_type;
+ }
+
+ values[i] = ReceiveFunctionCall(&column_info->proc,
+ bufptr,
+ column_info->typioparam,
+ att->atttypmod);
+
+ if (bufptr)
+ {
+ /* Trouble if it didn't eat the whole buffer */
+ if (item_buf.cursor != itemlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("improper binary format in record column %d",
+ i + 1)));
+
+ buf->data[buf->cursor] = csave;
+ }
+ }
+
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+
+ /*
+ * We cannot return tuple->t_data because heap_form_tuple allocates it as
+ * part of a larger chunk, and our caller may expect to be able to pfree
+ * our result. So must copy the info into a new palloc chunk.
+ */
+ result = (HeapTupleHeader) palloc(tuple->t_len);
+ memcpy(result, tuple->t_data, tuple->t_len);
+
+ heap_freetuple(tuple);
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ PG_RETURN_HEAPTUPLEHEADER(result);
+}
+
+/*
+ * record_send - binary output routine for any composite type.
+ */
+Datum
+record_send(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader rec = PG_GETARG_HEAPTUPLEHEADER(0);
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tuple;
+ RecordIOData *my_extra;
+ int ncolumns;
+ int validcols;
+ int i;
+ Datum *values;
+ bool *nulls;
+ StringInfoData buf;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from the tuple itself */
+ tupType = HeapTupleHeaderGetTypeId(rec);
+ tupTypmod = HeapTupleHeaderGetTypMod(rec);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /* Build a temporary HeapTuple control structure */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(rec);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = rec;
+
+ /*
+ * We arrange to look up the needed I/O info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns != ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra = (RecordIOData *) fcinfo->flinfo->fn_extra;
+ my_extra->record_type = InvalidOid;
+ my_extra->record_typmod = 0;
+ }
+
+ if (my_extra->record_type != tupType ||
+ my_extra->record_typmod != tupTypmod)
+ {
+ MemSet(my_extra, 0,
+ offsetof(RecordIOData, columns) +
+ ncolumns * sizeof(ColumnIOData));
+ my_extra->record_type = tupType;
+ my_extra->record_typmod = tupTypmod;
+ my_extra->ncolumns = ncolumns;
+ }
+
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+
+ /* Break down the tuple into fields */
+ heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+ /* And build the result string */
+ pq_begintypsend(&buf);
+
+ /* Need to scan to count nondeleted columns */
+ validcols = 0;
+ for (i = 0; i < ncolumns; i++)
+ {
+ if (!TupleDescAttr(tupdesc, i)->attisdropped)
+ validcols++;
+ }
+ pq_sendint32(&buf, validcols);
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+ ColumnIOData *column_info = &my_extra->columns[i];
+ Oid column_type = att->atttypid;
+ Datum attr;
+ bytea *outputbytes;
+
+ /* Ignore dropped columns in datatype */
+ if (att->attisdropped)
+ continue;
+
+ pq_sendint32(&buf, column_type);
+
+ if (nulls[i])
+ {
+ /* emit -1 data length to signify a NULL */
+ pq_sendint32(&buf, -1);
+ continue;
+ }
+
+ /*
+ * Convert the column value to binary
+ */
+ if (column_info->column_type != column_type)
+ {
+ getTypeBinaryOutputInfo(column_type,
+ &column_info->typiofunc,
+ &column_info->typisvarlena);
+ fmgr_info_cxt(column_info->typiofunc, &column_info->proc,
+ fcinfo->flinfo->fn_mcxt);
+ column_info->column_type = column_type;
+ }
+
+ attr = values[i];
+ outputbytes = SendFunctionCall(&column_info->proc, attr);
+ pq_sendint32(&buf, VARSIZE(outputbytes) - VARHDRSZ);
+ pq_sendbytes(&buf, VARDATA(outputbytes),
+ VARSIZE(outputbytes) - VARHDRSZ);
+ }
+
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * record_cmp()
+ * Internal comparison function for records.
+ *
+ * Returns -1, 0 or 1
+ *
+ * Do not assume that the two inputs are exactly the same record type;
+ * for instance we might be comparing an anonymous ROW() construct against a
+ * named composite type. We will compare as long as they have the same number
+ * of non-dropped columns of the same types.
+ */
+static int
+record_cmp(FunctionCallInfo fcinfo)
+{
+ HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
+ HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int result = 0;
+ Oid tupType1;
+ Oid tupType2;
+ int32 tupTypmod1;
+ int32 tupTypmod2;
+ TupleDesc tupdesc1;
+ TupleDesc tupdesc2;
+ HeapTupleData tuple1;
+ HeapTupleData tuple2;
+ int ncolumns1;
+ int ncolumns2;
+ RecordCompareData *my_extra;
+ int ncols;
+ Datum *values1;
+ Datum *values2;
+ bool *nulls1;
+ bool *nulls2;
+ int i1;
+ int i2;
+ int j;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from the tuples */
+ tupType1 = HeapTupleHeaderGetTypeId(record1);
+ tupTypmod1 = HeapTupleHeaderGetTypMod(record1);
+ tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1);
+ ncolumns1 = tupdesc1->natts;
+ tupType2 = HeapTupleHeaderGetTypeId(record2);
+ tupTypmod2 = HeapTupleHeaderGetTypMod(record2);
+ tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2);
+ ncolumns2 = tupdesc2->natts;
+
+ /* Build temporary HeapTuple control structures */
+ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
+ ItemPointerSetInvalid(&(tuple1.t_self));
+ tuple1.t_tableOid = InvalidOid;
+ tuple1.t_data = record1;
+ tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
+ ItemPointerSetInvalid(&(tuple2.t_self));
+ tuple2.t_tableOid = InvalidOid;
+ tuple2.t_data = record2;
+
+ /*
+ * We arrange to look up the needed comparison info just once per series
+ * of calls, assuming the record types don't change underneath us.
+ */
+ ncols = Max(ncolumns1, ncolumns2);
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncols)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncols * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncols;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ my_extra->record2_type = InvalidOid;
+ my_extra->record2_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType1 ||
+ my_extra->record1_typmod != tupTypmod1 ||
+ my_extra->record2_type != tupType2 ||
+ my_extra->record2_typmod != tupTypmod2)
+ {
+ MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType1;
+ my_extra->record1_typmod = tupTypmod1;
+ my_extra->record2_type = tupType2;
+ my_extra->record2_typmod = tupTypmod2;
+ }
+
+ /* Break down the tuples into fields */
+ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum));
+ nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool));
+ heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1);
+ values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum));
+ nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
+ heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+
+ /*
+ * Scan corresponding columns, allowing for dropped columns in different
+ * places in the two rows. i1 and i2 are physical column indexes, j is
+ * the logical column index.
+ */
+ i1 = i2 = j = 0;
+ while (i1 < ncolumns1 || i2 < ncolumns2)
+ {
+ Form_pg_attribute att1;
+ Form_pg_attribute att2;
+ TypeCacheEntry *typentry;
+ Oid collation;
+
+ /*
+ * Skip dropped columns
+ */
+ if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped)
+ {
+ i1++;
+ continue;
+ }
+ if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped)
+ {
+ i2++;
+ continue;
+ }
+ if (i1 >= ncolumns1 || i2 >= ncolumns2)
+ break; /* we'll deal with mismatch below loop */
+
+ att1 = TupleDescAttr(tupdesc1, i1);
+ att2 = TupleDescAttr(tupdesc2, i2);
+
+ /*
+ * Have two matching columns, they must be same type
+ */
+ if (att1->atttypid != att2->atttypid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare dissimilar column types %s and %s at record column %d",
+ format_type_be(att1->atttypid),
+ format_type_be(att2->atttypid),
+ j + 1)));
+
+ /*
+ * If they're not same collation, we don't complain here, but the
+ * comparison function might.
+ */
+ collation = att1->attcollation;
+ if (collation != att2->attcollation)
+ collation = InvalidOid;
+
+ /*
+ * Lookup the comparison function if not done already
+ */
+ typentry = my_extra->columns[j].typentry;
+ if (typentry == NULL ||
+ typentry->type_id != att1->atttypid)
+ {
+ typentry = lookup_type_cache(att1->atttypid,
+ TYPECACHE_CMP_PROC_FINFO);
+ if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a comparison function for type %s",
+ format_type_be(typentry->type_id))));
+ my_extra->columns[j].typentry = typentry;
+ }
+
+ /*
+ * We consider two NULLs equal; NULL > not-NULL.
+ */
+ if (!nulls1[i1] || !nulls2[i2])
+ {
+ LOCAL_FCINFO(locfcinfo, 2);
+ int32 cmpresult;
+
+ if (nulls1[i1])
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ if (nulls2[i2])
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+
+ /* Compare the pair of elements */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->cmp_proc_finfo, 2,
+ collation, NULL, NULL);
+ locfcinfo->args[0].value = values1[i1];
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = values2[i2];
+ locfcinfo->args[1].isnull = false;
+ cmpresult = DatumGetInt32(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect comparison support functions to return null */
+ Assert(!locfcinfo->isnull);
+
+ if (cmpresult < 0)
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+ else if (cmpresult > 0)
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ }
+
+ /* equal, so continue to next column */
+ i1++, i2++, j++;
+ }
+
+ /*
+ * If we didn't break out of the loop early, check for column count
+ * mismatch. (We do not report such mismatch if we found unequal column
+ * values; is that a feature or a bug?)
+ */
+ if (result == 0)
+ {
+ if (i1 != ncolumns1 || i2 != ncolumns2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare record types with different numbers of columns")));
+ }
+
+ pfree(values1);
+ pfree(nulls1);
+ pfree(values2);
+ pfree(nulls2);
+ ReleaseTupleDesc(tupdesc1);
+ ReleaseTupleDesc(tupdesc2);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record1, 0);
+ PG_FREE_IF_COPY(record2, 1);
+
+ return result;
+}
+
+/*
+ * record_eq :
+ * compares two records for equality
+ * result :
+ * returns true if the records are equal, false otherwise.
+ *
+ * Note: we do not use record_cmp here, since equality may be meaningful in
+ * datatypes that don't have a total ordering (and hence no btree support).
+ */
+Datum
+record_eq(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
+ HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ bool result = true;
+ Oid tupType1;
+ Oid tupType2;
+ int32 tupTypmod1;
+ int32 tupTypmod2;
+ TupleDesc tupdesc1;
+ TupleDesc tupdesc2;
+ HeapTupleData tuple1;
+ HeapTupleData tuple2;
+ int ncolumns1;
+ int ncolumns2;
+ RecordCompareData *my_extra;
+ int ncols;
+ Datum *values1;
+ Datum *values2;
+ bool *nulls1;
+ bool *nulls2;
+ int i1;
+ int i2;
+ int j;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from the tuples */
+ tupType1 = HeapTupleHeaderGetTypeId(record1);
+ tupTypmod1 = HeapTupleHeaderGetTypMod(record1);
+ tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1);
+ ncolumns1 = tupdesc1->natts;
+ tupType2 = HeapTupleHeaderGetTypeId(record2);
+ tupTypmod2 = HeapTupleHeaderGetTypMod(record2);
+ tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2);
+ ncolumns2 = tupdesc2->natts;
+
+ /* Build temporary HeapTuple control structures */
+ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
+ ItemPointerSetInvalid(&(tuple1.t_self));
+ tuple1.t_tableOid = InvalidOid;
+ tuple1.t_data = record1;
+ tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
+ ItemPointerSetInvalid(&(tuple2.t_self));
+ tuple2.t_tableOid = InvalidOid;
+ tuple2.t_data = record2;
+
+ /*
+ * We arrange to look up the needed comparison info just once per series
+ * of calls, assuming the record types don't change underneath us.
+ */
+ ncols = Max(ncolumns1, ncolumns2);
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncols)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncols * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncols;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ my_extra->record2_type = InvalidOid;
+ my_extra->record2_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType1 ||
+ my_extra->record1_typmod != tupTypmod1 ||
+ my_extra->record2_type != tupType2 ||
+ my_extra->record2_typmod != tupTypmod2)
+ {
+ MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType1;
+ my_extra->record1_typmod = tupTypmod1;
+ my_extra->record2_type = tupType2;
+ my_extra->record2_typmod = tupTypmod2;
+ }
+
+ /* Break down the tuples into fields */
+ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum));
+ nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool));
+ heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1);
+ values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum));
+ nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
+ heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+
+ /*
+ * Scan corresponding columns, allowing for dropped columns in different
+ * places in the two rows. i1 and i2 are physical column indexes, j is
+ * the logical column index.
+ */
+ i1 = i2 = j = 0;
+ while (i1 < ncolumns1 || i2 < ncolumns2)
+ {
+ LOCAL_FCINFO(locfcinfo, 2);
+ Form_pg_attribute att1;
+ Form_pg_attribute att2;
+ TypeCacheEntry *typentry;
+ Oid collation;
+ bool oprresult;
+
+ /*
+ * Skip dropped columns
+ */
+ if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped)
+ {
+ i1++;
+ continue;
+ }
+ if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped)
+ {
+ i2++;
+ continue;
+ }
+ if (i1 >= ncolumns1 || i2 >= ncolumns2)
+ break; /* we'll deal with mismatch below loop */
+
+ att1 = TupleDescAttr(tupdesc1, i1);
+ att2 = TupleDescAttr(tupdesc2, i2);
+
+ /*
+ * Have two matching columns, they must be same type
+ */
+ if (att1->atttypid != att2->atttypid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare dissimilar column types %s and %s at record column %d",
+ format_type_be(att1->atttypid),
+ format_type_be(att2->atttypid),
+ j + 1)));
+
+ /*
+ * If they're not same collation, we don't complain here, but the
+ * equality function might.
+ */
+ collation = att1->attcollation;
+ if (collation != att2->attcollation)
+ collation = InvalidOid;
+
+ /*
+ * Lookup the equality function if not done already
+ */
+ typentry = my_extra->columns[j].typentry;
+ if (typentry == NULL ||
+ typentry->type_id != att1->atttypid)
+ {
+ typentry = lookup_type_cache(att1->atttypid,
+ TYPECACHE_EQ_OPR_FINFO);
+ if (!OidIsValid(typentry->eq_opr_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an equality operator for type %s",
+ format_type_be(typentry->type_id))));
+ my_extra->columns[j].typentry = typentry;
+ }
+
+ /*
+ * We consider two NULLs equal; NULL > not-NULL.
+ */
+ if (!nulls1[i1] || !nulls2[i2])
+ {
+ if (nulls1[i1] || nulls2[i2])
+ {
+ result = false;
+ break;
+ }
+
+ /* Compare the pair of elements */
+ InitFunctionCallInfoData(*locfcinfo, &typentry->eq_opr_finfo, 2,
+ collation, NULL, NULL);
+ locfcinfo->args[0].value = values1[i1];
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = values2[i2];
+ locfcinfo->args[1].isnull = false;
+ oprresult = DatumGetBool(FunctionCallInvoke(locfcinfo));
+ if (locfcinfo->isnull || !oprresult)
+ {
+ result = false;
+ break;
+ }
+ }
+
+ /* equal, so continue to next column */
+ i1++, i2++, j++;
+ }
+
+ /*
+ * If we didn't break out of the loop early, check for column count
+ * mismatch. (We do not report such mismatch if we found unequal column
+ * values; is that a feature or a bug?)
+ */
+ if (result)
+ {
+ if (i1 != ncolumns1 || i2 != ncolumns2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare record types with different numbers of columns")));
+ }
+
+ pfree(values1);
+ pfree(nulls1);
+ pfree(values2);
+ pfree(nulls2);
+ ReleaseTupleDesc(tupdesc1);
+ ReleaseTupleDesc(tupdesc2);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record1, 0);
+ PG_FREE_IF_COPY(record2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+record_ne(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(!DatumGetBool(record_eq(fcinfo)));
+}
+
+Datum
+record_lt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_cmp(fcinfo) < 0);
+}
+
+Datum
+record_gt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_cmp(fcinfo) > 0);
+}
+
+Datum
+record_le(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_cmp(fcinfo) <= 0);
+}
+
+Datum
+record_ge(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_cmp(fcinfo) >= 0);
+}
+
+Datum
+btrecordcmp(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(record_cmp(fcinfo));
+}
+
+
+/*
+ * record_image_cmp :
+ * Internal byte-oriented comparison function for records.
+ *
+ * Returns -1, 0 or 1
+ *
+ * Note: The normal concepts of "equality" do not apply here; different
+ * representation of values considered to be equal are not considered to be
+ * identical. As an example, for the citext type 'A' and 'a' are equal, but
+ * they are not identical.
+ */
+static int
+record_image_cmp(FunctionCallInfo fcinfo)
+{
+ HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
+ HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ int result = 0;
+ Oid tupType1;
+ Oid tupType2;
+ int32 tupTypmod1;
+ int32 tupTypmod2;
+ TupleDesc tupdesc1;
+ TupleDesc tupdesc2;
+ HeapTupleData tuple1;
+ HeapTupleData tuple2;
+ int ncolumns1;
+ int ncolumns2;
+ RecordCompareData *my_extra;
+ int ncols;
+ Datum *values1;
+ Datum *values2;
+ bool *nulls1;
+ bool *nulls2;
+ int i1;
+ int i2;
+ int j;
+
+ /* Extract type info from the tuples */
+ tupType1 = HeapTupleHeaderGetTypeId(record1);
+ tupTypmod1 = HeapTupleHeaderGetTypMod(record1);
+ tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1);
+ ncolumns1 = tupdesc1->natts;
+ tupType2 = HeapTupleHeaderGetTypeId(record2);
+ tupTypmod2 = HeapTupleHeaderGetTypMod(record2);
+ tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2);
+ ncolumns2 = tupdesc2->natts;
+
+ /* Build temporary HeapTuple control structures */
+ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
+ ItemPointerSetInvalid(&(tuple1.t_self));
+ tuple1.t_tableOid = InvalidOid;
+ tuple1.t_data = record1;
+ tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
+ ItemPointerSetInvalid(&(tuple2.t_self));
+ tuple2.t_tableOid = InvalidOid;
+ tuple2.t_data = record2;
+
+ /*
+ * We arrange to look up the needed comparison info just once per series
+ * of calls, assuming the record types don't change underneath us.
+ */
+ ncols = Max(ncolumns1, ncolumns2);
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncols)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncols * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncols;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ my_extra->record2_type = InvalidOid;
+ my_extra->record2_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType1 ||
+ my_extra->record1_typmod != tupTypmod1 ||
+ my_extra->record2_type != tupType2 ||
+ my_extra->record2_typmod != tupTypmod2)
+ {
+ MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType1;
+ my_extra->record1_typmod = tupTypmod1;
+ my_extra->record2_type = tupType2;
+ my_extra->record2_typmod = tupTypmod2;
+ }
+
+ /* Break down the tuples into fields */
+ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum));
+ nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool));
+ heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1);
+ values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum));
+ nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
+ heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+
+ /*
+ * Scan corresponding columns, allowing for dropped columns in different
+ * places in the two rows. i1 and i2 are physical column indexes, j is
+ * the logical column index.
+ */
+ i1 = i2 = j = 0;
+ while (i1 < ncolumns1 || i2 < ncolumns2)
+ {
+ Form_pg_attribute att1;
+ Form_pg_attribute att2;
+
+ /*
+ * Skip dropped columns
+ */
+ if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped)
+ {
+ i1++;
+ continue;
+ }
+ if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped)
+ {
+ i2++;
+ continue;
+ }
+ if (i1 >= ncolumns1 || i2 >= ncolumns2)
+ break; /* we'll deal with mismatch below loop */
+
+ att1 = TupleDescAttr(tupdesc1, i1);
+ att2 = TupleDescAttr(tupdesc2, i2);
+
+ /*
+ * Have two matching columns, they must be same type
+ */
+ if (att1->atttypid != att2->atttypid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare dissimilar column types %s and %s at record column %d",
+ format_type_be(att1->atttypid),
+ format_type_be(att2->atttypid),
+ j + 1)));
+
+ /*
+ * The same type should have the same length (or both should be
+ * variable).
+ */
+ Assert(att1->attlen == att2->attlen);
+
+ /*
+ * We consider two NULLs equal; NULL > not-NULL.
+ */
+ if (!nulls1[i1] || !nulls2[i2])
+ {
+ int cmpresult = 0;
+
+ if (nulls1[i1])
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ if (nulls2[i2])
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+
+ /* Compare the pair of elements */
+ if (att1->attbyval)
+ {
+ if (values1[i1] != values2[i2])
+ cmpresult = (values1[i1] < values2[i2]) ? -1 : 1;
+ }
+ else if (att1->attlen > 0)
+ {
+ cmpresult = memcmp(DatumGetPointer(values1[i1]),
+ DatumGetPointer(values2[i2]),
+ att1->attlen);
+ }
+ else if (att1->attlen == -1)
+ {
+ Size len1,
+ len2;
+ struct varlena *arg1val;
+ struct varlena *arg2val;
+
+ len1 = toast_raw_datum_size(values1[i1]);
+ len2 = toast_raw_datum_size(values2[i2]);
+ arg1val = PG_DETOAST_DATUM_PACKED(values1[i1]);
+ arg2val = PG_DETOAST_DATUM_PACKED(values2[i2]);
+
+ cmpresult = memcmp(VARDATA_ANY(arg1val),
+ VARDATA_ANY(arg2val),
+ Min(len1, len2) - VARHDRSZ);
+ if ((cmpresult == 0) && (len1 != len2))
+ cmpresult = (len1 < len2) ? -1 : 1;
+
+ if ((Pointer) arg1val != (Pointer) values1[i1])
+ pfree(arg1val);
+ if ((Pointer) arg2val != (Pointer) values2[i2])
+ pfree(arg2val);
+ }
+ else
+ elog(ERROR, "unexpected attlen: %d", att1->attlen);
+
+ if (cmpresult < 0)
+ {
+ /* arg1 is less than arg2 */
+ result = -1;
+ break;
+ }
+ else if (cmpresult > 0)
+ {
+ /* arg1 is greater than arg2 */
+ result = 1;
+ break;
+ }
+ }
+
+ /* equal, so continue to next column */
+ i1++, i2++, j++;
+ }
+
+ /*
+ * If we didn't break out of the loop early, check for column count
+ * mismatch. (We do not report such mismatch if we found unequal column
+ * values; is that a feature or a bug?)
+ */
+ if (result == 0)
+ {
+ if (i1 != ncolumns1 || i2 != ncolumns2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare record types with different numbers of columns")));
+ }
+
+ pfree(values1);
+ pfree(nulls1);
+ pfree(values2);
+ pfree(nulls2);
+ ReleaseTupleDesc(tupdesc1);
+ ReleaseTupleDesc(tupdesc2);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record1, 0);
+ PG_FREE_IF_COPY(record2, 1);
+
+ return result;
+}
+
+/*
+ * record_image_eq :
+ * compares two records for identical contents, based on byte images
+ * result :
+ * returns true if the records are identical, false otherwise.
+ *
+ * Note: we do not use record_image_cmp here, since we can avoid
+ * de-toasting for unequal lengths this way.
+ */
+Datum
+record_image_eq(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader record1 = PG_GETARG_HEAPTUPLEHEADER(0);
+ HeapTupleHeader record2 = PG_GETARG_HEAPTUPLEHEADER(1);
+ bool result = true;
+ Oid tupType1;
+ Oid tupType2;
+ int32 tupTypmod1;
+ int32 tupTypmod2;
+ TupleDesc tupdesc1;
+ TupleDesc tupdesc2;
+ HeapTupleData tuple1;
+ HeapTupleData tuple2;
+ int ncolumns1;
+ int ncolumns2;
+ RecordCompareData *my_extra;
+ int ncols;
+ Datum *values1;
+ Datum *values2;
+ bool *nulls1;
+ bool *nulls2;
+ int i1;
+ int i2;
+ int j;
+
+ /* Extract type info from the tuples */
+ tupType1 = HeapTupleHeaderGetTypeId(record1);
+ tupTypmod1 = HeapTupleHeaderGetTypMod(record1);
+ tupdesc1 = lookup_rowtype_tupdesc(tupType1, tupTypmod1);
+ ncolumns1 = tupdesc1->natts;
+ tupType2 = HeapTupleHeaderGetTypeId(record2);
+ tupTypmod2 = HeapTupleHeaderGetTypMod(record2);
+ tupdesc2 = lookup_rowtype_tupdesc(tupType2, tupTypmod2);
+ ncolumns2 = tupdesc2->natts;
+
+ /* Build temporary HeapTuple control structures */
+ tuple1.t_len = HeapTupleHeaderGetDatumLength(record1);
+ ItemPointerSetInvalid(&(tuple1.t_self));
+ tuple1.t_tableOid = InvalidOid;
+ tuple1.t_data = record1;
+ tuple2.t_len = HeapTupleHeaderGetDatumLength(record2);
+ ItemPointerSetInvalid(&(tuple2.t_self));
+ tuple2.t_tableOid = InvalidOid;
+ tuple2.t_data = record2;
+
+ /*
+ * We arrange to look up the needed comparison info just once per series
+ * of calls, assuming the record types don't change underneath us.
+ */
+ ncols = Max(ncolumns1, ncolumns2);
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncols)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncols * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncols;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ my_extra->record2_type = InvalidOid;
+ my_extra->record2_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType1 ||
+ my_extra->record1_typmod != tupTypmod1 ||
+ my_extra->record2_type != tupType2 ||
+ my_extra->record2_typmod != tupTypmod2)
+ {
+ MemSet(my_extra->columns, 0, ncols * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType1;
+ my_extra->record1_typmod = tupTypmod1;
+ my_extra->record2_type = tupType2;
+ my_extra->record2_typmod = tupTypmod2;
+ }
+
+ /* Break down the tuples into fields */
+ values1 = (Datum *) palloc(ncolumns1 * sizeof(Datum));
+ nulls1 = (bool *) palloc(ncolumns1 * sizeof(bool));
+ heap_deform_tuple(&tuple1, tupdesc1, values1, nulls1);
+ values2 = (Datum *) palloc(ncolumns2 * sizeof(Datum));
+ nulls2 = (bool *) palloc(ncolumns2 * sizeof(bool));
+ heap_deform_tuple(&tuple2, tupdesc2, values2, nulls2);
+
+ /*
+ * Scan corresponding columns, allowing for dropped columns in different
+ * places in the two rows. i1 and i2 are physical column indexes, j is
+ * the logical column index.
+ */
+ i1 = i2 = j = 0;
+ while (i1 < ncolumns1 || i2 < ncolumns2)
+ {
+ Form_pg_attribute att1;
+ Form_pg_attribute att2;
+
+ /*
+ * Skip dropped columns
+ */
+ if (i1 < ncolumns1 && TupleDescAttr(tupdesc1, i1)->attisdropped)
+ {
+ i1++;
+ continue;
+ }
+ if (i2 < ncolumns2 && TupleDescAttr(tupdesc2, i2)->attisdropped)
+ {
+ i2++;
+ continue;
+ }
+ if (i1 >= ncolumns1 || i2 >= ncolumns2)
+ break; /* we'll deal with mismatch below loop */
+
+ att1 = TupleDescAttr(tupdesc1, i1);
+ att2 = TupleDescAttr(tupdesc2, i2);
+
+ /*
+ * Have two matching columns, they must be same type
+ */
+ if (att1->atttypid != att2->atttypid)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare dissimilar column types %s and %s at record column %d",
+ format_type_be(att1->atttypid),
+ format_type_be(att2->atttypid),
+ j + 1)));
+
+ /*
+ * We consider two NULLs equal; NULL > not-NULL.
+ */
+ if (!nulls1[i1] || !nulls2[i2])
+ {
+ if (nulls1[i1] || nulls2[i2])
+ {
+ result = false;
+ break;
+ }
+
+ /* Compare the pair of elements */
+ result = datum_image_eq(values1[i1], values2[i2], att1->attbyval, att2->attlen);
+ if (!result)
+ break;
+ }
+
+ /* equal, so continue to next column */
+ i1++, i2++, j++;
+ }
+
+ /*
+ * If we didn't break out of the loop early, check for column count
+ * mismatch. (We do not report such mismatch if we found unequal column
+ * values; is that a feature or a bug?)
+ */
+ if (result)
+ {
+ if (i1 != ncolumns1 || i2 != ncolumns2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("cannot compare record types with different numbers of columns")));
+ }
+
+ pfree(values1);
+ pfree(nulls1);
+ pfree(values2);
+ pfree(nulls2);
+ ReleaseTupleDesc(tupdesc1);
+ ReleaseTupleDesc(tupdesc2);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record1, 0);
+ PG_FREE_IF_COPY(record2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+record_image_ne(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(!DatumGetBool(record_image_eq(fcinfo)));
+}
+
+Datum
+record_image_lt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_image_cmp(fcinfo) < 0);
+}
+
+Datum
+record_image_gt(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_image_cmp(fcinfo) > 0);
+}
+
+Datum
+record_image_le(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_image_cmp(fcinfo) <= 0);
+}
+
+Datum
+record_image_ge(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(record_image_cmp(fcinfo) >= 0);
+}
+
+Datum
+btrecordimagecmp(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_INT32(record_image_cmp(fcinfo));
+}
+
+
+/*
+ * Row type hash functions
+ */
+
+Datum
+hash_record(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+ uint32 result = 0;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tuple;
+ int ncolumns;
+ RecordCompareData *my_extra;
+ Datum *values;
+ bool *nulls;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from tuple */
+ tupType = HeapTupleHeaderGetTypeId(record);
+ tupTypmod = HeapTupleHeaderGetTypMod(record);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /* Build temporary HeapTuple control structure */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = record;
+
+ /*
+ * We arrange to look up the needed hashing info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncolumns * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncolumns;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType ||
+ my_extra->record1_typmod != tupTypmod)
+ {
+ MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType;
+ my_extra->record1_typmod = tupTypmod;
+ }
+
+ /* Break down the tuple into fields */
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+ heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+ for (int i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att;
+ TypeCacheEntry *typentry;
+ uint32 element_hash;
+
+ att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+
+ /*
+ * Lookup the hash function if not done already
+ */
+ typentry = my_extra->columns[i].typentry;
+ if (typentry == NULL ||
+ typentry->type_id != att->atttypid)
+ {
+ typentry = lookup_type_cache(att->atttypid,
+ TYPECACHE_HASH_PROC_FINFO);
+ if (!OidIsValid(typentry->hash_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify a hash function for type %s",
+ format_type_be(typentry->type_id))));
+ my_extra->columns[i].typentry = typentry;
+ }
+
+ /* Compute hash of element */
+ if (nulls[i])
+ {
+ element_hash = 0;
+ }
+ else
+ {
+ LOCAL_FCINFO(locfcinfo, 1);
+
+ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_proc_finfo, 1,
+ att->attcollation, NULL, NULL);
+ locfcinfo->args[0].value = values[i];
+ locfcinfo->args[0].isnull = false;
+ element_hash = DatumGetUInt32(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect hash support functions to return null */
+ Assert(!locfcinfo->isnull);
+ }
+
+ /* see hash_array() */
+ result = (result << 5) - result + element_hash;
+ }
+
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record, 0);
+
+ PG_RETURN_UINT32(result);
+}
+
+Datum
+hash_record_extended(PG_FUNCTION_ARGS)
+{
+ HeapTupleHeader record = PG_GETARG_HEAPTUPLEHEADER(0);
+ uint64 seed = PG_GETARG_INT64(1);
+ uint64 result = 0;
+ Oid tupType;
+ int32 tupTypmod;
+ TupleDesc tupdesc;
+ HeapTupleData tuple;
+ int ncolumns;
+ RecordCompareData *my_extra;
+ Datum *values;
+ bool *nulls;
+
+ check_stack_depth(); /* recurses for record-type columns */
+
+ /* Extract type info from tuple */
+ tupType = HeapTupleHeaderGetTypeId(record);
+ tupTypmod = HeapTupleHeaderGetTypMod(record);
+ tupdesc = lookup_rowtype_tupdesc(tupType, tupTypmod);
+ ncolumns = tupdesc->natts;
+
+ /* Build temporary HeapTuple control structure */
+ tuple.t_len = HeapTupleHeaderGetDatumLength(record);
+ ItemPointerSetInvalid(&(tuple.t_self));
+ tuple.t_tableOid = InvalidOid;
+ tuple.t_data = record;
+
+ /*
+ * We arrange to look up the needed hashing info just once per series of
+ * calls, assuming the record type doesn't change underneath us.
+ */
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL ||
+ my_extra->ncolumns < ncolumns)
+ {
+ fcinfo->flinfo->fn_extra =
+ MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ offsetof(RecordCompareData, columns) +
+ ncolumns * sizeof(ColumnCompareData));
+ my_extra = (RecordCompareData *) fcinfo->flinfo->fn_extra;
+ my_extra->ncolumns = ncolumns;
+ my_extra->record1_type = InvalidOid;
+ my_extra->record1_typmod = 0;
+ }
+
+ if (my_extra->record1_type != tupType ||
+ my_extra->record1_typmod != tupTypmod)
+ {
+ MemSet(my_extra->columns, 0, ncolumns * sizeof(ColumnCompareData));
+ my_extra->record1_type = tupType;
+ my_extra->record1_typmod = tupTypmod;
+ }
+
+ /* Break down the tuple into fields */
+ values = (Datum *) palloc(ncolumns * sizeof(Datum));
+ nulls = (bool *) palloc(ncolumns * sizeof(bool));
+ heap_deform_tuple(&tuple, tupdesc, values, nulls);
+
+ for (int i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute att;
+ TypeCacheEntry *typentry;
+ uint64 element_hash;
+
+ att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+
+ /*
+ * Lookup the hash function if not done already
+ */
+ typentry = my_extra->columns[i].typentry;
+ if (typentry == NULL ||
+ typentry->type_id != att->atttypid)
+ {
+ typentry = lookup_type_cache(att->atttypid,
+ TYPECACHE_HASH_EXTENDED_PROC_FINFO);
+ if (!OidIsValid(typentry->hash_extended_proc_finfo.fn_oid))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_FUNCTION),
+ errmsg("could not identify an extended hash function for type %s",
+ format_type_be(typentry->type_id))));
+ my_extra->columns[i].typentry = typentry;
+ }
+
+ /* Compute hash of element */
+ if (nulls[i])
+ {
+ element_hash = 0;
+ }
+ else
+ {
+ LOCAL_FCINFO(locfcinfo, 2);
+
+ InitFunctionCallInfoData(*locfcinfo, &typentry->hash_extended_proc_finfo, 2,
+ att->attcollation, NULL, NULL);
+ locfcinfo->args[0].value = values[i];
+ locfcinfo->args[0].isnull = false;
+ locfcinfo->args[1].value = Int64GetDatum(seed);
+ locfcinfo->args[0].isnull = false;
+ element_hash = DatumGetUInt64(FunctionCallInvoke(locfcinfo));
+
+ /* We don't expect hash support functions to return null */
+ Assert(!locfcinfo->isnull);
+ }
+
+ /* see hash_array_extended() */
+ result = (result << 5) - result + element_hash;
+ }
+
+ pfree(values);
+ pfree(nulls);
+ ReleaseTupleDesc(tupdesc);
+
+ /* Avoid leaking memory when handed toasted input. */
+ PG_FREE_IF_COPY(record, 0);
+
+ PG_RETURN_UINT64(result);
+}
diff --git a/src/backend/utils/adt/ruleutils.c b/src/backend/utils/adt/ruleutils.c
new file mode 100644
index 0000000..a1c1831
--- /dev/null
+++ b/src/backend/utils/adt/ruleutils.c
@@ -0,0 +1,12406 @@
+/*-------------------------------------------------------------------------
+ *
+ * ruleutils.c
+ * Functions to convert stored expressions/querytrees back to
+ * source text
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/ruleutils.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "access/amapi.h"
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "access/sysattr.h"
+#include "access/table.h"
+#include "catalog/pg_aggregate.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_authid.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_constraint.h"
+#include "catalog/pg_depend.h"
+#include "catalog/pg_language.h"
+#include "catalog/pg_opclass.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_partitioned_table.h"
+#include "catalog/pg_proc.h"
+#include "catalog/pg_statistic_ext.h"
+#include "catalog/pg_trigger.h"
+#include "catalog/pg_type.h"
+#include "commands/defrem.h"
+#include "commands/tablespace.h"
+#include "common/keywords.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pathnodes.h"
+#include "optimizer/optimizer.h"
+#include "parser/parse_agg.h"
+#include "parser/parse_func.h"
+#include "parser/parse_node.h"
+#include "parser/parse_oper.h"
+#include "parser/parse_relation.h"
+#include "parser/parser.h"
+#include "parser/parsetree.h"
+#include "rewrite/rewriteHandler.h"
+#include "rewrite/rewriteManip.h"
+#include "rewrite/rewriteSupport.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/fmgroids.h"
+#include "utils/guc.h"
+#include "utils/hsearch.h"
+#include "utils/lsyscache.h"
+#include "utils/partcache.h"
+#include "utils/rel.h"
+#include "utils/ruleutils.h"
+#include "utils/snapmgr.h"
+#include "utils/syscache.h"
+#include "utils/typcache.h"
+#include "utils/varlena.h"
+#include "utils/xml.h"
+
+/* ----------
+ * Pretty formatting constants
+ * ----------
+ */
+
+/* Indent counts */
+#define PRETTYINDENT_STD 8
+#define PRETTYINDENT_JOIN 4
+#define PRETTYINDENT_VAR 4
+
+#define PRETTYINDENT_LIMIT 40 /* wrap limit */
+
+/* Pretty flags */
+#define PRETTYFLAG_PAREN 0x0001
+#define PRETTYFLAG_INDENT 0x0002
+#define PRETTYFLAG_SCHEMA 0x0004
+
+/* Standard conversion of a "bool pretty" option to detailed flags */
+#define GET_PRETTY_FLAGS(pretty) \
+ ((pretty) ? (PRETTYFLAG_PAREN | PRETTYFLAG_INDENT | PRETTYFLAG_SCHEMA) \
+ : PRETTYFLAG_INDENT)
+
+/* Default line length for pretty-print wrapping: 0 means wrap always */
+#define WRAP_COLUMN_DEFAULT 0
+
+/* macros to test if pretty action needed */
+#define PRETTY_PAREN(context) ((context)->prettyFlags & PRETTYFLAG_PAREN)
+#define PRETTY_INDENT(context) ((context)->prettyFlags & PRETTYFLAG_INDENT)
+#define PRETTY_SCHEMA(context) ((context)->prettyFlags & PRETTYFLAG_SCHEMA)
+
+
+/* ----------
+ * Local data types
+ * ----------
+ */
+
+/* Context info needed for invoking a recursive querytree display routine */
+typedef struct
+{
+ StringInfo buf; /* output buffer to append to */
+ List *namespaces; /* List of deparse_namespace nodes */
+ List *windowClause; /* Current query level's WINDOW clause */
+ List *windowTList; /* targetlist for resolving WINDOW clause */
+ int prettyFlags; /* enabling of pretty-print functions */
+ int wrapColumn; /* max line length, or -1 for no limit */
+ int indentLevel; /* current indent level for pretty-print */
+ bool varprefix; /* true to print prefixes on Vars */
+ ParseExprKind special_exprkind; /* set only for exprkinds needing special
+ * handling */
+ Bitmapset *appendparents; /* if not null, map child Vars of these relids
+ * back to the parent rel */
+} deparse_context;
+
+/*
+ * Each level of query context around a subtree needs a level of Var namespace.
+ * A Var having varlevelsup=N refers to the N'th item (counting from 0) in
+ * the current context's namespaces list.
+ *
+ * rtable is the list of actual RTEs from the Query or PlannedStmt.
+ * rtable_names holds the alias name to be used for each RTE (either a C
+ * string, or NULL for nameless RTEs such as unnamed joins).
+ * rtable_columns holds the column alias names to be used for each RTE.
+ *
+ * subplans is a list of Plan trees for SubPlans and CTEs (it's only used
+ * in the PlannedStmt case).
+ * ctes is a list of CommonTableExpr nodes (only used in the Query case).
+ * appendrels, if not null (it's only used in the PlannedStmt case), is an
+ * array of AppendRelInfo nodes, indexed by child relid. We use that to map
+ * child-table Vars to their inheritance parents.
+ *
+ * In some cases we need to make names of merged JOIN USING columns unique
+ * across the whole query, not only per-RTE. If so, unique_using is true
+ * and using_names is a list of C strings representing names already assigned
+ * to USING columns.
+ *
+ * When deparsing plan trees, there is always just a single item in the
+ * deparse_namespace list (since a plan tree never contains Vars with
+ * varlevelsup > 0). We store the Plan node that is the immediate
+ * parent of the expression to be deparsed, as well as a list of that
+ * Plan's ancestors. In addition, we store its outer and inner subplan nodes,
+ * as well as their targetlists, and the index tlist if the current plan node
+ * might contain INDEX_VAR Vars. (These fields could be derived on-the-fly
+ * from the current Plan node, but it seems notationally clearer to set them
+ * up as separate fields.)
+ */
+typedef struct
+{
+ List *rtable; /* List of RangeTblEntry nodes */
+ List *rtable_names; /* Parallel list of names for RTEs */
+ List *rtable_columns; /* Parallel list of deparse_columns structs */
+ List *subplans; /* List of Plan trees for SubPlans */
+ List *ctes; /* List of CommonTableExpr nodes */
+ AppendRelInfo **appendrels; /* Array of AppendRelInfo nodes, or NULL */
+ /* Workspace for column alias assignment: */
+ bool unique_using; /* Are we making USING names globally unique */
+ List *using_names; /* List of assigned names for USING columns */
+ /* Remaining fields are used only when deparsing a Plan tree: */
+ Plan *plan; /* immediate parent of current expression */
+ List *ancestors; /* ancestors of plan */
+ Plan *outer_plan; /* outer subnode, or NULL if none */
+ Plan *inner_plan; /* inner subnode, or NULL if none */
+ List *outer_tlist; /* referent for OUTER_VAR Vars */
+ List *inner_tlist; /* referent for INNER_VAR Vars */
+ List *index_tlist; /* referent for INDEX_VAR Vars */
+ /* Special namespace representing a function signature: */
+ char *funcname;
+ int numargs;
+ char **argnames;
+} deparse_namespace;
+
+/*
+ * Per-relation data about column alias names.
+ *
+ * Selecting aliases is unreasonably complicated because of the need to dump
+ * rules/views whose underlying tables may have had columns added, deleted, or
+ * renamed since the query was parsed. We must nonetheless print the rule/view
+ * in a form that can be reloaded and will produce the same results as before.
+ *
+ * For each RTE used in the query, we must assign column aliases that are
+ * unique within that RTE. SQL does not require this of the original query,
+ * but due to factors such as *-expansion we need to be able to uniquely
+ * reference every column in a decompiled query. As long as we qualify all
+ * column references, per-RTE uniqueness is sufficient for that.
+ *
+ * However, we can't ensure per-column name uniqueness for unnamed join RTEs,
+ * since they just inherit column names from their input RTEs, and we can't
+ * rename the columns at the join level. Most of the time this isn't an issue
+ * because we don't need to reference the join's output columns as such; we
+ * can reference the input columns instead. That approach can fail for merged
+ * JOIN USING columns, however, so when we have one of those in an unnamed
+ * join, we have to make that column's alias globally unique across the whole
+ * query to ensure it can be referenced unambiguously.
+ *
+ * Another problem is that a JOIN USING clause requires the columns to be
+ * merged to have the same aliases in both input RTEs, and that no other
+ * columns in those RTEs or their children conflict with the USING names.
+ * To handle that, we do USING-column alias assignment in a recursive
+ * traversal of the query's jointree. When descending through a JOIN with
+ * USING, we preassign the USING column names to the child columns, overriding
+ * other rules for column alias assignment. We also mark each RTE with a list
+ * of all USING column names selected for joins containing that RTE, so that
+ * when we assign other columns' aliases later, we can avoid conflicts.
+ *
+ * Another problem is that if a JOIN's input tables have had columns added or
+ * deleted since the query was parsed, we must generate a column alias list
+ * for the join that matches the current set of input columns --- otherwise, a
+ * change in the number of columns in the left input would throw off matching
+ * of aliases to columns of the right input. Thus, positions in the printable
+ * column alias list are not necessarily one-for-one with varattnos of the
+ * JOIN, so we need a separate new_colnames[] array for printing purposes.
+ */
+typedef struct
+{
+ /*
+ * colnames is an array containing column aliases to use for columns that
+ * existed when the query was parsed. Dropped columns have NULL entries.
+ * This array can be directly indexed by varattno to get a Var's name.
+ *
+ * Non-NULL entries are guaranteed unique within the RTE, *except* when
+ * this is for an unnamed JOIN RTE. In that case we merely copy up names
+ * from the two input RTEs.
+ *
+ * During the recursive descent in set_using_names(), forcible assignment
+ * of a child RTE's column name is represented by pre-setting that element
+ * of the child's colnames array. So at that stage, NULL entries in this
+ * array just mean that no name has been preassigned, not necessarily that
+ * the column is dropped.
+ */
+ int num_cols; /* length of colnames[] array */
+ char **colnames; /* array of C strings and NULLs */
+
+ /*
+ * new_colnames is an array containing column aliases to use for columns
+ * that would exist if the query was re-parsed against the current
+ * definitions of its base tables. This is what to print as the column
+ * alias list for the RTE. This array does not include dropped columns,
+ * but it will include columns added since original parsing. Indexes in
+ * it therefore have little to do with current varattno values. As above,
+ * entries are unique unless this is for an unnamed JOIN RTE. (In such an
+ * RTE, we never actually print this array, but we must compute it anyway
+ * for possible use in computing column names of upper joins.) The
+ * parallel array is_new_col marks which of these columns are new since
+ * original parsing. Entries with is_new_col false must match the
+ * non-NULL colnames entries one-for-one.
+ */
+ int num_new_cols; /* length of new_colnames[] array */
+ char **new_colnames; /* array of C strings */
+ bool *is_new_col; /* array of bool flags */
+
+ /* This flag tells whether we should actually print a column alias list */
+ bool printaliases;
+
+ /* This list has all names used as USING names in joins above this RTE */
+ List *parentUsing; /* names assigned to parent merged columns */
+
+ /*
+ * If this struct is for a JOIN RTE, we fill these fields during the
+ * set_using_names() pass to describe its relationship to its child RTEs.
+ *
+ * leftattnos and rightattnos are arrays with one entry per existing
+ * output column of the join (hence, indexable by join varattno). For a
+ * simple reference to a column of the left child, leftattnos[i] is the
+ * child RTE's attno and rightattnos[i] is zero; and conversely for a
+ * column of the right child. But for merged columns produced by JOIN
+ * USING/NATURAL JOIN, both leftattnos[i] and rightattnos[i] are nonzero.
+ * Note that a simple reference might be to a child RTE column that's been
+ * dropped; but that's OK since the column could not be used in the query.
+ *
+ * If it's a JOIN USING, usingNames holds the alias names selected for the
+ * merged columns (these might be different from the original USING list,
+ * if we had to modify names to achieve uniqueness).
+ */
+ int leftrti; /* rangetable index of left child */
+ int rightrti; /* rangetable index of right child */
+ int *leftattnos; /* left-child varattnos of join cols, or 0 */
+ int *rightattnos; /* right-child varattnos of join cols, or 0 */
+ List *usingNames; /* names assigned to merged columns */
+} deparse_columns;
+
+/* This macro is analogous to rt_fetch(), but for deparse_columns structs */
+#define deparse_columns_fetch(rangetable_index, dpns) \
+ ((deparse_columns *) list_nth((dpns)->rtable_columns, (rangetable_index)-1))
+
+/*
+ * Entry in set_rtable_names' hash table
+ */
+typedef struct
+{
+ char name[NAMEDATALEN]; /* Hash key --- must be first */
+ int counter; /* Largest addition used so far for name */
+} NameHashEntry;
+
+/* Callback signature for resolve_special_varno() */
+typedef void (*rsv_callback) (Node *node, deparse_context *context,
+ void *callback_arg);
+
+
+/* ----------
+ * Global data
+ * ----------
+ */
+static SPIPlanPtr plan_getrulebyoid = NULL;
+static const char *query_getrulebyoid = "SELECT * FROM pg_catalog.pg_rewrite WHERE oid = $1";
+static SPIPlanPtr plan_getviewrule = NULL;
+static const char *query_getviewrule = "SELECT * FROM pg_catalog.pg_rewrite WHERE ev_class = $1 AND rulename = $2";
+
+/* GUC parameters */
+bool quote_all_identifiers = false;
+
+
+/* ----------
+ * Local functions
+ *
+ * Most of these functions used to use fixed-size buffers to build their
+ * results. Now, they take an (already initialized) StringInfo object
+ * as a parameter, and append their text output to its contents.
+ * ----------
+ */
+static char *deparse_expression_pretty(Node *expr, List *dpcontext,
+ bool forceprefix, bool showimplicit,
+ int prettyFlags, int startIndent);
+static char *pg_get_viewdef_worker(Oid viewoid,
+ int prettyFlags, int wrapColumn);
+static char *pg_get_triggerdef_worker(Oid trigid, bool pretty);
+static int decompile_column_index_array(Datum column_index_array, Oid relId,
+ StringInfo buf);
+static char *pg_get_ruledef_worker(Oid ruleoid, int prettyFlags);
+static char *pg_get_indexdef_worker(Oid indexrelid, int colno,
+ const Oid *excludeOps,
+ bool attrsOnly, bool keysOnly,
+ bool showTblSpc, bool inherits,
+ int prettyFlags, bool missing_ok);
+static char *pg_get_statisticsobj_worker(Oid statextid, bool columns_only,
+ bool missing_ok);
+static char *pg_get_partkeydef_worker(Oid relid, int prettyFlags,
+ bool attrsOnly, bool missing_ok);
+static char *pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
+ int prettyFlags, bool missing_ok);
+static text *pg_get_expr_worker(text *expr, Oid relid, const char *relname,
+ int prettyFlags);
+static int print_function_arguments(StringInfo buf, HeapTuple proctup,
+ bool print_table_args, bool print_defaults);
+static void print_function_rettype(StringInfo buf, HeapTuple proctup);
+static void print_function_trftypes(StringInfo buf, HeapTuple proctup);
+static void print_function_sqlbody(StringInfo buf, HeapTuple proctup);
+static void set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
+ Bitmapset *rels_used);
+static void set_deparse_for_query(deparse_namespace *dpns, Query *query,
+ List *parent_namespaces);
+static void set_simple_column_names(deparse_namespace *dpns);
+static bool has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode);
+static void set_using_names(deparse_namespace *dpns, Node *jtnode,
+ List *parentUsing);
+static void set_relation_column_names(deparse_namespace *dpns,
+ RangeTblEntry *rte,
+ deparse_columns *colinfo);
+static void set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
+ deparse_columns *colinfo);
+static bool colname_is_unique(const char *colname, deparse_namespace *dpns,
+ deparse_columns *colinfo);
+static char *make_colname_unique(char *colname, deparse_namespace *dpns,
+ deparse_columns *colinfo);
+static void expand_colnames_array_to(deparse_columns *colinfo, int n);
+static void identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
+ deparse_columns *colinfo);
+static char *get_rtable_name(int rtindex, deparse_context *context);
+static void set_deparse_plan(deparse_namespace *dpns, Plan *plan);
+static Plan *find_recursive_union(deparse_namespace *dpns,
+ WorkTableScan *wtscan);
+static void push_child_plan(deparse_namespace *dpns, Plan *plan,
+ deparse_namespace *save_dpns);
+static void pop_child_plan(deparse_namespace *dpns,
+ deparse_namespace *save_dpns);
+static void push_ancestor_plan(deparse_namespace *dpns, ListCell *ancestor_cell,
+ deparse_namespace *save_dpns);
+static void pop_ancestor_plan(deparse_namespace *dpns,
+ deparse_namespace *save_dpns);
+static void make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
+ int prettyFlags);
+static void make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
+ int prettyFlags, int wrapColumn);
+static void get_query_def(Query *query, StringInfo buf, List *parentnamespace,
+ TupleDesc resultDesc, bool colNamesVisible,
+ int prettyFlags, int wrapColumn, int startIndent);
+static void get_values_def(List *values_lists, deparse_context *context);
+static void get_with_clause(Query *query, deparse_context *context);
+static void get_select_query_def(Query *query, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible);
+static void get_insert_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible);
+static void get_update_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible);
+static void get_update_query_targetlist_def(Query *query, List *targetList,
+ deparse_context *context,
+ RangeTblEntry *rte);
+static void get_delete_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible);
+static void get_merge_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible);
+static void get_utility_query_def(Query *query, deparse_context *context);
+static void get_basic_select_query(Query *query, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible);
+static void get_target_list(List *targetList, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible);
+static void get_setop_query(Node *setOp, Query *query,
+ deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible);
+static Node *get_rule_sortgroupclause(Index ref, List *tlist,
+ bool force_colno,
+ deparse_context *context);
+static void get_rule_groupingset(GroupingSet *gset, List *targetlist,
+ bool omit_parens, deparse_context *context);
+static void get_rule_orderby(List *orderList, List *targetList,
+ bool force_colno, deparse_context *context);
+static void get_rule_windowclause(Query *query, deparse_context *context);
+static void get_rule_windowspec(WindowClause *wc, List *targetList,
+ deparse_context *context);
+static char *get_variable(Var *var, int levelsup, bool istoplevel,
+ deparse_context *context);
+static void get_special_variable(Node *node, deparse_context *context,
+ void *callback_arg);
+static void resolve_special_varno(Node *node, deparse_context *context,
+ rsv_callback callback, void *callback_arg);
+static Node *find_param_referent(Param *param, deparse_context *context,
+ deparse_namespace **dpns_p, ListCell **ancestor_cell_p);
+static void get_parameter(Param *param, deparse_context *context);
+static const char *get_simple_binary_op_name(OpExpr *expr);
+static bool isSimpleNode(Node *node, Node *parentNode, int prettyFlags);
+static void appendContextKeyword(deparse_context *context, const char *str,
+ int indentBefore, int indentAfter, int indentPlus);
+static void removeStringInfoSpaces(StringInfo str);
+static void get_rule_expr(Node *node, deparse_context *context,
+ bool showimplicit);
+static void get_rule_expr_toplevel(Node *node, deparse_context *context,
+ bool showimplicit);
+static void get_rule_list_toplevel(List *lst, deparse_context *context,
+ bool showimplicit);
+static void get_rule_expr_funccall(Node *node, deparse_context *context,
+ bool showimplicit);
+static bool looks_like_function(Node *node);
+static void get_oper_expr(OpExpr *expr, deparse_context *context);
+static void get_func_expr(FuncExpr *expr, deparse_context *context,
+ bool showimplicit);
+static void get_agg_expr(Aggref *aggref, deparse_context *context,
+ Aggref *original_aggref);
+static void get_agg_combine_expr(Node *node, deparse_context *context,
+ void *callback_arg);
+static void get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context);
+static bool get_func_sql_syntax(FuncExpr *expr, deparse_context *context);
+static void get_coercion_expr(Node *arg, deparse_context *context,
+ Oid resulttype, int32 resulttypmod,
+ Node *parentNode);
+static void get_const_expr(Const *constval, deparse_context *context,
+ int showtype);
+static void get_const_collation(Const *constval, deparse_context *context);
+static void simple_quote_literal(StringInfo buf, const char *val);
+static void get_sublink_expr(SubLink *sublink, deparse_context *context);
+static void get_tablefunc(TableFunc *tf, deparse_context *context,
+ bool showimplicit);
+static void get_from_clause(Query *query, const char *prefix,
+ deparse_context *context);
+static void get_from_clause_item(Node *jtnode, Query *query,
+ deparse_context *context);
+static void get_rte_alias(RangeTblEntry *rte, int varno, bool use_as,
+ deparse_context *context);
+static void get_column_alias_list(deparse_columns *colinfo,
+ deparse_context *context);
+static void get_from_clause_coldeflist(RangeTblFunction *rtfunc,
+ deparse_columns *colinfo,
+ deparse_context *context);
+static void get_tablesample_def(TableSampleClause *tablesample,
+ deparse_context *context);
+static void get_opclass_name(Oid opclass, Oid actual_datatype,
+ StringInfo buf);
+static Node *processIndirection(Node *node, deparse_context *context);
+static void printSubscripts(SubscriptingRef *sbsref, deparse_context *context);
+static char *get_relation_name(Oid relid);
+static char *generate_relation_name(Oid relid, List *namespaces);
+static char *generate_qualified_relation_name(Oid relid);
+static char *generate_function_name(Oid funcid, int nargs,
+ List *argnames, Oid *argtypes,
+ bool has_variadic, bool *use_variadic_p,
+ ParseExprKind special_exprkind);
+static char *generate_operator_name(Oid operid, Oid arg1, Oid arg2);
+static void add_cast_to(StringInfo buf, Oid typid);
+static char *generate_qualified_type_name(Oid typid);
+static text *string_to_text(char *str);
+static char *flatten_reloptions(Oid relid);
+static void get_reloptions(StringInfo buf, Datum reloptions);
+
+#define only_marker(rte) ((rte)->inh ? "" : "ONLY ")
+
+
+/* ----------
+ * pg_get_ruledef - Do it all and return a text
+ * that could be used as a statement
+ * to recreate the rule
+ * ----------
+ */
+Datum
+pg_get_ruledef(PG_FUNCTION_ARGS)
+{
+ Oid ruleoid = PG_GETARG_OID(0);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ res = pg_get_ruledef_worker(ruleoid, prettyFlags);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+
+Datum
+pg_get_ruledef_ext(PG_FUNCTION_ARGS)
+{
+ Oid ruleoid = PG_GETARG_OID(0);
+ bool pretty = PG_GETARG_BOOL(1);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ res = pg_get_ruledef_worker(ruleoid, prettyFlags);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+
+static char *
+pg_get_ruledef_worker(Oid ruleoid, int prettyFlags)
+{
+ Datum args[1];
+ char nulls[1];
+ int spirc;
+ HeapTuple ruletup;
+ TupleDesc rulettc;
+ StringInfoData buf;
+
+ /*
+ * Do this first so that string is alloc'd in outer context not SPI's.
+ */
+ initStringInfo(&buf);
+
+ /*
+ * Connect to SPI manager
+ */
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * On the first call prepare the plan to lookup pg_rewrite. We read
+ * pg_rewrite over the SPI manager instead of using the syscache to be
+ * checked for read access on pg_rewrite.
+ */
+ if (plan_getrulebyoid == NULL)
+ {
+ Oid argtypes[1];
+ SPIPlanPtr plan;
+
+ argtypes[0] = OIDOID;
+ plan = SPI_prepare(query_getrulebyoid, 1, argtypes);
+ if (plan == NULL)
+ elog(ERROR, "SPI_prepare failed for \"%s\"", query_getrulebyoid);
+ SPI_keepplan(plan);
+ plan_getrulebyoid = plan;
+ }
+
+ /*
+ * Get the pg_rewrite tuple for this rule
+ */
+ args[0] = ObjectIdGetDatum(ruleoid);
+ nulls[0] = ' ';
+ spirc = SPI_execute_plan(plan_getrulebyoid, args, nulls, true, 0);
+ if (spirc != SPI_OK_SELECT)
+ elog(ERROR, "failed to get pg_rewrite tuple for rule %u", ruleoid);
+ if (SPI_processed != 1)
+ {
+ /*
+ * There is no tuple data available here, just keep the output buffer
+ * empty.
+ */
+ }
+ else
+ {
+ /*
+ * Get the rule's definition and put it into executor's memory
+ */
+ ruletup = SPI_tuptable->vals[0];
+ rulettc = SPI_tuptable->tupdesc;
+ make_ruledef(&buf, ruletup, rulettc, prettyFlags);
+ }
+
+ /*
+ * Disconnect from SPI manager
+ */
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ if (buf.len == 0)
+ return NULL;
+
+ return buf.data;
+}
+
+
+/* ----------
+ * pg_get_viewdef - Mainly the same thing, but we
+ * only return the SELECT part of a view
+ * ----------
+ */
+Datum
+pg_get_viewdef(PG_FUNCTION_ARGS)
+{
+ /* By OID */
+ Oid viewoid = PG_GETARG_OID(0);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+
+Datum
+pg_get_viewdef_ext(PG_FUNCTION_ARGS)
+{
+ /* By OID */
+ Oid viewoid = PG_GETARG_OID(0);
+ bool pretty = PG_GETARG_BOOL(1);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_viewdef_wrap(PG_FUNCTION_ARGS)
+{
+ /* By OID */
+ Oid viewoid = PG_GETARG_OID(0);
+ int wrap = PG_GETARG_INT32(1);
+ int prettyFlags;
+ char *res;
+
+ /* calling this implies we want pretty printing */
+ prettyFlags = GET_PRETTY_FLAGS(true);
+
+ res = pg_get_viewdef_worker(viewoid, prettyFlags, wrap);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_viewdef_name(PG_FUNCTION_ARGS)
+{
+ /* By qualified name */
+ text *viewname = PG_GETARG_TEXT_PP(0);
+ int prettyFlags;
+ RangeVar *viewrel;
+ Oid viewoid;
+ char *res;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ /* Look up view name. Can't lock it - we might not have privileges. */
+ viewrel = makeRangeVarFromNameList(textToQualifiedNameList(viewname));
+ viewoid = RangeVarGetRelid(viewrel, NoLock, false);
+
+ res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+
+Datum
+pg_get_viewdef_name_ext(PG_FUNCTION_ARGS)
+{
+ /* By qualified name */
+ text *viewname = PG_GETARG_TEXT_PP(0);
+ bool pretty = PG_GETARG_BOOL(1);
+ int prettyFlags;
+ RangeVar *viewrel;
+ Oid viewoid;
+ char *res;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ /* Look up view name. Can't lock it - we might not have privileges. */
+ viewrel = makeRangeVarFromNameList(textToQualifiedNameList(viewname));
+ viewoid = RangeVarGetRelid(viewrel, NoLock, false);
+
+ res = pg_get_viewdef_worker(viewoid, prettyFlags, WRAP_COLUMN_DEFAULT);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Common code for by-OID and by-name variants of pg_get_viewdef
+ */
+static char *
+pg_get_viewdef_worker(Oid viewoid, int prettyFlags, int wrapColumn)
+{
+ Datum args[2];
+ char nulls[2];
+ int spirc;
+ HeapTuple ruletup;
+ TupleDesc rulettc;
+ StringInfoData buf;
+
+ /*
+ * Do this first so that string is alloc'd in outer context not SPI's.
+ */
+ initStringInfo(&buf);
+
+ /*
+ * Connect to SPI manager
+ */
+ if (SPI_connect() != SPI_OK_CONNECT)
+ elog(ERROR, "SPI_connect failed");
+
+ /*
+ * On the first call prepare the plan to lookup pg_rewrite. We read
+ * pg_rewrite over the SPI manager instead of using the syscache to be
+ * checked for read access on pg_rewrite.
+ */
+ if (plan_getviewrule == NULL)
+ {
+ Oid argtypes[2];
+ SPIPlanPtr plan;
+
+ argtypes[0] = OIDOID;
+ argtypes[1] = NAMEOID;
+ plan = SPI_prepare(query_getviewrule, 2, argtypes);
+ if (plan == NULL)
+ elog(ERROR, "SPI_prepare failed for \"%s\"", query_getviewrule);
+ SPI_keepplan(plan);
+ plan_getviewrule = plan;
+ }
+
+ /*
+ * Get the pg_rewrite tuple for the view's SELECT rule
+ */
+ args[0] = ObjectIdGetDatum(viewoid);
+ args[1] = DirectFunctionCall1(namein, CStringGetDatum(ViewSelectRuleName));
+ nulls[0] = ' ';
+ nulls[1] = ' ';
+ spirc = SPI_execute_plan(plan_getviewrule, args, nulls, true, 0);
+ if (spirc != SPI_OK_SELECT)
+ elog(ERROR, "failed to get pg_rewrite tuple for view %u", viewoid);
+ if (SPI_processed != 1)
+ {
+ /*
+ * There is no tuple data available here, just keep the output buffer
+ * empty.
+ */
+ }
+ else
+ {
+ /*
+ * Get the rule's definition and put it into executor's memory
+ */
+ ruletup = SPI_tuptable->vals[0];
+ rulettc = SPI_tuptable->tupdesc;
+ make_viewdef(&buf, ruletup, rulettc, prettyFlags, wrapColumn);
+ }
+
+ /*
+ * Disconnect from SPI manager
+ */
+ if (SPI_finish() != SPI_OK_FINISH)
+ elog(ERROR, "SPI_finish failed");
+
+ if (buf.len == 0)
+ return NULL;
+
+ return buf.data;
+}
+
+/* ----------
+ * pg_get_triggerdef - Get the definition of a trigger
+ * ----------
+ */
+Datum
+pg_get_triggerdef(PG_FUNCTION_ARGS)
+{
+ Oid trigid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_triggerdef_worker(trigid, false);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_triggerdef_ext(PG_FUNCTION_ARGS)
+{
+ Oid trigid = PG_GETARG_OID(0);
+ bool pretty = PG_GETARG_BOOL(1);
+ char *res;
+
+ res = pg_get_triggerdef_worker(trigid, pretty);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+static char *
+pg_get_triggerdef_worker(Oid trigid, bool pretty)
+{
+ HeapTuple ht_trig;
+ Form_pg_trigger trigrec;
+ StringInfoData buf;
+ Relation tgrel;
+ ScanKeyData skey[1];
+ SysScanDesc tgscan;
+ int findx = 0;
+ char *tgname;
+ char *tgoldtable;
+ char *tgnewtable;
+ Datum value;
+ bool isnull;
+
+ /*
+ * Fetch the pg_trigger tuple by the Oid of the trigger
+ */
+ tgrel = table_open(TriggerRelationId, AccessShareLock);
+
+ ScanKeyInit(&skey[0],
+ Anum_pg_trigger_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(trigid));
+
+ tgscan = systable_beginscan(tgrel, TriggerOidIndexId, true,
+ NULL, 1, skey);
+
+ ht_trig = systable_getnext(tgscan);
+
+ if (!HeapTupleIsValid(ht_trig))
+ {
+ systable_endscan(tgscan);
+ table_close(tgrel, AccessShareLock);
+ return NULL;
+ }
+
+ trigrec = (Form_pg_trigger) GETSTRUCT(ht_trig);
+
+ /*
+ * Start the trigger definition. Note that the trigger's name should never
+ * be schema-qualified, but the trigger rel's name may be.
+ */
+ initStringInfo(&buf);
+
+ tgname = NameStr(trigrec->tgname);
+ appendStringInfo(&buf, "CREATE %sTRIGGER %s ",
+ OidIsValid(trigrec->tgconstraint) ? "CONSTRAINT " : "",
+ quote_identifier(tgname));
+
+ if (TRIGGER_FOR_BEFORE(trigrec->tgtype))
+ appendStringInfoString(&buf, "BEFORE");
+ else if (TRIGGER_FOR_AFTER(trigrec->tgtype))
+ appendStringInfoString(&buf, "AFTER");
+ else if (TRIGGER_FOR_INSTEAD(trigrec->tgtype))
+ appendStringInfoString(&buf, "INSTEAD OF");
+ else
+ elog(ERROR, "unexpected tgtype value: %d", trigrec->tgtype);
+
+ if (TRIGGER_FOR_INSERT(trigrec->tgtype))
+ {
+ appendStringInfoString(&buf, " INSERT");
+ findx++;
+ }
+ if (TRIGGER_FOR_DELETE(trigrec->tgtype))
+ {
+ if (findx > 0)
+ appendStringInfoString(&buf, " OR DELETE");
+ else
+ appendStringInfoString(&buf, " DELETE");
+ findx++;
+ }
+ if (TRIGGER_FOR_UPDATE(trigrec->tgtype))
+ {
+ if (findx > 0)
+ appendStringInfoString(&buf, " OR UPDATE");
+ else
+ appendStringInfoString(&buf, " UPDATE");
+ findx++;
+ /* tgattr is first var-width field, so OK to access directly */
+ if (trigrec->tgattr.dim1 > 0)
+ {
+ int i;
+
+ appendStringInfoString(&buf, " OF ");
+ for (i = 0; i < trigrec->tgattr.dim1; i++)
+ {
+ char *attname;
+
+ if (i > 0)
+ appendStringInfoString(&buf, ", ");
+ attname = get_attname(trigrec->tgrelid,
+ trigrec->tgattr.values[i], false);
+ appendStringInfoString(&buf, quote_identifier(attname));
+ }
+ }
+ }
+ if (TRIGGER_FOR_TRUNCATE(trigrec->tgtype))
+ {
+ if (findx > 0)
+ appendStringInfoString(&buf, " OR TRUNCATE");
+ else
+ appendStringInfoString(&buf, " TRUNCATE");
+ findx++;
+ }
+
+ /*
+ * In non-pretty mode, always schema-qualify the target table name for
+ * safety. In pretty mode, schema-qualify only if not visible.
+ */
+ appendStringInfo(&buf, " ON %s ",
+ pretty ?
+ generate_relation_name(trigrec->tgrelid, NIL) :
+ generate_qualified_relation_name(trigrec->tgrelid));
+
+ if (OidIsValid(trigrec->tgconstraint))
+ {
+ if (OidIsValid(trigrec->tgconstrrelid))
+ appendStringInfo(&buf, "FROM %s ",
+ generate_relation_name(trigrec->tgconstrrelid, NIL));
+ if (!trigrec->tgdeferrable)
+ appendStringInfoString(&buf, "NOT ");
+ appendStringInfoString(&buf, "DEFERRABLE INITIALLY ");
+ if (trigrec->tginitdeferred)
+ appendStringInfoString(&buf, "DEFERRED ");
+ else
+ appendStringInfoString(&buf, "IMMEDIATE ");
+ }
+
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgoldtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ tgoldtable = NameStr(*DatumGetName(value));
+ else
+ tgoldtable = NULL;
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgnewtable,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ tgnewtable = NameStr(*DatumGetName(value));
+ else
+ tgnewtable = NULL;
+ if (tgoldtable != NULL || tgnewtable != NULL)
+ {
+ appendStringInfoString(&buf, "REFERENCING ");
+ if (tgoldtable != NULL)
+ appendStringInfo(&buf, "OLD TABLE AS %s ",
+ quote_identifier(tgoldtable));
+ if (tgnewtable != NULL)
+ appendStringInfo(&buf, "NEW TABLE AS %s ",
+ quote_identifier(tgnewtable));
+ }
+
+ if (TRIGGER_FOR_ROW(trigrec->tgtype))
+ appendStringInfoString(&buf, "FOR EACH ROW ");
+ else
+ appendStringInfoString(&buf, "FOR EACH STATEMENT ");
+
+ /* If the trigger has a WHEN qualification, add that */
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgqual,
+ tgrel->rd_att, &isnull);
+ if (!isnull)
+ {
+ Node *qual;
+ char relkind;
+ deparse_context context;
+ deparse_namespace dpns;
+ RangeTblEntry *oldrte;
+ RangeTblEntry *newrte;
+
+ appendStringInfoString(&buf, "WHEN (");
+
+ qual = stringToNode(TextDatumGetCString(value));
+
+ relkind = get_rel_relkind(trigrec->tgrelid);
+
+ /* Build minimal OLD and NEW RTEs for the rel */
+ oldrte = makeNode(RangeTblEntry);
+ oldrte->rtekind = RTE_RELATION;
+ oldrte->relid = trigrec->tgrelid;
+ oldrte->relkind = relkind;
+ oldrte->rellockmode = AccessShareLock;
+ oldrte->alias = makeAlias("old", NIL);
+ oldrte->eref = oldrte->alias;
+ oldrte->lateral = false;
+ oldrte->inh = false;
+ oldrte->inFromCl = true;
+
+ newrte = makeNode(RangeTblEntry);
+ newrte->rtekind = RTE_RELATION;
+ newrte->relid = trigrec->tgrelid;
+ newrte->relkind = relkind;
+ newrte->rellockmode = AccessShareLock;
+ newrte->alias = makeAlias("new", NIL);
+ newrte->eref = newrte->alias;
+ newrte->lateral = false;
+ newrte->inh = false;
+ newrte->inFromCl = true;
+
+ /* Build two-element rtable */
+ memset(&dpns, 0, sizeof(dpns));
+ dpns.rtable = list_make2(oldrte, newrte);
+ dpns.subplans = NIL;
+ dpns.ctes = NIL;
+ dpns.appendrels = NULL;
+ set_rtable_names(&dpns, NIL, NULL);
+ set_simple_column_names(&dpns);
+
+ /* Set up context with one-deep namespace stack */
+ context.buf = &buf;
+ context.namespaces = list_make1(&dpns);
+ context.windowClause = NIL;
+ context.windowTList = NIL;
+ context.varprefix = true;
+ context.prettyFlags = GET_PRETTY_FLAGS(pretty);
+ context.wrapColumn = WRAP_COLUMN_DEFAULT;
+ context.indentLevel = PRETTYINDENT_STD;
+ context.special_exprkind = EXPR_KIND_NONE;
+ context.appendparents = NULL;
+
+ get_rule_expr(qual, &context, false);
+
+ appendStringInfoString(&buf, ") ");
+ }
+
+ appendStringInfo(&buf, "EXECUTE FUNCTION %s(",
+ generate_function_name(trigrec->tgfoid, 0,
+ NIL, NULL,
+ false, NULL, EXPR_KIND_NONE));
+
+ if (trigrec->tgnargs > 0)
+ {
+ char *p;
+ int i;
+
+ value = fastgetattr(ht_trig, Anum_pg_trigger_tgargs,
+ tgrel->rd_att, &isnull);
+ if (isnull)
+ elog(ERROR, "tgargs is null for trigger %u", trigid);
+ p = (char *) VARDATA_ANY(DatumGetByteaPP(value));
+ for (i = 0; i < trigrec->tgnargs; i++)
+ {
+ if (i > 0)
+ appendStringInfoString(&buf, ", ");
+ simple_quote_literal(&buf, p);
+ /* advance p to next string embedded in tgargs */
+ while (*p)
+ p++;
+ p++;
+ }
+ }
+
+ /* We deliberately do not put semi-colon at end */
+ appendStringInfoChar(&buf, ')');
+
+ /* Clean up */
+ systable_endscan(tgscan);
+
+ table_close(tgrel, AccessShareLock);
+
+ return buf.data;
+}
+
+/* ----------
+ * pg_get_indexdef - Get the definition of an index
+ *
+ * In the extended version, there is a colno argument as well as pretty bool.
+ * if colno == 0, we want a complete index definition.
+ * if colno > 0, we only want the Nth index key's variable or expression.
+ *
+ * Note that the SQL-function versions of this omit any info about the
+ * index tablespace; this is intentional because pg_dump wants it that way.
+ * However pg_get_indexdef_string() includes the index tablespace.
+ * ----------
+ */
+Datum
+pg_get_indexdef(PG_FUNCTION_ARGS)
+{
+ Oid indexrelid = PG_GETARG_OID(0);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ res = pg_get_indexdef_worker(indexrelid, 0, NULL,
+ false, false,
+ false, false,
+ prettyFlags, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_indexdef_ext(PG_FUNCTION_ARGS)
+{
+ Oid indexrelid = PG_GETARG_OID(0);
+ int32 colno = PG_GETARG_INT32(1);
+ bool pretty = PG_GETARG_BOOL(2);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ res = pg_get_indexdef_worker(indexrelid, colno, NULL,
+ colno != 0, false,
+ false, false,
+ prettyFlags, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal version for use by ALTER TABLE.
+ * Includes a tablespace clause in the result.
+ * Returns a palloc'd C string; no pretty-printing.
+ */
+char *
+pg_get_indexdef_string(Oid indexrelid)
+{
+ return pg_get_indexdef_worker(indexrelid, 0, NULL,
+ false, false,
+ true, true,
+ 0, false);
+}
+
+/* Internal version that just reports the key-column definitions */
+char *
+pg_get_indexdef_columns(Oid indexrelid, bool pretty)
+{
+ int prettyFlags;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ return pg_get_indexdef_worker(indexrelid, 0, NULL,
+ true, true,
+ false, false,
+ prettyFlags, false);
+}
+
+/* Internal version, extensible with flags to control its behavior */
+char *
+pg_get_indexdef_columns_extended(Oid indexrelid, bits16 flags)
+{
+ bool pretty = ((flags & RULE_INDEXDEF_PRETTY) != 0);
+ bool keys_only = ((flags & RULE_INDEXDEF_KEYS_ONLY) != 0);
+ int prettyFlags;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ return pg_get_indexdef_worker(indexrelid, 0, NULL,
+ true, keys_only,
+ false, false,
+ prettyFlags, false);
+}
+
+/*
+ * Internal workhorse to decompile an index definition.
+ *
+ * This is now used for exclusion constraints as well: if excludeOps is not
+ * NULL then it points to an array of exclusion operator OIDs.
+ */
+static char *
+pg_get_indexdef_worker(Oid indexrelid, int colno,
+ const Oid *excludeOps,
+ bool attrsOnly, bool keysOnly,
+ bool showTblSpc, bool inherits,
+ int prettyFlags, bool missing_ok)
+{
+ /* might want a separate isConstraint parameter later */
+ bool isConstraint = (excludeOps != NULL);
+ HeapTuple ht_idx;
+ HeapTuple ht_idxrel;
+ HeapTuple ht_am;
+ Form_pg_index idxrec;
+ Form_pg_class idxrelrec;
+ Form_pg_am amrec;
+ IndexAmRoutine *amroutine;
+ List *indexprs;
+ ListCell *indexpr_item;
+ List *context;
+ Oid indrelid;
+ int keyno;
+ Datum indcollDatum;
+ Datum indclassDatum;
+ Datum indoptionDatum;
+ bool isnull;
+ oidvector *indcollation;
+ oidvector *indclass;
+ int2vector *indoption;
+ StringInfoData buf;
+ char *str;
+ char *sep;
+
+ /*
+ * Fetch the pg_index tuple by the Oid of the index
+ */
+ ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid));
+ if (!HeapTupleIsValid(ht_idx))
+ {
+ if (missing_ok)
+ return NULL;
+ elog(ERROR, "cache lookup failed for index %u", indexrelid);
+ }
+ idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
+
+ indrelid = idxrec->indrelid;
+ Assert(indexrelid == idxrec->indexrelid);
+
+ /* Must get indcollation, indclass, and indoption the hard way */
+ indcollDatum = SysCacheGetAttr(INDEXRELID, ht_idx,
+ Anum_pg_index_indcollation, &isnull);
+ Assert(!isnull);
+ indcollation = (oidvector *) DatumGetPointer(indcollDatum);
+
+ indclassDatum = SysCacheGetAttr(INDEXRELID, ht_idx,
+ Anum_pg_index_indclass, &isnull);
+ Assert(!isnull);
+ indclass = (oidvector *) DatumGetPointer(indclassDatum);
+
+ indoptionDatum = SysCacheGetAttr(INDEXRELID, ht_idx,
+ Anum_pg_index_indoption, &isnull);
+ Assert(!isnull);
+ indoption = (int2vector *) DatumGetPointer(indoptionDatum);
+
+ /*
+ * Fetch the pg_class tuple of the index relation
+ */
+ ht_idxrel = SearchSysCache1(RELOID, ObjectIdGetDatum(indexrelid));
+ if (!HeapTupleIsValid(ht_idxrel))
+ elog(ERROR, "cache lookup failed for relation %u", indexrelid);
+ idxrelrec = (Form_pg_class) GETSTRUCT(ht_idxrel);
+
+ /*
+ * Fetch the pg_am tuple of the index' access method
+ */
+ ht_am = SearchSysCache1(AMOID, ObjectIdGetDatum(idxrelrec->relam));
+ if (!HeapTupleIsValid(ht_am))
+ elog(ERROR, "cache lookup failed for access method %u",
+ idxrelrec->relam);
+ amrec = (Form_pg_am) GETSTRUCT(ht_am);
+
+ /* Fetch the index AM's API struct */
+ amroutine = GetIndexAmRoutine(amrec->amhandler);
+
+ /*
+ * Get the index expressions, if any. (NOTE: we do not use the relcache
+ * versions of the expressions and predicate, because we want to display
+ * non-const-folded expressions.)
+ */
+ if (!heap_attisnull(ht_idx, Anum_pg_index_indexprs, NULL))
+ {
+ Datum exprsDatum;
+ bool isnull;
+ char *exprsString;
+
+ exprsDatum = SysCacheGetAttr(INDEXRELID, ht_idx,
+ Anum_pg_index_indexprs, &isnull);
+ Assert(!isnull);
+ exprsString = TextDatumGetCString(exprsDatum);
+ indexprs = (List *) stringToNode(exprsString);
+ pfree(exprsString);
+ }
+ else
+ indexprs = NIL;
+
+ indexpr_item = list_head(indexprs);
+
+ context = deparse_context_for(get_relation_name(indrelid), indrelid);
+
+ /*
+ * Start the index definition. Note that the index's name should never be
+ * schema-qualified, but the indexed rel's name may be.
+ */
+ initStringInfo(&buf);
+
+ if (!attrsOnly)
+ {
+ if (!isConstraint)
+ appendStringInfo(&buf, "CREATE %sINDEX %s ON %s%s USING %s (",
+ idxrec->indisunique ? "UNIQUE " : "",
+ quote_identifier(NameStr(idxrelrec->relname)),
+ idxrelrec->relkind == RELKIND_PARTITIONED_INDEX
+ && !inherits ? "ONLY " : "",
+ (prettyFlags & PRETTYFLAG_SCHEMA) ?
+ generate_relation_name(indrelid, NIL) :
+ generate_qualified_relation_name(indrelid),
+ quote_identifier(NameStr(amrec->amname)));
+ else /* currently, must be EXCLUDE constraint */
+ appendStringInfo(&buf, "EXCLUDE USING %s (",
+ quote_identifier(NameStr(amrec->amname)));
+ }
+
+ /*
+ * Report the indexed attributes
+ */
+ sep = "";
+ for (keyno = 0; keyno < idxrec->indnatts; keyno++)
+ {
+ AttrNumber attnum = idxrec->indkey.values[keyno];
+ Oid keycoltype;
+ Oid keycolcollation;
+
+ /*
+ * Ignore non-key attributes if told to.
+ */
+ if (keysOnly && keyno >= idxrec->indnkeyatts)
+ break;
+
+ /* Otherwise, print INCLUDE to divide key and non-key attrs. */
+ if (!colno && keyno == idxrec->indnkeyatts)
+ {
+ appendStringInfoString(&buf, ") INCLUDE (");
+ sep = "";
+ }
+
+ if (!colno)
+ appendStringInfoString(&buf, sep);
+ sep = ", ";
+
+ if (attnum != 0)
+ {
+ /* Simple index column */
+ char *attname;
+ int32 keycoltypmod;
+
+ attname = get_attname(indrelid, attnum, false);
+ if (!colno || colno == keyno + 1)
+ appendStringInfoString(&buf, quote_identifier(attname));
+ get_atttypetypmodcoll(indrelid, attnum,
+ &keycoltype, &keycoltypmod,
+ &keycolcollation);
+ }
+ else
+ {
+ /* expressional index */
+ Node *indexkey;
+
+ if (indexpr_item == NULL)
+ elog(ERROR, "too few entries in indexprs list");
+ indexkey = (Node *) lfirst(indexpr_item);
+ indexpr_item = lnext(indexprs, indexpr_item);
+ /* Deparse */
+ str = deparse_expression_pretty(indexkey, context, false, false,
+ prettyFlags, 0);
+ if (!colno || colno == keyno + 1)
+ {
+ /* Need parens if it's not a bare function call */
+ if (looks_like_function(indexkey))
+ appendStringInfoString(&buf, str);
+ else
+ appendStringInfo(&buf, "(%s)", str);
+ }
+ keycoltype = exprType(indexkey);
+ keycolcollation = exprCollation(indexkey);
+ }
+
+ /* Print additional decoration for (selected) key columns */
+ if (!attrsOnly && keyno < idxrec->indnkeyatts &&
+ (!colno || colno == keyno + 1))
+ {
+ int16 opt = indoption->values[keyno];
+ Oid indcoll = indcollation->values[keyno];
+ Datum attoptions = get_attoptions(indexrelid, keyno + 1);
+ bool has_options = attoptions != (Datum) 0;
+
+ /* Add collation, if not default for column */
+ if (OidIsValid(indcoll) && indcoll != keycolcollation)
+ appendStringInfo(&buf, " COLLATE %s",
+ generate_collation_name((indcoll)));
+
+ /* Add the operator class name, if not default */
+ get_opclass_name(indclass->values[keyno],
+ has_options ? InvalidOid : keycoltype, &buf);
+
+ if (has_options)
+ {
+ appendStringInfoString(&buf, " (");
+ get_reloptions(&buf, attoptions);
+ appendStringInfoChar(&buf, ')');
+ }
+
+ /* Add options if relevant */
+ if (amroutine->amcanorder)
+ {
+ /* if it supports sort ordering, report DESC and NULLS opts */
+ if (opt & INDOPTION_DESC)
+ {
+ appendStringInfoString(&buf, " DESC");
+ /* NULLS FIRST is the default in this case */
+ if (!(opt & INDOPTION_NULLS_FIRST))
+ appendStringInfoString(&buf, " NULLS LAST");
+ }
+ else
+ {
+ if (opt & INDOPTION_NULLS_FIRST)
+ appendStringInfoString(&buf, " NULLS FIRST");
+ }
+ }
+
+ /* Add the exclusion operator if relevant */
+ if (excludeOps != NULL)
+ appendStringInfo(&buf, " WITH %s",
+ generate_operator_name(excludeOps[keyno],
+ keycoltype,
+ keycoltype));
+ }
+ }
+
+ if (!attrsOnly)
+ {
+ appendStringInfoChar(&buf, ')');
+
+ if (idxrec->indnullsnotdistinct)
+ appendStringInfo(&buf, " NULLS NOT DISTINCT");
+
+ /*
+ * If it has options, append "WITH (options)"
+ */
+ str = flatten_reloptions(indexrelid);
+ if (str)
+ {
+ appendStringInfo(&buf, " WITH (%s)", str);
+ pfree(str);
+ }
+
+ /*
+ * Print tablespace, but only if requested
+ */
+ if (showTblSpc)
+ {
+ Oid tblspc;
+
+ tblspc = get_rel_tablespace(indexrelid);
+ if (OidIsValid(tblspc))
+ {
+ if (isConstraint)
+ appendStringInfoString(&buf, " USING INDEX");
+ appendStringInfo(&buf, " TABLESPACE %s",
+ quote_identifier(get_tablespace_name(tblspc)));
+ }
+ }
+
+ /*
+ * If it's a partial index, decompile and append the predicate
+ */
+ if (!heap_attisnull(ht_idx, Anum_pg_index_indpred, NULL))
+ {
+ Node *node;
+ Datum predDatum;
+ bool isnull;
+ char *predString;
+
+ /* Convert text string to node tree */
+ predDatum = SysCacheGetAttr(INDEXRELID, ht_idx,
+ Anum_pg_index_indpred, &isnull);
+ Assert(!isnull);
+ predString = TextDatumGetCString(predDatum);
+ node = (Node *) stringToNode(predString);
+ pfree(predString);
+
+ /* Deparse */
+ str = deparse_expression_pretty(node, context, false, false,
+ prettyFlags, 0);
+ if (isConstraint)
+ appendStringInfo(&buf, " WHERE (%s)", str);
+ else
+ appendStringInfo(&buf, " WHERE %s", str);
+ }
+ }
+
+ /* Clean up */
+ ReleaseSysCache(ht_idx);
+ ReleaseSysCache(ht_idxrel);
+ ReleaseSysCache(ht_am);
+
+ return buf.data;
+}
+
+/* ----------
+ * pg_get_querydef
+ *
+ * Public entry point to deparse one query parsetree.
+ * The pretty flags are determined by GET_PRETTY_FLAGS(pretty).
+ *
+ * The result is a palloc'd C string.
+ * ----------
+ */
+char *
+pg_get_querydef(Query *query, bool pretty)
+{
+ StringInfoData buf;
+ int prettyFlags;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ initStringInfo(&buf);
+
+ get_query_def(query, &buf, NIL, NULL, true,
+ prettyFlags, WRAP_COLUMN_DEFAULT, 0);
+
+ return buf.data;
+}
+
+/*
+ * pg_get_statisticsobjdef
+ * Get the definition of an extended statistics object
+ */
+Datum
+pg_get_statisticsobjdef(PG_FUNCTION_ARGS)
+{
+ Oid statextid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_statisticsobj_worker(statextid, false, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal version for use by ALTER TABLE.
+ * Includes a tablespace clause in the result.
+ * Returns a palloc'd C string; no pretty-printing.
+ */
+char *
+pg_get_statisticsobjdef_string(Oid statextid)
+{
+ return pg_get_statisticsobj_worker(statextid, false, false);
+}
+
+/*
+ * pg_get_statisticsobjdef_columns
+ * Get columns and expressions for an extended statistics object
+ */
+Datum
+pg_get_statisticsobjdef_columns(PG_FUNCTION_ARGS)
+{
+ Oid statextid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_statisticsobj_worker(statextid, true, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal workhorse to decompile an extended statistics object.
+ */
+static char *
+pg_get_statisticsobj_worker(Oid statextid, bool columns_only, bool missing_ok)
+{
+ Form_pg_statistic_ext statextrec;
+ HeapTuple statexttup;
+ StringInfoData buf;
+ int colno;
+ char *nsp;
+ ArrayType *arr;
+ char *enabled;
+ Datum datum;
+ bool isnull;
+ bool ndistinct_enabled;
+ bool dependencies_enabled;
+ bool mcv_enabled;
+ int i;
+ List *context;
+ ListCell *lc;
+ List *exprs = NIL;
+ bool has_exprs;
+ int ncolumns;
+
+ statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+ if (!HeapTupleIsValid(statexttup))
+ {
+ if (missing_ok)
+ return NULL;
+ elog(ERROR, "cache lookup failed for statistics object %u", statextid);
+ }
+
+ /* has the statistics expressions? */
+ has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL);
+
+ statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+ /*
+ * Get the statistics expressions, if any. (NOTE: we do not use the
+ * relcache versions of the expressions, because we want to display
+ * non-const-folded expressions.)
+ */
+ if (has_exprs)
+ {
+ Datum exprsDatum;
+ bool isnull;
+ char *exprsString;
+
+ exprsDatum = SysCacheGetAttr(STATEXTOID, statexttup,
+ Anum_pg_statistic_ext_stxexprs, &isnull);
+ Assert(!isnull);
+ exprsString = TextDatumGetCString(exprsDatum);
+ exprs = (List *) stringToNode(exprsString);
+ pfree(exprsString);
+ }
+ else
+ exprs = NIL;
+
+ /* count the number of columns (attributes and expressions) */
+ ncolumns = statextrec->stxkeys.dim1 + list_length(exprs);
+
+ initStringInfo(&buf);
+
+ if (!columns_only)
+ {
+ nsp = get_namespace_name_or_temp(statextrec->stxnamespace);
+ appendStringInfo(&buf, "CREATE STATISTICS %s",
+ quote_qualified_identifier(nsp,
+ NameStr(statextrec->stxname)));
+
+ /*
+ * Decode the stxkind column so that we know which stats types to
+ * print.
+ */
+ datum = SysCacheGetAttr(STATEXTOID, statexttup,
+ Anum_pg_statistic_ext_stxkind, &isnull);
+ Assert(!isnull);
+ arr = DatumGetArrayTypeP(datum);
+ if (ARR_NDIM(arr) != 1 ||
+ ARR_HASNULL(arr) ||
+ ARR_ELEMTYPE(arr) != CHAROID)
+ elog(ERROR, "stxkind is not a 1-D char array");
+ enabled = (char *) ARR_DATA_PTR(arr);
+
+ ndistinct_enabled = false;
+ dependencies_enabled = false;
+ mcv_enabled = false;
+
+ for (i = 0; i < ARR_DIMS(arr)[0]; i++)
+ {
+ if (enabled[i] == STATS_EXT_NDISTINCT)
+ ndistinct_enabled = true;
+ else if (enabled[i] == STATS_EXT_DEPENDENCIES)
+ dependencies_enabled = true;
+ else if (enabled[i] == STATS_EXT_MCV)
+ mcv_enabled = true;
+
+ /* ignore STATS_EXT_EXPRESSIONS (it's built automatically) */
+ }
+
+ /*
+ * If any option is disabled, then we'll need to append the types
+ * clause to show which options are enabled. We omit the types clause
+ * on purpose when all options are enabled, so a pg_dump/pg_restore
+ * will create all statistics types on a newer postgres version, if
+ * the statistics had all options enabled on the original version.
+ *
+ * But if the statistics is defined on just a single column, it has to
+ * be an expression statistics. In that case we don't need to specify
+ * kinds.
+ */
+ if ((!ndistinct_enabled || !dependencies_enabled || !mcv_enabled) &&
+ (ncolumns > 1))
+ {
+ bool gotone = false;
+
+ appendStringInfoString(&buf, " (");
+
+ if (ndistinct_enabled)
+ {
+ appendStringInfoString(&buf, "ndistinct");
+ gotone = true;
+ }
+
+ if (dependencies_enabled)
+ {
+ appendStringInfo(&buf, "%sdependencies", gotone ? ", " : "");
+ gotone = true;
+ }
+
+ if (mcv_enabled)
+ appendStringInfo(&buf, "%smcv", gotone ? ", " : "");
+
+ appendStringInfoChar(&buf, ')');
+ }
+
+ appendStringInfoString(&buf, " ON ");
+ }
+
+ /* decode simple column references */
+ for (colno = 0; colno < statextrec->stxkeys.dim1; colno++)
+ {
+ AttrNumber attnum = statextrec->stxkeys.values[colno];
+ char *attname;
+
+ if (colno > 0)
+ appendStringInfoString(&buf, ", ");
+
+ attname = get_attname(statextrec->stxrelid, attnum, false);
+
+ appendStringInfoString(&buf, quote_identifier(attname));
+ }
+
+ context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+ statextrec->stxrelid);
+
+ foreach(lc, exprs)
+ {
+ Node *expr = (Node *) lfirst(lc);
+ char *str;
+ int prettyFlags = PRETTYFLAG_PAREN;
+
+ str = deparse_expression_pretty(expr, context, false, false,
+ prettyFlags, 0);
+
+ if (colno > 0)
+ appendStringInfoString(&buf, ", ");
+
+ /* Need parens if it's not a bare function call */
+ if (looks_like_function(expr))
+ appendStringInfoString(&buf, str);
+ else
+ appendStringInfo(&buf, "(%s)", str);
+
+ colno++;
+ }
+
+ if (!columns_only)
+ appendStringInfo(&buf, " FROM %s",
+ generate_relation_name(statextrec->stxrelid, NIL));
+
+ ReleaseSysCache(statexttup);
+
+ return buf.data;
+}
+
+/*
+ * Generate text array of expressions for statistics object.
+ */
+Datum
+pg_get_statisticsobjdef_expressions(PG_FUNCTION_ARGS)
+{
+ Oid statextid = PG_GETARG_OID(0);
+ Form_pg_statistic_ext statextrec;
+ HeapTuple statexttup;
+ Datum datum;
+ bool isnull;
+ List *context;
+ ListCell *lc;
+ List *exprs = NIL;
+ bool has_exprs;
+ char *tmp;
+ ArrayBuildState *astate = NULL;
+
+ statexttup = SearchSysCache1(STATEXTOID, ObjectIdGetDatum(statextid));
+
+ if (!HeapTupleIsValid(statexttup))
+ PG_RETURN_NULL();
+
+ /* Does the stats object have expressions? */
+ has_exprs = !heap_attisnull(statexttup, Anum_pg_statistic_ext_stxexprs, NULL);
+
+ /* no expressions? we're done */
+ if (!has_exprs)
+ {
+ ReleaseSysCache(statexttup);
+ PG_RETURN_NULL();
+ }
+
+ statextrec = (Form_pg_statistic_ext) GETSTRUCT(statexttup);
+
+ /*
+ * Get the statistics expressions, and deparse them into text values.
+ */
+ datum = SysCacheGetAttr(STATEXTOID, statexttup,
+ Anum_pg_statistic_ext_stxexprs, &isnull);
+
+ Assert(!isnull);
+ tmp = TextDatumGetCString(datum);
+ exprs = (List *) stringToNode(tmp);
+ pfree(tmp);
+
+ context = deparse_context_for(get_relation_name(statextrec->stxrelid),
+ statextrec->stxrelid);
+
+ foreach(lc, exprs)
+ {
+ Node *expr = (Node *) lfirst(lc);
+ char *str;
+ int prettyFlags = PRETTYFLAG_INDENT;
+
+ str = deparse_expression_pretty(expr, context, false, false,
+ prettyFlags, 0);
+
+ astate = accumArrayResult(astate,
+ PointerGetDatum(cstring_to_text(str)),
+ false,
+ TEXTOID,
+ CurrentMemoryContext);
+ }
+
+ ReleaseSysCache(statexttup);
+
+ PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
+}
+
+/*
+ * pg_get_partkeydef
+ *
+ * Returns the partition key specification, ie, the following:
+ *
+ * PARTITION BY { RANGE | LIST | HASH } (column opt_collation opt_opclass [, ...])
+ */
+Datum
+pg_get_partkeydef(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ char *res;
+
+ res = pg_get_partkeydef_worker(relid, PRETTYFLAG_INDENT, false, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/* Internal version that just reports the column definitions */
+char *
+pg_get_partkeydef_columns(Oid relid, bool pretty)
+{
+ int prettyFlags;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ return pg_get_partkeydef_worker(relid, prettyFlags, true, false);
+}
+
+/*
+ * Internal workhorse to decompile a partition key definition.
+ */
+static char *
+pg_get_partkeydef_worker(Oid relid, int prettyFlags,
+ bool attrsOnly, bool missing_ok)
+{
+ Form_pg_partitioned_table form;
+ HeapTuple tuple;
+ oidvector *partclass;
+ oidvector *partcollation;
+ List *partexprs;
+ ListCell *partexpr_item;
+ List *context;
+ Datum datum;
+ bool isnull;
+ StringInfoData buf;
+ int keyno;
+ char *str;
+ char *sep;
+
+ tuple = SearchSysCache1(PARTRELID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ {
+ if (missing_ok)
+ return NULL;
+ elog(ERROR, "cache lookup failed for partition key of %u", relid);
+ }
+
+ form = (Form_pg_partitioned_table) GETSTRUCT(tuple);
+
+ Assert(form->partrelid == relid);
+
+ /* Must get partclass and partcollation the hard way */
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partclass, &isnull);
+ Assert(!isnull);
+ partclass = (oidvector *) DatumGetPointer(datum);
+
+ datum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partcollation, &isnull);
+ Assert(!isnull);
+ partcollation = (oidvector *) DatumGetPointer(datum);
+
+
+ /*
+ * Get the expressions, if any. (NOTE: we do not use the relcache
+ * versions of the expressions, because we want to display
+ * non-const-folded expressions.)
+ */
+ if (!heap_attisnull(tuple, Anum_pg_partitioned_table_partexprs, NULL))
+ {
+ Datum exprsDatum;
+ bool isnull;
+ char *exprsString;
+
+ exprsDatum = SysCacheGetAttr(PARTRELID, tuple,
+ Anum_pg_partitioned_table_partexprs, &isnull);
+ Assert(!isnull);
+ exprsString = TextDatumGetCString(exprsDatum);
+ partexprs = (List *) stringToNode(exprsString);
+
+ if (!IsA(partexprs, List))
+ elog(ERROR, "unexpected node type found in partexprs: %d",
+ (int) nodeTag(partexprs));
+
+ pfree(exprsString);
+ }
+ else
+ partexprs = NIL;
+
+ partexpr_item = list_head(partexprs);
+ context = deparse_context_for(get_relation_name(relid), relid);
+
+ initStringInfo(&buf);
+
+ switch (form->partstrat)
+ {
+ case PARTITION_STRATEGY_HASH:
+ if (!attrsOnly)
+ appendStringInfoString(&buf, "HASH");
+ break;
+ case PARTITION_STRATEGY_LIST:
+ if (!attrsOnly)
+ appendStringInfoString(&buf, "LIST");
+ break;
+ case PARTITION_STRATEGY_RANGE:
+ if (!attrsOnly)
+ appendStringInfoString(&buf, "RANGE");
+ break;
+ default:
+ elog(ERROR, "unexpected partition strategy: %d",
+ (int) form->partstrat);
+ }
+
+ if (!attrsOnly)
+ appendStringInfoString(&buf, " (");
+ sep = "";
+ for (keyno = 0; keyno < form->partnatts; keyno++)
+ {
+ AttrNumber attnum = form->partattrs.values[keyno];
+ Oid keycoltype;
+ Oid keycolcollation;
+ Oid partcoll;
+
+ appendStringInfoString(&buf, sep);
+ sep = ", ";
+ if (attnum != 0)
+ {
+ /* Simple attribute reference */
+ char *attname;
+ int32 keycoltypmod;
+
+ attname = get_attname(relid, attnum, false);
+ appendStringInfoString(&buf, quote_identifier(attname));
+ get_atttypetypmodcoll(relid, attnum,
+ &keycoltype, &keycoltypmod,
+ &keycolcollation);
+ }
+ else
+ {
+ /* Expression */
+ Node *partkey;
+
+ if (partexpr_item == NULL)
+ elog(ERROR, "too few entries in partexprs list");
+ partkey = (Node *) lfirst(partexpr_item);
+ partexpr_item = lnext(partexprs, partexpr_item);
+
+ /* Deparse */
+ str = deparse_expression_pretty(partkey, context, false, false,
+ prettyFlags, 0);
+ /* Need parens if it's not a bare function call */
+ if (looks_like_function(partkey))
+ appendStringInfoString(&buf, str);
+ else
+ appendStringInfo(&buf, "(%s)", str);
+
+ keycoltype = exprType(partkey);
+ keycolcollation = exprCollation(partkey);
+ }
+
+ /* Add collation, if not default for column */
+ partcoll = partcollation->values[keyno];
+ if (!attrsOnly && OidIsValid(partcoll) && partcoll != keycolcollation)
+ appendStringInfo(&buf, " COLLATE %s",
+ generate_collation_name((partcoll)));
+
+ /* Add the operator class name, if not default */
+ if (!attrsOnly)
+ get_opclass_name(partclass->values[keyno], keycoltype, &buf);
+ }
+
+ if (!attrsOnly)
+ appendStringInfoChar(&buf, ')');
+
+ /* Clean up */
+ ReleaseSysCache(tuple);
+
+ return buf.data;
+}
+
+/*
+ * pg_get_partition_constraintdef
+ *
+ * Returns partition constraint expression as a string for the input relation
+ */
+Datum
+pg_get_partition_constraintdef(PG_FUNCTION_ARGS)
+{
+ Oid relationId = PG_GETARG_OID(0);
+ Expr *constr_expr;
+ int prettyFlags;
+ List *context;
+ char *consrc;
+
+ constr_expr = get_partition_qual_relid(relationId);
+
+ /* Quick exit if no partition constraint */
+ if (constr_expr == NULL)
+ PG_RETURN_NULL();
+
+ /*
+ * Deparse and return the constraint expression.
+ */
+ prettyFlags = PRETTYFLAG_INDENT;
+ context = deparse_context_for(get_relation_name(relationId), relationId);
+ consrc = deparse_expression_pretty((Node *) constr_expr, context, false,
+ false, prettyFlags, 0);
+
+ PG_RETURN_TEXT_P(string_to_text(consrc));
+}
+
+/*
+ * pg_get_partconstrdef_string
+ *
+ * Returns the partition constraint as a C-string for the input relation, with
+ * the given alias. No pretty-printing.
+ */
+char *
+pg_get_partconstrdef_string(Oid partitionId, char *aliasname)
+{
+ Expr *constr_expr;
+ List *context;
+
+ constr_expr = get_partition_qual_relid(partitionId);
+ context = deparse_context_for(aliasname, partitionId);
+
+ return deparse_expression((Node *) constr_expr, context, true, false);
+}
+
+/*
+ * pg_get_constraintdef
+ *
+ * Returns the definition for the constraint, ie, everything that needs to
+ * appear after "ALTER TABLE ... ADD CONSTRAINT <constraintname>".
+ */
+Datum
+pg_get_constraintdef(PG_FUNCTION_ARGS)
+{
+ Oid constraintId = PG_GETARG_OID(0);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ res = pg_get_constraintdef_worker(constraintId, false, prettyFlags, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+Datum
+pg_get_constraintdef_ext(PG_FUNCTION_ARGS)
+{
+ Oid constraintId = PG_GETARG_OID(0);
+ bool pretty = PG_GETARG_BOOL(1);
+ int prettyFlags;
+ char *res;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ res = pg_get_constraintdef_worker(constraintId, false, prettyFlags, true);
+
+ if (res == NULL)
+ PG_RETURN_NULL();
+
+ PG_RETURN_TEXT_P(string_to_text(res));
+}
+
+/*
+ * Internal version that returns a full ALTER TABLE ... ADD CONSTRAINT command
+ */
+char *
+pg_get_constraintdef_command(Oid constraintId)
+{
+ return pg_get_constraintdef_worker(constraintId, true, 0, false);
+}
+
+/*
+ * As of 9.4, we now use an MVCC snapshot for this.
+ */
+static char *
+pg_get_constraintdef_worker(Oid constraintId, bool fullCommand,
+ int prettyFlags, bool missing_ok)
+{
+ HeapTuple tup;
+ Form_pg_constraint conForm;
+ StringInfoData buf;
+ SysScanDesc scandesc;
+ ScanKeyData scankey[1];
+ Snapshot snapshot = RegisterSnapshot(GetTransactionSnapshot());
+ Relation relation = table_open(ConstraintRelationId, AccessShareLock);
+
+ ScanKeyInit(&scankey[0],
+ Anum_pg_constraint_oid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(constraintId));
+
+ scandesc = systable_beginscan(relation,
+ ConstraintOidIndexId,
+ true,
+ snapshot,
+ 1,
+ scankey);
+
+ /*
+ * We later use the tuple with SysCacheGetAttr() as if we had obtained it
+ * via SearchSysCache, which works fine.
+ */
+ tup = systable_getnext(scandesc);
+
+ UnregisterSnapshot(snapshot);
+
+ if (!HeapTupleIsValid(tup))
+ {
+ if (missing_ok)
+ {
+ systable_endscan(scandesc);
+ table_close(relation, AccessShareLock);
+ return NULL;
+ }
+ elog(ERROR, "could not find tuple for constraint %u", constraintId);
+ }
+
+ conForm = (Form_pg_constraint) GETSTRUCT(tup);
+
+ initStringInfo(&buf);
+
+ if (fullCommand)
+ {
+ if (OidIsValid(conForm->conrelid))
+ {
+ /*
+ * Currently, callers want ALTER TABLE (without ONLY) for CHECK
+ * constraints, and other types of constraints don't inherit
+ * anyway so it doesn't matter whether we say ONLY or not. Someday
+ * we might need to let callers specify whether to put ONLY in the
+ * command.
+ */
+ appendStringInfo(&buf, "ALTER TABLE %s ADD CONSTRAINT %s ",
+ generate_qualified_relation_name(conForm->conrelid),
+ quote_identifier(NameStr(conForm->conname)));
+ }
+ else
+ {
+ /* Must be a domain constraint */
+ Assert(OidIsValid(conForm->contypid));
+ appendStringInfo(&buf, "ALTER DOMAIN %s ADD CONSTRAINT %s ",
+ generate_qualified_type_name(conForm->contypid),
+ quote_identifier(NameStr(conForm->conname)));
+ }
+ }
+
+ switch (conForm->contype)
+ {
+ case CONSTRAINT_FOREIGN:
+ {
+ Datum val;
+ bool isnull;
+ const char *string;
+
+ /* Start off the constraint definition */
+ appendStringInfoString(&buf, "FOREIGN KEY (");
+
+ /* Fetch and build referencing-column list */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_conkey, &isnull);
+ if (isnull)
+ elog(ERROR, "null conkey for constraint %u",
+ constraintId);
+
+ decompile_column_index_array(val, conForm->conrelid, &buf);
+
+ /* add foreign relation name */
+ appendStringInfo(&buf, ") REFERENCES %s(",
+ generate_relation_name(conForm->confrelid,
+ NIL));
+
+ /* Fetch and build referenced-column list */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_confkey, &isnull);
+ if (isnull)
+ elog(ERROR, "null confkey for constraint %u",
+ constraintId);
+
+ decompile_column_index_array(val, conForm->confrelid, &buf);
+
+ appendStringInfoChar(&buf, ')');
+
+ /* Add match type */
+ switch (conForm->confmatchtype)
+ {
+ case FKCONSTR_MATCH_FULL:
+ string = " MATCH FULL";
+ break;
+ case FKCONSTR_MATCH_PARTIAL:
+ string = " MATCH PARTIAL";
+ break;
+ case FKCONSTR_MATCH_SIMPLE:
+ string = "";
+ break;
+ default:
+ elog(ERROR, "unrecognized confmatchtype: %d",
+ conForm->confmatchtype);
+ string = ""; /* keep compiler quiet */
+ break;
+ }
+ appendStringInfoString(&buf, string);
+
+ /* Add ON UPDATE and ON DELETE clauses, if needed */
+ switch (conForm->confupdtype)
+ {
+ case FKCONSTR_ACTION_NOACTION:
+ string = NULL; /* suppress default */
+ break;
+ case FKCONSTR_ACTION_RESTRICT:
+ string = "RESTRICT";
+ break;
+ case FKCONSTR_ACTION_CASCADE:
+ string = "CASCADE";
+ break;
+ case FKCONSTR_ACTION_SETNULL:
+ string = "SET NULL";
+ break;
+ case FKCONSTR_ACTION_SETDEFAULT:
+ string = "SET DEFAULT";
+ break;
+ default:
+ elog(ERROR, "unrecognized confupdtype: %d",
+ conForm->confupdtype);
+ string = NULL; /* keep compiler quiet */
+ break;
+ }
+ if (string)
+ appendStringInfo(&buf, " ON UPDATE %s", string);
+
+ switch (conForm->confdeltype)
+ {
+ case FKCONSTR_ACTION_NOACTION:
+ string = NULL; /* suppress default */
+ break;
+ case FKCONSTR_ACTION_RESTRICT:
+ string = "RESTRICT";
+ break;
+ case FKCONSTR_ACTION_CASCADE:
+ string = "CASCADE";
+ break;
+ case FKCONSTR_ACTION_SETNULL:
+ string = "SET NULL";
+ break;
+ case FKCONSTR_ACTION_SETDEFAULT:
+ string = "SET DEFAULT";
+ break;
+ default:
+ elog(ERROR, "unrecognized confdeltype: %d",
+ conForm->confdeltype);
+ string = NULL; /* keep compiler quiet */
+ break;
+ }
+ if (string)
+ appendStringInfo(&buf, " ON DELETE %s", string);
+
+ /*
+ * Add columns specified to SET NULL or SET DEFAULT if
+ * provided.
+ */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_confdelsetcols, &isnull);
+ if (!isnull)
+ {
+ appendStringInfo(&buf, " (");
+ decompile_column_index_array(val, conForm->conrelid, &buf);
+ appendStringInfo(&buf, ")");
+ }
+
+ break;
+ }
+ case CONSTRAINT_PRIMARY:
+ case CONSTRAINT_UNIQUE:
+ {
+ Datum val;
+ bool isnull;
+ Oid indexId;
+ int keyatts;
+ HeapTuple indtup;
+
+ /* Start off the constraint definition */
+ if (conForm->contype == CONSTRAINT_PRIMARY)
+ appendStringInfoString(&buf, "PRIMARY KEY ");
+ else
+ appendStringInfoString(&buf, "UNIQUE ");
+
+ indexId = conForm->conindid;
+
+ indtup = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
+ if (!HeapTupleIsValid(indtup))
+ elog(ERROR, "cache lookup failed for index %u", indexId);
+ if (conForm->contype == CONSTRAINT_UNIQUE &&
+ ((Form_pg_index) GETSTRUCT(indtup))->indnullsnotdistinct)
+ appendStringInfoString(&buf, "NULLS NOT DISTINCT ");
+
+ appendStringInfoString(&buf, "(");
+
+ /* Fetch and build target column list */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_conkey, &isnull);
+ if (isnull)
+ elog(ERROR, "null conkey for constraint %u",
+ constraintId);
+
+ keyatts = decompile_column_index_array(val, conForm->conrelid, &buf);
+
+ appendStringInfoChar(&buf, ')');
+
+ /* Build including column list (from pg_index.indkeys) */
+ val = SysCacheGetAttr(INDEXRELID, indtup,
+ Anum_pg_index_indnatts, &isnull);
+ if (isnull)
+ elog(ERROR, "null indnatts for index %u", indexId);
+ if (DatumGetInt32(val) > keyatts)
+ {
+ Datum cols;
+ Datum *keys;
+ int nKeys;
+ int j;
+
+ appendStringInfoString(&buf, " INCLUDE (");
+
+ cols = SysCacheGetAttr(INDEXRELID, indtup,
+ Anum_pg_index_indkey, &isnull);
+ if (isnull)
+ elog(ERROR, "null indkey for index %u", indexId);
+
+ deconstruct_array(DatumGetArrayTypeP(cols),
+ INT2OID, 2, true, TYPALIGN_SHORT,
+ &keys, NULL, &nKeys);
+
+ for (j = keyatts; j < nKeys; j++)
+ {
+ char *colName;
+
+ colName = get_attname(conForm->conrelid,
+ DatumGetInt16(keys[j]), false);
+ if (j > keyatts)
+ appendStringInfoString(&buf, ", ");
+ appendStringInfoString(&buf, quote_identifier(colName));
+ }
+
+ appendStringInfoChar(&buf, ')');
+ }
+ ReleaseSysCache(indtup);
+
+ /* XXX why do we only print these bits if fullCommand? */
+ if (fullCommand && OidIsValid(indexId))
+ {
+ char *options = flatten_reloptions(indexId);
+ Oid tblspc;
+
+ if (options)
+ {
+ appendStringInfo(&buf, " WITH (%s)", options);
+ pfree(options);
+ }
+
+ /*
+ * Print the tablespace, unless it's the database default.
+ * This is to help ALTER TABLE usage of this facility,
+ * which needs this behavior to recreate exact catalog
+ * state.
+ */
+ tblspc = get_rel_tablespace(indexId);
+ if (OidIsValid(tblspc))
+ appendStringInfo(&buf, " USING INDEX TABLESPACE %s",
+ quote_identifier(get_tablespace_name(tblspc)));
+ }
+
+ break;
+ }
+ case CONSTRAINT_CHECK:
+ {
+ Datum val;
+ bool isnull;
+ char *conbin;
+ char *consrc;
+ Node *expr;
+ List *context;
+
+ /* Fetch constraint expression in parsetree form */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_conbin, &isnull);
+ if (isnull)
+ elog(ERROR, "null conbin for constraint %u",
+ constraintId);
+
+ conbin = TextDatumGetCString(val);
+ expr = stringToNode(conbin);
+
+ /* Set up deparsing context for Var nodes in constraint */
+ if (conForm->conrelid != InvalidOid)
+ {
+ /* relation constraint */
+ context = deparse_context_for(get_relation_name(conForm->conrelid),
+ conForm->conrelid);
+ }
+ else
+ {
+ /* domain constraint --- can't have Vars */
+ context = NIL;
+ }
+
+ consrc = deparse_expression_pretty(expr, context, false, false,
+ prettyFlags, 0);
+
+ /*
+ * Now emit the constraint definition, adding NO INHERIT if
+ * necessary.
+ *
+ * There are cases where the constraint expression will be
+ * fully parenthesized and we don't need the outer parens ...
+ * but there are other cases where we do need 'em. Be
+ * conservative for now.
+ *
+ * Note that simply checking for leading '(' and trailing ')'
+ * would NOT be good enough, consider "(x > 0) AND (y > 0)".
+ */
+ appendStringInfo(&buf, "CHECK (%s)%s",
+ consrc,
+ conForm->connoinherit ? " NO INHERIT" : "");
+ break;
+ }
+ case CONSTRAINT_TRIGGER:
+
+ /*
+ * There isn't an ALTER TABLE syntax for creating a user-defined
+ * constraint trigger, but it seems better to print something than
+ * throw an error; if we throw error then this function couldn't
+ * safely be applied to all rows of pg_constraint.
+ */
+ appendStringInfoString(&buf, "TRIGGER");
+ break;
+ case CONSTRAINT_EXCLUSION:
+ {
+ Oid indexOid = conForm->conindid;
+ Datum val;
+ bool isnull;
+ Datum *elems;
+ int nElems;
+ int i;
+ Oid *operators;
+
+ /* Extract operator OIDs from the pg_constraint tuple */
+ val = SysCacheGetAttr(CONSTROID, tup,
+ Anum_pg_constraint_conexclop,
+ &isnull);
+ if (isnull)
+ elog(ERROR, "null conexclop for constraint %u",
+ constraintId);
+
+ deconstruct_array(DatumGetArrayTypeP(val),
+ OIDOID, sizeof(Oid), true, TYPALIGN_INT,
+ &elems, NULL, &nElems);
+
+ operators = (Oid *) palloc(nElems * sizeof(Oid));
+ for (i = 0; i < nElems; i++)
+ operators[i] = DatumGetObjectId(elems[i]);
+
+ /* pg_get_indexdef_worker does the rest */
+ /* suppress tablespace because pg_dump wants it that way */
+ appendStringInfoString(&buf,
+ pg_get_indexdef_worker(indexOid,
+ 0,
+ operators,
+ false,
+ false,
+ false,
+ false,
+ prettyFlags,
+ false));
+ break;
+ }
+ default:
+ elog(ERROR, "invalid constraint type \"%c\"", conForm->contype);
+ break;
+ }
+
+ if (conForm->condeferrable)
+ appendStringInfoString(&buf, " DEFERRABLE");
+ if (conForm->condeferred)
+ appendStringInfoString(&buf, " INITIALLY DEFERRED");
+ if (!conForm->convalidated)
+ appendStringInfoString(&buf, " NOT VALID");
+
+ /* Cleanup */
+ systable_endscan(scandesc);
+ table_close(relation, AccessShareLock);
+
+ return buf.data;
+}
+
+
+/*
+ * Convert an int16[] Datum into a comma-separated list of column names
+ * for the indicated relation; append the list to buf. Returns the number
+ * of keys.
+ */
+static int
+decompile_column_index_array(Datum column_index_array, Oid relId,
+ StringInfo buf)
+{
+ Datum *keys;
+ int nKeys;
+ int j;
+
+ /* Extract data from array of int16 */
+ deconstruct_array(DatumGetArrayTypeP(column_index_array),
+ INT2OID, 2, true, TYPALIGN_SHORT,
+ &keys, NULL, &nKeys);
+
+ for (j = 0; j < nKeys; j++)
+ {
+ char *colName;
+
+ colName = get_attname(relId, DatumGetInt16(keys[j]), false);
+
+ if (j == 0)
+ appendStringInfoString(buf, quote_identifier(colName));
+ else
+ appendStringInfo(buf, ", %s", quote_identifier(colName));
+ }
+
+ return nKeys;
+}
+
+
+/* ----------
+ * pg_get_expr - Decompile an expression tree
+ *
+ * Input: an expression tree in nodeToString form, and a relation OID
+ *
+ * Output: reverse-listed expression
+ *
+ * Currently, the expression can only refer to a single relation, namely
+ * the one specified by the second parameter. This is sufficient for
+ * partial indexes, column default expressions, etc. We also support
+ * Var-free expressions, for which the OID can be InvalidOid.
+ *
+ * We expect this function to work, or throw a reasonably clean error,
+ * for any node tree that can appear in a catalog pg_node_tree column.
+ * Query trees, such as those appearing in pg_rewrite.ev_action, are
+ * not supported. Nor are expressions in more than one relation, which
+ * can appear in places like pg_rewrite.ev_qual.
+ * ----------
+ */
+Datum
+pg_get_expr(PG_FUNCTION_ARGS)
+{
+ text *expr = PG_GETARG_TEXT_PP(0);
+ Oid relid = PG_GETARG_OID(1);
+ int prettyFlags;
+ char *relname;
+
+ prettyFlags = PRETTYFLAG_INDENT;
+
+ if (OidIsValid(relid))
+ {
+ /* Get the name for the relation */
+ relname = get_rel_name(relid);
+
+ /*
+ * If the OID isn't actually valid, don't throw an error, just return
+ * NULL. This is a bit questionable, but it's what we've done
+ * historically, and it can help avoid unwanted failures when
+ * examining catalog entries for just-deleted relations.
+ */
+ if (relname == NULL)
+ PG_RETURN_NULL();
+ }
+ else
+ relname = NULL;
+
+ PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags));
+}
+
+Datum
+pg_get_expr_ext(PG_FUNCTION_ARGS)
+{
+ text *expr = PG_GETARG_TEXT_PP(0);
+ Oid relid = PG_GETARG_OID(1);
+ bool pretty = PG_GETARG_BOOL(2);
+ int prettyFlags;
+ char *relname;
+
+ prettyFlags = GET_PRETTY_FLAGS(pretty);
+
+ if (OidIsValid(relid))
+ {
+ /* Get the name for the relation */
+ relname = get_rel_name(relid);
+ /* See notes above */
+ if (relname == NULL)
+ PG_RETURN_NULL();
+ }
+ else
+ relname = NULL;
+
+ PG_RETURN_TEXT_P(pg_get_expr_worker(expr, relid, relname, prettyFlags));
+}
+
+static text *
+pg_get_expr_worker(text *expr, Oid relid, const char *relname, int prettyFlags)
+{
+ Node *node;
+ Node *tst;
+ Relids relids;
+ List *context;
+ char *exprstr;
+ char *str;
+
+ /* Convert input pg_node_tree (really TEXT) object to C string */
+ exprstr = text_to_cstring(expr);
+
+ /* Convert expression to node tree */
+ node = (Node *) stringToNode(exprstr);
+
+ pfree(exprstr);
+
+ /*
+ * Throw error if the input is a querytree rather than an expression tree.
+ * While we could support queries here, there seems no very good reason
+ * to. In most such catalog columns, we'll see a List of Query nodes, or
+ * even nested Lists, so drill down to a non-List node before checking.
+ */
+ tst = node;
+ while (tst && IsA(tst, List))
+ tst = linitial((List *) tst);
+ if (tst && IsA(tst, Query))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("input is a query, not an expression")));
+
+ /*
+ * Throw error if the expression contains Vars we won't be able to
+ * deparse.
+ */
+ relids = pull_varnos(NULL, node);
+ if (OidIsValid(relid))
+ {
+ if (!bms_is_subset(relids, bms_make_singleton(1)))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("expression contains variables of more than one relation")));
+ }
+ else
+ {
+ if (!bms_is_empty(relids))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("expression contains variables")));
+ }
+
+ /* Prepare deparse context if needed */
+ if (OidIsValid(relid))
+ context = deparse_context_for(relname, relid);
+ else
+ context = NIL;
+
+ /* Deparse */
+ str = deparse_expression_pretty(node, context, false, false,
+ prettyFlags, 0);
+
+ return string_to_text(str);
+}
+
+
+/* ----------
+ * pg_get_userbyid - Get a user name by roleid and
+ * fallback to 'unknown (OID=n)'
+ * ----------
+ */
+Datum
+pg_get_userbyid(PG_FUNCTION_ARGS)
+{
+ Oid roleid = PG_GETARG_OID(0);
+ Name result;
+ HeapTuple roletup;
+ Form_pg_authid role_rec;
+
+ /*
+ * Allocate space for the result
+ */
+ result = (Name) palloc(NAMEDATALEN);
+ memset(NameStr(*result), 0, NAMEDATALEN);
+
+ /*
+ * Get the pg_authid entry and print the result
+ */
+ roletup = SearchSysCache1(AUTHOID, ObjectIdGetDatum(roleid));
+ if (HeapTupleIsValid(roletup))
+ {
+ role_rec = (Form_pg_authid) GETSTRUCT(roletup);
+ *result = role_rec->rolname;
+ ReleaseSysCache(roletup);
+ }
+ else
+ sprintf(NameStr(*result), "unknown (OID=%u)", roleid);
+
+ PG_RETURN_NAME(result);
+}
+
+
+/*
+ * pg_get_serial_sequence
+ * Get the name of the sequence used by an identity or serial column,
+ * formatted suitably for passing to setval, nextval or currval.
+ * First parameter is not treated as double-quoted, second parameter
+ * is --- see documentation for reason.
+ */
+Datum
+pg_get_serial_sequence(PG_FUNCTION_ARGS)
+{
+ text *tablename = PG_GETARG_TEXT_PP(0);
+ text *columnname = PG_GETARG_TEXT_PP(1);
+ RangeVar *tablerv;
+ Oid tableOid;
+ char *column;
+ AttrNumber attnum;
+ Oid sequenceId = InvalidOid;
+ Relation depRel;
+ ScanKeyData key[3];
+ SysScanDesc scan;
+ HeapTuple tup;
+
+ /* Look up table name. Can't lock it - we might not have privileges. */
+ tablerv = makeRangeVarFromNameList(textToQualifiedNameList(tablename));
+ tableOid = RangeVarGetRelid(tablerv, NoLock, false);
+
+ /* Get the number of the column */
+ column = text_to_cstring(columnname);
+
+ attnum = get_attnum(tableOid, column);
+ if (attnum == InvalidAttrNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" of relation \"%s\" does not exist",
+ column, tablerv->relname)));
+
+ /* Search the dependency table for the dependent sequence */
+ depRel = table_open(DependRelationId, AccessShareLock);
+
+ ScanKeyInit(&key[0],
+ Anum_pg_depend_refclassid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(RelationRelationId));
+ ScanKeyInit(&key[1],
+ Anum_pg_depend_refobjid,
+ BTEqualStrategyNumber, F_OIDEQ,
+ ObjectIdGetDatum(tableOid));
+ ScanKeyInit(&key[2],
+ Anum_pg_depend_refobjsubid,
+ BTEqualStrategyNumber, F_INT4EQ,
+ Int32GetDatum(attnum));
+
+ scan = systable_beginscan(depRel, DependReferenceIndexId, true,
+ NULL, 3, key);
+
+ while (HeapTupleIsValid(tup = systable_getnext(scan)))
+ {
+ Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup);
+
+ /*
+ * Look for an auto dependency (serial column) or internal dependency
+ * (identity column) of a sequence on a column. (We need the relkind
+ * test because indexes can also have auto dependencies on columns.)
+ */
+ if (deprec->classid == RelationRelationId &&
+ deprec->objsubid == 0 &&
+ (deprec->deptype == DEPENDENCY_AUTO ||
+ deprec->deptype == DEPENDENCY_INTERNAL) &&
+ get_rel_relkind(deprec->objid) == RELKIND_SEQUENCE)
+ {
+ sequenceId = deprec->objid;
+ break;
+ }
+ }
+
+ systable_endscan(scan);
+ table_close(depRel, AccessShareLock);
+
+ if (OidIsValid(sequenceId))
+ {
+ char *result;
+
+ result = generate_qualified_relation_name(sequenceId);
+
+ PG_RETURN_TEXT_P(string_to_text(result));
+ }
+
+ PG_RETURN_NULL();
+}
+
+
+/*
+ * pg_get_functiondef
+ * Returns the complete "CREATE OR REPLACE FUNCTION ..." statement for
+ * the specified function.
+ *
+ * Note: if you change the output format of this function, be careful not
+ * to break psql's rules (in \ef and \sf) for identifying the start of the
+ * function body. To wit: the function body starts on a line that begins with
+ * "AS ", "BEGIN ", or "RETURN ", and no preceding line will look like that.
+ */
+Datum
+pg_get_functiondef(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ StringInfoData buf;
+ StringInfoData dq;
+ HeapTuple proctup;
+ Form_pg_proc proc;
+ bool isfunction;
+ Datum tmp;
+ bool isnull;
+ const char *prosrc;
+ const char *name;
+ const char *nsp;
+ float4 procost;
+ int oldlen;
+
+ initStringInfo(&buf);
+
+ /* Look up the function */
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ proc = (Form_pg_proc) GETSTRUCT(proctup);
+ name = NameStr(proc->proname);
+
+ if (proc->prokind == PROKIND_AGGREGATE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is an aggregate function", name)));
+
+ isfunction = (proc->prokind != PROKIND_PROCEDURE);
+
+ /*
+ * We always qualify the function name, to ensure the right function gets
+ * replaced.
+ */
+ nsp = get_namespace_name_or_temp(proc->pronamespace);
+ appendStringInfo(&buf, "CREATE OR REPLACE %s %s(",
+ isfunction ? "FUNCTION" : "PROCEDURE",
+ quote_qualified_identifier(nsp, name));
+ (void) print_function_arguments(&buf, proctup, false, true);
+ appendStringInfoString(&buf, ")\n");
+ if (isfunction)
+ {
+ appendStringInfoString(&buf, " RETURNS ");
+ print_function_rettype(&buf, proctup);
+ appendStringInfoChar(&buf, '\n');
+ }
+
+ print_function_trftypes(&buf, proctup);
+
+ appendStringInfo(&buf, " LANGUAGE %s\n",
+ quote_identifier(get_language_name(proc->prolang, false)));
+
+ /* Emit some miscellaneous options on one line */
+ oldlen = buf.len;
+
+ if (proc->prokind == PROKIND_WINDOW)
+ appendStringInfoString(&buf, " WINDOW");
+ switch (proc->provolatile)
+ {
+ case PROVOLATILE_IMMUTABLE:
+ appendStringInfoString(&buf, " IMMUTABLE");
+ break;
+ case PROVOLATILE_STABLE:
+ appendStringInfoString(&buf, " STABLE");
+ break;
+ case PROVOLATILE_VOLATILE:
+ break;
+ }
+
+ switch (proc->proparallel)
+ {
+ case PROPARALLEL_SAFE:
+ appendStringInfoString(&buf, " PARALLEL SAFE");
+ break;
+ case PROPARALLEL_RESTRICTED:
+ appendStringInfoString(&buf, " PARALLEL RESTRICTED");
+ break;
+ case PROPARALLEL_UNSAFE:
+ break;
+ }
+
+ if (proc->proisstrict)
+ appendStringInfoString(&buf, " STRICT");
+ if (proc->prosecdef)
+ appendStringInfoString(&buf, " SECURITY DEFINER");
+ if (proc->proleakproof)
+ appendStringInfoString(&buf, " LEAKPROOF");
+
+ /* This code for the default cost and rows should match functioncmds.c */
+ if (proc->prolang == INTERNALlanguageId ||
+ proc->prolang == ClanguageId)
+ procost = 1;
+ else
+ procost = 100;
+ if (proc->procost != procost)
+ appendStringInfo(&buf, " COST %g", proc->procost);
+
+ if (proc->prorows > 0 && proc->prorows != 1000)
+ appendStringInfo(&buf, " ROWS %g", proc->prorows);
+
+ if (proc->prosupport)
+ {
+ Oid argtypes[1];
+
+ /*
+ * We should qualify the support function's name if it wouldn't be
+ * resolved by lookup in the current search path.
+ */
+ argtypes[0] = INTERNALOID;
+ appendStringInfo(&buf, " SUPPORT %s",
+ generate_function_name(proc->prosupport, 1,
+ NIL, argtypes,
+ false, NULL, EXPR_KIND_NONE));
+ }
+
+ if (oldlen != buf.len)
+ appendStringInfoChar(&buf, '\n');
+
+ /* Emit any proconfig options, one per line */
+ tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_proconfig, &isnull);
+ if (!isnull)
+ {
+ ArrayType *a = DatumGetArrayTypeP(tmp);
+ int i;
+
+ Assert(ARR_ELEMTYPE(a) == TEXTOID);
+ Assert(ARR_NDIM(a) == 1);
+ Assert(ARR_LBOUND(a)[0] == 1);
+
+ for (i = 1; i <= ARR_DIMS(a)[0]; i++)
+ {
+ Datum d;
+
+ d = array_ref(a, 1, &i,
+ -1 /* varlenarray */ ,
+ -1 /* TEXT's typlen */ ,
+ false /* TEXT's typbyval */ ,
+ TYPALIGN_INT /* TEXT's typalign */ ,
+ &isnull);
+ if (!isnull)
+ {
+ char *configitem = TextDatumGetCString(d);
+ char *pos;
+
+ pos = strchr(configitem, '=');
+ if (pos == NULL)
+ continue;
+ *pos++ = '\0';
+
+ appendStringInfo(&buf, " SET %s TO ",
+ quote_identifier(configitem));
+
+ /*
+ * Variables that are marked GUC_LIST_QUOTE were already fully
+ * quoted by flatten_set_variable_args() before they were put
+ * into the proconfig array. However, because the quoting
+ * rules used there aren't exactly like SQL's, we have to
+ * break the list value apart and then quote the elements as
+ * string literals. (The elements may be double-quoted as-is,
+ * but we can't just feed them to the SQL parser; it would do
+ * the wrong thing with elements that are zero-length or
+ * longer than NAMEDATALEN.)
+ *
+ * Variables that are not so marked should just be emitted as
+ * simple string literals. If the variable is not known to
+ * guc.c, we'll do that; this makes it unsafe to use
+ * GUC_LIST_QUOTE for extension variables.
+ */
+ if (GetConfigOptionFlags(configitem, true) & GUC_LIST_QUOTE)
+ {
+ List *namelist;
+ ListCell *lc;
+
+ /* Parse string into list of identifiers */
+ if (!SplitGUCList(pos, ',', &namelist))
+ {
+ /* this shouldn't fail really */
+ elog(ERROR, "invalid list syntax in proconfig item");
+ }
+ foreach(lc, namelist)
+ {
+ char *curname = (char *) lfirst(lc);
+
+ simple_quote_literal(&buf, curname);
+ if (lnext(namelist, lc))
+ appendStringInfoString(&buf, ", ");
+ }
+ }
+ else
+ simple_quote_literal(&buf, pos);
+ appendStringInfoChar(&buf, '\n');
+ }
+ }
+ }
+
+ /* And finally the function definition ... */
+ (void) SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosqlbody, &isnull);
+ if (proc->prolang == SQLlanguageId && !isnull)
+ {
+ print_function_sqlbody(&buf, proctup);
+ }
+ else
+ {
+ appendStringInfoString(&buf, "AS ");
+
+ tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_probin, &isnull);
+ if (!isnull)
+ {
+ simple_quote_literal(&buf, TextDatumGetCString(tmp));
+ appendStringInfoString(&buf, ", "); /* assume prosrc isn't null */
+ }
+
+ tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosrc, &isnull);
+ if (isnull)
+ elog(ERROR, "null prosrc");
+ prosrc = TextDatumGetCString(tmp);
+
+ /*
+ * We always use dollar quoting. Figure out a suitable delimiter.
+ *
+ * Since the user is likely to be editing the function body string, we
+ * shouldn't use a short delimiter that he might easily create a
+ * conflict with. Hence prefer "$function$"/"$procedure$", but extend
+ * if needed.
+ */
+ initStringInfo(&dq);
+ appendStringInfoChar(&dq, '$');
+ appendStringInfoString(&dq, (isfunction ? "function" : "procedure"));
+ while (strstr(prosrc, dq.data) != NULL)
+ appendStringInfoChar(&dq, 'x');
+ appendStringInfoChar(&dq, '$');
+
+ appendBinaryStringInfo(&buf, dq.data, dq.len);
+ appendStringInfoString(&buf, prosrc);
+ appendBinaryStringInfo(&buf, dq.data, dq.len);
+ }
+
+ appendStringInfoChar(&buf, '\n');
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(string_to_text(buf.data));
+}
+
+/*
+ * pg_get_function_arguments
+ * Get a nicely-formatted list of arguments for a function.
+ * This is everything that would go between the parentheses in
+ * CREATE FUNCTION.
+ */
+Datum
+pg_get_function_arguments(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ StringInfoData buf;
+ HeapTuple proctup;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ initStringInfo(&buf);
+
+ (void) print_function_arguments(&buf, proctup, false, true);
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(string_to_text(buf.data));
+}
+
+/*
+ * pg_get_function_identity_arguments
+ * Get a formatted list of arguments for a function.
+ * This is everything that would go between the parentheses in
+ * ALTER FUNCTION, etc. In particular, don't print defaults.
+ */
+Datum
+pg_get_function_identity_arguments(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ StringInfoData buf;
+ HeapTuple proctup;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ initStringInfo(&buf);
+
+ (void) print_function_arguments(&buf, proctup, false, false);
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(string_to_text(buf.data));
+}
+
+/*
+ * pg_get_function_result
+ * Get a nicely-formatted version of the result type of a function.
+ * This is what would appear after RETURNS in CREATE FUNCTION.
+ */
+Datum
+pg_get_function_result(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ StringInfoData buf;
+ HeapTuple proctup;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ if (((Form_pg_proc) GETSTRUCT(proctup))->prokind == PROKIND_PROCEDURE)
+ {
+ ReleaseSysCache(proctup);
+ PG_RETURN_NULL();
+ }
+
+ initStringInfo(&buf);
+
+ print_function_rettype(&buf, proctup);
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(string_to_text(buf.data));
+}
+
+/*
+ * Guts of pg_get_function_result: append the function's return type
+ * to the specified buffer.
+ */
+static void
+print_function_rettype(StringInfo buf, HeapTuple proctup)
+{
+ Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(proctup);
+ int ntabargs = 0;
+ StringInfoData rbuf;
+
+ initStringInfo(&rbuf);
+
+ if (proc->proretset)
+ {
+ /* It might be a table function; try to print the arguments */
+ appendStringInfoString(&rbuf, "TABLE(");
+ ntabargs = print_function_arguments(&rbuf, proctup, true, false);
+ if (ntabargs > 0)
+ appendStringInfoChar(&rbuf, ')');
+ else
+ resetStringInfo(&rbuf);
+ }
+
+ if (ntabargs == 0)
+ {
+ /* Not a table function, so do the normal thing */
+ if (proc->proretset)
+ appendStringInfoString(&rbuf, "SETOF ");
+ appendStringInfoString(&rbuf, format_type_be(proc->prorettype));
+ }
+
+ appendBinaryStringInfo(buf, rbuf.data, rbuf.len);
+}
+
+/*
+ * Common code for pg_get_function_arguments and pg_get_function_result:
+ * append the desired subset of arguments to buf. We print only TABLE
+ * arguments when print_table_args is true, and all the others when it's false.
+ * We print argument defaults only if print_defaults is true.
+ * Function return value is the number of arguments printed.
+ */
+static int
+print_function_arguments(StringInfo buf, HeapTuple proctup,
+ bool print_table_args, bool print_defaults)
+{
+ Form_pg_proc proc = (Form_pg_proc) GETSTRUCT(proctup);
+ int numargs;
+ Oid *argtypes;
+ char **argnames;
+ char *argmodes;
+ int insertorderbyat = -1;
+ int argsprinted;
+ int inputargno;
+ int nlackdefaults;
+ List *argdefaults = NIL;
+ ListCell *nextargdefault = NULL;
+ int i;
+
+ numargs = get_func_arg_info(proctup,
+ &argtypes, &argnames, &argmodes);
+
+ nlackdefaults = numargs;
+ if (print_defaults && proc->pronargdefaults > 0)
+ {
+ Datum proargdefaults;
+ bool isnull;
+
+ proargdefaults = SysCacheGetAttr(PROCOID, proctup,
+ Anum_pg_proc_proargdefaults,
+ &isnull);
+ if (!isnull)
+ {
+ char *str;
+
+ str = TextDatumGetCString(proargdefaults);
+ argdefaults = castNode(List, stringToNode(str));
+ pfree(str);
+ nextargdefault = list_head(argdefaults);
+ /* nlackdefaults counts only *input* arguments lacking defaults */
+ nlackdefaults = proc->pronargs - list_length(argdefaults);
+ }
+ }
+
+ /* Check for special treatment of ordered-set aggregates */
+ if (proc->prokind == PROKIND_AGGREGATE)
+ {
+ HeapTuple aggtup;
+ Form_pg_aggregate agg;
+
+ aggtup = SearchSysCache1(AGGFNOID, proc->oid);
+ if (!HeapTupleIsValid(aggtup))
+ elog(ERROR, "cache lookup failed for aggregate %u",
+ proc->oid);
+ agg = (Form_pg_aggregate) GETSTRUCT(aggtup);
+ if (AGGKIND_IS_ORDERED_SET(agg->aggkind))
+ insertorderbyat = agg->aggnumdirectargs;
+ ReleaseSysCache(aggtup);
+ }
+
+ argsprinted = 0;
+ inputargno = 0;
+ for (i = 0; i < numargs; i++)
+ {
+ Oid argtype = argtypes[i];
+ char *argname = argnames ? argnames[i] : NULL;
+ char argmode = argmodes ? argmodes[i] : PROARGMODE_IN;
+ const char *modename;
+ bool isinput;
+
+ switch (argmode)
+ {
+ case PROARGMODE_IN:
+
+ /*
+ * For procedures, explicitly mark all argument modes, so as
+ * to avoid ambiguity with the SQL syntax for DROP PROCEDURE.
+ */
+ if (proc->prokind == PROKIND_PROCEDURE)
+ modename = "IN ";
+ else
+ modename = "";
+ isinput = true;
+ break;
+ case PROARGMODE_INOUT:
+ modename = "INOUT ";
+ isinput = true;
+ break;
+ case PROARGMODE_OUT:
+ modename = "OUT ";
+ isinput = false;
+ break;
+ case PROARGMODE_VARIADIC:
+ modename = "VARIADIC ";
+ isinput = true;
+ break;
+ case PROARGMODE_TABLE:
+ modename = "";
+ isinput = false;
+ break;
+ default:
+ elog(ERROR, "invalid parameter mode '%c'", argmode);
+ modename = NULL; /* keep compiler quiet */
+ isinput = false;
+ break;
+ }
+ if (isinput)
+ inputargno++; /* this is a 1-based counter */
+
+ if (print_table_args != (argmode == PROARGMODE_TABLE))
+ continue;
+
+ if (argsprinted == insertorderbyat)
+ {
+ if (argsprinted)
+ appendStringInfoChar(buf, ' ');
+ appendStringInfoString(buf, "ORDER BY ");
+ }
+ else if (argsprinted)
+ appendStringInfoString(buf, ", ");
+
+ appendStringInfoString(buf, modename);
+ if (argname && argname[0])
+ appendStringInfo(buf, "%s ", quote_identifier(argname));
+ appendStringInfoString(buf, format_type_be(argtype));
+ if (print_defaults && isinput && inputargno > nlackdefaults)
+ {
+ Node *expr;
+
+ Assert(nextargdefault != NULL);
+ expr = (Node *) lfirst(nextargdefault);
+ nextargdefault = lnext(argdefaults, nextargdefault);
+
+ appendStringInfo(buf, " DEFAULT %s",
+ deparse_expression(expr, NIL, false, false));
+ }
+ argsprinted++;
+
+ /* nasty hack: print the last arg twice for variadic ordered-set agg */
+ if (argsprinted == insertorderbyat && i == numargs - 1)
+ {
+ i--;
+ /* aggs shouldn't have defaults anyway, but just to be sure ... */
+ print_defaults = false;
+ }
+ }
+
+ return argsprinted;
+}
+
+static bool
+is_input_argument(int nth, const char *argmodes)
+{
+ return (!argmodes
+ || argmodes[nth] == PROARGMODE_IN
+ || argmodes[nth] == PROARGMODE_INOUT
+ || argmodes[nth] == PROARGMODE_VARIADIC);
+}
+
+/*
+ * Append used transformed types to specified buffer
+ */
+static void
+print_function_trftypes(StringInfo buf, HeapTuple proctup)
+{
+ Oid *trftypes;
+ int ntypes;
+
+ ntypes = get_func_trftypes(proctup, &trftypes);
+ if (ntypes > 0)
+ {
+ int i;
+
+ appendStringInfoString(buf, " TRANSFORM ");
+ for (i = 0; i < ntypes; i++)
+ {
+ if (i != 0)
+ appendStringInfoString(buf, ", ");
+ appendStringInfo(buf, "FOR TYPE %s", format_type_be(trftypes[i]));
+ }
+ appendStringInfoChar(buf, '\n');
+ }
+}
+
+/*
+ * Get textual representation of a function argument's default value. The
+ * second argument of this function is the argument number among all arguments
+ * (i.e. proallargtypes, *not* proargtypes), starting with 1, because that's
+ * how information_schema.sql uses it.
+ */
+Datum
+pg_get_function_arg_default(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ int32 nth_arg = PG_GETARG_INT32(1);
+ HeapTuple proctup;
+ Form_pg_proc proc;
+ int numargs;
+ Oid *argtypes;
+ char **argnames;
+ char *argmodes;
+ int i;
+ List *argdefaults;
+ Node *node;
+ char *str;
+ int nth_inputarg;
+ Datum proargdefaults;
+ bool isnull;
+ int nth_default;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ numargs = get_func_arg_info(proctup, &argtypes, &argnames, &argmodes);
+ if (nth_arg < 1 || nth_arg > numargs || !is_input_argument(nth_arg - 1, argmodes))
+ {
+ ReleaseSysCache(proctup);
+ PG_RETURN_NULL();
+ }
+
+ nth_inputarg = 0;
+ for (i = 0; i < nth_arg; i++)
+ if (is_input_argument(i, argmodes))
+ nth_inputarg++;
+
+ proargdefaults = SysCacheGetAttr(PROCOID, proctup,
+ Anum_pg_proc_proargdefaults,
+ &isnull);
+ if (isnull)
+ {
+ ReleaseSysCache(proctup);
+ PG_RETURN_NULL();
+ }
+
+ str = TextDatumGetCString(proargdefaults);
+ argdefaults = castNode(List, stringToNode(str));
+ pfree(str);
+
+ proc = (Form_pg_proc) GETSTRUCT(proctup);
+
+ /*
+ * Calculate index into proargdefaults: proargdefaults corresponds to the
+ * last N input arguments, where N = pronargdefaults.
+ */
+ nth_default = nth_inputarg - 1 - (proc->pronargs - proc->pronargdefaults);
+
+ if (nth_default < 0 || nth_default >= list_length(argdefaults))
+ {
+ ReleaseSysCache(proctup);
+ PG_RETURN_NULL();
+ }
+ node = list_nth(argdefaults, nth_default);
+ str = deparse_expression(node, NIL, false, false);
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(string_to_text(str));
+}
+
+static void
+print_function_sqlbody(StringInfo buf, HeapTuple proctup)
+{
+ int numargs;
+ Oid *argtypes;
+ char **argnames;
+ char *argmodes;
+ deparse_namespace dpns = {0};
+ Datum tmp;
+ bool isnull;
+ Node *n;
+
+ dpns.funcname = pstrdup(NameStr(((Form_pg_proc) GETSTRUCT(proctup))->proname));
+ numargs = get_func_arg_info(proctup,
+ &argtypes, &argnames, &argmodes);
+ dpns.numargs = numargs;
+ dpns.argnames = argnames;
+
+ tmp = SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosqlbody, &isnull);
+ Assert(!isnull);
+ n = stringToNode(TextDatumGetCString(tmp));
+
+ if (IsA(n, List))
+ {
+ List *stmts;
+ ListCell *lc;
+
+ stmts = linitial(castNode(List, n));
+
+ appendStringInfoString(buf, "BEGIN ATOMIC\n");
+
+ foreach(lc, stmts)
+ {
+ Query *query = lfirst_node(Query, lc);
+
+ /* It seems advisable to get at least AccessShareLock on rels */
+ AcquireRewriteLocks(query, false, false);
+ get_query_def(query, buf, list_make1(&dpns), NULL, false,
+ PRETTYFLAG_INDENT, WRAP_COLUMN_DEFAULT, 1);
+ appendStringInfoChar(buf, ';');
+ appendStringInfoChar(buf, '\n');
+ }
+
+ appendStringInfoString(buf, "END");
+ }
+ else
+ {
+ Query *query = castNode(Query, n);
+
+ /* It seems advisable to get at least AccessShareLock on rels */
+ AcquireRewriteLocks(query, false, false);
+ get_query_def(query, buf, list_make1(&dpns), NULL, false,
+ 0, WRAP_COLUMN_DEFAULT, 0);
+ }
+}
+
+Datum
+pg_get_function_sqlbody(PG_FUNCTION_ARGS)
+{
+ Oid funcid = PG_GETARG_OID(0);
+ StringInfoData buf;
+ HeapTuple proctup;
+ bool isnull;
+
+ initStringInfo(&buf);
+
+ /* Look up the function */
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ PG_RETURN_NULL();
+
+ (void) SysCacheGetAttr(PROCOID, proctup, Anum_pg_proc_prosqlbody, &isnull);
+ if (isnull)
+ {
+ ReleaseSysCache(proctup);
+ PG_RETURN_NULL();
+ }
+
+ print_function_sqlbody(&buf, proctup);
+
+ ReleaseSysCache(proctup);
+
+ PG_RETURN_TEXT_P(cstring_to_text(buf.data));
+}
+
+
+/*
+ * deparse_expression - General utility for deparsing expressions
+ *
+ * calls deparse_expression_pretty with all prettyPrinting disabled
+ */
+char *
+deparse_expression(Node *expr, List *dpcontext,
+ bool forceprefix, bool showimplicit)
+{
+ return deparse_expression_pretty(expr, dpcontext, forceprefix,
+ showimplicit, 0, 0);
+}
+
+/* ----------
+ * deparse_expression_pretty - General utility for deparsing expressions
+ *
+ * expr is the node tree to be deparsed. It must be a transformed expression
+ * tree (ie, not the raw output of gram.y).
+ *
+ * dpcontext is a list of deparse_namespace nodes representing the context
+ * for interpreting Vars in the node tree. It can be NIL if no Vars are
+ * expected.
+ *
+ * forceprefix is true to force all Vars to be prefixed with their table names.
+ *
+ * showimplicit is true to force all implicit casts to be shown explicitly.
+ *
+ * Tries to pretty up the output according to prettyFlags and startIndent.
+ *
+ * The result is a palloc'd string.
+ * ----------
+ */
+static char *
+deparse_expression_pretty(Node *expr, List *dpcontext,
+ bool forceprefix, bool showimplicit,
+ int prettyFlags, int startIndent)
+{
+ StringInfoData buf;
+ deparse_context context;
+
+ initStringInfo(&buf);
+ context.buf = &buf;
+ context.namespaces = dpcontext;
+ context.windowClause = NIL;
+ context.windowTList = NIL;
+ context.varprefix = forceprefix;
+ context.prettyFlags = prettyFlags;
+ context.wrapColumn = WRAP_COLUMN_DEFAULT;
+ context.indentLevel = startIndent;
+ context.special_exprkind = EXPR_KIND_NONE;
+ context.appendparents = NULL;
+
+ get_rule_expr(expr, &context, showimplicit);
+
+ return buf.data;
+}
+
+/* ----------
+ * deparse_context_for - Build deparse context for a single relation
+ *
+ * Given the reference name (alias) and OID of a relation, build deparsing
+ * context for an expression referencing only that relation (as varno 1,
+ * varlevelsup 0). This is sufficient for many uses of deparse_expression.
+ * ----------
+ */
+List *
+deparse_context_for(const char *aliasname, Oid relid)
+{
+ deparse_namespace *dpns;
+ RangeTblEntry *rte;
+
+ dpns = (deparse_namespace *) palloc0(sizeof(deparse_namespace));
+
+ /* Build a minimal RTE for the rel */
+ rte = makeNode(RangeTblEntry);
+ rte->rtekind = RTE_RELATION;
+ rte->relid = relid;
+ rte->relkind = RELKIND_RELATION; /* no need for exactness here */
+ rte->rellockmode = AccessShareLock;
+ rte->alias = makeAlias(aliasname, NIL);
+ rte->eref = rte->alias;
+ rte->lateral = false;
+ rte->inh = false;
+ rte->inFromCl = true;
+
+ /* Build one-element rtable */
+ dpns->rtable = list_make1(rte);
+ dpns->subplans = NIL;
+ dpns->ctes = NIL;
+ dpns->appendrels = NULL;
+ set_rtable_names(dpns, NIL, NULL);
+ set_simple_column_names(dpns);
+
+ /* Return a one-deep namespace stack */
+ return list_make1(dpns);
+}
+
+/*
+ * deparse_context_for_plan_tree - Build deparse context for a Plan tree
+ *
+ * When deparsing an expression in a Plan tree, we use the plan's rangetable
+ * to resolve names of simple Vars. The initialization of column names for
+ * this is rather expensive if the rangetable is large, and it'll be the same
+ * for every expression in the Plan tree; so we do it just once and re-use
+ * the result of this function for each expression. (Note that the result
+ * is not usable until set_deparse_context_plan() is applied to it.)
+ *
+ * In addition to the PlannedStmt, pass the per-RTE alias names
+ * assigned by a previous call to select_rtable_names_for_explain.
+ */
+List *
+deparse_context_for_plan_tree(PlannedStmt *pstmt, List *rtable_names)
+{
+ deparse_namespace *dpns;
+
+ dpns = (deparse_namespace *) palloc0(sizeof(deparse_namespace));
+
+ /* Initialize fields that stay the same across the whole plan tree */
+ dpns->rtable = pstmt->rtable;
+ dpns->rtable_names = rtable_names;
+ dpns->subplans = pstmt->subplans;
+ dpns->ctes = NIL;
+ if (pstmt->appendRelations)
+ {
+ /* Set up the array, indexed by child relid */
+ int ntables = list_length(dpns->rtable);
+ ListCell *lc;
+
+ dpns->appendrels = (AppendRelInfo **)
+ palloc0((ntables + 1) * sizeof(AppendRelInfo *));
+ foreach(lc, pstmt->appendRelations)
+ {
+ AppendRelInfo *appinfo = lfirst_node(AppendRelInfo, lc);
+ Index crelid = appinfo->child_relid;
+
+ Assert(crelid > 0 && crelid <= ntables);
+ Assert(dpns->appendrels[crelid] == NULL);
+ dpns->appendrels[crelid] = appinfo;
+ }
+ }
+ else
+ dpns->appendrels = NULL; /* don't need it */
+
+ /*
+ * Set up column name aliases. We will get rather bogus results for join
+ * RTEs, but that doesn't matter because plan trees don't contain any join
+ * alias Vars.
+ */
+ set_simple_column_names(dpns);
+
+ /* Return a one-deep namespace stack */
+ return list_make1(dpns);
+}
+
+/*
+ * set_deparse_context_plan - Specify Plan node containing expression
+ *
+ * When deparsing an expression in a Plan tree, we might have to resolve
+ * OUTER_VAR, INNER_VAR, or INDEX_VAR references. To do this, the caller must
+ * provide the parent Plan node. Then OUTER_VAR and INNER_VAR references
+ * can be resolved by drilling down into the left and right child plans.
+ * Similarly, INDEX_VAR references can be resolved by reference to the
+ * indextlist given in a parent IndexOnlyScan node, or to the scan tlist in
+ * ForeignScan and CustomScan nodes. (Note that we don't currently support
+ * deparsing of indexquals in regular IndexScan or BitmapIndexScan nodes;
+ * for those, we can only deparse the indexqualorig fields, which won't
+ * contain INDEX_VAR Vars.)
+ *
+ * The ancestors list is a list of the Plan's parent Plan and SubPlan nodes,
+ * the most-closely-nested first. This is needed to resolve PARAM_EXEC
+ * Params. Note we assume that all the Plan nodes share the same rtable.
+ *
+ * Once this function has been called, deparse_expression() can be called on
+ * subsidiary expression(s) of the specified Plan node. To deparse
+ * expressions of a different Plan node in the same Plan tree, re-call this
+ * function to identify the new parent Plan node.
+ *
+ * The result is the same List passed in; this is a notational convenience.
+ */
+List *
+set_deparse_context_plan(List *dpcontext, Plan *plan, List *ancestors)
+{
+ deparse_namespace *dpns;
+
+ /* Should always have one-entry namespace list for Plan deparsing */
+ Assert(list_length(dpcontext) == 1);
+ dpns = (deparse_namespace *) linitial(dpcontext);
+
+ /* Set our attention on the specific plan node passed in */
+ dpns->ancestors = ancestors;
+ set_deparse_plan(dpns, plan);
+
+ return dpcontext;
+}
+
+/*
+ * select_rtable_names_for_explain - Select RTE aliases for EXPLAIN
+ *
+ * Determine the relation aliases we'll use during an EXPLAIN operation.
+ * This is just a frontend to set_rtable_names. We have to expose the aliases
+ * to EXPLAIN because EXPLAIN needs to know the right alias names to print.
+ */
+List *
+select_rtable_names_for_explain(List *rtable, Bitmapset *rels_used)
+{
+ deparse_namespace dpns;
+
+ memset(&dpns, 0, sizeof(dpns));
+ dpns.rtable = rtable;
+ dpns.subplans = NIL;
+ dpns.ctes = NIL;
+ dpns.appendrels = NULL;
+ set_rtable_names(&dpns, NIL, rels_used);
+ /* We needn't bother computing column aliases yet */
+
+ return dpns.rtable_names;
+}
+
+/*
+ * set_rtable_names: select RTE aliases to be used in printing a query
+ *
+ * We fill in dpns->rtable_names with a list of names that is one-for-one with
+ * the already-filled dpns->rtable list. Each RTE name is unique among those
+ * in the new namespace plus any ancestor namespaces listed in
+ * parent_namespaces.
+ *
+ * If rels_used isn't NULL, only RTE indexes listed in it are given aliases.
+ *
+ * Note that this function is only concerned with relation names, not column
+ * names.
+ */
+static void
+set_rtable_names(deparse_namespace *dpns, List *parent_namespaces,
+ Bitmapset *rels_used)
+{
+ HASHCTL hash_ctl;
+ HTAB *names_hash;
+ NameHashEntry *hentry;
+ bool found;
+ int rtindex;
+ ListCell *lc;
+
+ dpns->rtable_names = NIL;
+ /* nothing more to do if empty rtable */
+ if (dpns->rtable == NIL)
+ return;
+
+ /*
+ * We use a hash table to hold known names, so that this process is O(N)
+ * not O(N^2) for N names.
+ */
+ hash_ctl.keysize = NAMEDATALEN;
+ hash_ctl.entrysize = sizeof(NameHashEntry);
+ hash_ctl.hcxt = CurrentMemoryContext;
+ names_hash = hash_create("set_rtable_names names",
+ list_length(dpns->rtable),
+ &hash_ctl,
+ HASH_ELEM | HASH_STRINGS | HASH_CONTEXT);
+
+ /* Preload the hash table with names appearing in parent_namespaces */
+ foreach(lc, parent_namespaces)
+ {
+ deparse_namespace *olddpns = (deparse_namespace *) lfirst(lc);
+ ListCell *lc2;
+
+ foreach(lc2, olddpns->rtable_names)
+ {
+ char *oldname = (char *) lfirst(lc2);
+
+ if (oldname == NULL)
+ continue;
+ hentry = (NameHashEntry *) hash_search(names_hash,
+ oldname,
+ HASH_ENTER,
+ &found);
+ /* we do not complain about duplicate names in parent namespaces */
+ hentry->counter = 0;
+ }
+ }
+
+ /* Now we can scan the rtable */
+ rtindex = 1;
+ foreach(lc, dpns->rtable)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+ char *refname;
+
+ /* Just in case this takes an unreasonable amount of time ... */
+ CHECK_FOR_INTERRUPTS();
+
+ if (rels_used && !bms_is_member(rtindex, rels_used))
+ {
+ /* Ignore unreferenced RTE */
+ refname = NULL;
+ }
+ else if (rte->alias)
+ {
+ /* If RTE has a user-defined alias, prefer that */
+ refname = rte->alias->aliasname;
+ }
+ else if (rte->rtekind == RTE_RELATION)
+ {
+ /* Use the current actual name of the relation */
+ refname = get_rel_name(rte->relid);
+ }
+ else if (rte->rtekind == RTE_JOIN)
+ {
+ /* Unnamed join has no refname */
+ refname = NULL;
+ }
+ else
+ {
+ /* Otherwise use whatever the parser assigned */
+ refname = rte->eref->aliasname;
+ }
+
+ /*
+ * If the selected name isn't unique, append digits to make it so, and
+ * make a new hash entry for it once we've got a unique name. For a
+ * very long input name, we might have to truncate to stay within
+ * NAMEDATALEN.
+ */
+ if (refname)
+ {
+ hentry = (NameHashEntry *) hash_search(names_hash,
+ refname,
+ HASH_ENTER,
+ &found);
+ if (found)
+ {
+ /* Name already in use, must choose a new one */
+ int refnamelen = strlen(refname);
+ char *modname = (char *) palloc(refnamelen + 16);
+ NameHashEntry *hentry2;
+
+ do
+ {
+ hentry->counter++;
+ for (;;)
+ {
+ memcpy(modname, refname, refnamelen);
+ sprintf(modname + refnamelen, "_%d", hentry->counter);
+ if (strlen(modname) < NAMEDATALEN)
+ break;
+ /* drop chars from refname to keep all the digits */
+ refnamelen = pg_mbcliplen(refname, refnamelen,
+ refnamelen - 1);
+ }
+ hentry2 = (NameHashEntry *) hash_search(names_hash,
+ modname,
+ HASH_ENTER,
+ &found);
+ } while (found);
+ hentry2->counter = 0; /* init new hash entry */
+ refname = modname;
+ }
+ else
+ {
+ /* Name not previously used, need only initialize hentry */
+ hentry->counter = 0;
+ }
+ }
+
+ dpns->rtable_names = lappend(dpns->rtable_names, refname);
+ rtindex++;
+ }
+
+ hash_destroy(names_hash);
+}
+
+/*
+ * set_deparse_for_query: set up deparse_namespace for deparsing a Query tree
+ *
+ * For convenience, this is defined to initialize the deparse_namespace struct
+ * from scratch.
+ */
+static void
+set_deparse_for_query(deparse_namespace *dpns, Query *query,
+ List *parent_namespaces)
+{
+ ListCell *lc;
+ ListCell *lc2;
+
+ /* Initialize *dpns and fill rtable/ctes links */
+ memset(dpns, 0, sizeof(deparse_namespace));
+ dpns->rtable = query->rtable;
+ dpns->subplans = NIL;
+ dpns->ctes = query->cteList;
+ dpns->appendrels = NULL;
+
+ /* Assign a unique relation alias to each RTE */
+ set_rtable_names(dpns, parent_namespaces, NULL);
+
+ /* Initialize dpns->rtable_columns to contain zeroed structs */
+ dpns->rtable_columns = NIL;
+ while (list_length(dpns->rtable_columns) < list_length(dpns->rtable))
+ dpns->rtable_columns = lappend(dpns->rtable_columns,
+ palloc0(sizeof(deparse_columns)));
+
+ /* If it's a utility query, it won't have a jointree */
+ if (query->jointree)
+ {
+ /* Detect whether global uniqueness of USING names is needed */
+ dpns->unique_using =
+ has_dangerous_join_using(dpns, (Node *) query->jointree);
+
+ /*
+ * Select names for columns merged by USING, via a recursive pass over
+ * the query jointree.
+ */
+ set_using_names(dpns, (Node *) query->jointree, NIL);
+ }
+
+ /*
+ * Now assign remaining column aliases for each RTE. We do this in a
+ * linear scan of the rtable, so as to process RTEs whether or not they
+ * are in the jointree (we mustn't miss NEW.*, INSERT target relations,
+ * etc). JOIN RTEs must be processed after their children, but this is
+ * okay because they appear later in the rtable list than their children
+ * (cf Asserts in identify_join_columns()).
+ */
+ forboth(lc, dpns->rtable, lc2, dpns->rtable_columns)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+ deparse_columns *colinfo = (deparse_columns *) lfirst(lc2);
+
+ if (rte->rtekind == RTE_JOIN)
+ set_join_column_names(dpns, rte, colinfo);
+ else
+ set_relation_column_names(dpns, rte, colinfo);
+ }
+}
+
+/*
+ * set_simple_column_names: fill in column aliases for non-query situations
+ *
+ * This handles EXPLAIN and cases where we only have relation RTEs. Without
+ * a join tree, we can't do anything smart about join RTEs, but we don't
+ * need to (note that EXPLAIN should never see join alias Vars anyway).
+ * If we do hit a join RTE we'll just process it like a non-table base RTE.
+ */
+static void
+set_simple_column_names(deparse_namespace *dpns)
+{
+ ListCell *lc;
+ ListCell *lc2;
+
+ /* Initialize dpns->rtable_columns to contain zeroed structs */
+ dpns->rtable_columns = NIL;
+ while (list_length(dpns->rtable_columns) < list_length(dpns->rtable))
+ dpns->rtable_columns = lappend(dpns->rtable_columns,
+ palloc0(sizeof(deparse_columns)));
+
+ /* Assign unique column aliases within each RTE */
+ forboth(lc, dpns->rtable, lc2, dpns->rtable_columns)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+ deparse_columns *colinfo = (deparse_columns *) lfirst(lc2);
+
+ set_relation_column_names(dpns, rte, colinfo);
+ }
+}
+
+/*
+ * has_dangerous_join_using: search jointree for unnamed JOIN USING
+ *
+ * Merged columns of a JOIN USING may act differently from either of the input
+ * columns, either because they are merged with COALESCE (in a FULL JOIN) or
+ * because an implicit coercion of the underlying input column is required.
+ * In such a case the column must be referenced as a column of the JOIN not as
+ * a column of either input. And this is problematic if the join is unnamed
+ * (alias-less): we cannot qualify the column's name with an RTE name, since
+ * there is none. (Forcibly assigning an alias to the join is not a solution,
+ * since that will prevent legal references to tables below the join.)
+ * To ensure that every column in the query is unambiguously referenceable,
+ * we must assign such merged columns names that are globally unique across
+ * the whole query, aliasing other columns out of the way as necessary.
+ *
+ * Because the ensuing re-aliasing is fairly damaging to the readability of
+ * the query, we don't do this unless we have to. So, we must pre-scan
+ * the join tree to see if we have to, before starting set_using_names().
+ */
+static bool
+has_dangerous_join_using(deparse_namespace *dpns, Node *jtnode)
+{
+ if (IsA(jtnode, RangeTblRef))
+ {
+ /* nothing to do here */
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *lc;
+
+ foreach(lc, f->fromlist)
+ {
+ if (has_dangerous_join_using(dpns, (Node *) lfirst(lc)))
+ return true;
+ }
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+
+ /* Is it an unnamed JOIN with USING? */
+ if (j->alias == NULL && j->usingClause)
+ {
+ /*
+ * Yes, so check each join alias var to see if any of them are not
+ * simple references to underlying columns. If so, we have a
+ * dangerous situation and must pick unique aliases.
+ */
+ RangeTblEntry *jrte = rt_fetch(j->rtindex, dpns->rtable);
+
+ /* We need only examine the merged columns */
+ for (int i = 0; i < jrte->joinmergedcols; i++)
+ {
+ Node *aliasvar = list_nth(jrte->joinaliasvars, i);
+
+ if (!IsA(aliasvar, Var))
+ return true;
+ }
+ }
+
+ /* Nope, but inspect children */
+ if (has_dangerous_join_using(dpns, j->larg))
+ return true;
+ if (has_dangerous_join_using(dpns, j->rarg))
+ return true;
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+ return false;
+}
+
+/*
+ * set_using_names: select column aliases to be used for merged USING columns
+ *
+ * We do this during a recursive descent of the query jointree.
+ * dpns->unique_using must already be set to determine the global strategy.
+ *
+ * Column alias info is saved in the dpns->rtable_columns list, which is
+ * assumed to be filled with pre-zeroed deparse_columns structs.
+ *
+ * parentUsing is a list of all USING aliases assigned in parent joins of
+ * the current jointree node. (The passed-in list must not be modified.)
+ */
+static void
+set_using_names(deparse_namespace *dpns, Node *jtnode, List *parentUsing)
+{
+ if (IsA(jtnode, RangeTblRef))
+ {
+ /* nothing to do now */
+ }
+ else if (IsA(jtnode, FromExpr))
+ {
+ FromExpr *f = (FromExpr *) jtnode;
+ ListCell *lc;
+
+ foreach(lc, f->fromlist)
+ set_using_names(dpns, (Node *) lfirst(lc), parentUsing);
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ RangeTblEntry *rte = rt_fetch(j->rtindex, dpns->rtable);
+ deparse_columns *colinfo = deparse_columns_fetch(j->rtindex, dpns);
+ int *leftattnos;
+ int *rightattnos;
+ deparse_columns *leftcolinfo;
+ deparse_columns *rightcolinfo;
+ int i;
+ ListCell *lc;
+
+ /* Get info about the shape of the join */
+ identify_join_columns(j, rte, colinfo);
+ leftattnos = colinfo->leftattnos;
+ rightattnos = colinfo->rightattnos;
+
+ /* Look up the not-yet-filled-in child deparse_columns structs */
+ leftcolinfo = deparse_columns_fetch(colinfo->leftrti, dpns);
+ rightcolinfo = deparse_columns_fetch(colinfo->rightrti, dpns);
+
+ /*
+ * If this join is unnamed, then we cannot substitute new aliases at
+ * this level, so any name requirements pushed down to here must be
+ * pushed down again to the children.
+ */
+ if (rte->alias == NULL)
+ {
+ for (i = 0; i < colinfo->num_cols; i++)
+ {
+ char *colname = colinfo->colnames[i];
+
+ if (colname == NULL)
+ continue;
+
+ /* Push down to left column, unless it's a system column */
+ if (leftattnos[i] > 0)
+ {
+ expand_colnames_array_to(leftcolinfo, leftattnos[i]);
+ leftcolinfo->colnames[leftattnos[i] - 1] = colname;
+ }
+
+ /* Same on the righthand side */
+ if (rightattnos[i] > 0)
+ {
+ expand_colnames_array_to(rightcolinfo, rightattnos[i]);
+ rightcolinfo->colnames[rightattnos[i] - 1] = colname;
+ }
+ }
+ }
+
+ /*
+ * If there's a USING clause, select the USING column names and push
+ * those names down to the children. We have two strategies:
+ *
+ * If dpns->unique_using is true, we force all USING names to be
+ * unique across the whole query level. In principle we'd only need
+ * the names of dangerous USING columns to be globally unique, but to
+ * safely assign all USING names in a single pass, we have to enforce
+ * the same uniqueness rule for all of them. However, if a USING
+ * column's name has been pushed down from the parent, we should use
+ * it as-is rather than making a uniqueness adjustment. This is
+ * necessary when we're at an unnamed join, and it creates no risk of
+ * ambiguity. Also, if there's a user-written output alias for a
+ * merged column, we prefer to use that rather than the input name;
+ * this simplifies the logic and seems likely to lead to less aliasing
+ * overall.
+ *
+ * If dpns->unique_using is false, we only need USING names to be
+ * unique within their own join RTE. We still need to honor
+ * pushed-down names, though.
+ *
+ * Though significantly different in results, these two strategies are
+ * implemented by the same code, with only the difference of whether
+ * to put assigned names into dpns->using_names.
+ */
+ if (j->usingClause)
+ {
+ /* Copy the input parentUsing list so we don't modify it */
+ parentUsing = list_copy(parentUsing);
+
+ /* USING names must correspond to the first join output columns */
+ expand_colnames_array_to(colinfo, list_length(j->usingClause));
+ i = 0;
+ foreach(lc, j->usingClause)
+ {
+ char *colname = strVal(lfirst(lc));
+
+ /* Assert it's a merged column */
+ Assert(leftattnos[i] != 0 && rightattnos[i] != 0);
+
+ /* Adopt passed-down name if any, else select unique name */
+ if (colinfo->colnames[i] != NULL)
+ colname = colinfo->colnames[i];
+ else
+ {
+ /* Prefer user-written output alias if any */
+ if (rte->alias && i < list_length(rte->alias->colnames))
+ colname = strVal(list_nth(rte->alias->colnames, i));
+ /* Make it appropriately unique */
+ colname = make_colname_unique(colname, dpns, colinfo);
+ if (dpns->unique_using)
+ dpns->using_names = lappend(dpns->using_names,
+ colname);
+ /* Save it as output column name, too */
+ colinfo->colnames[i] = colname;
+ }
+
+ /* Remember selected names for use later */
+ colinfo->usingNames = lappend(colinfo->usingNames, colname);
+ parentUsing = lappend(parentUsing, colname);
+
+ /* Push down to left column, unless it's a system column */
+ if (leftattnos[i] > 0)
+ {
+ expand_colnames_array_to(leftcolinfo, leftattnos[i]);
+ leftcolinfo->colnames[leftattnos[i] - 1] = colname;
+ }
+
+ /* Same on the righthand side */
+ if (rightattnos[i] > 0)
+ {
+ expand_colnames_array_to(rightcolinfo, rightattnos[i]);
+ rightcolinfo->colnames[rightattnos[i] - 1] = colname;
+ }
+
+ i++;
+ }
+ }
+
+ /* Mark child deparse_columns structs with correct parentUsing info */
+ leftcolinfo->parentUsing = parentUsing;
+ rightcolinfo->parentUsing = parentUsing;
+
+ /* Now recursively assign USING column names in children */
+ set_using_names(dpns, j->larg, parentUsing);
+ set_using_names(dpns, j->rarg, parentUsing);
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+}
+
+/*
+ * set_relation_column_names: select column aliases for a non-join RTE
+ *
+ * Column alias info is saved in *colinfo, which is assumed to be pre-zeroed.
+ * If any colnames entries are already filled in, those override local
+ * choices.
+ */
+static void
+set_relation_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
+ deparse_columns *colinfo)
+{
+ int ncolumns;
+ char **real_colnames;
+ bool changed_any;
+ int noldcolumns;
+ int i;
+ int j;
+
+ /*
+ * Construct an array of the current "real" column names of the RTE.
+ * real_colnames[] will be indexed by physical column number, with NULL
+ * entries for dropped columns.
+ */
+ if (rte->rtekind == RTE_RELATION)
+ {
+ /* Relation --- look to the system catalogs for up-to-date info */
+ Relation rel;
+ TupleDesc tupdesc;
+
+ rel = relation_open(rte->relid, AccessShareLock);
+ tupdesc = RelationGetDescr(rel);
+
+ ncolumns = tupdesc->natts;
+ real_colnames = (char **) palloc(ncolumns * sizeof(char *));
+
+ for (i = 0; i < ncolumns; i++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
+
+ if (attr->attisdropped)
+ real_colnames[i] = NULL;
+ else
+ real_colnames[i] = pstrdup(NameStr(attr->attname));
+ }
+ relation_close(rel, AccessShareLock);
+ }
+ else
+ {
+ /* Otherwise get the column names from eref or expandRTE() */
+ List *colnames;
+ ListCell *lc;
+
+ /*
+ * Functions returning composites have the annoying property that some
+ * of the composite type's columns might have been dropped since the
+ * query was parsed. If possible, use expandRTE() to handle that
+ * case, since it has the tedious logic needed to find out about
+ * dropped columns. However, if we're explaining a plan, then we
+ * don't have rte->functions because the planner thinks that won't be
+ * needed later, and that breaks expandRTE(). So in that case we have
+ * to rely on rte->eref, which may lead us to report a dropped
+ * column's old name; that seems close enough for EXPLAIN's purposes.
+ *
+ * For non-RELATION, non-FUNCTION RTEs, we can just look at rte->eref,
+ * which should be sufficiently up-to-date: no other RTE types can
+ * have columns get dropped from under them after parsing.
+ */
+ if (rte->rtekind == RTE_FUNCTION && rte->functions != NIL)
+ {
+ /* Since we're not creating Vars, rtindex etc. don't matter */
+ expandRTE(rte, 1, 0, -1, true /* include dropped */ ,
+ &colnames, NULL);
+ }
+ else
+ colnames = rte->eref->colnames;
+
+ ncolumns = list_length(colnames);
+ real_colnames = (char **) palloc(ncolumns * sizeof(char *));
+
+ i = 0;
+ foreach(lc, colnames)
+ {
+ /*
+ * If the column name we find here is an empty string, then it's a
+ * dropped column, so change to NULL.
+ */
+ char *cname = strVal(lfirst(lc));
+
+ if (cname[0] == '\0')
+ cname = NULL;
+ real_colnames[i] = cname;
+ i++;
+ }
+ }
+
+ /*
+ * Ensure colinfo->colnames has a slot for each column. (It could be long
+ * enough already, if we pushed down a name for the last column.) Note:
+ * it's possible that there are now more columns than there were when the
+ * query was parsed, ie colnames could be longer than rte->eref->colnames.
+ * We must assign unique aliases to the new columns too, else there could
+ * be unresolved conflicts when the view/rule is reloaded.
+ */
+ expand_colnames_array_to(colinfo, ncolumns);
+ Assert(colinfo->num_cols == ncolumns);
+
+ /*
+ * Make sufficiently large new_colnames and is_new_col arrays, too.
+ *
+ * Note: because we leave colinfo->num_new_cols zero until after the loop,
+ * colname_is_unique will not consult that array, which is fine because it
+ * would only be duplicate effort.
+ */
+ colinfo->new_colnames = (char **) palloc(ncolumns * sizeof(char *));
+ colinfo->is_new_col = (bool *) palloc(ncolumns * sizeof(bool));
+
+ /*
+ * Scan the columns, select a unique alias for each one, and store it in
+ * colinfo->colnames and colinfo->new_colnames. The former array has NULL
+ * entries for dropped columns, the latter omits them. Also mark
+ * new_colnames entries as to whether they are new since parse time; this
+ * is the case for entries beyond the length of rte->eref->colnames.
+ */
+ noldcolumns = list_length(rte->eref->colnames);
+ changed_any = false;
+ j = 0;
+ for (i = 0; i < ncolumns; i++)
+ {
+ char *real_colname = real_colnames[i];
+ char *colname = colinfo->colnames[i];
+
+ /* Skip dropped columns */
+ if (real_colname == NULL)
+ {
+ Assert(colname == NULL); /* colnames[i] is already NULL */
+ continue;
+ }
+
+ /* If alias already assigned, that's what to use */
+ if (colname == NULL)
+ {
+ /* If user wrote an alias, prefer that over real column name */
+ if (rte->alias && i < list_length(rte->alias->colnames))
+ colname = strVal(list_nth(rte->alias->colnames, i));
+ else
+ colname = real_colname;
+
+ /* Unique-ify and insert into colinfo */
+ colname = make_colname_unique(colname, dpns, colinfo);
+
+ colinfo->colnames[i] = colname;
+ }
+
+ /* Put names of non-dropped columns in new_colnames[] too */
+ colinfo->new_colnames[j] = colname;
+ /* And mark them as new or not */
+ colinfo->is_new_col[j] = (i >= noldcolumns);
+ j++;
+
+ /* Remember if any assigned aliases differ from "real" name */
+ if (!changed_any && strcmp(colname, real_colname) != 0)
+ changed_any = true;
+ }
+
+ /*
+ * Set correct length for new_colnames[] array. (Note: if columns have
+ * been added, colinfo->num_cols includes them, which is not really quite
+ * right but is harmless, since any new columns must be at the end where
+ * they won't affect varattnos of pre-existing columns.)
+ */
+ colinfo->num_new_cols = j;
+
+ /*
+ * For a relation RTE, we need only print the alias column names if any
+ * are different from the underlying "real" names. For a function RTE,
+ * always emit a complete column alias list; this is to protect against
+ * possible instability of the default column names (eg, from altering
+ * parameter names). For tablefunc RTEs, we never print aliases, because
+ * the column names are part of the clause itself. For other RTE types,
+ * print if we changed anything OR if there were user-written column
+ * aliases (since the latter would be part of the underlying "reality").
+ */
+ if (rte->rtekind == RTE_RELATION)
+ colinfo->printaliases = changed_any;
+ else if (rte->rtekind == RTE_FUNCTION)
+ colinfo->printaliases = true;
+ else if (rte->rtekind == RTE_TABLEFUNC)
+ colinfo->printaliases = false;
+ else if (rte->alias && rte->alias->colnames != NIL)
+ colinfo->printaliases = true;
+ else
+ colinfo->printaliases = changed_any;
+}
+
+/*
+ * set_join_column_names: select column aliases for a join RTE
+ *
+ * Column alias info is saved in *colinfo, which is assumed to be pre-zeroed.
+ * If any colnames entries are already filled in, those override local
+ * choices. Also, names for USING columns were already chosen by
+ * set_using_names(). We further expect that column alias selection has been
+ * completed for both input RTEs.
+ */
+static void
+set_join_column_names(deparse_namespace *dpns, RangeTblEntry *rte,
+ deparse_columns *colinfo)
+{
+ deparse_columns *leftcolinfo;
+ deparse_columns *rightcolinfo;
+ bool changed_any;
+ int noldcolumns;
+ int nnewcolumns;
+ Bitmapset *leftmerged = NULL;
+ Bitmapset *rightmerged = NULL;
+ int i;
+ int j;
+ int ic;
+ int jc;
+
+ /* Look up the previously-filled-in child deparse_columns structs */
+ leftcolinfo = deparse_columns_fetch(colinfo->leftrti, dpns);
+ rightcolinfo = deparse_columns_fetch(colinfo->rightrti, dpns);
+
+ /*
+ * Ensure colinfo->colnames has a slot for each column. (It could be long
+ * enough already, if we pushed down a name for the last column.) Note:
+ * it's possible that one or both inputs now have more columns than there
+ * were when the query was parsed, but we'll deal with that below. We
+ * only need entries in colnames for pre-existing columns.
+ */
+ noldcolumns = list_length(rte->eref->colnames);
+ expand_colnames_array_to(colinfo, noldcolumns);
+ Assert(colinfo->num_cols == noldcolumns);
+
+ /*
+ * Scan the join output columns, select an alias for each one, and store
+ * it in colinfo->colnames. If there are USING columns, set_using_names()
+ * already selected their names, so we can start the loop at the first
+ * non-merged column.
+ */
+ changed_any = false;
+ for (i = list_length(colinfo->usingNames); i < noldcolumns; i++)
+ {
+ char *colname = colinfo->colnames[i];
+ char *real_colname;
+
+ /* Join column must refer to at least one input column */
+ Assert(colinfo->leftattnos[i] != 0 || colinfo->rightattnos[i] != 0);
+
+ /* Get the child column name */
+ if (colinfo->leftattnos[i] > 0)
+ real_colname = leftcolinfo->colnames[colinfo->leftattnos[i] - 1];
+ else if (colinfo->rightattnos[i] > 0)
+ real_colname = rightcolinfo->colnames[colinfo->rightattnos[i] - 1];
+ else
+ {
+ /* We're joining system columns --- use eref name */
+ real_colname = strVal(list_nth(rte->eref->colnames, i));
+ }
+
+ /* If child col has been dropped, no need to assign a join colname */
+ if (real_colname == NULL)
+ {
+ colinfo->colnames[i] = NULL;
+ continue;
+ }
+
+ /* In an unnamed join, just report child column names as-is */
+ if (rte->alias == NULL)
+ {
+ colinfo->colnames[i] = real_colname;
+ continue;
+ }
+
+ /* If alias already assigned, that's what to use */
+ if (colname == NULL)
+ {
+ /* If user wrote an alias, prefer that over real column name */
+ if (rte->alias && i < list_length(rte->alias->colnames))
+ colname = strVal(list_nth(rte->alias->colnames, i));
+ else
+ colname = real_colname;
+
+ /* Unique-ify and insert into colinfo */
+ colname = make_colname_unique(colname, dpns, colinfo);
+
+ colinfo->colnames[i] = colname;
+ }
+
+ /* Remember if any assigned aliases differ from "real" name */
+ if (!changed_any && strcmp(colname, real_colname) != 0)
+ changed_any = true;
+ }
+
+ /*
+ * Calculate number of columns the join would have if it were re-parsed
+ * now, and create storage for the new_colnames and is_new_col arrays.
+ *
+ * Note: colname_is_unique will be consulting new_colnames[] during the
+ * loops below, so its not-yet-filled entries must be zeroes.
+ */
+ nnewcolumns = leftcolinfo->num_new_cols + rightcolinfo->num_new_cols -
+ list_length(colinfo->usingNames);
+ colinfo->num_new_cols = nnewcolumns;
+ colinfo->new_colnames = (char **) palloc0(nnewcolumns * sizeof(char *));
+ colinfo->is_new_col = (bool *) palloc0(nnewcolumns * sizeof(bool));
+
+ /*
+ * Generating the new_colnames array is a bit tricky since any new columns
+ * added since parse time must be inserted in the right places. This code
+ * must match the parser, which will order a join's columns as merged
+ * columns first (in USING-clause order), then non-merged columns from the
+ * left input (in attnum order), then non-merged columns from the right
+ * input (ditto). If one of the inputs is itself a join, its columns will
+ * be ordered according to the same rule, which means newly-added columns
+ * might not be at the end. We can figure out what's what by consulting
+ * the leftattnos and rightattnos arrays plus the input is_new_col arrays.
+ *
+ * In these loops, i indexes leftattnos/rightattnos (so it's join varattno
+ * less one), j indexes new_colnames/is_new_col, and ic/jc have similar
+ * meanings for the current child RTE.
+ */
+
+ /* Handle merged columns; they are first and can't be new */
+ i = j = 0;
+ while (i < noldcolumns &&
+ colinfo->leftattnos[i] != 0 &&
+ colinfo->rightattnos[i] != 0)
+ {
+ /* column name is already determined and known unique */
+ colinfo->new_colnames[j] = colinfo->colnames[i];
+ colinfo->is_new_col[j] = false;
+
+ /* build bitmapsets of child attnums of merged columns */
+ if (colinfo->leftattnos[i] > 0)
+ leftmerged = bms_add_member(leftmerged, colinfo->leftattnos[i]);
+ if (colinfo->rightattnos[i] > 0)
+ rightmerged = bms_add_member(rightmerged, colinfo->rightattnos[i]);
+
+ i++, j++;
+ }
+
+ /* Handle non-merged left-child columns */
+ ic = 0;
+ for (jc = 0; jc < leftcolinfo->num_new_cols; jc++)
+ {
+ char *child_colname = leftcolinfo->new_colnames[jc];
+
+ if (!leftcolinfo->is_new_col[jc])
+ {
+ /* Advance ic to next non-dropped old column of left child */
+ while (ic < leftcolinfo->num_cols &&
+ leftcolinfo->colnames[ic] == NULL)
+ ic++;
+ Assert(ic < leftcolinfo->num_cols);
+ ic++;
+ /* If it is a merged column, we already processed it */
+ if (bms_is_member(ic, leftmerged))
+ continue;
+ /* Else, advance i to the corresponding existing join column */
+ while (i < colinfo->num_cols &&
+ colinfo->colnames[i] == NULL)
+ i++;
+ Assert(i < colinfo->num_cols);
+ Assert(ic == colinfo->leftattnos[i]);
+ /* Use the already-assigned name of this column */
+ colinfo->new_colnames[j] = colinfo->colnames[i];
+ i++;
+ }
+ else
+ {
+ /*
+ * Unique-ify the new child column name and assign, unless we're
+ * in an unnamed join, in which case just copy
+ */
+ if (rte->alias != NULL)
+ {
+ colinfo->new_colnames[j] =
+ make_colname_unique(child_colname, dpns, colinfo);
+ if (!changed_any &&
+ strcmp(colinfo->new_colnames[j], child_colname) != 0)
+ changed_any = true;
+ }
+ else
+ colinfo->new_colnames[j] = child_colname;
+ }
+
+ colinfo->is_new_col[j] = leftcolinfo->is_new_col[jc];
+ j++;
+ }
+
+ /* Handle non-merged right-child columns in exactly the same way */
+ ic = 0;
+ for (jc = 0; jc < rightcolinfo->num_new_cols; jc++)
+ {
+ char *child_colname = rightcolinfo->new_colnames[jc];
+
+ if (!rightcolinfo->is_new_col[jc])
+ {
+ /* Advance ic to next non-dropped old column of right child */
+ while (ic < rightcolinfo->num_cols &&
+ rightcolinfo->colnames[ic] == NULL)
+ ic++;
+ Assert(ic < rightcolinfo->num_cols);
+ ic++;
+ /* If it is a merged column, we already processed it */
+ if (bms_is_member(ic, rightmerged))
+ continue;
+ /* Else, advance i to the corresponding existing join column */
+ while (i < colinfo->num_cols &&
+ colinfo->colnames[i] == NULL)
+ i++;
+ Assert(i < colinfo->num_cols);
+ Assert(ic == colinfo->rightattnos[i]);
+ /* Use the already-assigned name of this column */
+ colinfo->new_colnames[j] = colinfo->colnames[i];
+ i++;
+ }
+ else
+ {
+ /*
+ * Unique-ify the new child column name and assign, unless we're
+ * in an unnamed join, in which case just copy
+ */
+ if (rte->alias != NULL)
+ {
+ colinfo->new_colnames[j] =
+ make_colname_unique(child_colname, dpns, colinfo);
+ if (!changed_any &&
+ strcmp(colinfo->new_colnames[j], child_colname) != 0)
+ changed_any = true;
+ }
+ else
+ colinfo->new_colnames[j] = child_colname;
+ }
+
+ colinfo->is_new_col[j] = rightcolinfo->is_new_col[jc];
+ j++;
+ }
+
+ /* Assert we processed the right number of columns */
+#ifdef USE_ASSERT_CHECKING
+ while (i < colinfo->num_cols && colinfo->colnames[i] == NULL)
+ i++;
+ Assert(i == colinfo->num_cols);
+ Assert(j == nnewcolumns);
+#endif
+
+ /*
+ * For a named join, print column aliases if we changed any from the child
+ * names. Unnamed joins cannot print aliases.
+ */
+ if (rte->alias != NULL)
+ colinfo->printaliases = changed_any;
+ else
+ colinfo->printaliases = false;
+}
+
+/*
+ * colname_is_unique: is colname distinct from already-chosen column names?
+ *
+ * dpns is query-wide info, colinfo is for the column's RTE
+ */
+static bool
+colname_is_unique(const char *colname, deparse_namespace *dpns,
+ deparse_columns *colinfo)
+{
+ int i;
+ ListCell *lc;
+
+ /* Check against already-assigned column aliases within RTE */
+ for (i = 0; i < colinfo->num_cols; i++)
+ {
+ char *oldname = colinfo->colnames[i];
+
+ if (oldname && strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ /*
+ * If we're building a new_colnames array, check that too (this will be
+ * partially but not completely redundant with the previous checks)
+ */
+ for (i = 0; i < colinfo->num_new_cols; i++)
+ {
+ char *oldname = colinfo->new_colnames[i];
+
+ if (oldname && strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ /* Also check against USING-column names that must be globally unique */
+ foreach(lc, dpns->using_names)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ if (strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ /* Also check against names already assigned for parent-join USING cols */
+ foreach(lc, colinfo->parentUsing)
+ {
+ char *oldname = (char *) lfirst(lc);
+
+ if (strcmp(oldname, colname) == 0)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * make_colname_unique: modify colname if necessary to make it unique
+ *
+ * dpns is query-wide info, colinfo is for the column's RTE
+ */
+static char *
+make_colname_unique(char *colname, deparse_namespace *dpns,
+ deparse_columns *colinfo)
+{
+ /*
+ * If the selected name isn't unique, append digits to make it so. For a
+ * very long input name, we might have to truncate to stay within
+ * NAMEDATALEN.
+ */
+ if (!colname_is_unique(colname, dpns, colinfo))
+ {
+ int colnamelen = strlen(colname);
+ char *modname = (char *) palloc(colnamelen + 16);
+ int i = 0;
+
+ do
+ {
+ i++;
+ for (;;)
+ {
+ memcpy(modname, colname, colnamelen);
+ sprintf(modname + colnamelen, "_%d", i);
+ if (strlen(modname) < NAMEDATALEN)
+ break;
+ /* drop chars from colname to keep all the digits */
+ colnamelen = pg_mbcliplen(colname, colnamelen,
+ colnamelen - 1);
+ }
+ } while (!colname_is_unique(modname, dpns, colinfo));
+ colname = modname;
+ }
+ return colname;
+}
+
+/*
+ * expand_colnames_array_to: make colinfo->colnames at least n items long
+ *
+ * Any added array entries are initialized to zero.
+ */
+static void
+expand_colnames_array_to(deparse_columns *colinfo, int n)
+{
+ if (n > colinfo->num_cols)
+ {
+ if (colinfo->colnames == NULL)
+ colinfo->colnames = (char **) palloc0(n * sizeof(char *));
+ else
+ {
+ colinfo->colnames = (char **) repalloc(colinfo->colnames,
+ n * sizeof(char *));
+ memset(colinfo->colnames + colinfo->num_cols, 0,
+ (n - colinfo->num_cols) * sizeof(char *));
+ }
+ colinfo->num_cols = n;
+ }
+}
+
+/*
+ * identify_join_columns: figure out where columns of a join come from
+ *
+ * Fills the join-specific fields of the colinfo struct, except for
+ * usingNames which is filled later.
+ */
+static void
+identify_join_columns(JoinExpr *j, RangeTblEntry *jrte,
+ deparse_columns *colinfo)
+{
+ int numjoincols;
+ int jcolno;
+ int rcolno;
+ ListCell *lc;
+
+ /* Extract left/right child RT indexes */
+ if (IsA(j->larg, RangeTblRef))
+ colinfo->leftrti = ((RangeTblRef *) j->larg)->rtindex;
+ else if (IsA(j->larg, JoinExpr))
+ colinfo->leftrti = ((JoinExpr *) j->larg)->rtindex;
+ else
+ elog(ERROR, "unrecognized node type in jointree: %d",
+ (int) nodeTag(j->larg));
+ if (IsA(j->rarg, RangeTblRef))
+ colinfo->rightrti = ((RangeTblRef *) j->rarg)->rtindex;
+ else if (IsA(j->rarg, JoinExpr))
+ colinfo->rightrti = ((JoinExpr *) j->rarg)->rtindex;
+ else
+ elog(ERROR, "unrecognized node type in jointree: %d",
+ (int) nodeTag(j->rarg));
+
+ /* Assert children will be processed earlier than join in second pass */
+ Assert(colinfo->leftrti < j->rtindex);
+ Assert(colinfo->rightrti < j->rtindex);
+
+ /* Initialize result arrays with zeroes */
+ numjoincols = list_length(jrte->joinaliasvars);
+ Assert(numjoincols == list_length(jrte->eref->colnames));
+ colinfo->leftattnos = (int *) palloc0(numjoincols * sizeof(int));
+ colinfo->rightattnos = (int *) palloc0(numjoincols * sizeof(int));
+
+ /*
+ * Deconstruct RTE's joinleftcols/joinrightcols into desired format.
+ * Recall that the column(s) merged due to USING are the first column(s)
+ * of the join output. We need not do anything special while scanning
+ * joinleftcols, but while scanning joinrightcols we must distinguish
+ * merged from unmerged columns.
+ */
+ jcolno = 0;
+ foreach(lc, jrte->joinleftcols)
+ {
+ int leftattno = lfirst_int(lc);
+
+ colinfo->leftattnos[jcolno++] = leftattno;
+ }
+ rcolno = 0;
+ foreach(lc, jrte->joinrightcols)
+ {
+ int rightattno = lfirst_int(lc);
+
+ if (rcolno < jrte->joinmergedcols) /* merged column? */
+ colinfo->rightattnos[rcolno] = rightattno;
+ else
+ colinfo->rightattnos[jcolno++] = rightattno;
+ rcolno++;
+ }
+ Assert(jcolno == numjoincols);
+}
+
+/*
+ * get_rtable_name: convenience function to get a previously assigned RTE alias
+ *
+ * The RTE must belong to the topmost namespace level in "context".
+ */
+static char *
+get_rtable_name(int rtindex, deparse_context *context)
+{
+ deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces);
+
+ Assert(rtindex > 0 && rtindex <= list_length(dpns->rtable_names));
+ return (char *) list_nth(dpns->rtable_names, rtindex - 1);
+}
+
+/*
+ * set_deparse_plan: set up deparse_namespace to parse subexpressions
+ * of a given Plan node
+ *
+ * This sets the plan, outer_plan, inner_plan, outer_tlist, inner_tlist,
+ * and index_tlist fields. Caller must already have adjusted the ancestors
+ * list if necessary. Note that the rtable, subplans, and ctes fields do
+ * not need to change when shifting attention to different plan nodes in a
+ * single plan tree.
+ */
+static void
+set_deparse_plan(deparse_namespace *dpns, Plan *plan)
+{
+ dpns->plan = plan;
+
+ /*
+ * We special-case Append and MergeAppend to pretend that the first child
+ * plan is the OUTER referent; we have to interpret OUTER Vars in their
+ * tlists according to one of the children, and the first one is the most
+ * natural choice.
+ */
+ if (IsA(plan, Append))
+ dpns->outer_plan = linitial(((Append *) plan)->appendplans);
+ else if (IsA(plan, MergeAppend))
+ dpns->outer_plan = linitial(((MergeAppend *) plan)->mergeplans);
+ else
+ dpns->outer_plan = outerPlan(plan);
+
+ if (dpns->outer_plan)
+ dpns->outer_tlist = dpns->outer_plan->targetlist;
+ else
+ dpns->outer_tlist = NIL;
+
+ /*
+ * For a SubqueryScan, pretend the subplan is INNER referent. (We don't
+ * use OUTER because that could someday conflict with the normal meaning.)
+ * Likewise, for a CteScan, pretend the subquery's plan is INNER referent.
+ * For a WorkTableScan, locate the parent RecursiveUnion plan node and use
+ * that as INNER referent.
+ *
+ * For MERGE, make the inner tlist point to the merge source tlist, which
+ * is same as the targetlist that the ModifyTable's source plan provides.
+ * For ON CONFLICT .. UPDATE we just need the inner tlist to point to the
+ * excluded expression's tlist. (Similar to the SubqueryScan we don't want
+ * to reuse OUTER, it's used for RETURNING in some modify table cases,
+ * although not INSERT .. CONFLICT).
+ */
+ if (IsA(plan, SubqueryScan))
+ dpns->inner_plan = ((SubqueryScan *) plan)->subplan;
+ else if (IsA(plan, CteScan))
+ dpns->inner_plan = list_nth(dpns->subplans,
+ ((CteScan *) plan)->ctePlanId - 1);
+ else if (IsA(plan, WorkTableScan))
+ dpns->inner_plan = find_recursive_union(dpns,
+ (WorkTableScan *) plan);
+ else if (IsA(plan, ModifyTable))
+ dpns->inner_plan = plan;
+ else
+ dpns->inner_plan = innerPlan(plan);
+
+ if (IsA(plan, ModifyTable))
+ {
+ if (((ModifyTable *) plan)->operation == CMD_MERGE)
+ dpns->inner_tlist = dpns->outer_tlist;
+ else
+ dpns->inner_tlist = ((ModifyTable *) plan)->exclRelTlist;
+ }
+ else if (dpns->inner_plan)
+ dpns->inner_tlist = dpns->inner_plan->targetlist;
+ else
+ dpns->inner_tlist = NIL;
+
+ /* Set up referent for INDEX_VAR Vars, if needed */
+ if (IsA(plan, IndexOnlyScan))
+ dpns->index_tlist = ((IndexOnlyScan *) plan)->indextlist;
+ else if (IsA(plan, ForeignScan))
+ dpns->index_tlist = ((ForeignScan *) plan)->fdw_scan_tlist;
+ else if (IsA(plan, CustomScan))
+ dpns->index_tlist = ((CustomScan *) plan)->custom_scan_tlist;
+ else
+ dpns->index_tlist = NIL;
+}
+
+/*
+ * Locate the ancestor plan node that is the RecursiveUnion generating
+ * the WorkTableScan's work table. We can match on wtParam, since that
+ * should be unique within the plan tree.
+ */
+static Plan *
+find_recursive_union(deparse_namespace *dpns, WorkTableScan *wtscan)
+{
+ ListCell *lc;
+
+ foreach(lc, dpns->ancestors)
+ {
+ Plan *ancestor = (Plan *) lfirst(lc);
+
+ if (IsA(ancestor, RecursiveUnion) &&
+ ((RecursiveUnion *) ancestor)->wtParam == wtscan->wtParam)
+ return ancestor;
+ }
+ elog(ERROR, "could not find RecursiveUnion for WorkTableScan with wtParam %d",
+ wtscan->wtParam);
+ return NULL;
+}
+
+/*
+ * push_child_plan: temporarily transfer deparsing attention to a child plan
+ *
+ * When expanding an OUTER_VAR or INNER_VAR reference, we must adjust the
+ * deparse context in case the referenced expression itself uses
+ * OUTER_VAR/INNER_VAR. We modify the top stack entry in-place to avoid
+ * affecting levelsup issues (although in a Plan tree there really shouldn't
+ * be any).
+ *
+ * Caller must provide a local deparse_namespace variable to save the
+ * previous state for pop_child_plan.
+ */
+static void
+push_child_plan(deparse_namespace *dpns, Plan *plan,
+ deparse_namespace *save_dpns)
+{
+ /* Save state for restoration later */
+ *save_dpns = *dpns;
+
+ /* Link current plan node into ancestors list */
+ dpns->ancestors = lcons(dpns->plan, dpns->ancestors);
+
+ /* Set attention on selected child */
+ set_deparse_plan(dpns, plan);
+}
+
+/*
+ * pop_child_plan: undo the effects of push_child_plan
+ */
+static void
+pop_child_plan(deparse_namespace *dpns, deparse_namespace *save_dpns)
+{
+ List *ancestors;
+
+ /* Get rid of ancestors list cell added by push_child_plan */
+ ancestors = list_delete_first(dpns->ancestors);
+
+ /* Restore fields changed by push_child_plan */
+ *dpns = *save_dpns;
+
+ /* Make sure dpns->ancestors is right (may be unnecessary) */
+ dpns->ancestors = ancestors;
+}
+
+/*
+ * push_ancestor_plan: temporarily transfer deparsing attention to an
+ * ancestor plan
+ *
+ * When expanding a Param reference, we must adjust the deparse context
+ * to match the plan node that contains the expression being printed;
+ * otherwise we'd fail if that expression itself contains a Param or
+ * OUTER_VAR/INNER_VAR/INDEX_VAR variable.
+ *
+ * The target ancestor is conveniently identified by the ListCell holding it
+ * in dpns->ancestors.
+ *
+ * Caller must provide a local deparse_namespace variable to save the
+ * previous state for pop_ancestor_plan.
+ */
+static void
+push_ancestor_plan(deparse_namespace *dpns, ListCell *ancestor_cell,
+ deparse_namespace *save_dpns)
+{
+ Plan *plan = (Plan *) lfirst(ancestor_cell);
+
+ /* Save state for restoration later */
+ *save_dpns = *dpns;
+
+ /* Build a new ancestor list with just this node's ancestors */
+ dpns->ancestors =
+ list_copy_tail(dpns->ancestors,
+ list_cell_number(dpns->ancestors, ancestor_cell) + 1);
+
+ /* Set attention on selected ancestor */
+ set_deparse_plan(dpns, plan);
+}
+
+/*
+ * pop_ancestor_plan: undo the effects of push_ancestor_plan
+ */
+static void
+pop_ancestor_plan(deparse_namespace *dpns, deparse_namespace *save_dpns)
+{
+ /* Free the ancestor list made in push_ancestor_plan */
+ list_free(dpns->ancestors);
+
+ /* Restore fields changed by push_ancestor_plan */
+ *dpns = *save_dpns;
+}
+
+
+/* ----------
+ * make_ruledef - reconstruct the CREATE RULE command
+ * for a given pg_rewrite tuple
+ * ----------
+ */
+static void
+make_ruledef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
+ int prettyFlags)
+{
+ char *rulename;
+ char ev_type;
+ Oid ev_class;
+ bool is_instead;
+ char *ev_qual;
+ char *ev_action;
+ List *actions;
+ Relation ev_relation;
+ TupleDesc viewResultDesc = NULL;
+ int fno;
+ Datum dat;
+ bool isnull;
+
+ /*
+ * Get the attribute values from the rules tuple
+ */
+ fno = SPI_fnumber(rulettc, "rulename");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ rulename = NameStr(*(DatumGetName(dat)));
+
+ fno = SPI_fnumber(rulettc, "ev_type");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ ev_type = DatumGetChar(dat);
+
+ fno = SPI_fnumber(rulettc, "ev_class");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ ev_class = DatumGetObjectId(dat);
+
+ fno = SPI_fnumber(rulettc, "is_instead");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ is_instead = DatumGetBool(dat);
+
+ fno = SPI_fnumber(rulettc, "ev_qual");
+ ev_qual = SPI_getvalue(ruletup, rulettc, fno);
+ Assert(ev_qual != NULL);
+
+ fno = SPI_fnumber(rulettc, "ev_action");
+ ev_action = SPI_getvalue(ruletup, rulettc, fno);
+ Assert(ev_action != NULL);
+ actions = (List *) stringToNode(ev_action);
+ if (actions == NIL)
+ elog(ERROR, "invalid empty ev_action list");
+
+ ev_relation = table_open(ev_class, AccessShareLock);
+
+ /*
+ * Build the rules definition text
+ */
+ appendStringInfo(buf, "CREATE RULE %s AS",
+ quote_identifier(rulename));
+
+ if (prettyFlags & PRETTYFLAG_INDENT)
+ appendStringInfoString(buf, "\n ON ");
+ else
+ appendStringInfoString(buf, " ON ");
+
+ /* The event the rule is fired for */
+ switch (ev_type)
+ {
+ case '1':
+ appendStringInfoString(buf, "SELECT");
+ viewResultDesc = RelationGetDescr(ev_relation);
+ break;
+
+ case '2':
+ appendStringInfoString(buf, "UPDATE");
+ break;
+
+ case '3':
+ appendStringInfoString(buf, "INSERT");
+ break;
+
+ case '4':
+ appendStringInfoString(buf, "DELETE");
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("rule \"%s\" has unsupported event type %d",
+ rulename, ev_type)));
+ break;
+ }
+
+ /* The relation the rule is fired on */
+ appendStringInfo(buf, " TO %s",
+ (prettyFlags & PRETTYFLAG_SCHEMA) ?
+ generate_relation_name(ev_class, NIL) :
+ generate_qualified_relation_name(ev_class));
+
+ /* If the rule has an event qualification, add it */
+ if (strcmp(ev_qual, "<>") != 0)
+ {
+ Node *qual;
+ Query *query;
+ deparse_context context;
+ deparse_namespace dpns;
+
+ if (prettyFlags & PRETTYFLAG_INDENT)
+ appendStringInfoString(buf, "\n ");
+ appendStringInfoString(buf, " WHERE ");
+
+ qual = stringToNode(ev_qual);
+
+ /*
+ * We need to make a context for recognizing any Vars in the qual
+ * (which can only be references to OLD and NEW). Use the rtable of
+ * the first query in the action list for this purpose.
+ */
+ query = (Query *) linitial(actions);
+
+ /*
+ * If the action is INSERT...SELECT, OLD/NEW have been pushed down
+ * into the SELECT, and that's what we need to look at. (Ugly kluge
+ * ... try to fix this when we redesign querytrees.)
+ */
+ query = getInsertSelectQuery(query, NULL);
+
+ /* Must acquire locks right away; see notes in get_query_def() */
+ AcquireRewriteLocks(query, false, false);
+
+ context.buf = buf;
+ context.namespaces = list_make1(&dpns);
+ context.windowClause = NIL;
+ context.windowTList = NIL;
+ context.varprefix = (list_length(query->rtable) != 1);
+ context.prettyFlags = prettyFlags;
+ context.wrapColumn = WRAP_COLUMN_DEFAULT;
+ context.indentLevel = PRETTYINDENT_STD;
+ context.special_exprkind = EXPR_KIND_NONE;
+ context.appendparents = NULL;
+
+ set_deparse_for_query(&dpns, query, NIL);
+
+ get_rule_expr(qual, &context, false);
+ }
+
+ appendStringInfoString(buf, " DO ");
+
+ /* The INSTEAD keyword (if so) */
+ if (is_instead)
+ appendStringInfoString(buf, "INSTEAD ");
+
+ /* Finally the rules actions */
+ if (list_length(actions) > 1)
+ {
+ ListCell *action;
+ Query *query;
+
+ appendStringInfoChar(buf, '(');
+ foreach(action, actions)
+ {
+ query = (Query *) lfirst(action);
+ get_query_def(query, buf, NIL, viewResultDesc, true,
+ prettyFlags, WRAP_COLUMN_DEFAULT, 0);
+ if (prettyFlags)
+ appendStringInfoString(buf, ";\n");
+ else
+ appendStringInfoString(buf, "; ");
+ }
+ appendStringInfoString(buf, ");");
+ }
+ else
+ {
+ Query *query;
+
+ query = (Query *) linitial(actions);
+ get_query_def(query, buf, NIL, viewResultDesc, true,
+ prettyFlags, WRAP_COLUMN_DEFAULT, 0);
+ appendStringInfoChar(buf, ';');
+ }
+
+ table_close(ev_relation, AccessShareLock);
+}
+
+
+/* ----------
+ * make_viewdef - reconstruct the SELECT part of a
+ * view rewrite rule
+ * ----------
+ */
+static void
+make_viewdef(StringInfo buf, HeapTuple ruletup, TupleDesc rulettc,
+ int prettyFlags, int wrapColumn)
+{
+ Query *query;
+ char ev_type;
+ Oid ev_class;
+ bool is_instead;
+ char *ev_qual;
+ char *ev_action;
+ List *actions;
+ Relation ev_relation;
+ int fno;
+ Datum dat;
+ bool isnull;
+
+ /*
+ * Get the attribute values from the rules tuple
+ */
+ fno = SPI_fnumber(rulettc, "ev_type");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ ev_type = DatumGetChar(dat);
+
+ fno = SPI_fnumber(rulettc, "ev_class");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ ev_class = DatumGetObjectId(dat);
+
+ fno = SPI_fnumber(rulettc, "is_instead");
+ dat = SPI_getbinval(ruletup, rulettc, fno, &isnull);
+ Assert(!isnull);
+ is_instead = DatumGetBool(dat);
+
+ fno = SPI_fnumber(rulettc, "ev_qual");
+ ev_qual = SPI_getvalue(ruletup, rulettc, fno);
+ Assert(ev_qual != NULL);
+
+ fno = SPI_fnumber(rulettc, "ev_action");
+ ev_action = SPI_getvalue(ruletup, rulettc, fno);
+ Assert(ev_action != NULL);
+ actions = (List *) stringToNode(ev_action);
+
+ if (list_length(actions) != 1)
+ {
+ /* keep output buffer empty and leave */
+ return;
+ }
+
+ query = (Query *) linitial(actions);
+
+ if (ev_type != '1' || !is_instead ||
+ strcmp(ev_qual, "<>") != 0 || query->commandType != CMD_SELECT)
+ {
+ /* keep output buffer empty and leave */
+ return;
+ }
+
+ ev_relation = table_open(ev_class, AccessShareLock);
+
+ get_query_def(query, buf, NIL, RelationGetDescr(ev_relation), true,
+ prettyFlags, wrapColumn, 0);
+ appendStringInfoChar(buf, ';');
+
+ table_close(ev_relation, AccessShareLock);
+}
+
+
+/* ----------
+ * get_query_def - Parse back one query parsetree
+ *
+ * query: parsetree to be displayed
+ * buf: output text is appended to buf
+ * parentnamespace: list (initially empty) of outer-level deparse_namespace's
+ * resultDesc: if not NULL, the output tuple descriptor for the view
+ * represented by a SELECT query. We use the column names from it
+ * to label SELECT output columns, in preference to names in the query
+ * colNamesVisible: true if the surrounding context cares about the output
+ * column names at all (as, for example, an EXISTS() context does not);
+ * when false, we can suppress dummy column labels such as "?column?"
+ * prettyFlags: bitmask of PRETTYFLAG_XXX options
+ * wrapColumn: maximum line length, or -1 to disable wrapping
+ * startIndent: initial indentation amount
+ * ----------
+ */
+static void
+get_query_def(Query *query, StringInfo buf, List *parentnamespace,
+ TupleDesc resultDesc, bool colNamesVisible,
+ int prettyFlags, int wrapColumn, int startIndent)
+{
+ deparse_context context;
+ deparse_namespace dpns;
+
+ /* Guard against excessively long or deeply-nested queries */
+ CHECK_FOR_INTERRUPTS();
+ check_stack_depth();
+
+ /*
+ * Before we begin to examine the query, acquire locks on referenced
+ * relations, and fix up deleted columns in JOIN RTEs. This ensures
+ * consistent results. Note we assume it's OK to scribble on the passed
+ * querytree!
+ *
+ * We are only deparsing the query (we are not about to execute it), so we
+ * only need AccessShareLock on the relations it mentions.
+ */
+ AcquireRewriteLocks(query, false, false);
+
+ context.buf = buf;
+ context.namespaces = lcons(&dpns, list_copy(parentnamespace));
+ context.windowClause = NIL;
+ context.windowTList = NIL;
+ context.varprefix = (parentnamespace != NIL ||
+ list_length(query->rtable) != 1);
+ context.prettyFlags = prettyFlags;
+ context.wrapColumn = wrapColumn;
+ context.indentLevel = startIndent;
+ context.special_exprkind = EXPR_KIND_NONE;
+ context.appendparents = NULL;
+
+ set_deparse_for_query(&dpns, query, parentnamespace);
+
+ switch (query->commandType)
+ {
+ case CMD_SELECT:
+ get_select_query_def(query, &context, resultDesc, colNamesVisible);
+ break;
+
+ case CMD_UPDATE:
+ get_update_query_def(query, &context, colNamesVisible);
+ break;
+
+ case CMD_INSERT:
+ get_insert_query_def(query, &context, colNamesVisible);
+ break;
+
+ case CMD_DELETE:
+ get_delete_query_def(query, &context, colNamesVisible);
+ break;
+
+ case CMD_MERGE:
+ get_merge_query_def(query, &context, colNamesVisible);
+ break;
+
+ case CMD_NOTHING:
+ appendStringInfoString(buf, "NOTHING");
+ break;
+
+ case CMD_UTILITY:
+ get_utility_query_def(query, &context);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized query command type: %d",
+ query->commandType);
+ break;
+ }
+}
+
+/* ----------
+ * get_values_def - Parse back a VALUES list
+ * ----------
+ */
+static void
+get_values_def(List *values_lists, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ bool first_list = true;
+ ListCell *vtl;
+
+ appendStringInfoString(buf, "VALUES ");
+
+ foreach(vtl, values_lists)
+ {
+ List *sublist = (List *) lfirst(vtl);
+ bool first_col = true;
+ ListCell *lc;
+
+ if (first_list)
+ first_list = false;
+ else
+ appendStringInfoString(buf, ", ");
+
+ appendStringInfoChar(buf, '(');
+ foreach(lc, sublist)
+ {
+ Node *col = (Node *) lfirst(lc);
+
+ if (first_col)
+ first_col = false;
+ else
+ appendStringInfoChar(buf, ',');
+
+ /*
+ * Print the value. Whole-row Vars need special treatment.
+ */
+ get_rule_expr_toplevel(col, context, false);
+ }
+ appendStringInfoChar(buf, ')');
+ }
+}
+
+/* ----------
+ * get_with_clause - Parse back a WITH clause
+ * ----------
+ */
+static void
+get_with_clause(Query *query, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ const char *sep;
+ ListCell *l;
+
+ if (query->cteList == NIL)
+ return;
+
+ if (PRETTY_INDENT(context))
+ {
+ context->indentLevel += PRETTYINDENT_STD;
+ appendStringInfoChar(buf, ' ');
+ }
+
+ if (query->hasRecursive)
+ sep = "WITH RECURSIVE ";
+ else
+ sep = "WITH ";
+ foreach(l, query->cteList)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(l);
+
+ appendStringInfoString(buf, sep);
+ appendStringInfoString(buf, quote_identifier(cte->ctename));
+ if (cte->aliascolnames)
+ {
+ bool first = true;
+ ListCell *col;
+
+ appendStringInfoChar(buf, '(');
+ foreach(col, cte->aliascolnames)
+ {
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf,
+ quote_identifier(strVal(lfirst(col))));
+ }
+ appendStringInfoChar(buf, ')');
+ }
+ appendStringInfoString(buf, " AS ");
+ switch (cte->ctematerialized)
+ {
+ case CTEMaterializeDefault:
+ break;
+ case CTEMaterializeAlways:
+ appendStringInfoString(buf, "MATERIALIZED ");
+ break;
+ case CTEMaterializeNever:
+ appendStringInfoString(buf, "NOT MATERIALIZED ");
+ break;
+ }
+ appendStringInfoChar(buf, '(');
+ if (PRETTY_INDENT(context))
+ appendContextKeyword(context, "", 0, 0, 0);
+ get_query_def((Query *) cte->ctequery, buf, context->namespaces, NULL,
+ true,
+ context->prettyFlags, context->wrapColumn,
+ context->indentLevel);
+ if (PRETTY_INDENT(context))
+ appendContextKeyword(context, "", 0, 0, 0);
+ appendStringInfoChar(buf, ')');
+
+ if (cte->search_clause)
+ {
+ bool first = true;
+ ListCell *lc;
+
+ appendStringInfo(buf, " SEARCH %s FIRST BY ",
+ cte->search_clause->search_breadth_first ? "BREADTH" : "DEPTH");
+
+ foreach(lc, cte->search_clause->search_col_list)
+ {
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf,
+ quote_identifier(strVal(lfirst(lc))));
+ }
+
+ appendStringInfo(buf, " SET %s", quote_identifier(cte->search_clause->search_seq_column));
+ }
+
+ if (cte->cycle_clause)
+ {
+ bool first = true;
+ ListCell *lc;
+
+ appendStringInfoString(buf, " CYCLE ");
+
+ foreach(lc, cte->cycle_clause->cycle_col_list)
+ {
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf,
+ quote_identifier(strVal(lfirst(lc))));
+ }
+
+ appendStringInfo(buf, " SET %s", quote_identifier(cte->cycle_clause->cycle_mark_column));
+
+ {
+ Const *cmv = castNode(Const, cte->cycle_clause->cycle_mark_value);
+ Const *cmd = castNode(Const, cte->cycle_clause->cycle_mark_default);
+
+ if (!(cmv->consttype == BOOLOID && !cmv->constisnull && DatumGetBool(cmv->constvalue) == true &&
+ cmd->consttype == BOOLOID && !cmd->constisnull && DatumGetBool(cmd->constvalue) == false))
+ {
+ appendStringInfoString(buf, " TO ");
+ get_rule_expr(cte->cycle_clause->cycle_mark_value, context, false);
+ appendStringInfoString(buf, " DEFAULT ");
+ get_rule_expr(cte->cycle_clause->cycle_mark_default, context, false);
+ }
+ }
+
+ appendStringInfo(buf, " USING %s", quote_identifier(cte->cycle_clause->cycle_path_column));
+ }
+
+ sep = ", ";
+ }
+
+ if (PRETTY_INDENT(context))
+ {
+ context->indentLevel -= PRETTYINDENT_STD;
+ appendContextKeyword(context, "", 0, 0, 0);
+ }
+ else
+ appendStringInfoChar(buf, ' ');
+}
+
+/* ----------
+ * get_select_query_def - Parse back a SELECT parsetree
+ * ----------
+ */
+static void
+get_select_query_def(Query *query, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ List *save_windowclause;
+ List *save_windowtlist;
+ bool force_colno;
+ ListCell *l;
+
+ /* Insert the WITH clause if given */
+ get_with_clause(query, context);
+
+ /* Set up context for possible window functions */
+ save_windowclause = context->windowClause;
+ context->windowClause = query->windowClause;
+ save_windowtlist = context->windowTList;
+ context->windowTList = query->targetList;
+
+ /*
+ * If the Query node has a setOperations tree, then it's the top level of
+ * a UNION/INTERSECT/EXCEPT query; only the WITH, ORDER BY and LIMIT
+ * fields are interesting in the top query itself.
+ */
+ if (query->setOperations)
+ {
+ get_setop_query(query->setOperations, query, context, resultDesc,
+ colNamesVisible);
+ /* ORDER BY clauses must be simple in this case */
+ force_colno = true;
+ }
+ else
+ {
+ get_basic_select_query(query, context, resultDesc, colNamesVisible);
+ force_colno = false;
+ }
+
+ /* Add the ORDER BY clause if given */
+ if (query->sortClause != NIL)
+ {
+ appendContextKeyword(context, " ORDER BY ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_orderby(query->sortClause, query->targetList,
+ force_colno, context);
+ }
+
+ /*
+ * Add the LIMIT/OFFSET clauses if given. If non-default options, use the
+ * standard spelling of LIMIT.
+ */
+ if (query->limitOffset != NULL)
+ {
+ appendContextKeyword(context, " OFFSET ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ get_rule_expr(query->limitOffset, context, false);
+ }
+ if (query->limitCount != NULL)
+ {
+ if (query->limitOption == LIMIT_OPTION_WITH_TIES)
+ {
+ appendContextKeyword(context, " FETCH FIRST ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ get_rule_expr(query->limitCount, context, false);
+ appendStringInfoString(buf, " ROWS WITH TIES");
+ }
+ else
+ {
+ appendContextKeyword(context, " LIMIT ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ if (IsA(query->limitCount, Const) &&
+ ((Const *) query->limitCount)->constisnull)
+ appendStringInfoString(buf, "ALL");
+ else
+ get_rule_expr(query->limitCount, context, false);
+ }
+ }
+
+ /* Add FOR [KEY] UPDATE/SHARE clauses if present */
+ if (query->hasForUpdate)
+ {
+ foreach(l, query->rowMarks)
+ {
+ RowMarkClause *rc = (RowMarkClause *) lfirst(l);
+
+ /* don't print implicit clauses */
+ if (rc->pushedDown)
+ continue;
+
+ switch (rc->strength)
+ {
+ case LCS_NONE:
+ /* we intentionally throw an error for LCS_NONE */
+ elog(ERROR, "unrecognized LockClauseStrength %d",
+ (int) rc->strength);
+ break;
+ case LCS_FORKEYSHARE:
+ appendContextKeyword(context, " FOR KEY SHARE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORSHARE:
+ appendContextKeyword(context, " FOR SHARE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORNOKEYUPDATE:
+ appendContextKeyword(context, " FOR NO KEY UPDATE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ case LCS_FORUPDATE:
+ appendContextKeyword(context, " FOR UPDATE",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ break;
+ }
+
+ appendStringInfo(buf, " OF %s",
+ quote_identifier(get_rtable_name(rc->rti,
+ context)));
+ if (rc->waitPolicy == LockWaitError)
+ appendStringInfoString(buf, " NOWAIT");
+ else if (rc->waitPolicy == LockWaitSkip)
+ appendStringInfoString(buf, " SKIP LOCKED");
+ }
+ }
+
+ context->windowClause = save_windowclause;
+ context->windowTList = save_windowtlist;
+}
+
+/*
+ * Detect whether query looks like SELECT ... FROM VALUES(),
+ * with no need to rename the output columns of the VALUES RTE.
+ * If so, return the VALUES RTE. Otherwise return NULL.
+ */
+static RangeTblEntry *
+get_simple_values_rte(Query *query, TupleDesc resultDesc)
+{
+ RangeTblEntry *result = NULL;
+ ListCell *lc;
+
+ /*
+ * We want to detect a match even if the Query also contains OLD or NEW
+ * rule RTEs. So the idea is to scan the rtable and see if there is only
+ * one inFromCl RTE that is a VALUES RTE.
+ */
+ foreach(lc, query->rtable)
+ {
+ RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
+
+ if (rte->rtekind == RTE_VALUES && rte->inFromCl)
+ {
+ if (result)
+ return NULL; /* multiple VALUES (probably not possible) */
+ result = rte;
+ }
+ else if (rte->rtekind == RTE_RELATION && !rte->inFromCl)
+ continue; /* ignore rule entries */
+ else
+ return NULL; /* something else -> not simple VALUES */
+ }
+
+ /*
+ * We don't need to check the targetlist in any great detail, because
+ * parser/analyze.c will never generate a "bare" VALUES RTE --- they only
+ * appear inside auto-generated sub-queries with very restricted
+ * structure. However, DefineView might have modified the tlist by
+ * injecting new column aliases, or we might have some other column
+ * aliases forced by a resultDesc. We can only simplify if the RTE's
+ * column names match the names that get_target_list() would select.
+ */
+ if (result)
+ {
+ ListCell *lcn;
+ int colno;
+
+ if (list_length(query->targetList) != list_length(result->eref->colnames))
+ return NULL; /* this probably cannot happen */
+ colno = 0;
+ forboth(lc, query->targetList, lcn, result->eref->colnames)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc);
+ char *cname = strVal(lfirst(lcn));
+ char *colname;
+
+ if (tle->resjunk)
+ return NULL; /* this probably cannot happen */
+
+ /* compute name that get_target_list would use for column */
+ colno++;
+ if (resultDesc && colno <= resultDesc->natts)
+ colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname);
+ else
+ colname = tle->resname;
+
+ /* does it match the VALUES RTE? */
+ if (colname == NULL || strcmp(colname, cname) != 0)
+ return NULL; /* column name has been changed */
+ }
+ }
+
+ return result;
+}
+
+static void
+get_basic_select_query(Query *query, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *values_rte;
+ char *sep;
+ ListCell *l;
+
+ if (PRETTY_INDENT(context))
+ {
+ context->indentLevel += PRETTYINDENT_STD;
+ appendStringInfoChar(buf, ' ');
+ }
+
+ /*
+ * If the query looks like SELECT * FROM (VALUES ...), then print just the
+ * VALUES part. This reverses what transformValuesClause() did at parse
+ * time.
+ */
+ values_rte = get_simple_values_rte(query, resultDesc);
+ if (values_rte)
+ {
+ get_values_def(values_rte->values_lists, context);
+ return;
+ }
+
+ /*
+ * Build up the query string - first we say SELECT
+ */
+ if (query->isReturn)
+ appendStringInfoString(buf, "RETURN");
+ else
+ appendStringInfoString(buf, "SELECT");
+
+ /* Add the DISTINCT clause if given */
+ if (query->distinctClause != NIL)
+ {
+ if (query->hasDistinctOn)
+ {
+ appendStringInfoString(buf, " DISTINCT ON (");
+ sep = "";
+ foreach(l, query->distinctClause)
+ {
+ SortGroupClause *srt = (SortGroupClause *) lfirst(l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_sortgroupclause(srt->tleSortGroupRef, query->targetList,
+ false, context);
+ sep = ", ";
+ }
+ appendStringInfoChar(buf, ')');
+ }
+ else
+ appendStringInfoString(buf, " DISTINCT");
+ }
+
+ /* Then we tell what to select (the targetlist) */
+ get_target_list(query->targetList, context, resultDesc, colNamesVisible);
+
+ /* Add the FROM clause if needed */
+ get_from_clause(query, " FROM ", context);
+
+ /* Add the WHERE clause if given */
+ if (query->jointree->quals != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(query->jointree->quals, context, false);
+ }
+
+ /* Add the GROUP BY clause if given */
+ if (query->groupClause != NULL || query->groupingSets != NULL)
+ {
+ ParseExprKind save_exprkind;
+
+ appendContextKeyword(context, " GROUP BY ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ if (query->groupDistinct)
+ appendStringInfoString(buf, "DISTINCT ");
+
+ save_exprkind = context->special_exprkind;
+ context->special_exprkind = EXPR_KIND_GROUP_BY;
+
+ if (query->groupingSets == NIL)
+ {
+ sep = "";
+ foreach(l, query->groupClause)
+ {
+ SortGroupClause *grp = (SortGroupClause *) lfirst(l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_sortgroupclause(grp->tleSortGroupRef, query->targetList,
+ false, context);
+ sep = ", ";
+ }
+ }
+ else
+ {
+ sep = "";
+ foreach(l, query->groupingSets)
+ {
+ GroupingSet *grp = lfirst(l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_groupingset(grp, query->targetList, true, context);
+ sep = ", ";
+ }
+ }
+
+ context->special_exprkind = save_exprkind;
+ }
+
+ /* Add the HAVING clause if given */
+ if (query->havingQual != NULL)
+ {
+ appendContextKeyword(context, " HAVING ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 0);
+ get_rule_expr(query->havingQual, context, false);
+ }
+
+ /* Add the WINDOW clause if needed */
+ if (query->windowClause != NIL)
+ get_rule_windowclause(query, context);
+}
+
+/* ----------
+ * get_target_list - Parse back a SELECT target list
+ *
+ * This is also used for RETURNING lists in INSERT/UPDATE/DELETE.
+ *
+ * resultDesc and colNamesVisible are as for get_query_def()
+ * ----------
+ */
+static void
+get_target_list(List *targetList, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ StringInfoData targetbuf;
+ bool last_was_multiline = false;
+ char *sep;
+ int colno;
+ ListCell *l;
+
+ /* we use targetbuf to hold each TLE's text temporarily */
+ initStringInfo(&targetbuf);
+
+ sep = " ";
+ colno = 0;
+ foreach(l, targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+ char *colname;
+ char *attname;
+
+ if (tle->resjunk)
+ continue; /* ignore junk entries */
+
+ appendStringInfoString(buf, sep);
+ sep = ", ";
+ colno++;
+
+ /*
+ * Put the new field text into targetbuf so we can decide after we've
+ * got it whether or not it needs to go on a new line.
+ */
+ resetStringInfo(&targetbuf);
+ context->buf = &targetbuf;
+
+ /*
+ * We special-case Var nodes rather than using get_rule_expr. This is
+ * needed because get_rule_expr will display a whole-row Var as
+ * "foo.*", which is the preferred notation in most contexts, but at
+ * the top level of a SELECT list it's not right (the parser will
+ * expand that notation into multiple columns, yielding behavior
+ * different from a whole-row Var). We need to call get_variable
+ * directly so that we can tell it to do the right thing, and so that
+ * we can get the attribute name which is the default AS label.
+ */
+ if (tle->expr && (IsA(tle->expr, Var)))
+ {
+ attname = get_variable((Var *) tle->expr, 0, true, context);
+ }
+ else
+ {
+ get_rule_expr((Node *) tle->expr, context, true);
+
+ /*
+ * When colNamesVisible is true, we should always show the
+ * assigned column name explicitly. Otherwise, show it only if
+ * it's not FigureColname's fallback.
+ */
+ attname = colNamesVisible ? NULL : "?column?";
+ }
+
+ /*
+ * Figure out what the result column should be called. In the context
+ * of a view, use the view's tuple descriptor (so as to pick up the
+ * effects of any column RENAME that's been done on the view).
+ * Otherwise, just use what we can find in the TLE.
+ */
+ if (resultDesc && colno <= resultDesc->natts)
+ colname = NameStr(TupleDescAttr(resultDesc, colno - 1)->attname);
+ else
+ colname = tle->resname;
+
+ /* Show AS unless the column's name is correct as-is */
+ if (colname) /* resname could be NULL */
+ {
+ if (attname == NULL || strcmp(attname, colname) != 0)
+ appendStringInfo(&targetbuf, " AS %s", quote_identifier(colname));
+ }
+
+ /* Restore context's output buffer */
+ context->buf = buf;
+
+ /* Consider line-wrapping if enabled */
+ if (PRETTY_INDENT(context) && context->wrapColumn >= 0)
+ {
+ int leading_nl_pos;
+
+ /* Does the new field start with a new line? */
+ if (targetbuf.len > 0 && targetbuf.data[0] == '\n')
+ leading_nl_pos = 0;
+ else
+ leading_nl_pos = -1;
+
+ /* If so, we shouldn't add anything */
+ if (leading_nl_pos >= 0)
+ {
+ /* instead, remove any trailing spaces currently in buf */
+ removeStringInfoSpaces(buf);
+ }
+ else
+ {
+ char *trailing_nl;
+
+ /* Locate the start of the current line in the output buffer */
+ trailing_nl = strrchr(buf->data, '\n');
+ if (trailing_nl == NULL)
+ trailing_nl = buf->data;
+ else
+ trailing_nl++;
+
+ /*
+ * Add a newline, plus some indentation, if the new field is
+ * not the first and either the new field would cause an
+ * overflow or the last field used more than one line.
+ */
+ if (colno > 1 &&
+ ((strlen(trailing_nl) + targetbuf.len > context->wrapColumn) ||
+ last_was_multiline))
+ appendContextKeyword(context, "", -PRETTYINDENT_STD,
+ PRETTYINDENT_STD, PRETTYINDENT_VAR);
+ }
+
+ /* Remember this field's multiline status for next iteration */
+ last_was_multiline =
+ (strchr(targetbuf.data + leading_nl_pos + 1, '\n') != NULL);
+ }
+
+ /* Add the new field */
+ appendBinaryStringInfo(buf, targetbuf.data, targetbuf.len);
+ }
+
+ /* clean up */
+ pfree(targetbuf.data);
+}
+
+static void
+get_setop_query(Node *setOp, Query *query, deparse_context *context,
+ TupleDesc resultDesc, bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ bool need_paren;
+
+ /* Guard against excessively long or deeply-nested queries */
+ CHECK_FOR_INTERRUPTS();
+ check_stack_depth();
+
+ if (IsA(setOp, RangeTblRef))
+ {
+ RangeTblRef *rtr = (RangeTblRef *) setOp;
+ RangeTblEntry *rte = rt_fetch(rtr->rtindex, query->rtable);
+ Query *subquery = rte->subquery;
+
+ Assert(subquery != NULL);
+ Assert(subquery->setOperations == NULL);
+ /* Need parens if WITH, ORDER BY, FOR UPDATE, or LIMIT; see gram.y */
+ need_paren = (subquery->cteList ||
+ subquery->sortClause ||
+ subquery->rowMarks ||
+ subquery->limitOffset ||
+ subquery->limitCount);
+ if (need_paren)
+ appendStringInfoChar(buf, '(');
+ get_query_def(subquery, buf, context->namespaces, resultDesc,
+ colNamesVisible,
+ context->prettyFlags, context->wrapColumn,
+ context->indentLevel);
+ if (need_paren)
+ appendStringInfoChar(buf, ')');
+ }
+ else if (IsA(setOp, SetOperationStmt))
+ {
+ SetOperationStmt *op = (SetOperationStmt *) setOp;
+ int subindent;
+
+ /*
+ * We force parens when nesting two SetOperationStmts, except when the
+ * lefthand input is another setop of the same kind. Syntactically,
+ * we could omit parens in rather more cases, but it seems best to use
+ * parens to flag cases where the setop operator changes. If we use
+ * parens, we also increase the indentation level for the child query.
+ *
+ * There are some cases in which parens are needed around a leaf query
+ * too, but those are more easily handled at the next level down (see
+ * code above).
+ */
+ if (IsA(op->larg, SetOperationStmt))
+ {
+ SetOperationStmt *lop = (SetOperationStmt *) op->larg;
+
+ if (op->op == lop->op && op->all == lop->all)
+ need_paren = false;
+ else
+ need_paren = true;
+ }
+ else
+ need_paren = false;
+
+ if (need_paren)
+ {
+ appendStringInfoChar(buf, '(');
+ subindent = PRETTYINDENT_STD;
+ appendContextKeyword(context, "", subindent, 0, 0);
+ }
+ else
+ subindent = 0;
+
+ get_setop_query(op->larg, query, context, resultDesc, colNamesVisible);
+
+ if (need_paren)
+ appendContextKeyword(context, ") ", -subindent, 0, 0);
+ else if (PRETTY_INDENT(context))
+ appendContextKeyword(context, "", -subindent, 0, 0);
+ else
+ appendStringInfoChar(buf, ' ');
+
+ switch (op->op)
+ {
+ case SETOP_UNION:
+ appendStringInfoString(buf, "UNION ");
+ break;
+ case SETOP_INTERSECT:
+ appendStringInfoString(buf, "INTERSECT ");
+ break;
+ case SETOP_EXCEPT:
+ appendStringInfoString(buf, "EXCEPT ");
+ break;
+ default:
+ elog(ERROR, "unrecognized set op: %d",
+ (int) op->op);
+ }
+ if (op->all)
+ appendStringInfoString(buf, "ALL ");
+
+ /* Always parenthesize if RHS is another setop */
+ need_paren = IsA(op->rarg, SetOperationStmt);
+
+ /*
+ * The indentation code here is deliberately a bit different from that
+ * for the lefthand input, because we want the line breaks in
+ * different places.
+ */
+ if (need_paren)
+ {
+ appendStringInfoChar(buf, '(');
+ subindent = PRETTYINDENT_STD;
+ }
+ else
+ subindent = 0;
+ appendContextKeyword(context, "", subindent, 0, 0);
+
+ get_setop_query(op->rarg, query, context, resultDesc, false);
+
+ if (PRETTY_INDENT(context))
+ context->indentLevel -= subindent;
+ if (need_paren)
+ appendContextKeyword(context, ")", 0, 0, 0);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(setOp));
+ }
+}
+
+/*
+ * Display a sort/group clause.
+ *
+ * Also returns the expression tree, so caller need not find it again.
+ */
+static Node *
+get_rule_sortgroupclause(Index ref, List *tlist, bool force_colno,
+ deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ TargetEntry *tle;
+ Node *expr;
+
+ tle = get_sortgroupref_tle(ref, tlist);
+ expr = (Node *) tle->expr;
+
+ /*
+ * Use column-number form if requested by caller. Otherwise, if
+ * expression is a constant, force it to be dumped with an explicit cast
+ * as decoration --- this is because a simple integer constant is
+ * ambiguous (and will be misinterpreted by findTargetlistEntry()) if we
+ * dump it without any decoration. If it's anything more complex than a
+ * simple Var, then force extra parens around it, to ensure it can't be
+ * misinterpreted as a cube() or rollup() construct.
+ */
+ if (force_colno)
+ {
+ Assert(!tle->resjunk);
+ appendStringInfo(buf, "%d", tle->resno);
+ }
+ else if (expr && IsA(expr, Const))
+ get_const_expr((Const *) expr, context, 1);
+ else if (!expr || IsA(expr, Var))
+ get_rule_expr(expr, context, true);
+ else
+ {
+ /*
+ * We must force parens for function-like expressions even if
+ * PRETTY_PAREN is off, since those are the ones in danger of
+ * misparsing. For other expressions we need to force them only if
+ * PRETTY_PAREN is on, since otherwise the expression will output them
+ * itself. (We can't skip the parens.)
+ */
+ bool need_paren = (PRETTY_PAREN(context)
+ || IsA(expr, FuncExpr)
+ || IsA(expr, Aggref)
+ || IsA(expr, WindowFunc));
+
+ if (need_paren)
+ appendStringInfoChar(context->buf, '(');
+ get_rule_expr(expr, context, true);
+ if (need_paren)
+ appendStringInfoChar(context->buf, ')');
+ }
+
+ return expr;
+}
+
+/*
+ * Display a GroupingSet
+ */
+static void
+get_rule_groupingset(GroupingSet *gset, List *targetlist,
+ bool omit_parens, deparse_context *context)
+{
+ ListCell *l;
+ StringInfo buf = context->buf;
+ bool omit_child_parens = true;
+ char *sep = "";
+
+ switch (gset->kind)
+ {
+ case GROUPING_SET_EMPTY:
+ appendStringInfoString(buf, "()");
+ return;
+
+ case GROUPING_SET_SIMPLE:
+ {
+ if (!omit_parens || list_length(gset->content) != 1)
+ appendStringInfoChar(buf, '(');
+
+ foreach(l, gset->content)
+ {
+ Index ref = lfirst_int(l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_sortgroupclause(ref, targetlist,
+ false, context);
+ sep = ", ";
+ }
+
+ if (!omit_parens || list_length(gset->content) != 1)
+ appendStringInfoChar(buf, ')');
+ }
+ return;
+
+ case GROUPING_SET_ROLLUP:
+ appendStringInfoString(buf, "ROLLUP(");
+ break;
+ case GROUPING_SET_CUBE:
+ appendStringInfoString(buf, "CUBE(");
+ break;
+ case GROUPING_SET_SETS:
+ appendStringInfoString(buf, "GROUPING SETS (");
+ omit_child_parens = false;
+ break;
+ }
+
+ foreach(l, gset->content)
+ {
+ appendStringInfoString(buf, sep);
+ get_rule_groupingset(lfirst(l), targetlist, omit_child_parens, context);
+ sep = ", ";
+ }
+
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * Display an ORDER BY list.
+ */
+static void
+get_rule_orderby(List *orderList, List *targetList,
+ bool force_colno, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ const char *sep;
+ ListCell *l;
+
+ sep = "";
+ foreach(l, orderList)
+ {
+ SortGroupClause *srt = (SortGroupClause *) lfirst(l);
+ Node *sortexpr;
+ Oid sortcoltype;
+ TypeCacheEntry *typentry;
+
+ appendStringInfoString(buf, sep);
+ sortexpr = get_rule_sortgroupclause(srt->tleSortGroupRef, targetList,
+ force_colno, context);
+ sortcoltype = exprType(sortexpr);
+ /* See whether operator is default < or > for datatype */
+ typentry = lookup_type_cache(sortcoltype,
+ TYPECACHE_LT_OPR | TYPECACHE_GT_OPR);
+ if (srt->sortop == typentry->lt_opr)
+ {
+ /* ASC is default, so emit nothing for it */
+ if (srt->nulls_first)
+ appendStringInfoString(buf, " NULLS FIRST");
+ }
+ else if (srt->sortop == typentry->gt_opr)
+ {
+ appendStringInfoString(buf, " DESC");
+ /* DESC defaults to NULLS FIRST */
+ if (!srt->nulls_first)
+ appendStringInfoString(buf, " NULLS LAST");
+ }
+ else
+ {
+ appendStringInfo(buf, " USING %s",
+ generate_operator_name(srt->sortop,
+ sortcoltype,
+ sortcoltype));
+ /* be specific to eliminate ambiguity */
+ if (srt->nulls_first)
+ appendStringInfoString(buf, " NULLS FIRST");
+ else
+ appendStringInfoString(buf, " NULLS LAST");
+ }
+ sep = ", ";
+ }
+}
+
+/*
+ * Display a WINDOW clause.
+ *
+ * Note that the windowClause list might contain only anonymous window
+ * specifications, in which case we should print nothing here.
+ */
+static void
+get_rule_windowclause(Query *query, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ const char *sep;
+ ListCell *l;
+
+ sep = NULL;
+ foreach(l, query->windowClause)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(l);
+
+ if (wc->name == NULL)
+ continue; /* ignore anonymous windows */
+
+ if (sep == NULL)
+ appendContextKeyword(context, " WINDOW ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ else
+ appendStringInfoString(buf, sep);
+
+ appendStringInfo(buf, "%s AS ", quote_identifier(wc->name));
+
+ get_rule_windowspec(wc, query->targetList, context);
+
+ sep = ", ";
+ }
+}
+
+/*
+ * Display a window definition
+ */
+static void
+get_rule_windowspec(WindowClause *wc, List *targetList,
+ deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ bool needspace = false;
+ const char *sep;
+ ListCell *l;
+
+ appendStringInfoChar(buf, '(');
+ if (wc->refname)
+ {
+ appendStringInfoString(buf, quote_identifier(wc->refname));
+ needspace = true;
+ }
+ /* partition clauses are always inherited, so only print if no refname */
+ if (wc->partitionClause && !wc->refname)
+ {
+ if (needspace)
+ appendStringInfoChar(buf, ' ');
+ appendStringInfoString(buf, "PARTITION BY ");
+ sep = "";
+ foreach(l, wc->partitionClause)
+ {
+ SortGroupClause *grp = (SortGroupClause *) lfirst(l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_sortgroupclause(grp->tleSortGroupRef, targetList,
+ false, context);
+ sep = ", ";
+ }
+ needspace = true;
+ }
+ /* print ordering clause only if not inherited */
+ if (wc->orderClause && !wc->copiedOrder)
+ {
+ if (needspace)
+ appendStringInfoChar(buf, ' ');
+ appendStringInfoString(buf, "ORDER BY ");
+ get_rule_orderby(wc->orderClause, targetList, false, context);
+ needspace = true;
+ }
+ /* framing clause is never inherited, so print unless it's default */
+ if (wc->frameOptions & FRAMEOPTION_NONDEFAULT)
+ {
+ if (needspace)
+ appendStringInfoChar(buf, ' ');
+ if (wc->frameOptions & FRAMEOPTION_RANGE)
+ appendStringInfoString(buf, "RANGE ");
+ else if (wc->frameOptions & FRAMEOPTION_ROWS)
+ appendStringInfoString(buf, "ROWS ");
+ else if (wc->frameOptions & FRAMEOPTION_GROUPS)
+ appendStringInfoString(buf, "GROUPS ");
+ else
+ Assert(false);
+ if (wc->frameOptions & FRAMEOPTION_BETWEEN)
+ appendStringInfoString(buf, "BETWEEN ");
+ if (wc->frameOptions & FRAMEOPTION_START_UNBOUNDED_PRECEDING)
+ appendStringInfoString(buf, "UNBOUNDED PRECEDING ");
+ else if (wc->frameOptions & FRAMEOPTION_START_CURRENT_ROW)
+ appendStringInfoString(buf, "CURRENT ROW ");
+ else if (wc->frameOptions & FRAMEOPTION_START_OFFSET)
+ {
+ get_rule_expr(wc->startOffset, context, false);
+ if (wc->frameOptions & FRAMEOPTION_START_OFFSET_PRECEDING)
+ appendStringInfoString(buf, " PRECEDING ");
+ else if (wc->frameOptions & FRAMEOPTION_START_OFFSET_FOLLOWING)
+ appendStringInfoString(buf, " FOLLOWING ");
+ else
+ Assert(false);
+ }
+ else
+ Assert(false);
+ if (wc->frameOptions & FRAMEOPTION_BETWEEN)
+ {
+ appendStringInfoString(buf, "AND ");
+ if (wc->frameOptions & FRAMEOPTION_END_UNBOUNDED_FOLLOWING)
+ appendStringInfoString(buf, "UNBOUNDED FOLLOWING ");
+ else if (wc->frameOptions & FRAMEOPTION_END_CURRENT_ROW)
+ appendStringInfoString(buf, "CURRENT ROW ");
+ else if (wc->frameOptions & FRAMEOPTION_END_OFFSET)
+ {
+ get_rule_expr(wc->endOffset, context, false);
+ if (wc->frameOptions & FRAMEOPTION_END_OFFSET_PRECEDING)
+ appendStringInfoString(buf, " PRECEDING ");
+ else if (wc->frameOptions & FRAMEOPTION_END_OFFSET_FOLLOWING)
+ appendStringInfoString(buf, " FOLLOWING ");
+ else
+ Assert(false);
+ }
+ else
+ Assert(false);
+ }
+ if (wc->frameOptions & FRAMEOPTION_EXCLUDE_CURRENT_ROW)
+ appendStringInfoString(buf, "EXCLUDE CURRENT ROW ");
+ else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_GROUP)
+ appendStringInfoString(buf, "EXCLUDE GROUP ");
+ else if (wc->frameOptions & FRAMEOPTION_EXCLUDE_TIES)
+ appendStringInfoString(buf, "EXCLUDE TIES ");
+ /* we will now have a trailing space; remove it */
+ buf->len--;
+ }
+ appendStringInfoChar(buf, ')');
+}
+
+/* ----------
+ * get_insert_query_def - Parse back an INSERT parsetree
+ * ----------
+ */
+static void
+get_insert_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *select_rte = NULL;
+ RangeTblEntry *values_rte = NULL;
+ RangeTblEntry *rte;
+ char *sep;
+ ListCell *l;
+ List *strippedexprs;
+
+ /* Insert the WITH clause if given */
+ get_with_clause(query, context);
+
+ /*
+ * If it's an INSERT ... SELECT or multi-row VALUES, there will be a
+ * single RTE for the SELECT or VALUES. Plain VALUES has neither.
+ */
+ foreach(l, query->rtable)
+ {
+ rte = (RangeTblEntry *) lfirst(l);
+
+ if (rte->rtekind == RTE_SUBQUERY)
+ {
+ if (select_rte)
+ elog(ERROR, "too many subquery RTEs in INSERT");
+ select_rte = rte;
+ }
+
+ if (rte->rtekind == RTE_VALUES)
+ {
+ if (values_rte)
+ elog(ERROR, "too many values RTEs in INSERT");
+ values_rte = rte;
+ }
+ }
+ if (select_rte && values_rte)
+ elog(ERROR, "both subquery and values RTEs in INSERT");
+
+ /*
+ * Start the query with INSERT INTO relname
+ */
+ rte = rt_fetch(query->resultRelation, query->rtable);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ if (PRETTY_INDENT(context))
+ {
+ context->indentLevel += PRETTYINDENT_STD;
+ appendStringInfoChar(buf, ' ');
+ }
+ appendStringInfo(buf, "INSERT INTO %s",
+ generate_relation_name(rte->relid, NIL));
+
+ /* Print the relation alias, if needed; INSERT requires explicit AS */
+ get_rte_alias(rte, query->resultRelation, true, context);
+
+ /* always want a space here */
+ appendStringInfoChar(buf, ' ');
+
+ /*
+ * Add the insert-column-names list. Any indirection decoration needed on
+ * the column names can be inferred from the top targetlist.
+ */
+ strippedexprs = NIL;
+ sep = "";
+ if (query->targetList)
+ appendStringInfoChar(buf, '(');
+ foreach(l, query->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk)
+ continue; /* ignore junk entries */
+
+ appendStringInfoString(buf, sep);
+ sep = ", ";
+
+ /*
+ * Put out name of target column; look in the catalogs, not at
+ * tle->resname, since resname will fail to track RENAME.
+ */
+ appendStringInfoString(buf,
+ quote_identifier(get_attname(rte->relid,
+ tle->resno,
+ false)));
+
+ /*
+ * Print any indirection needed (subfields or subscripts), and strip
+ * off the top-level nodes representing the indirection assignments.
+ * Add the stripped expressions to strippedexprs. (If it's a
+ * single-VALUES statement, the stripped expressions are the VALUES to
+ * print below. Otherwise they're just Vars and not really
+ * interesting.)
+ */
+ strippedexprs = lappend(strippedexprs,
+ processIndirection((Node *) tle->expr,
+ context));
+ }
+ if (query->targetList)
+ appendStringInfoString(buf, ") ");
+
+ if (query->override)
+ {
+ if (query->override == OVERRIDING_SYSTEM_VALUE)
+ appendStringInfoString(buf, "OVERRIDING SYSTEM VALUE ");
+ else if (query->override == OVERRIDING_USER_VALUE)
+ appendStringInfoString(buf, "OVERRIDING USER VALUE ");
+ }
+
+ if (select_rte)
+ {
+ /* Add the SELECT */
+ get_query_def(select_rte->subquery, buf, context->namespaces, NULL,
+ false,
+ context->prettyFlags, context->wrapColumn,
+ context->indentLevel);
+ }
+ else if (values_rte)
+ {
+ /* Add the multi-VALUES expression lists */
+ get_values_def(values_rte->values_lists, context);
+ }
+ else if (strippedexprs)
+ {
+ /* Add the single-VALUES expression list */
+ appendContextKeyword(context, "VALUES (",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 2);
+ get_rule_list_toplevel(strippedexprs, context, false);
+ appendStringInfoChar(buf, ')');
+ }
+ else
+ {
+ /* No expressions, so it must be DEFAULT VALUES */
+ appendStringInfoString(buf, "DEFAULT VALUES");
+ }
+
+ /* Add ON CONFLICT if present */
+ if (query->onConflict)
+ {
+ OnConflictExpr *confl = query->onConflict;
+
+ appendStringInfoString(buf, " ON CONFLICT");
+
+ if (confl->arbiterElems)
+ {
+ /* Add the single-VALUES expression list */
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) confl->arbiterElems, context, false);
+ appendStringInfoChar(buf, ')');
+
+ /* Add a WHERE clause (for partial indexes) if given */
+ if (confl->arbiterWhere != NULL)
+ {
+ bool save_varprefix;
+
+ /*
+ * Force non-prefixing of Vars, since parser assumes that they
+ * belong to target relation. WHERE clause does not use
+ * InferenceElem, so this is separately required.
+ */
+ save_varprefix = context->varprefix;
+ context->varprefix = false;
+
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(confl->arbiterWhere, context, false);
+
+ context->varprefix = save_varprefix;
+ }
+ }
+ else if (OidIsValid(confl->constraint))
+ {
+ char *constraint = get_constraint_name(confl->constraint);
+
+ if (!constraint)
+ elog(ERROR, "cache lookup failed for constraint %u",
+ confl->constraint);
+ appendStringInfo(buf, " ON CONSTRAINT %s",
+ quote_identifier(constraint));
+ }
+
+ if (confl->action == ONCONFLICT_NOTHING)
+ {
+ appendStringInfoString(buf, " DO NOTHING");
+ }
+ else
+ {
+ appendStringInfoString(buf, " DO UPDATE SET ");
+ /* Deparse targetlist */
+ get_update_query_targetlist_def(query, confl->onConflictSet,
+ context, rte);
+
+ /* Add a WHERE clause if given */
+ if (confl->onConflictWhere != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(confl->onConflictWhere, context, false);
+ }
+ }
+ }
+
+ /* Add RETURNING if present */
+ if (query->returningList)
+ {
+ appendContextKeyword(context, " RETURNING",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_target_list(query->returningList, context, NULL, colNamesVisible);
+ }
+}
+
+
+/* ----------
+ * get_update_query_def - Parse back an UPDATE parsetree
+ * ----------
+ */
+static void
+get_update_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *rte;
+
+ /* Insert the WITH clause if given */
+ get_with_clause(query, context);
+
+ /*
+ * Start the query with UPDATE relname SET
+ */
+ rte = rt_fetch(query->resultRelation, query->rtable);
+ Assert(rte->rtekind == RTE_RELATION);
+ if (PRETTY_INDENT(context))
+ {
+ appendStringInfoChar(buf, ' ');
+ context->indentLevel += PRETTYINDENT_STD;
+ }
+ appendStringInfo(buf, "UPDATE %s%s",
+ only_marker(rte),
+ generate_relation_name(rte->relid, NIL));
+
+ /* Print the relation alias, if needed */
+ get_rte_alias(rte, query->resultRelation, false, context);
+
+ appendStringInfoString(buf, " SET ");
+
+ /* Deparse targetlist */
+ get_update_query_targetlist_def(query, query->targetList, context, rte);
+
+ /* Add the FROM clause if needed */
+ get_from_clause(query, " FROM ", context);
+
+ /* Add a WHERE clause if given */
+ if (query->jointree->quals != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(query->jointree->quals, context, false);
+ }
+
+ /* Add RETURNING if present */
+ if (query->returningList)
+ {
+ appendContextKeyword(context, " RETURNING",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_target_list(query->returningList, context, NULL, colNamesVisible);
+ }
+}
+
+
+/* ----------
+ * get_update_query_targetlist_def - Parse back an UPDATE targetlist
+ * ----------
+ */
+static void
+get_update_query_targetlist_def(Query *query, List *targetList,
+ deparse_context *context, RangeTblEntry *rte)
+{
+ StringInfo buf = context->buf;
+ ListCell *l;
+ ListCell *next_ma_cell;
+ int remaining_ma_columns;
+ const char *sep;
+ SubLink *cur_ma_sublink;
+ List *ma_sublinks;
+
+ /*
+ * Prepare to deal with MULTIEXPR assignments: collect the source SubLinks
+ * into a list. We expect them to appear, in ID order, in resjunk tlist
+ * entries.
+ */
+ ma_sublinks = NIL;
+ if (query->hasSubLinks) /* else there can't be any */
+ {
+ foreach(l, targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+
+ if (tle->resjunk && IsA(tle->expr, SubLink))
+ {
+ SubLink *sl = (SubLink *) tle->expr;
+
+ if (sl->subLinkType == MULTIEXPR_SUBLINK)
+ {
+ ma_sublinks = lappend(ma_sublinks, sl);
+ Assert(sl->subLinkId == list_length(ma_sublinks));
+ }
+ }
+ }
+ }
+ next_ma_cell = list_head(ma_sublinks);
+ cur_ma_sublink = NULL;
+ remaining_ma_columns = 0;
+
+ /* Add the comma separated list of 'attname = value' */
+ sep = "";
+ foreach(l, targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+ Node *expr;
+
+ if (tle->resjunk)
+ continue; /* ignore junk entries */
+
+ /* Emit separator (OK whether we're in multiassignment or not) */
+ appendStringInfoString(buf, sep);
+ sep = ", ";
+
+ /*
+ * Check to see if we're starting a multiassignment group: if so,
+ * output a left paren.
+ */
+ if (next_ma_cell != NULL && cur_ma_sublink == NULL)
+ {
+ /*
+ * We must dig down into the expr to see if it's a PARAM_MULTIEXPR
+ * Param. That could be buried under FieldStores and
+ * SubscriptingRefs and CoerceToDomains (cf processIndirection()),
+ * and underneath those there could be an implicit type coercion.
+ * Because we would ignore implicit type coercions anyway, we
+ * don't need to be as careful as processIndirection() is about
+ * descending past implicit CoerceToDomains.
+ */
+ expr = (Node *) tle->expr;
+ while (expr)
+ {
+ if (IsA(expr, FieldStore))
+ {
+ FieldStore *fstore = (FieldStore *) expr;
+
+ expr = (Node *) linitial(fstore->newvals);
+ }
+ else if (IsA(expr, SubscriptingRef))
+ {
+ SubscriptingRef *sbsref = (SubscriptingRef *) expr;
+
+ if (sbsref->refassgnexpr == NULL)
+ break;
+
+ expr = (Node *) sbsref->refassgnexpr;
+ }
+ else if (IsA(expr, CoerceToDomain))
+ {
+ CoerceToDomain *cdomain = (CoerceToDomain *) expr;
+
+ if (cdomain->coercionformat != COERCE_IMPLICIT_CAST)
+ break;
+ expr = (Node *) cdomain->arg;
+ }
+ else
+ break;
+ }
+ expr = strip_implicit_coercions(expr);
+
+ if (expr && IsA(expr, Param) &&
+ ((Param *) expr)->paramkind == PARAM_MULTIEXPR)
+ {
+ cur_ma_sublink = (SubLink *) lfirst(next_ma_cell);
+ next_ma_cell = lnext(ma_sublinks, next_ma_cell);
+ remaining_ma_columns = count_nonjunk_tlist_entries(((Query *) cur_ma_sublink->subselect)->targetList);
+ Assert(((Param *) expr)->paramid ==
+ ((cur_ma_sublink->subLinkId << 16) | 1));
+ appendStringInfoChar(buf, '(');
+ }
+ }
+
+ /*
+ * Put out name of target column; look in the catalogs, not at
+ * tle->resname, since resname will fail to track RENAME.
+ */
+ appendStringInfoString(buf,
+ quote_identifier(get_attname(rte->relid,
+ tle->resno,
+ false)));
+
+ /*
+ * Print any indirection needed (subfields or subscripts), and strip
+ * off the top-level nodes representing the indirection assignments.
+ */
+ expr = processIndirection((Node *) tle->expr, context);
+
+ /*
+ * If we're in a multiassignment, skip printing anything more, unless
+ * this is the last column; in which case, what we print should be the
+ * sublink, not the Param.
+ */
+ if (cur_ma_sublink != NULL)
+ {
+ if (--remaining_ma_columns > 0)
+ continue; /* not the last column of multiassignment */
+ appendStringInfoChar(buf, ')');
+ expr = (Node *) cur_ma_sublink;
+ cur_ma_sublink = NULL;
+ }
+
+ appendStringInfoString(buf, " = ");
+
+ get_rule_expr(expr, context, false);
+ }
+}
+
+
+/* ----------
+ * get_delete_query_def - Parse back a DELETE parsetree
+ * ----------
+ */
+static void
+get_delete_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *rte;
+
+ /* Insert the WITH clause if given */
+ get_with_clause(query, context);
+
+ /*
+ * Start the query with DELETE FROM relname
+ */
+ rte = rt_fetch(query->resultRelation, query->rtable);
+ Assert(rte->rtekind == RTE_RELATION);
+ if (PRETTY_INDENT(context))
+ {
+ appendStringInfoChar(buf, ' ');
+ context->indentLevel += PRETTYINDENT_STD;
+ }
+ appendStringInfo(buf, "DELETE FROM %s%s",
+ only_marker(rte),
+ generate_relation_name(rte->relid, NIL));
+
+ /* Print the relation alias, if needed */
+ get_rte_alias(rte, query->resultRelation, false, context);
+
+ /* Add the USING clause if given */
+ get_from_clause(query, " USING ", context);
+
+ /* Add a WHERE clause if given */
+ if (query->jointree->quals != NULL)
+ {
+ appendContextKeyword(context, " WHERE ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_rule_expr(query->jointree->quals, context, false);
+ }
+
+ /* Add RETURNING if present */
+ if (query->returningList)
+ {
+ appendContextKeyword(context, " RETURNING",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 1);
+ get_target_list(query->returningList, context, NULL, colNamesVisible);
+ }
+}
+
+
+/* ----------
+ * get_merge_query_def - Parse back a MERGE parsetree
+ * ----------
+ */
+static void
+get_merge_query_def(Query *query, deparse_context *context,
+ bool colNamesVisible)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *rte;
+ ListCell *lc;
+
+ /* Insert the WITH clause if given */
+ get_with_clause(query, context);
+
+ /*
+ * Start the query with MERGE INTO relname
+ */
+ rte = rt_fetch(query->resultRelation, query->rtable);
+ Assert(rte->rtekind == RTE_RELATION);
+ if (PRETTY_INDENT(context))
+ {
+ appendStringInfoChar(buf, ' ');
+ context->indentLevel += PRETTYINDENT_STD;
+ }
+ appendStringInfo(buf, "MERGE INTO %s%s",
+ only_marker(rte),
+ generate_relation_name(rte->relid, NIL));
+
+ /* Print the relation alias, if needed */
+ get_rte_alias(rte, query->resultRelation, false, context);
+
+ /* Print the source relation and join clause */
+ get_from_clause(query, " USING ", context);
+ appendContextKeyword(context, " ON ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 2);
+ get_rule_expr(query->jointree->quals, context, false);
+
+ /* Print each merge action */
+ foreach(lc, query->mergeActionList)
+ {
+ MergeAction *action = lfirst_node(MergeAction, lc);
+
+ appendContextKeyword(context, " WHEN ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 2);
+ appendStringInfo(buf, "%sMATCHED", action->matched ? "" : "NOT ");
+
+ if (action->qual)
+ {
+ appendContextKeyword(context, " AND ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 3);
+ get_rule_expr(action->qual, context, false);
+ }
+ appendContextKeyword(context, " THEN ",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 3);
+
+ if (action->commandType == CMD_INSERT)
+ {
+ /* This generally matches get_insert_query_def() */
+ List *strippedexprs = NIL;
+ const char *sep = "";
+ ListCell *lc2;
+
+ appendStringInfoString(buf, "INSERT");
+
+ if (action->targetList)
+ appendStringInfoString(buf, " (");
+ foreach(lc2, action->targetList)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(lc2);
+
+ Assert(!tle->resjunk);
+
+ appendStringInfoString(buf, sep);
+ sep = ", ";
+
+ appendStringInfoString(buf,
+ quote_identifier(get_attname(rte->relid,
+ tle->resno,
+ false)));
+ strippedexprs = lappend(strippedexprs,
+ processIndirection((Node *) tle->expr,
+ context));
+ }
+ if (action->targetList)
+ appendStringInfoChar(buf, ')');
+
+ if (action->override)
+ {
+ if (action->override == OVERRIDING_SYSTEM_VALUE)
+ appendStringInfoString(buf, " OVERRIDING SYSTEM VALUE");
+ else if (action->override == OVERRIDING_USER_VALUE)
+ appendStringInfoString(buf, " OVERRIDING USER VALUE");
+ }
+
+ if (strippedexprs)
+ {
+ appendContextKeyword(context, " VALUES (",
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 4);
+ get_rule_list_toplevel(strippedexprs, context, false);
+ appendStringInfoChar(buf, ')');
+ }
+ else
+ appendStringInfoString(buf, " DEFAULT VALUES");
+ }
+ else if (action->commandType == CMD_UPDATE)
+ {
+ appendStringInfoString(buf, "UPDATE SET ");
+ get_update_query_targetlist_def(query, action->targetList,
+ context, rte);
+ }
+ else if (action->commandType == CMD_DELETE)
+ appendStringInfoString(buf, "DELETE");
+ else if (action->commandType == CMD_NOTHING)
+ appendStringInfoString(buf, "DO NOTHING");
+ }
+
+ /* No RETURNING support in MERGE yet */
+ Assert(query->returningList == NIL);
+}
+
+
+/* ----------
+ * get_utility_query_def - Parse back a UTILITY parsetree
+ * ----------
+ */
+static void
+get_utility_query_def(Query *query, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+
+ if (query->utilityStmt && IsA(query->utilityStmt, NotifyStmt))
+ {
+ NotifyStmt *stmt = (NotifyStmt *) query->utilityStmt;
+
+ appendContextKeyword(context, "",
+ 0, PRETTYINDENT_STD, 1);
+ appendStringInfo(buf, "NOTIFY %s",
+ quote_identifier(stmt->conditionname));
+ if (stmt->payload)
+ {
+ appendStringInfoString(buf, ", ");
+ simple_quote_literal(buf, stmt->payload);
+ }
+ }
+ else
+ {
+ /* Currently only NOTIFY utility commands can appear in rules */
+ elog(ERROR, "unexpected utility statement type");
+ }
+}
+
+/*
+ * Display a Var appropriately.
+ *
+ * In some cases (currently only when recursing into an unnamed join)
+ * the Var's varlevelsup has to be interpreted with respect to a context
+ * above the current one; levelsup indicates the offset.
+ *
+ * If istoplevel is true, the Var is at the top level of a SELECT's
+ * targetlist, which means we need special treatment of whole-row Vars.
+ * Instead of the normal "tab.*", we'll print "tab.*::typename", which is a
+ * dirty hack to prevent "tab.*" from being expanded into multiple columns.
+ * (The parser will strip the useless coercion, so no inefficiency is added in
+ * dump and reload.) We used to print just "tab" in such cases, but that is
+ * ambiguous and will yield the wrong result if "tab" is also a plain column
+ * name in the query.
+ *
+ * Returns the attname of the Var, or NULL if the Var has no attname (because
+ * it is a whole-row Var or a subplan output reference).
+ */
+static char *
+get_variable(Var *var, int levelsup, bool istoplevel, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ RangeTblEntry *rte;
+ AttrNumber attnum;
+ int netlevelsup;
+ deparse_namespace *dpns;
+ int varno;
+ AttrNumber varattno;
+ deparse_columns *colinfo;
+ char *refname;
+ char *attname;
+
+ /* Find appropriate nesting depth */
+ netlevelsup = var->varlevelsup + levelsup;
+ if (netlevelsup >= list_length(context->namespaces))
+ elog(ERROR, "bogus varlevelsup: %d offset %d",
+ var->varlevelsup, levelsup);
+ dpns = (deparse_namespace *) list_nth(context->namespaces,
+ netlevelsup);
+
+ /*
+ * If we have a syntactic referent for the Var, and we're working from a
+ * parse tree, prefer to use the syntactic referent. Otherwise, fall back
+ * on the semantic referent. (Forcing use of the semantic referent when
+ * printing plan trees is a design choice that's perhaps more motivated by
+ * backwards compatibility than anything else. But it does have the
+ * advantage of making plans more explicit.)
+ */
+ if (var->varnosyn > 0 && dpns->plan == NULL)
+ {
+ varno = var->varnosyn;
+ varattno = var->varattnosyn;
+ }
+ else
+ {
+ varno = var->varno;
+ varattno = var->varattno;
+ }
+
+ /*
+ * Try to find the relevant RTE in this rtable. In a plan tree, it's
+ * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig
+ * down into the subplans, or INDEX_VAR, which is resolved similarly. Also
+ * find the aliases previously assigned for this RTE.
+ */
+ if (varno >= 1 && varno <= list_length(dpns->rtable))
+ {
+ /*
+ * We might have been asked to map child Vars to some parent relation.
+ */
+ if (context->appendparents && dpns->appendrels)
+ {
+ int pvarno = varno;
+ AttrNumber pvarattno = varattno;
+ AppendRelInfo *appinfo = dpns->appendrels[pvarno];
+ bool found = false;
+
+ /* Only map up to inheritance parents, not UNION ALL appendrels */
+ while (appinfo &&
+ rt_fetch(appinfo->parent_relid,
+ dpns->rtable)->rtekind == RTE_RELATION)
+ {
+ found = false;
+ if (pvarattno > 0) /* system columns stay as-is */
+ {
+ if (pvarattno > appinfo->num_child_cols)
+ break; /* safety check */
+ pvarattno = appinfo->parent_colnos[pvarattno - 1];
+ if (pvarattno == 0)
+ break; /* Var is local to child */
+ }
+
+ pvarno = appinfo->parent_relid;
+ found = true;
+
+ /* If the parent is itself a child, continue up. */
+ Assert(pvarno > 0 && pvarno <= list_length(dpns->rtable));
+ appinfo = dpns->appendrels[pvarno];
+ }
+
+ /*
+ * If we found an ancestral rel, and that rel is included in
+ * appendparents, print that column not the original one.
+ */
+ if (found && bms_is_member(pvarno, context->appendparents))
+ {
+ varno = pvarno;
+ varattno = pvarattno;
+ }
+ }
+
+ rte = rt_fetch(varno, dpns->rtable);
+ refname = (char *) list_nth(dpns->rtable_names, varno - 1);
+ colinfo = deparse_columns_fetch(varno, dpns);
+ attnum = varattno;
+ }
+ else
+ {
+ resolve_special_varno((Node *) var, context,
+ get_special_variable, NULL);
+ return NULL;
+ }
+
+ /*
+ * The planner will sometimes emit Vars referencing resjunk elements of a
+ * subquery's target list (this is currently only possible if it chooses
+ * to generate a "physical tlist" for a SubqueryScan or CteScan node).
+ * Although we prefer to print subquery-referencing Vars using the
+ * subquery's alias, that's not possible for resjunk items since they have
+ * no alias. So in that case, drill down to the subplan and print the
+ * contents of the referenced tlist item. This works because in a plan
+ * tree, such Vars can only occur in a SubqueryScan or CteScan node, and
+ * we'll have set dpns->inner_plan to reference the child plan node.
+ */
+ if ((rte->rtekind == RTE_SUBQUERY || rte->rtekind == RTE_CTE) &&
+ attnum > list_length(rte->eref->colnames) &&
+ dpns->inner_plan)
+ {
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+
+ tle = get_tle_by_resno(dpns->inner_tlist, attnum);
+ if (!tle)
+ elog(ERROR, "invalid attnum %d for relation \"%s\"",
+ attnum, rte->eref->aliasname);
+
+ Assert(netlevelsup == 0);
+ push_child_plan(dpns, dpns->inner_plan, &save_dpns);
+
+ /*
+ * Force parentheses because our caller probably assumed a Var is a
+ * simple expression.
+ */
+ if (!IsA(tle->expr, Var))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) tle->expr, context, true);
+ if (!IsA(tle->expr, Var))
+ appendStringInfoChar(buf, ')');
+
+ pop_child_plan(dpns, &save_dpns);
+ return NULL;
+ }
+
+ /*
+ * If it's an unnamed join, look at the expansion of the alias variable.
+ * If it's a simple reference to one of the input vars, then recursively
+ * print the name of that var instead. When it's not a simple reference,
+ * we have to just print the unqualified join column name. (This can only
+ * happen with "dangerous" merged columns in a JOIN USING; we took pains
+ * previously to make the unqualified column name unique in such cases.)
+ *
+ * This wouldn't work in decompiling plan trees, because we don't store
+ * joinaliasvars lists after planning; but a plan tree should never
+ * contain a join alias variable.
+ */
+ if (rte->rtekind == RTE_JOIN && rte->alias == NULL)
+ {
+ if (rte->joinaliasvars == NIL)
+ elog(ERROR, "cannot decompile join alias var in plan tree");
+ if (attnum > 0)
+ {
+ Var *aliasvar;
+
+ aliasvar = (Var *) list_nth(rte->joinaliasvars, attnum - 1);
+ /* we intentionally don't strip implicit coercions here */
+ if (aliasvar && IsA(aliasvar, Var))
+ {
+ return get_variable(aliasvar, var->varlevelsup + levelsup,
+ istoplevel, context);
+ }
+ }
+
+ /*
+ * Unnamed join has no refname. (Note: since it's unnamed, there is
+ * no way the user could have referenced it to create a whole-row Var
+ * for it. So we don't have to cover that case below.)
+ */
+ Assert(refname == NULL);
+ }
+
+ if (attnum == InvalidAttrNumber)
+ attname = NULL;
+ else if (attnum > 0)
+ {
+ /* Get column name to use from the colinfo struct */
+ if (attnum > colinfo->num_cols)
+ elog(ERROR, "invalid attnum %d for relation \"%s\"",
+ attnum, rte->eref->aliasname);
+ attname = colinfo->colnames[attnum - 1];
+
+ /*
+ * If we find a Var referencing a dropped column, it seems better to
+ * print something (anything) than to fail. In general this should
+ * not happen, but it used to be possible for some cases involving
+ * functions returning named composite types, and perhaps there are
+ * still bugs out there.
+ */
+ if (attname == NULL)
+ attname = "?dropped?column?";
+ }
+ else
+ {
+ /* System column - name is fixed, get it from the catalog */
+ attname = get_rte_attribute_name(rte, attnum);
+ }
+
+ if (refname && (context->varprefix || attname == NULL))
+ {
+ appendStringInfoString(buf, quote_identifier(refname));
+ appendStringInfoChar(buf, '.');
+ }
+ if (attname)
+ appendStringInfoString(buf, quote_identifier(attname));
+ else
+ {
+ appendStringInfoChar(buf, '*');
+ if (istoplevel)
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(var->vartype,
+ var->vartypmod));
+ }
+
+ return attname;
+}
+
+/*
+ * Deparse a Var which references OUTER_VAR, INNER_VAR, or INDEX_VAR. This
+ * routine is actually a callback for resolve_special_varno, which handles
+ * finding the correct TargetEntry. We get the expression contained in that
+ * TargetEntry and just need to deparse it, a job we can throw back on
+ * get_rule_expr.
+ */
+static void
+get_special_variable(Node *node, deparse_context *context, void *callback_arg)
+{
+ StringInfo buf = context->buf;
+
+ /*
+ * For a non-Var referent, force parentheses because our caller probably
+ * assumed a Var is a simple expression.
+ */
+ if (!IsA(node, Var))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr(node, context, true);
+ if (!IsA(node, Var))
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * Chase through plan references to special varnos (OUTER_VAR, INNER_VAR,
+ * INDEX_VAR) until we find a real Var or some kind of non-Var node; then,
+ * invoke the callback provided.
+ */
+static void
+resolve_special_varno(Node *node, deparse_context *context,
+ rsv_callback callback, void *callback_arg)
+{
+ Var *var;
+ deparse_namespace *dpns;
+
+ /* This function is recursive, so let's be paranoid. */
+ check_stack_depth();
+
+ /* If it's not a Var, invoke the callback. */
+ if (!IsA(node, Var))
+ {
+ (*callback) (node, context, callback_arg);
+ return;
+ }
+
+ /* Find appropriate nesting depth */
+ var = (Var *) node;
+ dpns = (deparse_namespace *) list_nth(context->namespaces,
+ var->varlevelsup);
+
+ /*
+ * If varno is special, recurse. (Don't worry about varnosyn; if we're
+ * here, we already decided not to use that.)
+ */
+ if (var->varno == OUTER_VAR && dpns->outer_tlist)
+ {
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+ Bitmapset *save_appendparents;
+
+ tle = get_tle_by_resno(dpns->outer_tlist, var->varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for OUTER_VAR var: %d", var->varattno);
+
+ /*
+ * If we're descending to the first child of an Append or MergeAppend,
+ * update appendparents. This will affect deparsing of all Vars
+ * appearing within the eventually-resolved subexpression.
+ */
+ save_appendparents = context->appendparents;
+
+ if (IsA(dpns->plan, Append))
+ context->appendparents = bms_union(context->appendparents,
+ ((Append *) dpns->plan)->apprelids);
+ else if (IsA(dpns->plan, MergeAppend))
+ context->appendparents = bms_union(context->appendparents,
+ ((MergeAppend *) dpns->plan)->apprelids);
+
+ push_child_plan(dpns, dpns->outer_plan, &save_dpns);
+ resolve_special_varno((Node *) tle->expr, context,
+ callback, callback_arg);
+ pop_child_plan(dpns, &save_dpns);
+ context->appendparents = save_appendparents;
+ return;
+ }
+ else if (var->varno == INNER_VAR && dpns->inner_tlist)
+ {
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+
+ tle = get_tle_by_resno(dpns->inner_tlist, var->varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for INNER_VAR var: %d", var->varattno);
+
+ push_child_plan(dpns, dpns->inner_plan, &save_dpns);
+ resolve_special_varno((Node *) tle->expr, context,
+ callback, callback_arg);
+ pop_child_plan(dpns, &save_dpns);
+ return;
+ }
+ else if (var->varno == INDEX_VAR && dpns->index_tlist)
+ {
+ TargetEntry *tle;
+
+ tle = get_tle_by_resno(dpns->index_tlist, var->varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for INDEX_VAR var: %d", var->varattno);
+
+ resolve_special_varno((Node *) tle->expr, context,
+ callback, callback_arg);
+ return;
+ }
+ else if (var->varno < 1 || var->varno > list_length(dpns->rtable))
+ elog(ERROR, "bogus varno: %d", var->varno);
+
+ /* Not special. Just invoke the callback. */
+ (*callback) (node, context, callback_arg);
+}
+
+/*
+ * Get the name of a field of an expression of composite type. The
+ * expression is usually a Var, but we handle other cases too.
+ *
+ * levelsup is an extra offset to interpret the Var's varlevelsup correctly.
+ *
+ * This is fairly straightforward when the expression has a named composite
+ * type; we need only look up the type in the catalogs. However, the type
+ * could also be RECORD. Since no actual table or view column is allowed to
+ * have type RECORD, a Var of type RECORD must refer to a JOIN or FUNCTION RTE
+ * or to a subquery output. We drill down to find the ultimate defining
+ * expression and attempt to infer the field name from it. We ereport if we
+ * can't determine the name.
+ *
+ * Similarly, a PARAM of type RECORD has to refer to some expression of
+ * a determinable composite type.
+ */
+static const char *
+get_name_for_var_field(Var *var, int fieldno,
+ int levelsup, deparse_context *context)
+{
+ RangeTblEntry *rte;
+ AttrNumber attnum;
+ int netlevelsup;
+ deparse_namespace *dpns;
+ int varno;
+ AttrNumber varattno;
+ TupleDesc tupleDesc;
+ Node *expr;
+
+ /*
+ * If it's a RowExpr that was expanded from a whole-row Var, use the
+ * column names attached to it. (We could let get_expr_result_tupdesc()
+ * handle this, but it's much cheaper to just pull out the name we need.)
+ */
+ if (IsA(var, RowExpr))
+ {
+ RowExpr *r = (RowExpr *) var;
+
+ if (fieldno > 0 && fieldno <= list_length(r->colnames))
+ return strVal(list_nth(r->colnames, fieldno - 1));
+ }
+
+ /*
+ * If it's a Param of type RECORD, try to find what the Param refers to.
+ */
+ if (IsA(var, Param))
+ {
+ Param *param = (Param *) var;
+ ListCell *ancestor_cell;
+
+ expr = find_param_referent(param, context, &dpns, &ancestor_cell);
+ if (expr)
+ {
+ /* Found a match, so recurse to decipher the field name */
+ deparse_namespace save_dpns;
+ const char *result;
+
+ push_ancestor_plan(dpns, ancestor_cell, &save_dpns);
+ result = get_name_for_var_field((Var *) expr, fieldno,
+ 0, context);
+ pop_ancestor_plan(dpns, &save_dpns);
+ return result;
+ }
+ }
+
+ /*
+ * If it's a Var of type RECORD, we have to find what the Var refers to;
+ * if not, we can use get_expr_result_tupdesc().
+ */
+ if (!IsA(var, Var) ||
+ var->vartype != RECORDOID)
+ {
+ tupleDesc = get_expr_result_tupdesc((Node *) var, false);
+ /* Got the tupdesc, so we can extract the field name */
+ Assert(fieldno >= 1 && fieldno <= tupleDesc->natts);
+ return NameStr(TupleDescAttr(tupleDesc, fieldno - 1)->attname);
+ }
+
+ /* Find appropriate nesting depth */
+ netlevelsup = var->varlevelsup + levelsup;
+ if (netlevelsup >= list_length(context->namespaces))
+ elog(ERROR, "bogus varlevelsup: %d offset %d",
+ var->varlevelsup, levelsup);
+ dpns = (deparse_namespace *) list_nth(context->namespaces,
+ netlevelsup);
+
+ /*
+ * If we have a syntactic referent for the Var, and we're working from a
+ * parse tree, prefer to use the syntactic referent. Otherwise, fall back
+ * on the semantic referent. (See comments in get_variable().)
+ */
+ if (var->varnosyn > 0 && dpns->plan == NULL)
+ {
+ varno = var->varnosyn;
+ varattno = var->varattnosyn;
+ }
+ else
+ {
+ varno = var->varno;
+ varattno = var->varattno;
+ }
+
+ /*
+ * Try to find the relevant RTE in this rtable. In a plan tree, it's
+ * likely that varno is OUTER_VAR or INNER_VAR, in which case we must dig
+ * down into the subplans, or INDEX_VAR, which is resolved similarly.
+ *
+ * Note: unlike get_variable and resolve_special_varno, we need not worry
+ * about inheritance mapping: a child Var should have the same datatype as
+ * its parent, and here we're really only interested in the Var's type.
+ */
+ if (varno >= 1 && varno <= list_length(dpns->rtable))
+ {
+ rte = rt_fetch(varno, dpns->rtable);
+ attnum = varattno;
+ }
+ else if (varno == OUTER_VAR && dpns->outer_tlist)
+ {
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+ const char *result;
+
+ tle = get_tle_by_resno(dpns->outer_tlist, varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for OUTER_VAR var: %d", varattno);
+
+ Assert(netlevelsup == 0);
+ push_child_plan(dpns, dpns->outer_plan, &save_dpns);
+
+ result = get_name_for_var_field((Var *) tle->expr, fieldno,
+ levelsup, context);
+
+ pop_child_plan(dpns, &save_dpns);
+ return result;
+ }
+ else if (varno == INNER_VAR && dpns->inner_tlist)
+ {
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+ const char *result;
+
+ tle = get_tle_by_resno(dpns->inner_tlist, varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for INNER_VAR var: %d", varattno);
+
+ Assert(netlevelsup == 0);
+ push_child_plan(dpns, dpns->inner_plan, &save_dpns);
+
+ result = get_name_for_var_field((Var *) tle->expr, fieldno,
+ levelsup, context);
+
+ pop_child_plan(dpns, &save_dpns);
+ return result;
+ }
+ else if (varno == INDEX_VAR && dpns->index_tlist)
+ {
+ TargetEntry *tle;
+ const char *result;
+
+ tle = get_tle_by_resno(dpns->index_tlist, varattno);
+ if (!tle)
+ elog(ERROR, "bogus varattno for INDEX_VAR var: %d", varattno);
+
+ Assert(netlevelsup == 0);
+
+ result = get_name_for_var_field((Var *) tle->expr, fieldno,
+ levelsup, context);
+
+ return result;
+ }
+ else
+ {
+ elog(ERROR, "bogus varno: %d", varno);
+ return NULL; /* keep compiler quiet */
+ }
+
+ if (attnum == InvalidAttrNumber)
+ {
+ /* Var is whole-row reference to RTE, so select the right field */
+ return get_rte_attribute_name(rte, fieldno);
+ }
+
+ /*
+ * This part has essentially the same logic as the parser's
+ * expandRecordVariable() function, but we are dealing with a different
+ * representation of the input context, and we only need one field name
+ * not a TupleDesc. Also, we need special cases for finding subquery and
+ * CTE subplans when deparsing Plan trees.
+ */
+ expr = (Node *) var; /* default if we can't drill down */
+
+ switch (rte->rtekind)
+ {
+ case RTE_RELATION:
+ case RTE_VALUES:
+ case RTE_NAMEDTUPLESTORE:
+ case RTE_RESULT:
+
+ /*
+ * This case should not occur: a column of a table, values list,
+ * or ENR shouldn't have type RECORD. Fall through and fail (most
+ * likely) at the bottom.
+ */
+ break;
+ case RTE_SUBQUERY:
+ /* Subselect-in-FROM: examine sub-select's output expr */
+ {
+ if (rte->subquery)
+ {
+ TargetEntry *ste = get_tle_by_resno(rte->subquery->targetList,
+ attnum);
+
+ if (ste == NULL || ste->resjunk)
+ elog(ERROR, "subquery %s does not have attribute %d",
+ rte->eref->aliasname, attnum);
+ expr = (Node *) ste->expr;
+ if (IsA(expr, Var))
+ {
+ /*
+ * Recurse into the sub-select to see what its Var
+ * refers to. We have to build an additional level of
+ * namespace to keep in step with varlevelsup in the
+ * subselect; furthermore, the subquery RTE might be
+ * from an outer query level, in which case the
+ * namespace for the subselect must have that outer
+ * level as parent namespace.
+ */
+ List *save_nslist = context->namespaces;
+ List *parent_namespaces;
+ deparse_namespace mydpns;
+ const char *result;
+
+ parent_namespaces = list_copy_tail(context->namespaces,
+ netlevelsup);
+
+ set_deparse_for_query(&mydpns, rte->subquery,
+ parent_namespaces);
+
+ context->namespaces = lcons(&mydpns, parent_namespaces);
+
+ result = get_name_for_var_field((Var *) expr, fieldno,
+ 0, context);
+
+ context->namespaces = save_nslist;
+
+ return result;
+ }
+ /* else fall through to inspect the expression */
+ }
+ else
+ {
+ /*
+ * We're deparsing a Plan tree so we don't have complete
+ * RTE entries (in particular, rte->subquery is NULL). But
+ * the only place we'd see a Var directly referencing a
+ * SUBQUERY RTE is in a SubqueryScan plan node, and we can
+ * look into the child plan's tlist instead.
+ */
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+ const char *result;
+
+ if (!dpns->inner_plan)
+ elog(ERROR, "failed to find plan for subquery %s",
+ rte->eref->aliasname);
+ tle = get_tle_by_resno(dpns->inner_tlist, attnum);
+ if (!tle)
+ elog(ERROR, "bogus varattno for subquery var: %d",
+ attnum);
+ Assert(netlevelsup == 0);
+ push_child_plan(dpns, dpns->inner_plan, &save_dpns);
+
+ result = get_name_for_var_field((Var *) tle->expr, fieldno,
+ levelsup, context);
+
+ pop_child_plan(dpns, &save_dpns);
+ return result;
+ }
+ }
+ break;
+ case RTE_JOIN:
+ /* Join RTE --- recursively inspect the alias variable */
+ if (rte->joinaliasvars == NIL)
+ elog(ERROR, "cannot decompile join alias var in plan tree");
+ Assert(attnum > 0 && attnum <= list_length(rte->joinaliasvars));
+ expr = (Node *) list_nth(rte->joinaliasvars, attnum - 1);
+ Assert(expr != NULL);
+ /* we intentionally don't strip implicit coercions here */
+ if (IsA(expr, Var))
+ return get_name_for_var_field((Var *) expr, fieldno,
+ var->varlevelsup + levelsup,
+ context);
+ /* else fall through to inspect the expression */
+ break;
+ case RTE_FUNCTION:
+ case RTE_TABLEFUNC:
+
+ /*
+ * We couldn't get here unless a function is declared with one of
+ * its result columns as RECORD, which is not allowed.
+ */
+ break;
+ case RTE_CTE:
+ /* CTE reference: examine subquery's output expr */
+ {
+ CommonTableExpr *cte = NULL;
+ Index ctelevelsup;
+ ListCell *lc;
+
+ /*
+ * Try to find the referenced CTE using the namespace stack.
+ */
+ ctelevelsup = rte->ctelevelsup + netlevelsup;
+ if (ctelevelsup >= list_length(context->namespaces))
+ lc = NULL;
+ else
+ {
+ deparse_namespace *ctedpns;
+
+ ctedpns = (deparse_namespace *)
+ list_nth(context->namespaces, ctelevelsup);
+ foreach(lc, ctedpns->ctes)
+ {
+ cte = (CommonTableExpr *) lfirst(lc);
+ if (strcmp(cte->ctename, rte->ctename) == 0)
+ break;
+ }
+ }
+ if (lc != NULL)
+ {
+ Query *ctequery = (Query *) cte->ctequery;
+ TargetEntry *ste = get_tle_by_resno(GetCTETargetList(cte),
+ attnum);
+
+ if (ste == NULL || ste->resjunk)
+ elog(ERROR, "CTE %s does not have attribute %d",
+ rte->eref->aliasname, attnum);
+ expr = (Node *) ste->expr;
+ if (IsA(expr, Var))
+ {
+ /*
+ * Recurse into the CTE to see what its Var refers to.
+ * We have to build an additional level of namespace
+ * to keep in step with varlevelsup in the CTE;
+ * furthermore it could be an outer CTE (compare
+ * SUBQUERY case above).
+ */
+ List *save_nslist = context->namespaces;
+ List *parent_namespaces;
+ deparse_namespace mydpns;
+ const char *result;
+
+ parent_namespaces = list_copy_tail(context->namespaces,
+ ctelevelsup);
+
+ set_deparse_for_query(&mydpns, ctequery,
+ parent_namespaces);
+
+ context->namespaces = lcons(&mydpns, parent_namespaces);
+
+ result = get_name_for_var_field((Var *) expr, fieldno,
+ 0, context);
+
+ context->namespaces = save_nslist;
+
+ return result;
+ }
+ /* else fall through to inspect the expression */
+ }
+ else
+ {
+ /*
+ * We're deparsing a Plan tree so we don't have a CTE
+ * list. But the only places we'd see a Var directly
+ * referencing a CTE RTE are in CteScan or WorkTableScan
+ * plan nodes. For those cases, set_deparse_plan arranged
+ * for dpns->inner_plan to be the plan node that emits the
+ * CTE or RecursiveUnion result, and we can look at its
+ * tlist instead.
+ */
+ TargetEntry *tle;
+ deparse_namespace save_dpns;
+ const char *result;
+
+ if (!dpns->inner_plan)
+ elog(ERROR, "failed to find plan for CTE %s",
+ rte->eref->aliasname);
+ tle = get_tle_by_resno(dpns->inner_tlist, attnum);
+ if (!tle)
+ elog(ERROR, "bogus varattno for subquery var: %d",
+ attnum);
+ Assert(netlevelsup == 0);
+ push_child_plan(dpns, dpns->inner_plan, &save_dpns);
+
+ result = get_name_for_var_field((Var *) tle->expr, fieldno,
+ levelsup, context);
+
+ pop_child_plan(dpns, &save_dpns);
+ return result;
+ }
+ }
+ break;
+ }
+
+ /*
+ * We now have an expression we can't expand any more, so see if
+ * get_expr_result_tupdesc() can do anything with it.
+ */
+ tupleDesc = get_expr_result_tupdesc(expr, false);
+ /* Got the tupdesc, so we can extract the field name */
+ Assert(fieldno >= 1 && fieldno <= tupleDesc->natts);
+ return NameStr(TupleDescAttr(tupleDesc, fieldno - 1)->attname);
+}
+
+/*
+ * Try to find the referenced expression for a PARAM_EXEC Param that might
+ * reference a parameter supplied by an upper NestLoop or SubPlan plan node.
+ *
+ * If successful, return the expression and set *dpns_p and *ancestor_cell_p
+ * appropriately for calling push_ancestor_plan(). If no referent can be
+ * found, return NULL.
+ */
+static Node *
+find_param_referent(Param *param, deparse_context *context,
+ deparse_namespace **dpns_p, ListCell **ancestor_cell_p)
+{
+ /* Initialize output parameters to prevent compiler warnings */
+ *dpns_p = NULL;
+ *ancestor_cell_p = NULL;
+
+ /*
+ * If it's a PARAM_EXEC parameter, look for a matching NestLoopParam or
+ * SubPlan argument. This will necessarily be in some ancestor of the
+ * current expression's Plan node.
+ */
+ if (param->paramkind == PARAM_EXEC)
+ {
+ deparse_namespace *dpns;
+ Plan *child_plan;
+ bool in_same_plan_level;
+ ListCell *lc;
+
+ dpns = (deparse_namespace *) linitial(context->namespaces);
+ child_plan = dpns->plan;
+ in_same_plan_level = true;
+
+ foreach(lc, dpns->ancestors)
+ {
+ Node *ancestor = (Node *) lfirst(lc);
+ ListCell *lc2;
+
+ /*
+ * NestLoops transmit params to their inner child only; also, once
+ * we've crawled up out of a subplan, this couldn't possibly be
+ * the right match.
+ */
+ if (IsA(ancestor, NestLoop) &&
+ child_plan == innerPlan(ancestor) &&
+ in_same_plan_level)
+ {
+ NestLoop *nl = (NestLoop *) ancestor;
+
+ foreach(lc2, nl->nestParams)
+ {
+ NestLoopParam *nlp = (NestLoopParam *) lfirst(lc2);
+
+ if (nlp->paramno == param->paramid)
+ {
+ /* Found a match, so return it */
+ *dpns_p = dpns;
+ *ancestor_cell_p = lc;
+ return (Node *) nlp->paramval;
+ }
+ }
+ }
+
+ /*
+ * If ancestor is a SubPlan, check the arguments it provides.
+ */
+ if (IsA(ancestor, SubPlan))
+ {
+ SubPlan *subplan = (SubPlan *) ancestor;
+ ListCell *lc3;
+ ListCell *lc4;
+
+ forboth(lc3, subplan->parParam, lc4, subplan->args)
+ {
+ int paramid = lfirst_int(lc3);
+ Node *arg = (Node *) lfirst(lc4);
+
+ if (paramid == param->paramid)
+ {
+ /*
+ * Found a match, so return it. But, since Vars in
+ * the arg are to be evaluated in the surrounding
+ * context, we have to point to the next ancestor item
+ * that is *not* a SubPlan.
+ */
+ ListCell *rest;
+
+ for_each_cell(rest, dpns->ancestors,
+ lnext(dpns->ancestors, lc))
+ {
+ Node *ancestor2 = (Node *) lfirst(rest);
+
+ if (!IsA(ancestor2, SubPlan))
+ {
+ *dpns_p = dpns;
+ *ancestor_cell_p = rest;
+ return arg;
+ }
+ }
+ elog(ERROR, "SubPlan cannot be outermost ancestor");
+ }
+ }
+
+ /* We have emerged from a subplan. */
+ in_same_plan_level = false;
+
+ /* SubPlan isn't a kind of Plan, so skip the rest */
+ continue;
+ }
+
+ /*
+ * Check to see if we're emerging from an initplan of the current
+ * ancestor plan. Initplans never have any parParams, so no need
+ * to search that list, but we need to know if we should reset
+ * in_same_plan_level.
+ */
+ foreach(lc2, ((Plan *) ancestor)->initPlan)
+ {
+ SubPlan *subplan = lfirst_node(SubPlan, lc2);
+
+ if (child_plan != (Plan *) list_nth(dpns->subplans,
+ subplan->plan_id - 1))
+ continue;
+
+ /* No parameters to be had here. */
+ Assert(subplan->parParam == NIL);
+
+ /* We have emerged from an initplan. */
+ in_same_plan_level = false;
+ break;
+ }
+
+ /* No luck, crawl up to next ancestor */
+ child_plan = (Plan *) ancestor;
+ }
+ }
+
+ /* No referent found */
+ return NULL;
+}
+
+/*
+ * Display a Param appropriately.
+ */
+static void
+get_parameter(Param *param, deparse_context *context)
+{
+ Node *expr;
+ deparse_namespace *dpns;
+ ListCell *ancestor_cell;
+
+ /*
+ * If it's a PARAM_EXEC parameter, try to locate the expression from which
+ * the parameter was computed. Note that failing to find a referent isn't
+ * an error, since the Param might well be a subplan output rather than an
+ * input.
+ */
+ expr = find_param_referent(param, context, &dpns, &ancestor_cell);
+ if (expr)
+ {
+ /* Found a match, so print it */
+ deparse_namespace save_dpns;
+ bool save_varprefix;
+ bool need_paren;
+
+ /* Switch attention to the ancestor plan node */
+ push_ancestor_plan(dpns, ancestor_cell, &save_dpns);
+
+ /*
+ * Force prefixing of Vars, since they won't belong to the relation
+ * being scanned in the original plan node.
+ */
+ save_varprefix = context->varprefix;
+ context->varprefix = true;
+
+ /*
+ * A Param's expansion is typically a Var, Aggref, GroupingFunc, or
+ * upper-level Param, which wouldn't need extra parentheses.
+ * Otherwise, insert parens to ensure the expression looks atomic.
+ */
+ need_paren = !(IsA(expr, Var) ||
+ IsA(expr, Aggref) ||
+ IsA(expr, GroupingFunc) ||
+ IsA(expr, Param));
+ if (need_paren)
+ appendStringInfoChar(context->buf, '(');
+
+ get_rule_expr(expr, context, false);
+
+ if (need_paren)
+ appendStringInfoChar(context->buf, ')');
+
+ context->varprefix = save_varprefix;
+
+ pop_ancestor_plan(dpns, &save_dpns);
+
+ return;
+ }
+
+ /*
+ * If it's an external parameter, see if the outermost namespace provides
+ * function argument names.
+ */
+ if (param->paramkind == PARAM_EXTERN && context->namespaces != NIL)
+ {
+ dpns = llast(context->namespaces);
+ if (dpns->argnames &&
+ param->paramid > 0 &&
+ param->paramid <= dpns->numargs)
+ {
+ char *argname = dpns->argnames[param->paramid - 1];
+
+ if (argname)
+ {
+ bool should_qualify = false;
+ ListCell *lc;
+
+ /*
+ * Qualify the parameter name if there are any other deparse
+ * namespaces with range tables. This avoids qualifying in
+ * trivial cases like "RETURN a + b", but makes it safe in all
+ * other cases.
+ */
+ foreach(lc, context->namespaces)
+ {
+ deparse_namespace *dpns = lfirst(lc);
+
+ if (list_length(dpns->rtable_names) > 0)
+ {
+ should_qualify = true;
+ break;
+ }
+ }
+ if (should_qualify)
+ {
+ appendStringInfoString(context->buf, quote_identifier(dpns->funcname));
+ appendStringInfoChar(context->buf, '.');
+ }
+
+ appendStringInfoString(context->buf, quote_identifier(argname));
+ return;
+ }
+ }
+ }
+
+ /*
+ * Not PARAM_EXEC, or couldn't find referent: just print $N.
+ */
+ appendStringInfo(context->buf, "$%d", param->paramid);
+}
+
+/*
+ * get_simple_binary_op_name
+ *
+ * helper function for isSimpleNode
+ * will return single char binary operator name, or NULL if it's not
+ */
+static const char *
+get_simple_binary_op_name(OpExpr *expr)
+{
+ List *args = expr->args;
+
+ if (list_length(args) == 2)
+ {
+ /* binary operator */
+ Node *arg1 = (Node *) linitial(args);
+ Node *arg2 = (Node *) lsecond(args);
+ const char *op;
+
+ op = generate_operator_name(expr->opno, exprType(arg1), exprType(arg2));
+ if (strlen(op) == 1)
+ return op;
+ }
+ return NULL;
+}
+
+
+/*
+ * isSimpleNode - check if given node is simple (doesn't need parenthesizing)
+ *
+ * true : simple in the context of parent node's type
+ * false : not simple
+ */
+static bool
+isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
+{
+ if (!node)
+ return false;
+
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ case T_Const:
+ case T_Param:
+ case T_CoerceToDomainValue:
+ case T_SetToDefault:
+ case T_CurrentOfExpr:
+ /* single words: always simple */
+ return true;
+
+ case T_SubscriptingRef:
+ case T_ArrayExpr:
+ case T_RowExpr:
+ case T_CoalesceExpr:
+ case T_MinMaxExpr:
+ case T_SQLValueFunction:
+ case T_XmlExpr:
+ case T_NextValueExpr:
+ case T_NullIfExpr:
+ case T_Aggref:
+ case T_GroupingFunc:
+ case T_WindowFunc:
+ case T_FuncExpr:
+ /* function-like: name(..) or name[..] */
+ return true;
+
+ /* CASE keywords act as parentheses */
+ case T_CaseExpr:
+ return true;
+
+ case T_FieldSelect:
+
+ /*
+ * appears simple since . has top precedence, unless parent is
+ * T_FieldSelect itself!
+ */
+ return !IsA(parentNode, FieldSelect);
+
+ case T_FieldStore:
+
+ /*
+ * treat like FieldSelect (probably doesn't matter)
+ */
+ return !IsA(parentNode, FieldStore);
+
+ case T_CoerceToDomain:
+ /* maybe simple, check args */
+ return isSimpleNode((Node *) ((CoerceToDomain *) node)->arg,
+ node, prettyFlags);
+ case T_RelabelType:
+ return isSimpleNode((Node *) ((RelabelType *) node)->arg,
+ node, prettyFlags);
+ case T_CoerceViaIO:
+ return isSimpleNode((Node *) ((CoerceViaIO *) node)->arg,
+ node, prettyFlags);
+ case T_ArrayCoerceExpr:
+ return isSimpleNode((Node *) ((ArrayCoerceExpr *) node)->arg,
+ node, prettyFlags);
+ case T_ConvertRowtypeExpr:
+ return isSimpleNode((Node *) ((ConvertRowtypeExpr *) node)->arg,
+ node, prettyFlags);
+
+ case T_OpExpr:
+ {
+ /* depends on parent node type; needs further checking */
+ if (prettyFlags & PRETTYFLAG_PAREN && IsA(parentNode, OpExpr))
+ {
+ const char *op;
+ const char *parentOp;
+ bool is_lopriop;
+ bool is_hipriop;
+ bool is_lopriparent;
+ bool is_hipriparent;
+
+ op = get_simple_binary_op_name((OpExpr *) node);
+ if (!op)
+ return false;
+
+ /* We know only the basic operators + - and * / % */
+ is_lopriop = (strchr("+-", *op) != NULL);
+ is_hipriop = (strchr("*/%", *op) != NULL);
+ if (!(is_lopriop || is_hipriop))
+ return false;
+
+ parentOp = get_simple_binary_op_name((OpExpr *) parentNode);
+ if (!parentOp)
+ return false;
+
+ is_lopriparent = (strchr("+-", *parentOp) != NULL);
+ is_hipriparent = (strchr("*/%", *parentOp) != NULL);
+ if (!(is_lopriparent || is_hipriparent))
+ return false;
+
+ if (is_hipriop && is_lopriparent)
+ return true; /* op binds tighter than parent */
+
+ if (is_lopriop && is_hipriparent)
+ return false;
+
+ /*
+ * Operators are same priority --- can skip parens only if
+ * we have (a - b) - c, not a - (b - c).
+ */
+ if (node == (Node *) linitial(((OpExpr *) parentNode)->args))
+ return true;
+
+ return false;
+ }
+ /* else do the same stuff as for T_SubLink et al. */
+ }
+ /* FALLTHROUGH */
+
+ case T_SubLink:
+ case T_NullTest:
+ case T_BooleanTest:
+ case T_DistinctExpr:
+ switch (nodeTag(parentNode))
+ {
+ case T_FuncExpr:
+ {
+ /* special handling for casts and COERCE_SQL_SYNTAX */
+ CoercionForm type = ((FuncExpr *) parentNode)->funcformat;
+
+ if (type == COERCE_EXPLICIT_CAST ||
+ type == COERCE_IMPLICIT_CAST ||
+ type == COERCE_SQL_SYNTAX)
+ return false;
+ return true; /* own parentheses */
+ }
+ case T_BoolExpr: /* lower precedence */
+ case T_SubscriptingRef: /* other separators */
+ case T_ArrayExpr: /* other separators */
+ case T_RowExpr: /* other separators */
+ case T_CoalesceExpr: /* own parentheses */
+ case T_MinMaxExpr: /* own parentheses */
+ case T_XmlExpr: /* own parentheses */
+ case T_NullIfExpr: /* other separators */
+ case T_Aggref: /* own parentheses */
+ case T_GroupingFunc: /* own parentheses */
+ case T_WindowFunc: /* own parentheses */
+ case T_CaseExpr: /* other separators */
+ return true;
+ default:
+ return false;
+ }
+
+ case T_BoolExpr:
+ switch (nodeTag(parentNode))
+ {
+ case T_BoolExpr:
+ if (prettyFlags & PRETTYFLAG_PAREN)
+ {
+ BoolExprType type;
+ BoolExprType parentType;
+
+ type = ((BoolExpr *) node)->boolop;
+ parentType = ((BoolExpr *) parentNode)->boolop;
+ switch (type)
+ {
+ case NOT_EXPR:
+ case AND_EXPR:
+ if (parentType == AND_EXPR || parentType == OR_EXPR)
+ return true;
+ break;
+ case OR_EXPR:
+ if (parentType == OR_EXPR)
+ return true;
+ break;
+ }
+ }
+ return false;
+ case T_FuncExpr:
+ {
+ /* special handling for casts and COERCE_SQL_SYNTAX */
+ CoercionForm type = ((FuncExpr *) parentNode)->funcformat;
+
+ if (type == COERCE_EXPLICIT_CAST ||
+ type == COERCE_IMPLICIT_CAST ||
+ type == COERCE_SQL_SYNTAX)
+ return false;
+ return true; /* own parentheses */
+ }
+ case T_SubscriptingRef: /* other separators */
+ case T_ArrayExpr: /* other separators */
+ case T_RowExpr: /* other separators */
+ case T_CoalesceExpr: /* own parentheses */
+ case T_MinMaxExpr: /* own parentheses */
+ case T_XmlExpr: /* own parentheses */
+ case T_NullIfExpr: /* other separators */
+ case T_Aggref: /* own parentheses */
+ case T_GroupingFunc: /* own parentheses */
+ case T_WindowFunc: /* own parentheses */
+ case T_CaseExpr: /* other separators */
+ return true;
+ default:
+ return false;
+ }
+
+ default:
+ break;
+ }
+ /* those we don't know: in dubio complexo */
+ return false;
+}
+
+
+/*
+ * appendContextKeyword - append a keyword to buffer
+ *
+ * If prettyPrint is enabled, perform a line break, and adjust indentation.
+ * Otherwise, just append the keyword.
+ */
+static void
+appendContextKeyword(deparse_context *context, const char *str,
+ int indentBefore, int indentAfter, int indentPlus)
+{
+ StringInfo buf = context->buf;
+
+ if (PRETTY_INDENT(context))
+ {
+ int indentAmount;
+
+ context->indentLevel += indentBefore;
+
+ /* remove any trailing spaces currently in the buffer ... */
+ removeStringInfoSpaces(buf);
+ /* ... then add a newline and some spaces */
+ appendStringInfoChar(buf, '\n');
+
+ if (context->indentLevel < PRETTYINDENT_LIMIT)
+ indentAmount = Max(context->indentLevel, 0) + indentPlus;
+ else
+ {
+ /*
+ * If we're indented more than PRETTYINDENT_LIMIT characters, try
+ * to conserve horizontal space by reducing the per-level
+ * indentation. For best results the scale factor here should
+ * divide all the indent amounts that get added to indentLevel
+ * (PRETTYINDENT_STD, etc). It's important that the indentation
+ * not grow unboundedly, else deeply-nested trees use O(N^2)
+ * whitespace; so we also wrap modulo PRETTYINDENT_LIMIT.
+ */
+ indentAmount = PRETTYINDENT_LIMIT +
+ (context->indentLevel - PRETTYINDENT_LIMIT) /
+ (PRETTYINDENT_STD / 2);
+ indentAmount %= PRETTYINDENT_LIMIT;
+ /* scale/wrap logic affects indentLevel, but not indentPlus */
+ indentAmount += indentPlus;
+ }
+ appendStringInfoSpaces(buf, indentAmount);
+
+ appendStringInfoString(buf, str);
+
+ context->indentLevel += indentAfter;
+ if (context->indentLevel < 0)
+ context->indentLevel = 0;
+ }
+ else
+ appendStringInfoString(buf, str);
+}
+
+/*
+ * removeStringInfoSpaces - delete trailing spaces from a buffer.
+ *
+ * Possibly this should move to stringinfo.c at some point.
+ */
+static void
+removeStringInfoSpaces(StringInfo str)
+{
+ while (str->len > 0 && str->data[str->len - 1] == ' ')
+ str->data[--(str->len)] = '\0';
+}
+
+
+/*
+ * get_rule_expr_paren - deparse expr using get_rule_expr,
+ * embracing the string with parentheses if necessary for prettyPrint.
+ *
+ * Never embrace if prettyFlags=0, because it's done in the calling node.
+ *
+ * Any node that does *not* embrace its argument node by sql syntax (with
+ * parentheses, non-operator keywords like CASE/WHEN/ON, or comma etc) should
+ * use get_rule_expr_paren instead of get_rule_expr so parentheses can be
+ * added.
+ */
+static void
+get_rule_expr_paren(Node *node, deparse_context *context,
+ bool showimplicit, Node *parentNode)
+{
+ bool need_paren;
+
+ need_paren = PRETTY_PAREN(context) &&
+ !isSimpleNode(node, parentNode, context->prettyFlags);
+
+ if (need_paren)
+ appendStringInfoChar(context->buf, '(');
+
+ get_rule_expr(node, context, showimplicit);
+
+ if (need_paren)
+ appendStringInfoChar(context->buf, ')');
+}
+
+
+/* ----------
+ * get_rule_expr - Parse back an expression
+ *
+ * Note: showimplicit determines whether we display any implicit cast that
+ * is present at the top of the expression tree. It is a passed argument,
+ * not a field of the context struct, because we change the value as we
+ * recurse down into the expression. In general we suppress implicit casts
+ * when the result type is known with certainty (eg, the arguments of an
+ * OR must be boolean). We display implicit casts for arguments of functions
+ * and operators, since this is needed to be certain that the same function
+ * or operator will be chosen when the expression is re-parsed.
+ * ----------
+ */
+static void
+get_rule_expr(Node *node, deparse_context *context,
+ bool showimplicit)
+{
+ StringInfo buf = context->buf;
+
+ if (node == NULL)
+ return;
+
+ /* Guard against excessively long or deeply-nested queries */
+ CHECK_FOR_INTERRUPTS();
+ check_stack_depth();
+
+ /*
+ * Each level of get_rule_expr must emit an indivisible term
+ * (parenthesized if necessary) to ensure result is reparsed into the same
+ * expression tree. The only exception is that when the input is a List,
+ * we emit the component items comma-separated with no surrounding
+ * decoration; this is convenient for most callers.
+ */
+ switch (nodeTag(node))
+ {
+ case T_Var:
+ (void) get_variable((Var *) node, 0, false, context);
+ break;
+
+ case T_Const:
+ get_const_expr((Const *) node, context, 0);
+ break;
+
+ case T_Param:
+ get_parameter((Param *) node, context);
+ break;
+
+ case T_Aggref:
+ get_agg_expr((Aggref *) node, context, (Aggref *) node);
+ break;
+
+ case T_GroupingFunc:
+ {
+ GroupingFunc *gexpr = (GroupingFunc *) node;
+
+ appendStringInfoString(buf, "GROUPING(");
+ get_rule_expr((Node *) gexpr->args, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_WindowFunc:
+ get_windowfunc_expr((WindowFunc *) node, context);
+ break;
+
+ case T_SubscriptingRef:
+ {
+ SubscriptingRef *sbsref = (SubscriptingRef *) node;
+ bool need_parens;
+
+ /*
+ * If the argument is a CaseTestExpr, we must be inside a
+ * FieldStore, ie, we are assigning to an element of an array
+ * within a composite column. Since we already punted on
+ * displaying the FieldStore's target information, just punt
+ * here too, and display only the assignment source
+ * expression.
+ */
+ if (IsA(sbsref->refexpr, CaseTestExpr))
+ {
+ Assert(sbsref->refassgnexpr);
+ get_rule_expr((Node *) sbsref->refassgnexpr,
+ context, showimplicit);
+ break;
+ }
+
+ /*
+ * Parenthesize the argument unless it's a simple Var or a
+ * FieldSelect. (In particular, if it's another
+ * SubscriptingRef, we *must* parenthesize to avoid
+ * confusion.)
+ */
+ need_parens = !IsA(sbsref->refexpr, Var) &&
+ !IsA(sbsref->refexpr, FieldSelect);
+ if (need_parens)
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) sbsref->refexpr, context, showimplicit);
+ if (need_parens)
+ appendStringInfoChar(buf, ')');
+
+ /*
+ * If there's a refassgnexpr, we want to print the node in the
+ * format "container[subscripts] := refassgnexpr". This is
+ * not legal SQL, so decompilation of INSERT or UPDATE
+ * statements should always use processIndirection as part of
+ * the statement-level syntax. We should only see this when
+ * EXPLAIN tries to print the targetlist of a plan resulting
+ * from such a statement.
+ */
+ if (sbsref->refassgnexpr)
+ {
+ Node *refassgnexpr;
+
+ /*
+ * Use processIndirection to print this node's subscripts
+ * as well as any additional field selections or
+ * subscripting in immediate descendants. It returns the
+ * RHS expr that is actually being "assigned".
+ */
+ refassgnexpr = processIndirection(node, context);
+ appendStringInfoString(buf, " := ");
+ get_rule_expr(refassgnexpr, context, showimplicit);
+ }
+ else
+ {
+ /* Just an ordinary container fetch, so print subscripts */
+ printSubscripts(sbsref, context);
+ }
+ }
+ break;
+
+ case T_FuncExpr:
+ get_func_expr((FuncExpr *) node, context, showimplicit);
+ break;
+
+ case T_NamedArgExpr:
+ {
+ NamedArgExpr *na = (NamedArgExpr *) node;
+
+ appendStringInfo(buf, "%s => ", quote_identifier(na->name));
+ get_rule_expr((Node *) na->arg, context, showimplicit);
+ }
+ break;
+
+ case T_OpExpr:
+ get_oper_expr((OpExpr *) node, context);
+ break;
+
+ case T_DistinctExpr:
+ {
+ DistinctExpr *expr = (DistinctExpr *) node;
+ List *args = expr->args;
+ Node *arg1 = (Node *) linitial(args);
+ Node *arg2 = (Node *) lsecond(args);
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(arg1, context, true, node);
+ appendStringInfoString(buf, " IS DISTINCT FROM ");
+ get_rule_expr_paren(arg2, context, true, node);
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_NullIfExpr:
+ {
+ NullIfExpr *nullifexpr = (NullIfExpr *) node;
+
+ appendStringInfoString(buf, "NULLIF(");
+ get_rule_expr((Node *) nullifexpr->args, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_ScalarArrayOpExpr:
+ {
+ ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
+ List *args = expr->args;
+ Node *arg1 = (Node *) linitial(args);
+ Node *arg2 = (Node *) lsecond(args);
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(arg1, context, true, node);
+ appendStringInfo(buf, " %s %s (",
+ generate_operator_name(expr->opno,
+ exprType(arg1),
+ get_base_element_type(exprType(arg2))),
+ expr->useOr ? "ANY" : "ALL");
+ get_rule_expr_paren(arg2, context, true, node);
+
+ /*
+ * There's inherent ambiguity in "x op ANY/ALL (y)" when y is
+ * a bare sub-SELECT. Since we're here, the sub-SELECT must
+ * be meant as a scalar sub-SELECT yielding an array value to
+ * be used in ScalarArrayOpExpr; but the grammar will
+ * preferentially interpret such a construct as an ANY/ALL
+ * SubLink. To prevent misparsing the output that way, insert
+ * a dummy coercion (which will be stripped by parse analysis,
+ * so no inefficiency is added in dump and reload). This is
+ * indeed most likely what the user wrote to get the construct
+ * accepted in the first place.
+ */
+ if (IsA(arg2, SubLink) &&
+ ((SubLink *) arg2)->subLinkType == EXPR_SUBLINK)
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(exprType(arg2),
+ exprTypmod(arg2)));
+ appendStringInfoChar(buf, ')');
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_BoolExpr:
+ {
+ BoolExpr *expr = (BoolExpr *) node;
+ Node *first_arg = linitial(expr->args);
+ ListCell *arg;
+
+ switch (expr->boolop)
+ {
+ case AND_EXPR:
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(first_arg, context,
+ false, node);
+ for_each_from(arg, expr->args, 1)
+ {
+ appendStringInfoString(buf, " AND ");
+ get_rule_expr_paren((Node *) lfirst(arg), context,
+ false, node);
+ }
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ break;
+
+ case OR_EXPR:
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(first_arg, context,
+ false, node);
+ for_each_from(arg, expr->args, 1)
+ {
+ appendStringInfoString(buf, " OR ");
+ get_rule_expr_paren((Node *) lfirst(arg), context,
+ false, node);
+ }
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ break;
+
+ case NOT_EXPR:
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ appendStringInfoString(buf, "NOT ");
+ get_rule_expr_paren(first_arg, context,
+ false, node);
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ break;
+
+ default:
+ elog(ERROR, "unrecognized boolop: %d",
+ (int) expr->boolop);
+ }
+ }
+ break;
+
+ case T_SubLink:
+ get_sublink_expr((SubLink *) node, context);
+ break;
+
+ case T_SubPlan:
+ {
+ SubPlan *subplan = (SubPlan *) node;
+
+ /*
+ * We cannot see an already-planned subplan in rule deparsing,
+ * only while EXPLAINing a query plan. We don't try to
+ * reconstruct the original SQL, just reference the subplan
+ * that appears elsewhere in EXPLAIN's result.
+ */
+ if (subplan->useHashTable)
+ appendStringInfo(buf, "(hashed %s)", subplan->plan_name);
+ else
+ appendStringInfo(buf, "(%s)", subplan->plan_name);
+ }
+ break;
+
+ case T_AlternativeSubPlan:
+ {
+ AlternativeSubPlan *asplan = (AlternativeSubPlan *) node;
+ ListCell *lc;
+
+ /*
+ * This case cannot be reached in normal usage, since no
+ * AlternativeSubPlan can appear either in parsetrees or
+ * finished plan trees. We keep it just in case somebody
+ * wants to use this code to print planner data structures.
+ */
+ appendStringInfoString(buf, "(alternatives: ");
+ foreach(lc, asplan->subplans)
+ {
+ SubPlan *splan = lfirst_node(SubPlan, lc);
+
+ if (splan->useHashTable)
+ appendStringInfo(buf, "hashed %s", splan->plan_name);
+ else
+ appendStringInfoString(buf, splan->plan_name);
+ if (lnext(asplan->subplans, lc))
+ appendStringInfoString(buf, " or ");
+ }
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_FieldSelect:
+ {
+ FieldSelect *fselect = (FieldSelect *) node;
+ Node *arg = (Node *) fselect->arg;
+ int fno = fselect->fieldnum;
+ const char *fieldname;
+ bool need_parens;
+
+ /*
+ * Parenthesize the argument unless it's an SubscriptingRef or
+ * another FieldSelect. Note in particular that it would be
+ * WRONG to not parenthesize a Var argument; simplicity is not
+ * the issue here, having the right number of names is.
+ */
+ need_parens = !IsA(arg, SubscriptingRef) &&
+ !IsA(arg, FieldSelect);
+ if (need_parens)
+ appendStringInfoChar(buf, '(');
+ get_rule_expr(arg, context, true);
+ if (need_parens)
+ appendStringInfoChar(buf, ')');
+
+ /*
+ * Get and print the field name.
+ */
+ fieldname = get_name_for_var_field((Var *) arg, fno,
+ 0, context);
+ appendStringInfo(buf, ".%s", quote_identifier(fieldname));
+ }
+ break;
+
+ case T_FieldStore:
+ {
+ FieldStore *fstore = (FieldStore *) node;
+ bool need_parens;
+
+ /*
+ * There is no good way to represent a FieldStore as real SQL,
+ * so decompilation of INSERT or UPDATE statements should
+ * always use processIndirection as part of the
+ * statement-level syntax. We should only get here when
+ * EXPLAIN tries to print the targetlist of a plan resulting
+ * from such a statement. The plan case is even harder than
+ * ordinary rules would be, because the planner tries to
+ * collapse multiple assignments to the same field or subfield
+ * into one FieldStore; so we can see a list of target fields
+ * not just one, and the arguments could be FieldStores
+ * themselves. We don't bother to try to print the target
+ * field names; we just print the source arguments, with a
+ * ROW() around them if there's more than one. This isn't
+ * terribly complete, but it's probably good enough for
+ * EXPLAIN's purposes; especially since anything more would be
+ * either hopelessly confusing or an even poorer
+ * representation of what the plan is actually doing.
+ */
+ need_parens = (list_length(fstore->newvals) != 1);
+ if (need_parens)
+ appendStringInfoString(buf, "ROW(");
+ get_rule_expr((Node *) fstore->newvals, context, showimplicit);
+ if (need_parens)
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_RelabelType:
+ {
+ RelabelType *relabel = (RelabelType *) node;
+ Node *arg = (Node *) relabel->arg;
+
+ if (relabel->relabelformat == COERCE_IMPLICIT_CAST &&
+ !showimplicit)
+ {
+ /* don't show the implicit cast */
+ get_rule_expr_paren(arg, context, false, node);
+ }
+ else
+ {
+ get_coercion_expr(arg, context,
+ relabel->resulttype,
+ relabel->resulttypmod,
+ node);
+ }
+ }
+ break;
+
+ case T_CoerceViaIO:
+ {
+ CoerceViaIO *iocoerce = (CoerceViaIO *) node;
+ Node *arg = (Node *) iocoerce->arg;
+
+ if (iocoerce->coerceformat == COERCE_IMPLICIT_CAST &&
+ !showimplicit)
+ {
+ /* don't show the implicit cast */
+ get_rule_expr_paren(arg, context, false, node);
+ }
+ else
+ {
+ get_coercion_expr(arg, context,
+ iocoerce->resulttype,
+ -1,
+ node);
+ }
+ }
+ break;
+
+ case T_ArrayCoerceExpr:
+ {
+ ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
+ Node *arg = (Node *) acoerce->arg;
+
+ if (acoerce->coerceformat == COERCE_IMPLICIT_CAST &&
+ !showimplicit)
+ {
+ /* don't show the implicit cast */
+ get_rule_expr_paren(arg, context, false, node);
+ }
+ else
+ {
+ get_coercion_expr(arg, context,
+ acoerce->resulttype,
+ acoerce->resulttypmod,
+ node);
+ }
+ }
+ break;
+
+ case T_ConvertRowtypeExpr:
+ {
+ ConvertRowtypeExpr *convert = (ConvertRowtypeExpr *) node;
+ Node *arg = (Node *) convert->arg;
+
+ if (convert->convertformat == COERCE_IMPLICIT_CAST &&
+ !showimplicit)
+ {
+ /* don't show the implicit cast */
+ get_rule_expr_paren(arg, context, false, node);
+ }
+ else
+ {
+ get_coercion_expr(arg, context,
+ convert->resulttype, -1,
+ node);
+ }
+ }
+ break;
+
+ case T_CollateExpr:
+ {
+ CollateExpr *collate = (CollateExpr *) node;
+ Node *arg = (Node *) collate->arg;
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(arg, context, showimplicit, node);
+ appendStringInfo(buf, " COLLATE %s",
+ generate_collation_name(collate->collOid));
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_CaseExpr:
+ {
+ CaseExpr *caseexpr = (CaseExpr *) node;
+ ListCell *temp;
+
+ appendContextKeyword(context, "CASE",
+ 0, PRETTYINDENT_VAR, 0);
+ if (caseexpr->arg)
+ {
+ appendStringInfoChar(buf, ' ');
+ get_rule_expr((Node *) caseexpr->arg, context, true);
+ }
+ foreach(temp, caseexpr->args)
+ {
+ CaseWhen *when = (CaseWhen *) lfirst(temp);
+ Node *w = (Node *) when->expr;
+
+ if (caseexpr->arg)
+ {
+ /*
+ * The parser should have produced WHEN clauses of the
+ * form "CaseTestExpr = RHS", possibly with an
+ * implicit coercion inserted above the CaseTestExpr.
+ * For accurate decompilation of rules it's essential
+ * that we show just the RHS. However in an
+ * expression that's been through the optimizer, the
+ * WHEN clause could be almost anything (since the
+ * equality operator could have been expanded into an
+ * inline function). If we don't recognize the form
+ * of the WHEN clause, just punt and display it as-is.
+ */
+ if (IsA(w, OpExpr))
+ {
+ List *args = ((OpExpr *) w)->args;
+
+ if (list_length(args) == 2 &&
+ IsA(strip_implicit_coercions(linitial(args)),
+ CaseTestExpr))
+ w = (Node *) lsecond(args);
+ }
+ }
+
+ if (!PRETTY_INDENT(context))
+ appendStringInfoChar(buf, ' ');
+ appendContextKeyword(context, "WHEN ",
+ 0, 0, 0);
+ get_rule_expr(w, context, false);
+ appendStringInfoString(buf, " THEN ");
+ get_rule_expr((Node *) when->result, context, true);
+ }
+ if (!PRETTY_INDENT(context))
+ appendStringInfoChar(buf, ' ');
+ appendContextKeyword(context, "ELSE ",
+ 0, 0, 0);
+ get_rule_expr((Node *) caseexpr->defresult, context, true);
+ if (!PRETTY_INDENT(context))
+ appendStringInfoChar(buf, ' ');
+ appendContextKeyword(context, "END",
+ -PRETTYINDENT_VAR, 0, 0);
+ }
+ break;
+
+ case T_CaseTestExpr:
+ {
+ /*
+ * Normally we should never get here, since for expressions
+ * that can contain this node type we attempt to avoid
+ * recursing to it. But in an optimized expression we might
+ * be unable to avoid that (see comments for CaseExpr). If we
+ * do see one, print it as CASE_TEST_EXPR.
+ */
+ appendStringInfoString(buf, "CASE_TEST_EXPR");
+ }
+ break;
+
+ case T_ArrayExpr:
+ {
+ ArrayExpr *arrayexpr = (ArrayExpr *) node;
+
+ appendStringInfoString(buf, "ARRAY[");
+ get_rule_expr((Node *) arrayexpr->elements, context, true);
+ appendStringInfoChar(buf, ']');
+
+ /*
+ * If the array isn't empty, we assume its elements are
+ * coerced to the desired type. If it's empty, though, we
+ * need an explicit coercion to the array type.
+ */
+ if (arrayexpr->elements == NIL)
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(arrayexpr->array_typeid, -1));
+ }
+ break;
+
+ case T_RowExpr:
+ {
+ RowExpr *rowexpr = (RowExpr *) node;
+ TupleDesc tupdesc = NULL;
+ ListCell *arg;
+ int i;
+ char *sep;
+
+ /*
+ * If it's a named type and not RECORD, we may have to skip
+ * dropped columns and/or claim there are NULLs for added
+ * columns.
+ */
+ if (rowexpr->row_typeid != RECORDOID)
+ {
+ tupdesc = lookup_rowtype_tupdesc(rowexpr->row_typeid, -1);
+ Assert(list_length(rowexpr->args) <= tupdesc->natts);
+ }
+
+ /*
+ * SQL99 allows "ROW" to be omitted when there is more than
+ * one column, but for simplicity we always print it.
+ */
+ appendStringInfoString(buf, "ROW(");
+ sep = "";
+ i = 0;
+ foreach(arg, rowexpr->args)
+ {
+ Node *e = (Node *) lfirst(arg);
+
+ if (tupdesc == NULL ||
+ !TupleDescAttr(tupdesc, i)->attisdropped)
+ {
+ appendStringInfoString(buf, sep);
+ /* Whole-row Vars need special treatment here */
+ get_rule_expr_toplevel(e, context, true);
+ sep = ", ";
+ }
+ i++;
+ }
+ if (tupdesc != NULL)
+ {
+ while (i < tupdesc->natts)
+ {
+ if (!TupleDescAttr(tupdesc, i)->attisdropped)
+ {
+ appendStringInfoString(buf, sep);
+ appendStringInfoString(buf, "NULL");
+ sep = ", ";
+ }
+ i++;
+ }
+
+ ReleaseTupleDesc(tupdesc);
+ }
+ appendStringInfoChar(buf, ')');
+ if (rowexpr->row_format == COERCE_EXPLICIT_CAST)
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(rowexpr->row_typeid, -1));
+ }
+ break;
+
+ case T_RowCompareExpr:
+ {
+ RowCompareExpr *rcexpr = (RowCompareExpr *) node;
+
+ /*
+ * SQL99 allows "ROW" to be omitted when there is more than
+ * one column, but for simplicity we always print it. Within
+ * a ROW expression, whole-row Vars need special treatment, so
+ * use get_rule_list_toplevel.
+ */
+ appendStringInfoString(buf, "(ROW(");
+ get_rule_list_toplevel(rcexpr->largs, context, true);
+
+ /*
+ * We assume that the name of the first-column operator will
+ * do for all the rest too. This is definitely open to
+ * failure, eg if some but not all operators were renamed
+ * since the construct was parsed, but there seems no way to
+ * be perfect.
+ */
+ appendStringInfo(buf, ") %s ROW(",
+ generate_operator_name(linitial_oid(rcexpr->opnos),
+ exprType(linitial(rcexpr->largs)),
+ exprType(linitial(rcexpr->rargs))));
+ get_rule_list_toplevel(rcexpr->rargs, context, true);
+ appendStringInfoString(buf, "))");
+ }
+ break;
+
+ case T_CoalesceExpr:
+ {
+ CoalesceExpr *coalesceexpr = (CoalesceExpr *) node;
+
+ appendStringInfoString(buf, "COALESCE(");
+ get_rule_expr((Node *) coalesceexpr->args, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_MinMaxExpr:
+ {
+ MinMaxExpr *minmaxexpr = (MinMaxExpr *) node;
+
+ switch (minmaxexpr->op)
+ {
+ case IS_GREATEST:
+ appendStringInfoString(buf, "GREATEST(");
+ break;
+ case IS_LEAST:
+ appendStringInfoString(buf, "LEAST(");
+ break;
+ }
+ get_rule_expr((Node *) minmaxexpr->args, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_SQLValueFunction:
+ {
+ SQLValueFunction *svf = (SQLValueFunction *) node;
+
+ /*
+ * Note: this code knows that typmod for time, timestamp, and
+ * timestamptz just prints as integer.
+ */
+ switch (svf->op)
+ {
+ case SVFOP_CURRENT_DATE:
+ appendStringInfoString(buf, "CURRENT_DATE");
+ break;
+ case SVFOP_CURRENT_TIME:
+ appendStringInfoString(buf, "CURRENT_TIME");
+ break;
+ case SVFOP_CURRENT_TIME_N:
+ appendStringInfo(buf, "CURRENT_TIME(%d)", svf->typmod);
+ break;
+ case SVFOP_CURRENT_TIMESTAMP:
+ appendStringInfoString(buf, "CURRENT_TIMESTAMP");
+ break;
+ case SVFOP_CURRENT_TIMESTAMP_N:
+ appendStringInfo(buf, "CURRENT_TIMESTAMP(%d)",
+ svf->typmod);
+ break;
+ case SVFOP_LOCALTIME:
+ appendStringInfoString(buf, "LOCALTIME");
+ break;
+ case SVFOP_LOCALTIME_N:
+ appendStringInfo(buf, "LOCALTIME(%d)", svf->typmod);
+ break;
+ case SVFOP_LOCALTIMESTAMP:
+ appendStringInfoString(buf, "LOCALTIMESTAMP");
+ break;
+ case SVFOP_LOCALTIMESTAMP_N:
+ appendStringInfo(buf, "LOCALTIMESTAMP(%d)",
+ svf->typmod);
+ break;
+ case SVFOP_CURRENT_ROLE:
+ appendStringInfoString(buf, "CURRENT_ROLE");
+ break;
+ case SVFOP_CURRENT_USER:
+ appendStringInfoString(buf, "CURRENT_USER");
+ break;
+ case SVFOP_USER:
+ appendStringInfoString(buf, "USER");
+ break;
+ case SVFOP_SESSION_USER:
+ appendStringInfoString(buf, "SESSION_USER");
+ break;
+ case SVFOP_CURRENT_CATALOG:
+ appendStringInfoString(buf, "CURRENT_CATALOG");
+ break;
+ case SVFOP_CURRENT_SCHEMA:
+ appendStringInfoString(buf, "CURRENT_SCHEMA");
+ break;
+ }
+ }
+ break;
+
+ case T_XmlExpr:
+ {
+ XmlExpr *xexpr = (XmlExpr *) node;
+ bool needcomma = false;
+ ListCell *arg;
+ ListCell *narg;
+ Const *con;
+
+ switch (xexpr->op)
+ {
+ case IS_XMLCONCAT:
+ appendStringInfoString(buf, "XMLCONCAT(");
+ break;
+ case IS_XMLELEMENT:
+ appendStringInfoString(buf, "XMLELEMENT(");
+ break;
+ case IS_XMLFOREST:
+ appendStringInfoString(buf, "XMLFOREST(");
+ break;
+ case IS_XMLPARSE:
+ appendStringInfoString(buf, "XMLPARSE(");
+ break;
+ case IS_XMLPI:
+ appendStringInfoString(buf, "XMLPI(");
+ break;
+ case IS_XMLROOT:
+ appendStringInfoString(buf, "XMLROOT(");
+ break;
+ case IS_XMLSERIALIZE:
+ appendStringInfoString(buf, "XMLSERIALIZE(");
+ break;
+ case IS_DOCUMENT:
+ break;
+ }
+ if (xexpr->op == IS_XMLPARSE || xexpr->op == IS_XMLSERIALIZE)
+ {
+ if (xexpr->xmloption == XMLOPTION_DOCUMENT)
+ appendStringInfoString(buf, "DOCUMENT ");
+ else
+ appendStringInfoString(buf, "CONTENT ");
+ }
+ if (xexpr->name)
+ {
+ appendStringInfo(buf, "NAME %s",
+ quote_identifier(map_xml_name_to_sql_identifier(xexpr->name)));
+ needcomma = true;
+ }
+ if (xexpr->named_args)
+ {
+ if (xexpr->op != IS_XMLFOREST)
+ {
+ if (needcomma)
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf, "XMLATTRIBUTES(");
+ needcomma = false;
+ }
+ forboth(arg, xexpr->named_args, narg, xexpr->arg_names)
+ {
+ Node *e = (Node *) lfirst(arg);
+ char *argname = strVal(lfirst(narg));
+
+ if (needcomma)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) e, context, true);
+ appendStringInfo(buf, " AS %s",
+ quote_identifier(map_xml_name_to_sql_identifier(argname)));
+ needcomma = true;
+ }
+ if (xexpr->op != IS_XMLFOREST)
+ appendStringInfoChar(buf, ')');
+ }
+ if (xexpr->args)
+ {
+ if (needcomma)
+ appendStringInfoString(buf, ", ");
+ switch (xexpr->op)
+ {
+ case IS_XMLCONCAT:
+ case IS_XMLELEMENT:
+ case IS_XMLFOREST:
+ case IS_XMLPI:
+ case IS_XMLSERIALIZE:
+ /* no extra decoration needed */
+ get_rule_expr((Node *) xexpr->args, context, true);
+ break;
+ case IS_XMLPARSE:
+ Assert(list_length(xexpr->args) == 2);
+
+ get_rule_expr((Node *) linitial(xexpr->args),
+ context, true);
+
+ con = lsecond_node(Const, xexpr->args);
+ Assert(!con->constisnull);
+ if (DatumGetBool(con->constvalue))
+ appendStringInfoString(buf,
+ " PRESERVE WHITESPACE");
+ else
+ appendStringInfoString(buf,
+ " STRIP WHITESPACE");
+ break;
+ case IS_XMLROOT:
+ Assert(list_length(xexpr->args) == 3);
+
+ get_rule_expr((Node *) linitial(xexpr->args),
+ context, true);
+
+ appendStringInfoString(buf, ", VERSION ");
+ con = (Const *) lsecond(xexpr->args);
+ if (IsA(con, Const) &&
+ con->constisnull)
+ appendStringInfoString(buf, "NO VALUE");
+ else
+ get_rule_expr((Node *) con, context, false);
+
+ con = lthird_node(Const, xexpr->args);
+ if (con->constisnull)
+ /* suppress STANDALONE NO VALUE */ ;
+ else
+ {
+ switch (DatumGetInt32(con->constvalue))
+ {
+ case XML_STANDALONE_YES:
+ appendStringInfoString(buf,
+ ", STANDALONE YES");
+ break;
+ case XML_STANDALONE_NO:
+ appendStringInfoString(buf,
+ ", STANDALONE NO");
+ break;
+ case XML_STANDALONE_NO_VALUE:
+ appendStringInfoString(buf,
+ ", STANDALONE NO VALUE");
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+ case IS_DOCUMENT:
+ get_rule_expr_paren((Node *) xexpr->args, context, false, node);
+ break;
+ }
+ }
+ if (xexpr->op == IS_XMLSERIALIZE)
+ appendStringInfo(buf, " AS %s",
+ format_type_with_typemod(xexpr->type,
+ xexpr->typmod));
+ if (xexpr->op == IS_DOCUMENT)
+ appendStringInfoString(buf, " IS DOCUMENT");
+ else
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_NullTest:
+ {
+ NullTest *ntest = (NullTest *) node;
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren((Node *) ntest->arg, context, true, node);
+
+ /*
+ * For scalar inputs, we prefer to print as IS [NOT] NULL,
+ * which is shorter and traditional. If it's a rowtype input
+ * but we're applying a scalar test, must print IS [NOT]
+ * DISTINCT FROM NULL to be semantically correct.
+ */
+ if (ntest->argisrow ||
+ !type_is_rowtype(exprType((Node *) ntest->arg)))
+ {
+ switch (ntest->nulltesttype)
+ {
+ case IS_NULL:
+ appendStringInfoString(buf, " IS NULL");
+ break;
+ case IS_NOT_NULL:
+ appendStringInfoString(buf, " IS NOT NULL");
+ break;
+ default:
+ elog(ERROR, "unrecognized nulltesttype: %d",
+ (int) ntest->nulltesttype);
+ }
+ }
+ else
+ {
+ switch (ntest->nulltesttype)
+ {
+ case IS_NULL:
+ appendStringInfoString(buf, " IS NOT DISTINCT FROM NULL");
+ break;
+ case IS_NOT_NULL:
+ appendStringInfoString(buf, " IS DISTINCT FROM NULL");
+ break;
+ default:
+ elog(ERROR, "unrecognized nulltesttype: %d",
+ (int) ntest->nulltesttype);
+ }
+ }
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_BooleanTest:
+ {
+ BooleanTest *btest = (BooleanTest *) node;
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren((Node *) btest->arg, context, false, node);
+ switch (btest->booltesttype)
+ {
+ case IS_TRUE:
+ appendStringInfoString(buf, " IS TRUE");
+ break;
+ case IS_NOT_TRUE:
+ appendStringInfoString(buf, " IS NOT TRUE");
+ break;
+ case IS_FALSE:
+ appendStringInfoString(buf, " IS FALSE");
+ break;
+ case IS_NOT_FALSE:
+ appendStringInfoString(buf, " IS NOT FALSE");
+ break;
+ case IS_UNKNOWN:
+ appendStringInfoString(buf, " IS UNKNOWN");
+ break;
+ case IS_NOT_UNKNOWN:
+ appendStringInfoString(buf, " IS NOT UNKNOWN");
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) btest->booltesttype);
+ }
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_CoerceToDomain:
+ {
+ CoerceToDomain *ctest = (CoerceToDomain *) node;
+ Node *arg = (Node *) ctest->arg;
+
+ if (ctest->coercionformat == COERCE_IMPLICIT_CAST &&
+ !showimplicit)
+ {
+ /* don't show the implicit cast */
+ get_rule_expr(arg, context, false);
+ }
+ else
+ {
+ get_coercion_expr(arg, context,
+ ctest->resulttype,
+ ctest->resulttypmod,
+ node);
+ }
+ }
+ break;
+
+ case T_CoerceToDomainValue:
+ appendStringInfoString(buf, "VALUE");
+ break;
+
+ case T_SetToDefault:
+ appendStringInfoString(buf, "DEFAULT");
+ break;
+
+ case T_CurrentOfExpr:
+ {
+ CurrentOfExpr *cexpr = (CurrentOfExpr *) node;
+
+ if (cexpr->cursor_name)
+ appendStringInfo(buf, "CURRENT OF %s",
+ quote_identifier(cexpr->cursor_name));
+ else
+ appendStringInfo(buf, "CURRENT OF $%d",
+ cexpr->cursor_param);
+ }
+ break;
+
+ case T_NextValueExpr:
+ {
+ NextValueExpr *nvexpr = (NextValueExpr *) node;
+
+ /*
+ * This isn't exactly nextval(), but that seems close enough
+ * for EXPLAIN's purposes.
+ */
+ appendStringInfoString(buf, "nextval(");
+ simple_quote_literal(buf,
+ generate_relation_name(nvexpr->seqid,
+ NIL));
+ appendStringInfoChar(buf, ')');
+ }
+ break;
+
+ case T_InferenceElem:
+ {
+ InferenceElem *iexpr = (InferenceElem *) node;
+ bool save_varprefix;
+ bool need_parens;
+
+ /*
+ * InferenceElem can only refer to target relation, so a
+ * prefix is not useful, and indeed would cause parse errors.
+ */
+ save_varprefix = context->varprefix;
+ context->varprefix = false;
+
+ /*
+ * Parenthesize the element unless it's a simple Var or a bare
+ * function call. Follows pg_get_indexdef_worker().
+ */
+ need_parens = !IsA(iexpr->expr, Var);
+ if (IsA(iexpr->expr, FuncExpr) &&
+ ((FuncExpr *) iexpr->expr)->funcformat ==
+ COERCE_EXPLICIT_CALL)
+ need_parens = false;
+
+ if (need_parens)
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) iexpr->expr,
+ context, false);
+ if (need_parens)
+ appendStringInfoChar(buf, ')');
+
+ context->varprefix = save_varprefix;
+
+ if (iexpr->infercollid)
+ appendStringInfo(buf, " COLLATE %s",
+ generate_collation_name(iexpr->infercollid));
+
+ /* Add the operator class name, if not default */
+ if (iexpr->inferopclass)
+ {
+ Oid inferopclass = iexpr->inferopclass;
+ Oid inferopcinputtype = get_opclass_input_type(iexpr->inferopclass);
+
+ get_opclass_name(inferopclass, inferopcinputtype, buf);
+ }
+ }
+ break;
+
+ case T_PartitionBoundSpec:
+ {
+ PartitionBoundSpec *spec = (PartitionBoundSpec *) node;
+ ListCell *cell;
+ char *sep;
+
+ if (spec->is_default)
+ {
+ appendStringInfoString(buf, "DEFAULT");
+ break;
+ }
+
+ switch (spec->strategy)
+ {
+ case PARTITION_STRATEGY_HASH:
+ Assert(spec->modulus > 0 && spec->remainder >= 0);
+ Assert(spec->modulus > spec->remainder);
+
+ appendStringInfoString(buf, "FOR VALUES");
+ appendStringInfo(buf, " WITH (modulus %d, remainder %d)",
+ spec->modulus, spec->remainder);
+ break;
+
+ case PARTITION_STRATEGY_LIST:
+ Assert(spec->listdatums != NIL);
+
+ appendStringInfoString(buf, "FOR VALUES IN (");
+ sep = "";
+ foreach(cell, spec->listdatums)
+ {
+ Const *val = lfirst_node(Const, cell);
+
+ appendStringInfoString(buf, sep);
+ get_const_expr(val, context, -1);
+ sep = ", ";
+ }
+
+ appendStringInfoChar(buf, ')');
+ break;
+
+ case PARTITION_STRATEGY_RANGE:
+ Assert(spec->lowerdatums != NIL &&
+ spec->upperdatums != NIL &&
+ list_length(spec->lowerdatums) ==
+ list_length(spec->upperdatums));
+
+ appendStringInfo(buf, "FOR VALUES FROM %s TO %s",
+ get_range_partbound_string(spec->lowerdatums),
+ get_range_partbound_string(spec->upperdatums));
+ break;
+
+ default:
+ elog(ERROR, "unrecognized partition strategy: %d",
+ (int) spec->strategy);
+ break;
+ }
+ }
+ break;
+
+ case T_List:
+ {
+ char *sep;
+ ListCell *l;
+
+ sep = "";
+ foreach(l, (List *) node)
+ {
+ appendStringInfoString(buf, sep);
+ get_rule_expr((Node *) lfirst(l), context, showimplicit);
+ sep = ", ";
+ }
+ }
+ break;
+
+ case T_TableFunc:
+ get_tablefunc((TableFunc *) node, context, showimplicit);
+ break;
+
+ default:
+ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node));
+ break;
+ }
+}
+
+/*
+ * get_rule_expr_toplevel - Parse back a toplevel expression
+ *
+ * Same as get_rule_expr(), except that if the expr is just a Var, we pass
+ * istoplevel = true not false to get_variable(). This causes whole-row Vars
+ * to get printed with decoration that will prevent expansion of "*".
+ * We need to use this in contexts such as ROW() and VALUES(), where the
+ * parser would expand "foo.*" appearing at top level. (In principle we'd
+ * use this in get_target_list() too, but that has additional worries about
+ * whether to print AS, so it needs to invoke get_variable() directly anyway.)
+ */
+static void
+get_rule_expr_toplevel(Node *node, deparse_context *context,
+ bool showimplicit)
+{
+ if (node && IsA(node, Var))
+ (void) get_variable((Var *) node, 0, true, context);
+ else
+ get_rule_expr(node, context, showimplicit);
+}
+
+/*
+ * get_rule_list_toplevel - Parse back a list of toplevel expressions
+ *
+ * Apply get_rule_expr_toplevel() to each element of a List.
+ *
+ * This adds commas between the expressions, but caller is responsible
+ * for printing surrounding decoration.
+ */
+static void
+get_rule_list_toplevel(List *lst, deparse_context *context,
+ bool showimplicit)
+{
+ const char *sep;
+ ListCell *lc;
+
+ sep = "";
+ foreach(lc, lst)
+ {
+ Node *e = (Node *) lfirst(lc);
+
+ appendStringInfoString(context->buf, sep);
+ get_rule_expr_toplevel(e, context, showimplicit);
+ sep = ", ";
+ }
+}
+
+/*
+ * get_rule_expr_funccall - Parse back a function-call expression
+ *
+ * Same as get_rule_expr(), except that we guarantee that the output will
+ * look like a function call, or like one of the things the grammar treats as
+ * equivalent to a function call (see the func_expr_windowless production).
+ * This is needed in places where the grammar uses func_expr_windowless and
+ * you can't substitute a parenthesized a_expr. If what we have isn't going
+ * to look like a function call, wrap it in a dummy CAST() expression, which
+ * will satisfy the grammar --- and, indeed, is likely what the user wrote to
+ * produce such a thing.
+ */
+static void
+get_rule_expr_funccall(Node *node, deparse_context *context,
+ bool showimplicit)
+{
+ if (looks_like_function(node))
+ get_rule_expr(node, context, showimplicit);
+ else
+ {
+ StringInfo buf = context->buf;
+
+ appendStringInfoString(buf, "CAST(");
+ /* no point in showing any top-level implicit cast */
+ get_rule_expr(node, context, false);
+ appendStringInfo(buf, " AS %s)",
+ format_type_with_typemod(exprType(node),
+ exprTypmod(node)));
+ }
+}
+
+/*
+ * Helper function to identify node types that satisfy func_expr_windowless.
+ * If in doubt, "false" is always a safe answer.
+ */
+static bool
+looks_like_function(Node *node)
+{
+ if (node == NULL)
+ return false; /* probably shouldn't happen */
+ switch (nodeTag(node))
+ {
+ case T_FuncExpr:
+ /* OK, unless it's going to deparse as a cast */
+ return (((FuncExpr *) node)->funcformat == COERCE_EXPLICIT_CALL ||
+ ((FuncExpr *) node)->funcformat == COERCE_SQL_SYNTAX);
+ case T_NullIfExpr:
+ case T_CoalesceExpr:
+ case T_MinMaxExpr:
+ case T_SQLValueFunction:
+ case T_XmlExpr:
+ /* these are all accepted by func_expr_common_subexpr */
+ return true;
+ default:
+ break;
+ }
+ return false;
+}
+
+
+/*
+ * get_oper_expr - Parse back an OpExpr node
+ */
+static void
+get_oper_expr(OpExpr *expr, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid opno = expr->opno;
+ List *args = expr->args;
+
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ if (list_length(args) == 2)
+ {
+ /* binary operator */
+ Node *arg1 = (Node *) linitial(args);
+ Node *arg2 = (Node *) lsecond(args);
+
+ get_rule_expr_paren(arg1, context, true, (Node *) expr);
+ appendStringInfo(buf, " %s ",
+ generate_operator_name(opno,
+ exprType(arg1),
+ exprType(arg2)));
+ get_rule_expr_paren(arg2, context, true, (Node *) expr);
+ }
+ else
+ {
+ /* prefix operator */
+ Node *arg = (Node *) linitial(args);
+
+ appendStringInfo(buf, "%s ",
+ generate_operator_name(opno,
+ InvalidOid,
+ exprType(arg)));
+ get_rule_expr_paren(arg, context, true, (Node *) expr);
+ }
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * get_func_expr - Parse back a FuncExpr node
+ */
+static void
+get_func_expr(FuncExpr *expr, deparse_context *context,
+ bool showimplicit)
+{
+ StringInfo buf = context->buf;
+ Oid funcoid = expr->funcid;
+ Oid argtypes[FUNC_MAX_ARGS];
+ int nargs;
+ List *argnames;
+ bool use_variadic;
+ ListCell *l;
+
+ /*
+ * If the function call came from an implicit coercion, then just show the
+ * first argument --- unless caller wants to see implicit coercions.
+ */
+ if (expr->funcformat == COERCE_IMPLICIT_CAST && !showimplicit)
+ {
+ get_rule_expr_paren((Node *) linitial(expr->args), context,
+ false, (Node *) expr);
+ return;
+ }
+
+ /*
+ * If the function call came from a cast, then show the first argument
+ * plus an explicit cast operation.
+ */
+ if (expr->funcformat == COERCE_EXPLICIT_CAST ||
+ expr->funcformat == COERCE_IMPLICIT_CAST)
+ {
+ Node *arg = linitial(expr->args);
+ Oid rettype = expr->funcresulttype;
+ int32 coercedTypmod;
+
+ /* Get the typmod if this is a length-coercion function */
+ (void) exprIsLengthCoercion((Node *) expr, &coercedTypmod);
+
+ get_coercion_expr(arg, context,
+ rettype, coercedTypmod,
+ (Node *) expr);
+
+ return;
+ }
+
+ /*
+ * If the function was called using one of the SQL spec's random special
+ * syntaxes, try to reproduce that. If we don't recognize the function,
+ * fall through.
+ */
+ if (expr->funcformat == COERCE_SQL_SYNTAX)
+ {
+ if (get_func_sql_syntax(expr, context))
+ return;
+ }
+
+ /*
+ * Normal function: display as proname(args). First we need to extract
+ * the argument datatypes.
+ */
+ if (list_length(expr->args) > FUNC_MAX_ARGS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg("too many arguments")));
+ nargs = 0;
+ argnames = NIL;
+ foreach(l, expr->args)
+ {
+ Node *arg = (Node *) lfirst(l);
+
+ if (IsA(arg, NamedArgExpr))
+ argnames = lappend(argnames, ((NamedArgExpr *) arg)->name);
+ argtypes[nargs] = exprType(arg);
+ nargs++;
+ }
+
+ appendStringInfo(buf, "%s(",
+ generate_function_name(funcoid, nargs,
+ argnames, argtypes,
+ expr->funcvariadic,
+ &use_variadic,
+ context->special_exprkind));
+ nargs = 0;
+ foreach(l, expr->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ if (use_variadic && lnext(expr->args, l) == NULL)
+ appendStringInfoString(buf, "VARIADIC ");
+ get_rule_expr((Node *) lfirst(l), context, true);
+ }
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * get_agg_expr - Parse back an Aggref node
+ */
+static void
+get_agg_expr(Aggref *aggref, deparse_context *context,
+ Aggref *original_aggref)
+{
+ StringInfo buf = context->buf;
+ Oid argtypes[FUNC_MAX_ARGS];
+ int nargs;
+ bool use_variadic;
+
+ /*
+ * For a combining aggregate, we look up and deparse the corresponding
+ * partial aggregate instead. This is necessary because our input
+ * argument list has been replaced; the new argument list always has just
+ * one element, which will point to a partial Aggref that supplies us with
+ * transition states to combine.
+ */
+ if (DO_AGGSPLIT_COMBINE(aggref->aggsplit))
+ {
+ TargetEntry *tle;
+
+ Assert(list_length(aggref->args) == 1);
+ tle = linitial_node(TargetEntry, aggref->args);
+ resolve_special_varno((Node *) tle->expr, context,
+ get_agg_combine_expr, original_aggref);
+ return;
+ }
+
+ /*
+ * Mark as PARTIAL, if appropriate. We look to the original aggref so as
+ * to avoid printing this when recursing from the code just above.
+ */
+ if (DO_AGGSPLIT_SKIPFINAL(original_aggref->aggsplit))
+ appendStringInfoString(buf, "PARTIAL ");
+
+ /* Extract the argument types as seen by the parser */
+ nargs = get_aggregate_argtypes(aggref, argtypes);
+
+ /* Print the aggregate name, schema-qualified if needed */
+ appendStringInfo(buf, "%s(%s",
+ generate_function_name(aggref->aggfnoid, nargs,
+ NIL, argtypes,
+ aggref->aggvariadic,
+ &use_variadic,
+ context->special_exprkind),
+ (aggref->aggdistinct != NIL) ? "DISTINCT " : "");
+
+ if (AGGKIND_IS_ORDERED_SET(aggref->aggkind))
+ {
+ /*
+ * Ordered-set aggregates do not use "*" syntax. Also, we needn't
+ * worry about inserting VARIADIC. So we can just dump the direct
+ * args as-is.
+ */
+ Assert(!aggref->aggvariadic);
+ get_rule_expr((Node *) aggref->aggdirectargs, context, true);
+ Assert(aggref->aggorder != NIL);
+ appendStringInfoString(buf, ") WITHIN GROUP (ORDER BY ");
+ get_rule_orderby(aggref->aggorder, aggref->args, false, context);
+ }
+ else
+ {
+ /* aggstar can be set only in zero-argument aggregates */
+ if (aggref->aggstar)
+ appendStringInfoChar(buf, '*');
+ else
+ {
+ ListCell *l;
+ int i;
+
+ i = 0;
+ foreach(l, aggref->args)
+ {
+ TargetEntry *tle = (TargetEntry *) lfirst(l);
+ Node *arg = (Node *) tle->expr;
+
+ Assert(!IsA(arg, NamedArgExpr));
+ if (tle->resjunk)
+ continue;
+ if (i++ > 0)
+ appendStringInfoString(buf, ", ");
+ if (use_variadic && i == nargs)
+ appendStringInfoString(buf, "VARIADIC ");
+ get_rule_expr(arg, context, true);
+ }
+ }
+
+ if (aggref->aggorder != NIL)
+ {
+ appendStringInfoString(buf, " ORDER BY ");
+ get_rule_orderby(aggref->aggorder, aggref->args, false, context);
+ }
+ }
+
+ if (aggref->aggfilter != NULL)
+ {
+ appendStringInfoString(buf, ") FILTER (WHERE ");
+ get_rule_expr((Node *) aggref->aggfilter, context, false);
+ }
+
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * This is a helper function for get_agg_expr(). It's used when we deparse
+ * a combining Aggref; resolve_special_varno locates the corresponding partial
+ * Aggref and then calls this.
+ */
+static void
+get_agg_combine_expr(Node *node, deparse_context *context, void *callback_arg)
+{
+ Aggref *aggref;
+ Aggref *original_aggref = callback_arg;
+
+ if (!IsA(node, Aggref))
+ elog(ERROR, "combining Aggref does not point to an Aggref");
+
+ aggref = (Aggref *) node;
+ get_agg_expr(aggref, context, original_aggref);
+}
+
+/*
+ * get_windowfunc_expr - Parse back a WindowFunc node
+ */
+static void
+get_windowfunc_expr(WindowFunc *wfunc, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid argtypes[FUNC_MAX_ARGS];
+ int nargs;
+ List *argnames;
+ ListCell *l;
+
+ if (list_length(wfunc->args) > FUNC_MAX_ARGS)
+ ereport(ERROR,
+ (errcode(ERRCODE_TOO_MANY_ARGUMENTS),
+ errmsg("too many arguments")));
+ nargs = 0;
+ argnames = NIL;
+ foreach(l, wfunc->args)
+ {
+ Node *arg = (Node *) lfirst(l);
+
+ if (IsA(arg, NamedArgExpr))
+ argnames = lappend(argnames, ((NamedArgExpr *) arg)->name);
+ argtypes[nargs] = exprType(arg);
+ nargs++;
+ }
+
+ appendStringInfo(buf, "%s(",
+ generate_function_name(wfunc->winfnoid, nargs,
+ argnames, argtypes,
+ false, NULL,
+ context->special_exprkind));
+ /* winstar can be set only in zero-argument aggregates */
+ if (wfunc->winstar)
+ appendStringInfoChar(buf, '*');
+ else
+ get_rule_expr((Node *) wfunc->args, context, true);
+
+ if (wfunc->aggfilter != NULL)
+ {
+ appendStringInfoString(buf, ") FILTER (WHERE ");
+ get_rule_expr((Node *) wfunc->aggfilter, context, false);
+ }
+
+ appendStringInfoString(buf, ") OVER ");
+
+ foreach(l, context->windowClause)
+ {
+ WindowClause *wc = (WindowClause *) lfirst(l);
+
+ if (wc->winref == wfunc->winref)
+ {
+ if (wc->name)
+ appendStringInfoString(buf, quote_identifier(wc->name));
+ else
+ get_rule_windowspec(wc, context->windowTList, context);
+ break;
+ }
+ }
+ if (l == NULL)
+ {
+ if (context->windowClause)
+ elog(ERROR, "could not find window clause for winref %u",
+ wfunc->winref);
+
+ /*
+ * In EXPLAIN, we don't have window context information available, so
+ * we have to settle for this:
+ */
+ appendStringInfoString(buf, "(?)");
+ }
+}
+
+/*
+ * get_func_sql_syntax - Parse back a SQL-syntax function call
+ *
+ * Returns true if we successfully deparsed, false if we did not
+ * recognize the function.
+ */
+static bool
+get_func_sql_syntax(FuncExpr *expr, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid funcoid = expr->funcid;
+
+ switch (funcoid)
+ {
+ case F_TIMEZONE_INTERVAL_TIMESTAMP:
+ case F_TIMEZONE_INTERVAL_TIMESTAMPTZ:
+ case F_TIMEZONE_INTERVAL_TIMETZ:
+ case F_TIMEZONE_TEXT_TIMESTAMP:
+ case F_TIMEZONE_TEXT_TIMESTAMPTZ:
+ case F_TIMEZONE_TEXT_TIMETZ:
+ /* AT TIME ZONE ... note reversed argument order */
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren((Node *) lsecond(expr->args), context, false,
+ (Node *) expr);
+ appendStringInfoString(buf, " AT TIME ZONE ");
+ get_rule_expr_paren((Node *) linitial(expr->args), context, false,
+ (Node *) expr);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_OVERLAPS_TIMESTAMPTZ_INTERVAL_TIMESTAMPTZ_INTERVAL:
+ case F_OVERLAPS_TIMESTAMPTZ_INTERVAL_TIMESTAMPTZ_TIMESTAMPTZ:
+ case F_OVERLAPS_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ_INTERVAL:
+ case F_OVERLAPS_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ_TIMESTAMPTZ:
+ case F_OVERLAPS_TIMESTAMP_INTERVAL_TIMESTAMP_INTERVAL:
+ case F_OVERLAPS_TIMESTAMP_INTERVAL_TIMESTAMP_TIMESTAMP:
+ case F_OVERLAPS_TIMESTAMP_TIMESTAMP_TIMESTAMP_INTERVAL:
+ case F_OVERLAPS_TIMESTAMP_TIMESTAMP_TIMESTAMP_TIMESTAMP:
+ case F_OVERLAPS_TIMETZ_TIMETZ_TIMETZ_TIMETZ:
+ case F_OVERLAPS_TIME_INTERVAL_TIME_INTERVAL:
+ case F_OVERLAPS_TIME_INTERVAL_TIME_TIME:
+ case F_OVERLAPS_TIME_TIME_TIME_INTERVAL:
+ case F_OVERLAPS_TIME_TIME_TIME_TIME:
+ /* (x1, x2) OVERLAPS (y1, y2) */
+ appendStringInfoString(buf, "((");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoString(buf, ") OVERLAPS (");
+ get_rule_expr((Node *) lthird(expr->args), context, false);
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfourth(expr->args), context, false);
+ appendStringInfoString(buf, "))");
+ return true;
+
+ case F_EXTRACT_TEXT_DATE:
+ case F_EXTRACT_TEXT_TIME:
+ case F_EXTRACT_TEXT_TIMETZ:
+ case F_EXTRACT_TEXT_TIMESTAMP:
+ case F_EXTRACT_TEXT_TIMESTAMPTZ:
+ case F_EXTRACT_TEXT_INTERVAL:
+ /* EXTRACT (x FROM y) */
+ appendStringInfoString(buf, "EXTRACT(");
+ {
+ Const *con = (Const *) linitial(expr->args);
+
+ Assert(IsA(con, Const) &&
+ con->consttype == TEXTOID &&
+ !con->constisnull);
+ appendStringInfoString(buf, TextDatumGetCString(con->constvalue));
+ }
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_IS_NORMALIZED:
+ /* IS xxx NORMALIZED */
+ appendStringInfoString(buf, "(");
+ get_rule_expr_paren((Node *) linitial(expr->args), context, false,
+ (Node *) expr);
+ appendStringInfoString(buf, " IS");
+ if (list_length(expr->args) == 2)
+ {
+ Const *con = (Const *) lsecond(expr->args);
+
+ Assert(IsA(con, Const) &&
+ con->consttype == TEXTOID &&
+ !con->constisnull);
+ appendStringInfo(buf, " %s",
+ TextDatumGetCString(con->constvalue));
+ }
+ appendStringInfoString(buf, " NORMALIZED)");
+ return true;
+
+ case F_PG_COLLATION_FOR:
+ /* COLLATION FOR */
+ appendStringInfoString(buf, "COLLATION FOR (");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_NORMALIZE:
+ /* NORMALIZE() */
+ appendStringInfoString(buf, "NORMALIZE(");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ if (list_length(expr->args) == 2)
+ {
+ Const *con = (Const *) lsecond(expr->args);
+
+ Assert(IsA(con, Const) &&
+ con->consttype == TEXTOID &&
+ !con->constisnull);
+ appendStringInfo(buf, ", %s",
+ TextDatumGetCString(con->constvalue));
+ }
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_OVERLAY_BIT_BIT_INT4:
+ case F_OVERLAY_BIT_BIT_INT4_INT4:
+ case F_OVERLAY_BYTEA_BYTEA_INT4:
+ case F_OVERLAY_BYTEA_BYTEA_INT4_INT4:
+ case F_OVERLAY_TEXT_TEXT_INT4:
+ case F_OVERLAY_TEXT_TEXT_INT4_INT4:
+ /* OVERLAY() */
+ appendStringInfoString(buf, "OVERLAY(");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, " PLACING ");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) lthird(expr->args), context, false);
+ if (list_length(expr->args) == 4)
+ {
+ appendStringInfoString(buf, " FOR ");
+ get_rule_expr((Node *) lfourth(expr->args), context, false);
+ }
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_POSITION_BIT_BIT:
+ case F_POSITION_BYTEA_BYTEA:
+ case F_POSITION_TEXT_TEXT:
+ /* POSITION() ... extra parens since args are b_expr not a_expr */
+ appendStringInfoString(buf, "POSITION((");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoString(buf, ") IN (");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, "))");
+ return true;
+
+ case F_SUBSTRING_BIT_INT4:
+ case F_SUBSTRING_BIT_INT4_INT4:
+ case F_SUBSTRING_BYTEA_INT4:
+ case F_SUBSTRING_BYTEA_INT4_INT4:
+ case F_SUBSTRING_TEXT_INT4:
+ case F_SUBSTRING_TEXT_INT4_INT4:
+ /* SUBSTRING FROM/FOR (i.e., integer-position variants) */
+ appendStringInfoString(buf, "SUBSTRING(");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ if (list_length(expr->args) == 3)
+ {
+ appendStringInfoString(buf, " FOR ");
+ get_rule_expr((Node *) lthird(expr->args), context, false);
+ }
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_SUBSTRING_TEXT_TEXT_TEXT:
+ /* SUBSTRING SIMILAR/ESCAPE */
+ appendStringInfoString(buf, "SUBSTRING(");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, " SIMILAR ");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoString(buf, " ESCAPE ");
+ get_rule_expr((Node *) lthird(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_BTRIM_BYTEA_BYTEA:
+ case F_BTRIM_TEXT:
+ case F_BTRIM_TEXT_TEXT:
+ /* TRIM() */
+ appendStringInfoString(buf, "TRIM(BOTH");
+ if (list_length(expr->args) == 2)
+ {
+ appendStringInfoChar(buf, ' ');
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ }
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_LTRIM_BYTEA_BYTEA:
+ case F_LTRIM_TEXT:
+ case F_LTRIM_TEXT_TEXT:
+ /* TRIM() */
+ appendStringInfoString(buf, "TRIM(LEADING");
+ if (list_length(expr->args) == 2)
+ {
+ appendStringInfoChar(buf, ' ');
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ }
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_RTRIM_BYTEA_BYTEA:
+ case F_RTRIM_TEXT:
+ case F_RTRIM_TEXT_TEXT:
+ /* TRIM() */
+ appendStringInfoString(buf, "TRIM(TRAILING");
+ if (list_length(expr->args) == 2)
+ {
+ appendStringInfoChar(buf, ' ');
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ }
+ appendStringInfoString(buf, " FROM ");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoChar(buf, ')');
+ return true;
+
+ case F_XMLEXISTS:
+ /* XMLEXISTS ... extra parens because args are c_expr */
+ appendStringInfoString(buf, "XMLEXISTS((");
+ get_rule_expr((Node *) linitial(expr->args), context, false);
+ appendStringInfoString(buf, ") PASSING (");
+ get_rule_expr((Node *) lsecond(expr->args), context, false);
+ appendStringInfoString(buf, "))");
+ return true;
+ }
+ return false;
+}
+
+/* ----------
+ * get_coercion_expr
+ *
+ * Make a string representation of a value coerced to a specific type
+ * ----------
+ */
+static void
+get_coercion_expr(Node *arg, deparse_context *context,
+ Oid resulttype, int32 resulttypmod,
+ Node *parentNode)
+{
+ StringInfo buf = context->buf;
+
+ /*
+ * Since parse_coerce.c doesn't immediately collapse application of
+ * length-coercion functions to constants, what we'll typically see in
+ * such cases is a Const with typmod -1 and a length-coercion function
+ * right above it. Avoid generating redundant output. However, beware of
+ * suppressing casts when the user actually wrote something like
+ * 'foo'::text::char(3).
+ *
+ * Note: it might seem that we are missing the possibility of needing to
+ * print a COLLATE clause for such a Const. However, a Const could only
+ * have nondefault collation in a post-constant-folding tree, in which the
+ * length coercion would have been folded too. See also the special
+ * handling of CollateExpr in coerce_to_target_type(): any collation
+ * marking will be above the coercion node, not below it.
+ */
+ if (arg && IsA(arg, Const) &&
+ ((Const *) arg)->consttype == resulttype &&
+ ((Const *) arg)->consttypmod == -1)
+ {
+ /* Show the constant without normal ::typename decoration */
+ get_const_expr((Const *) arg, context, -1);
+ }
+ else
+ {
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr_paren(arg, context, false, parentNode);
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+
+ /*
+ * Never emit resulttype(arg) functional notation. A pg_proc entry could
+ * take precedence, and a resulttype in pg_temp would require schema
+ * qualification that format_type_with_typemod() would usually omit. We've
+ * standardized on arg::resulttype, but CAST(arg AS resulttype) notation
+ * would work fine.
+ */
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(resulttype, resulttypmod));
+}
+
+/* ----------
+ * get_const_expr
+ *
+ * Make a string representation of a Const
+ *
+ * showtype can be -1 to never show "::typename" decoration, or +1 to always
+ * show it, or 0 to show it only if the constant wouldn't be assumed to be
+ * the right type by default.
+ *
+ * If the Const's collation isn't default for its type, show that too.
+ * We mustn't do this when showtype is -1 (since that means the caller will
+ * print "::typename", and we can't put a COLLATE clause in between). It's
+ * caller's responsibility that collation isn't missed in such cases.
+ * ----------
+ */
+static void
+get_const_expr(Const *constval, deparse_context *context, int showtype)
+{
+ StringInfo buf = context->buf;
+ Oid typoutput;
+ bool typIsVarlena;
+ char *extval;
+ bool needlabel = false;
+
+ if (constval->constisnull)
+ {
+ /*
+ * Always label the type of a NULL constant to prevent misdecisions
+ * about type when reparsing.
+ */
+ appendStringInfoString(buf, "NULL");
+ if (showtype >= 0)
+ {
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(constval->consttype,
+ constval->consttypmod));
+ get_const_collation(constval, context);
+ }
+ return;
+ }
+
+ getTypeOutputInfo(constval->consttype,
+ &typoutput, &typIsVarlena);
+
+ extval = OidOutputFunctionCall(typoutput, constval->constvalue);
+
+ switch (constval->consttype)
+ {
+ case INT4OID:
+
+ /*
+ * INT4 can be printed without any decoration, unless it is
+ * negative; in that case print it as '-nnn'::integer to ensure
+ * that the output will re-parse as a constant, not as a constant
+ * plus operator. In most cases we could get away with printing
+ * (-nnn) instead, because of the way that gram.y handles negative
+ * literals; but that doesn't work for INT_MIN, and it doesn't
+ * seem that much prettier anyway.
+ */
+ if (extval[0] != '-')
+ appendStringInfoString(buf, extval);
+ else
+ {
+ appendStringInfo(buf, "'%s'", extval);
+ needlabel = true; /* we must attach a cast */
+ }
+ break;
+
+ case NUMERICOID:
+
+ /*
+ * NUMERIC can be printed without quotes if it looks like a float
+ * constant (not an integer, and not Infinity or NaN) and doesn't
+ * have a leading sign (for the same reason as for INT4).
+ */
+ if (isdigit((unsigned char) extval[0]) &&
+ strcspn(extval, "eE.") != strlen(extval))
+ {
+ appendStringInfoString(buf, extval);
+ }
+ else
+ {
+ appendStringInfo(buf, "'%s'", extval);
+ needlabel = true; /* we must attach a cast */
+ }
+ break;
+
+ case BOOLOID:
+ if (strcmp(extval, "t") == 0)
+ appendStringInfoString(buf, "true");
+ else
+ appendStringInfoString(buf, "false");
+ break;
+
+ default:
+ simple_quote_literal(buf, extval);
+ break;
+ }
+
+ pfree(extval);
+
+ if (showtype < 0)
+ return;
+
+ /*
+ * For showtype == 0, append ::typename unless the constant will be
+ * implicitly typed as the right type when it is read in.
+ *
+ * XXX this code has to be kept in sync with the behavior of the parser,
+ * especially make_const.
+ */
+ switch (constval->consttype)
+ {
+ case BOOLOID:
+ case UNKNOWNOID:
+ /* These types can be left unlabeled */
+ needlabel = false;
+ break;
+ case INT4OID:
+ /* We determined above whether a label is needed */
+ break;
+ case NUMERICOID:
+
+ /*
+ * Float-looking constants will be typed as numeric, which we
+ * checked above; but if there's a nondefault typmod we need to
+ * show it.
+ */
+ needlabel |= (constval->consttypmod >= 0);
+ break;
+ default:
+ needlabel = true;
+ break;
+ }
+ if (needlabel || showtype > 0)
+ appendStringInfo(buf, "::%s",
+ format_type_with_typemod(constval->consttype,
+ constval->consttypmod));
+
+ get_const_collation(constval, context);
+}
+
+/*
+ * helper for get_const_expr: append COLLATE if needed
+ */
+static void
+get_const_collation(Const *constval, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+
+ if (OidIsValid(constval->constcollid))
+ {
+ Oid typcollation = get_typcollation(constval->consttype);
+
+ if (constval->constcollid != typcollation)
+ {
+ appendStringInfo(buf, " COLLATE %s",
+ generate_collation_name(constval->constcollid));
+ }
+ }
+}
+
+/*
+ * simple_quote_literal - Format a string as a SQL literal, append to buf
+ */
+static void
+simple_quote_literal(StringInfo buf, const char *val)
+{
+ const char *valptr;
+
+ /*
+ * We form the string literal according to the prevailing setting of
+ * standard_conforming_strings; we never use E''. User is responsible for
+ * making sure result is used correctly.
+ */
+ appendStringInfoChar(buf, '\'');
+ for (valptr = val; *valptr; valptr++)
+ {
+ char ch = *valptr;
+
+ if (SQL_STR_DOUBLE(ch, !standard_conforming_strings))
+ appendStringInfoChar(buf, ch);
+ appendStringInfoChar(buf, ch);
+ }
+ appendStringInfoChar(buf, '\'');
+}
+
+
+/* ----------
+ * get_sublink_expr - Parse back a sublink
+ * ----------
+ */
+static void
+get_sublink_expr(SubLink *sublink, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Query *query = (Query *) (sublink->subselect);
+ char *opname = NULL;
+ bool need_paren;
+
+ if (sublink->subLinkType == ARRAY_SUBLINK)
+ appendStringInfoString(buf, "ARRAY(");
+ else
+ appendStringInfoChar(buf, '(');
+
+ /*
+ * Note that we print the name of only the first operator, when there are
+ * multiple combining operators. This is an approximation that could go
+ * wrong in various scenarios (operators in different schemas, renamed
+ * operators, etc) but there is not a whole lot we can do about it, since
+ * the syntax allows only one operator to be shown.
+ */
+ if (sublink->testexpr)
+ {
+ if (IsA(sublink->testexpr, OpExpr))
+ {
+ /* single combining operator */
+ OpExpr *opexpr = (OpExpr *) sublink->testexpr;
+
+ get_rule_expr(linitial(opexpr->args), context, true);
+ opname = generate_operator_name(opexpr->opno,
+ exprType(linitial(opexpr->args)),
+ exprType(lsecond(opexpr->args)));
+ }
+ else if (IsA(sublink->testexpr, BoolExpr))
+ {
+ /* multiple combining operators, = or <> cases */
+ char *sep;
+ ListCell *l;
+
+ appendStringInfoChar(buf, '(');
+ sep = "";
+ foreach(l, ((BoolExpr *) sublink->testexpr)->args)
+ {
+ OpExpr *opexpr = lfirst_node(OpExpr, l);
+
+ appendStringInfoString(buf, sep);
+ get_rule_expr(linitial(opexpr->args), context, true);
+ if (!opname)
+ opname = generate_operator_name(opexpr->opno,
+ exprType(linitial(opexpr->args)),
+ exprType(lsecond(opexpr->args)));
+ sep = ", ";
+ }
+ appendStringInfoChar(buf, ')');
+ }
+ else if (IsA(sublink->testexpr, RowCompareExpr))
+ {
+ /* multiple combining operators, < <= > >= cases */
+ RowCompareExpr *rcexpr = (RowCompareExpr *) sublink->testexpr;
+
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) rcexpr->largs, context, true);
+ opname = generate_operator_name(linitial_oid(rcexpr->opnos),
+ exprType(linitial(rcexpr->largs)),
+ exprType(linitial(rcexpr->rargs)));
+ appendStringInfoChar(buf, ')');
+ }
+ else
+ elog(ERROR, "unrecognized testexpr type: %d",
+ (int) nodeTag(sublink->testexpr));
+ }
+
+ need_paren = true;
+
+ switch (sublink->subLinkType)
+ {
+ case EXISTS_SUBLINK:
+ appendStringInfoString(buf, "EXISTS ");
+ break;
+
+ case ANY_SUBLINK:
+ if (strcmp(opname, "=") == 0) /* Represent = ANY as IN */
+ appendStringInfoString(buf, " IN ");
+ else
+ appendStringInfo(buf, " %s ANY ", opname);
+ break;
+
+ case ALL_SUBLINK:
+ appendStringInfo(buf, " %s ALL ", opname);
+ break;
+
+ case ROWCOMPARE_SUBLINK:
+ appendStringInfo(buf, " %s ", opname);
+ break;
+
+ case EXPR_SUBLINK:
+ case MULTIEXPR_SUBLINK:
+ case ARRAY_SUBLINK:
+ need_paren = false;
+ break;
+
+ case CTE_SUBLINK: /* shouldn't occur in a SubLink */
+ default:
+ elog(ERROR, "unrecognized sublink type: %d",
+ (int) sublink->subLinkType);
+ break;
+ }
+
+ if (need_paren)
+ appendStringInfoChar(buf, '(');
+
+ get_query_def(query, buf, context->namespaces, NULL, false,
+ context->prettyFlags, context->wrapColumn,
+ context->indentLevel);
+
+ if (need_paren)
+ appendStringInfoString(buf, "))");
+ else
+ appendStringInfoChar(buf, ')');
+}
+
+
+/* ----------
+ * get_tablefunc - Parse back a table function
+ * ----------
+ */
+static void
+get_tablefunc(TableFunc *tf, deparse_context *context, bool showimplicit)
+{
+ StringInfo buf = context->buf;
+
+ /* XMLTABLE is the only existing implementation. */
+
+ appendStringInfoString(buf, "XMLTABLE(");
+
+ if (tf->ns_uris != NIL)
+ {
+ ListCell *lc1,
+ *lc2;
+ bool first = true;
+
+ appendStringInfoString(buf, "XMLNAMESPACES (");
+ forboth(lc1, tf->ns_uris, lc2, tf->ns_names)
+ {
+ Node *expr = (Node *) lfirst(lc1);
+ String *ns_node = lfirst_node(String, lc2);
+
+ if (!first)
+ appendStringInfoString(buf, ", ");
+ else
+ first = false;
+
+ if (ns_node != NULL)
+ {
+ get_rule_expr(expr, context, showimplicit);
+ appendStringInfo(buf, " AS %s", strVal(ns_node));
+ }
+ else
+ {
+ appendStringInfoString(buf, "DEFAULT ");
+ get_rule_expr(expr, context, showimplicit);
+ }
+ }
+ appendStringInfoString(buf, "), ");
+ }
+
+ appendStringInfoChar(buf, '(');
+ get_rule_expr((Node *) tf->rowexpr, context, showimplicit);
+ appendStringInfoString(buf, ") PASSING (");
+ get_rule_expr((Node *) tf->docexpr, context, showimplicit);
+ appendStringInfoChar(buf, ')');
+
+ if (tf->colexprs != NIL)
+ {
+ ListCell *l1;
+ ListCell *l2;
+ ListCell *l3;
+ ListCell *l4;
+ ListCell *l5;
+ int colnum = 0;
+
+ appendStringInfoString(buf, " COLUMNS ");
+ forfive(l1, tf->colnames, l2, tf->coltypes, l3, tf->coltypmods,
+ l4, tf->colexprs, l5, tf->coldefexprs)
+ {
+ char *colname = strVal(lfirst(l1));
+ Oid typid = lfirst_oid(l2);
+ int32 typmod = lfirst_int(l3);
+ Node *colexpr = (Node *) lfirst(l4);
+ Node *coldefexpr = (Node *) lfirst(l5);
+ bool ordinality = (tf->ordinalitycol == colnum);
+ bool notnull = bms_is_member(colnum, tf->notnulls);
+
+ if (colnum > 0)
+ appendStringInfoString(buf, ", ");
+ colnum++;
+
+ appendStringInfo(buf, "%s %s", quote_identifier(colname),
+ ordinality ? "FOR ORDINALITY" :
+ format_type_with_typemod(typid, typmod));
+ if (ordinality)
+ continue;
+
+ if (coldefexpr != NULL)
+ {
+ appendStringInfoString(buf, " DEFAULT (");
+ get_rule_expr((Node *) coldefexpr, context, showimplicit);
+ appendStringInfoChar(buf, ')');
+ }
+ if (colexpr != NULL)
+ {
+ appendStringInfoString(buf, " PATH (");
+ get_rule_expr((Node *) colexpr, context, showimplicit);
+ appendStringInfoChar(buf, ')');
+ }
+ if (notnull)
+ appendStringInfoString(buf, " NOT NULL");
+ }
+ }
+
+ appendStringInfoChar(buf, ')');
+}
+
+/* ----------
+ * get_from_clause - Parse back a FROM clause
+ *
+ * "prefix" is the keyword that denotes the start of the list of FROM
+ * elements. It is FROM when used to parse back SELECT and UPDATE, but
+ * is USING when parsing back DELETE.
+ * ----------
+ */
+static void
+get_from_clause(Query *query, const char *prefix, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ bool first = true;
+ ListCell *l;
+
+ /*
+ * We use the query's jointree as a guide to what to print. However, we
+ * must ignore auto-added RTEs that are marked not inFromCl. (These can
+ * only appear at the top level of the jointree, so it's sufficient to
+ * check here.) This check also ensures we ignore the rule pseudo-RTEs
+ * for NEW and OLD.
+ */
+ foreach(l, query->jointree->fromlist)
+ {
+ Node *jtnode = (Node *) lfirst(l);
+
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ RangeTblEntry *rte = rt_fetch(varno, query->rtable);
+
+ if (!rte->inFromCl)
+ continue;
+ }
+
+ if (first)
+ {
+ appendContextKeyword(context, prefix,
+ -PRETTYINDENT_STD, PRETTYINDENT_STD, 2);
+ first = false;
+
+ get_from_clause_item(jtnode, query, context);
+ }
+ else
+ {
+ StringInfoData itembuf;
+
+ appendStringInfoString(buf, ", ");
+
+ /*
+ * Put the new FROM item's text into itembuf so we can decide
+ * after we've got it whether or not it needs to go on a new line.
+ */
+ initStringInfo(&itembuf);
+ context->buf = &itembuf;
+
+ get_from_clause_item(jtnode, query, context);
+
+ /* Restore context's output buffer */
+ context->buf = buf;
+
+ /* Consider line-wrapping if enabled */
+ if (PRETTY_INDENT(context) && context->wrapColumn >= 0)
+ {
+ /* Does the new item start with a new line? */
+ if (itembuf.len > 0 && itembuf.data[0] == '\n')
+ {
+ /* If so, we shouldn't add anything */
+ /* instead, remove any trailing spaces currently in buf */
+ removeStringInfoSpaces(buf);
+ }
+ else
+ {
+ char *trailing_nl;
+
+ /* Locate the start of the current line in the buffer */
+ trailing_nl = strrchr(buf->data, '\n');
+ if (trailing_nl == NULL)
+ trailing_nl = buf->data;
+ else
+ trailing_nl++;
+
+ /*
+ * Add a newline, plus some indentation, if the new item
+ * would cause an overflow.
+ */
+ if (strlen(trailing_nl) + itembuf.len > context->wrapColumn)
+ appendContextKeyword(context, "", -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_VAR);
+ }
+ }
+
+ /* Add the new item */
+ appendBinaryStringInfo(buf, itembuf.data, itembuf.len);
+
+ /* clean up */
+ pfree(itembuf.data);
+ }
+ }
+}
+
+static void
+get_from_clause_item(Node *jtnode, Query *query, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces);
+
+ if (IsA(jtnode, RangeTblRef))
+ {
+ int varno = ((RangeTblRef *) jtnode)->rtindex;
+ RangeTblEntry *rte = rt_fetch(varno, query->rtable);
+ deparse_columns *colinfo = deparse_columns_fetch(varno, dpns);
+ RangeTblFunction *rtfunc1 = NULL;
+
+ if (rte->lateral)
+ appendStringInfoString(buf, "LATERAL ");
+
+ /* Print the FROM item proper */
+ switch (rte->rtekind)
+ {
+ case RTE_RELATION:
+ /* Normal relation RTE */
+ appendStringInfo(buf, "%s%s",
+ only_marker(rte),
+ generate_relation_name(rte->relid,
+ context->namespaces));
+ break;
+ case RTE_SUBQUERY:
+ /* Subquery RTE */
+ appendStringInfoChar(buf, '(');
+ get_query_def(rte->subquery, buf, context->namespaces, NULL,
+ true,
+ context->prettyFlags, context->wrapColumn,
+ context->indentLevel);
+ appendStringInfoChar(buf, ')');
+ break;
+ case RTE_FUNCTION:
+ /* Function RTE */
+ rtfunc1 = (RangeTblFunction *) linitial(rte->functions);
+
+ /*
+ * Omit ROWS FROM() syntax for just one function, unless it
+ * has both a coldeflist and WITH ORDINALITY. If it has both,
+ * we must use ROWS FROM() syntax to avoid ambiguity about
+ * whether the coldeflist includes the ordinality column.
+ */
+ if (list_length(rte->functions) == 1 &&
+ (rtfunc1->funccolnames == NIL || !rte->funcordinality))
+ {
+ get_rule_expr_funccall(rtfunc1->funcexpr, context, true);
+ /* we'll print the coldeflist below, if it has one */
+ }
+ else
+ {
+ bool all_unnest;
+ ListCell *lc;
+
+ /*
+ * If all the function calls in the list are to unnest,
+ * and none need a coldeflist, then collapse the list back
+ * down to UNNEST(args). (If we had more than one
+ * built-in unnest function, this would get more
+ * difficult.)
+ *
+ * XXX This is pretty ugly, since it makes not-terribly-
+ * future-proof assumptions about what the parser would do
+ * with the output; but the alternative is to emit our
+ * nonstandard ROWS FROM() notation for what might have
+ * been a perfectly spec-compliant multi-argument
+ * UNNEST().
+ */
+ all_unnest = true;
+ foreach(lc, rte->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+
+ if (!IsA(rtfunc->funcexpr, FuncExpr) ||
+ ((FuncExpr *) rtfunc->funcexpr)->funcid != F_UNNEST_ANYARRAY ||
+ rtfunc->funccolnames != NIL)
+ {
+ all_unnest = false;
+ break;
+ }
+ }
+
+ if (all_unnest)
+ {
+ List *allargs = NIL;
+
+ foreach(lc, rte->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+ List *args = ((FuncExpr *) rtfunc->funcexpr)->args;
+
+ allargs = list_concat(allargs, args);
+ }
+
+ appendStringInfoString(buf, "UNNEST(");
+ get_rule_expr((Node *) allargs, context, true);
+ appendStringInfoChar(buf, ')');
+ }
+ else
+ {
+ int funcno = 0;
+
+ appendStringInfoString(buf, "ROWS FROM(");
+ foreach(lc, rte->functions)
+ {
+ RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
+
+ if (funcno > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr_funccall(rtfunc->funcexpr, context, true);
+ if (rtfunc->funccolnames != NIL)
+ {
+ /* Reconstruct the column definition list */
+ appendStringInfoString(buf, " AS ");
+ get_from_clause_coldeflist(rtfunc,
+ NULL,
+ context);
+ }
+ funcno++;
+ }
+ appendStringInfoChar(buf, ')');
+ }
+ /* prevent printing duplicate coldeflist below */
+ rtfunc1 = NULL;
+ }
+ if (rte->funcordinality)
+ appendStringInfoString(buf, " WITH ORDINALITY");
+ break;
+ case RTE_TABLEFUNC:
+ get_tablefunc(rte->tablefunc, context, true);
+ break;
+ case RTE_VALUES:
+ /* Values list RTE */
+ appendStringInfoChar(buf, '(');
+ get_values_def(rte->values_lists, context);
+ appendStringInfoChar(buf, ')');
+ break;
+ case RTE_CTE:
+ appendStringInfoString(buf, quote_identifier(rte->ctename));
+ break;
+ default:
+ elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
+ break;
+ }
+
+ /* Print the relation alias, if needed */
+ get_rte_alias(rte, varno, false, context);
+
+ /* Print the column definitions or aliases, if needed */
+ if (rtfunc1 && rtfunc1->funccolnames != NIL)
+ {
+ /* Reconstruct the columndef list, which is also the aliases */
+ get_from_clause_coldeflist(rtfunc1, colinfo, context);
+ }
+ else
+ {
+ /* Else print column aliases as needed */
+ get_column_alias_list(colinfo, context);
+ }
+
+ /* Tablesample clause must go after any alias */
+ if (rte->rtekind == RTE_RELATION && rte->tablesample)
+ get_tablesample_def(rte->tablesample, context);
+ }
+ else if (IsA(jtnode, JoinExpr))
+ {
+ JoinExpr *j = (JoinExpr *) jtnode;
+ deparse_columns *colinfo = deparse_columns_fetch(j->rtindex, dpns);
+ bool need_paren_on_right;
+
+ need_paren_on_right = PRETTY_PAREN(context) &&
+ !IsA(j->rarg, RangeTblRef) &&
+ !(IsA(j->rarg, JoinExpr) && ((JoinExpr *) j->rarg)->alias != NULL);
+
+ if (!PRETTY_PAREN(context) || j->alias != NULL)
+ appendStringInfoChar(buf, '(');
+
+ get_from_clause_item(j->larg, query, context);
+
+ switch (j->jointype)
+ {
+ case JOIN_INNER:
+ if (j->quals)
+ appendContextKeyword(context, " JOIN ",
+ -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_JOIN);
+ else
+ appendContextKeyword(context, " CROSS JOIN ",
+ -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_JOIN);
+ break;
+ case JOIN_LEFT:
+ appendContextKeyword(context, " LEFT JOIN ",
+ -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_JOIN);
+ break;
+ case JOIN_FULL:
+ appendContextKeyword(context, " FULL JOIN ",
+ -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_JOIN);
+ break;
+ case JOIN_RIGHT:
+ appendContextKeyword(context, " RIGHT JOIN ",
+ -PRETTYINDENT_STD,
+ PRETTYINDENT_STD,
+ PRETTYINDENT_JOIN);
+ break;
+ default:
+ elog(ERROR, "unrecognized join type: %d",
+ (int) j->jointype);
+ }
+
+ if (need_paren_on_right)
+ appendStringInfoChar(buf, '(');
+ get_from_clause_item(j->rarg, query, context);
+ if (need_paren_on_right)
+ appendStringInfoChar(buf, ')');
+
+ if (j->usingClause)
+ {
+ ListCell *lc;
+ bool first = true;
+
+ appendStringInfoString(buf, " USING (");
+ /* Use the assigned names, not what's in usingClause */
+ foreach(lc, colinfo->usingNames)
+ {
+ char *colname = (char *) lfirst(lc);
+
+ if (first)
+ first = false;
+ else
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf, quote_identifier(colname));
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (j->join_using_alias)
+ appendStringInfo(buf, " AS %s",
+ quote_identifier(j->join_using_alias->aliasname));
+ }
+ else if (j->quals)
+ {
+ appendStringInfoString(buf, " ON ");
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, '(');
+ get_rule_expr(j->quals, context, false);
+ if (!PRETTY_PAREN(context))
+ appendStringInfoChar(buf, ')');
+ }
+ else if (j->jointype != JOIN_INNER)
+ {
+ /* If we didn't say CROSS JOIN above, we must provide an ON */
+ appendStringInfoString(buf, " ON TRUE");
+ }
+
+ if (!PRETTY_PAREN(context) || j->alias != NULL)
+ appendStringInfoChar(buf, ')');
+
+ /* Yes, it's correct to put alias after the right paren ... */
+ if (j->alias != NULL)
+ {
+ /*
+ * Note that it's correct to emit an alias clause if and only if
+ * there was one originally. Otherwise we'd be converting a named
+ * join to unnamed or vice versa, which creates semantic
+ * subtleties we don't want. However, we might print a different
+ * alias name than was there originally.
+ */
+ appendStringInfo(buf, " %s",
+ quote_identifier(get_rtable_name(j->rtindex,
+ context)));
+ get_column_alias_list(colinfo, context);
+ }
+ }
+ else
+ elog(ERROR, "unrecognized node type: %d",
+ (int) nodeTag(jtnode));
+}
+
+/*
+ * get_rte_alias - print the relation's alias, if needed
+ *
+ * If printed, the alias is preceded by a space, or by " AS " if use_as is true.
+ */
+static void
+get_rte_alias(RangeTblEntry *rte, int varno, bool use_as,
+ deparse_context *context)
+{
+ deparse_namespace *dpns = (deparse_namespace *) linitial(context->namespaces);
+ char *refname = get_rtable_name(varno, context);
+ deparse_columns *colinfo = deparse_columns_fetch(varno, dpns);
+ bool printalias = false;
+
+ if (rte->alias != NULL)
+ {
+ /* Always print alias if user provided one */
+ printalias = true;
+ }
+ else if (colinfo->printaliases)
+ {
+ /* Always print alias if we need to print column aliases */
+ printalias = true;
+ }
+ else if (rte->rtekind == RTE_RELATION)
+ {
+ /*
+ * No need to print alias if it's same as relation name (this would
+ * normally be the case, but not if set_rtable_names had to resolve a
+ * conflict).
+ */
+ if (strcmp(refname, get_relation_name(rte->relid)) != 0)
+ printalias = true;
+ }
+ else if (rte->rtekind == RTE_FUNCTION)
+ {
+ /*
+ * For a function RTE, always print alias. This covers possible
+ * renaming of the function and/or instability of the FigureColname
+ * rules for things that aren't simple functions. Note we'd need to
+ * force it anyway for the columndef list case.
+ */
+ printalias = true;
+ }
+ else if (rte->rtekind == RTE_SUBQUERY ||
+ rte->rtekind == RTE_VALUES)
+ {
+ /* Alias is syntactically required for SUBQUERY and VALUES */
+ printalias = true;
+ }
+ else if (rte->rtekind == RTE_CTE)
+ {
+ /*
+ * No need to print alias if it's same as CTE name (this would
+ * normally be the case, but not if set_rtable_names had to resolve a
+ * conflict).
+ */
+ if (strcmp(refname, rte->ctename) != 0)
+ printalias = true;
+ }
+
+ if (printalias)
+ appendStringInfo(context->buf, "%s%s",
+ use_as ? " AS " : " ",
+ quote_identifier(refname));
+}
+
+/*
+ * get_column_alias_list - print column alias list for an RTE
+ *
+ * Caller must already have printed the relation's alias name.
+ */
+static void
+get_column_alias_list(deparse_columns *colinfo, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ int i;
+ bool first = true;
+
+ /* Don't print aliases if not needed */
+ if (!colinfo->printaliases)
+ return;
+
+ for (i = 0; i < colinfo->num_new_cols; i++)
+ {
+ char *colname = colinfo->new_colnames[i];
+
+ if (first)
+ {
+ appendStringInfoChar(buf, '(');
+ first = false;
+ }
+ else
+ appendStringInfoString(buf, ", ");
+ appendStringInfoString(buf, quote_identifier(colname));
+ }
+ if (!first)
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * get_from_clause_coldeflist - reproduce FROM clause coldeflist
+ *
+ * When printing a top-level coldeflist (which is syntactically also the
+ * relation's column alias list), use column names from colinfo. But when
+ * printing a coldeflist embedded inside ROWS FROM(), we prefer to use the
+ * original coldeflist's names, which are available in rtfunc->funccolnames.
+ * Pass NULL for colinfo to select the latter behavior.
+ *
+ * The coldeflist is appended immediately (no space) to buf. Caller is
+ * responsible for ensuring that an alias or AS is present before it.
+ */
+static void
+get_from_clause_coldeflist(RangeTblFunction *rtfunc,
+ deparse_columns *colinfo,
+ deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ ListCell *l1;
+ ListCell *l2;
+ ListCell *l3;
+ ListCell *l4;
+ int i;
+
+ appendStringInfoChar(buf, '(');
+
+ i = 0;
+ forfour(l1, rtfunc->funccoltypes,
+ l2, rtfunc->funccoltypmods,
+ l3, rtfunc->funccolcollations,
+ l4, rtfunc->funccolnames)
+ {
+ Oid atttypid = lfirst_oid(l1);
+ int32 atttypmod = lfirst_int(l2);
+ Oid attcollation = lfirst_oid(l3);
+ char *attname;
+
+ if (colinfo)
+ attname = colinfo->colnames[i];
+ else
+ attname = strVal(lfirst(l4));
+
+ Assert(attname); /* shouldn't be any dropped columns here */
+
+ if (i > 0)
+ appendStringInfoString(buf, ", ");
+ appendStringInfo(buf, "%s %s",
+ quote_identifier(attname),
+ format_type_with_typemod(atttypid, atttypmod));
+ if (OidIsValid(attcollation) &&
+ attcollation != get_typcollation(atttypid))
+ appendStringInfo(buf, " COLLATE %s",
+ generate_collation_name(attcollation));
+
+ i++;
+ }
+
+ appendStringInfoChar(buf, ')');
+}
+
+/*
+ * get_tablesample_def - print a TableSampleClause
+ */
+static void
+get_tablesample_def(TableSampleClause *tablesample, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ Oid argtypes[1];
+ int nargs;
+ ListCell *l;
+
+ /*
+ * We should qualify the handler's function name if it wouldn't be
+ * resolved by lookup in the current search path.
+ */
+ argtypes[0] = INTERNALOID;
+ appendStringInfo(buf, " TABLESAMPLE %s (",
+ generate_function_name(tablesample->tsmhandler, 1,
+ NIL, argtypes,
+ false, NULL, EXPR_KIND_NONE));
+
+ nargs = 0;
+ foreach(l, tablesample->args)
+ {
+ if (nargs++ > 0)
+ appendStringInfoString(buf, ", ");
+ get_rule_expr((Node *) lfirst(l), context, false);
+ }
+ appendStringInfoChar(buf, ')');
+
+ if (tablesample->repeatable != NULL)
+ {
+ appendStringInfoString(buf, " REPEATABLE (");
+ get_rule_expr((Node *) tablesample->repeatable, context, false);
+ appendStringInfoChar(buf, ')');
+ }
+}
+
+/*
+ * get_opclass_name - fetch name of an index operator class
+ *
+ * The opclass name is appended (after a space) to buf.
+ *
+ * Output is suppressed if the opclass is the default for the given
+ * actual_datatype. (If you don't want this behavior, just pass
+ * InvalidOid for actual_datatype.)
+ */
+static void
+get_opclass_name(Oid opclass, Oid actual_datatype,
+ StringInfo buf)
+{
+ HeapTuple ht_opc;
+ Form_pg_opclass opcrec;
+ char *opcname;
+ char *nspname;
+
+ ht_opc = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
+ if (!HeapTupleIsValid(ht_opc))
+ elog(ERROR, "cache lookup failed for opclass %u", opclass);
+ opcrec = (Form_pg_opclass) GETSTRUCT(ht_opc);
+
+ if (!OidIsValid(actual_datatype) ||
+ GetDefaultOpClass(actual_datatype, opcrec->opcmethod) != opclass)
+ {
+ /* Okay, we need the opclass name. Do we need to qualify it? */
+ opcname = NameStr(opcrec->opcname);
+ if (OpclassIsVisible(opclass))
+ appendStringInfo(buf, " %s", quote_identifier(opcname));
+ else
+ {
+ nspname = get_namespace_name_or_temp(opcrec->opcnamespace);
+ appendStringInfo(buf, " %s.%s",
+ quote_identifier(nspname),
+ quote_identifier(opcname));
+ }
+ }
+ ReleaseSysCache(ht_opc);
+}
+
+/*
+ * generate_opclass_name
+ * Compute the name to display for an opclass specified by OID
+ *
+ * The result includes all necessary quoting and schema-prefixing.
+ */
+char *
+generate_opclass_name(Oid opclass)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ get_opclass_name(opclass, InvalidOid, &buf);
+
+ return &buf.data[1]; /* get_opclass_name() prepends space */
+}
+
+/*
+ * processIndirection - take care of array and subfield assignment
+ *
+ * We strip any top-level FieldStore or assignment SubscriptingRef nodes that
+ * appear in the input, printing them as decoration for the base column
+ * name (which we assume the caller just printed). We might also need to
+ * strip CoerceToDomain nodes, but only ones that appear above assignment
+ * nodes.
+ *
+ * Returns the subexpression that's to be assigned.
+ */
+static Node *
+processIndirection(Node *node, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ CoerceToDomain *cdomain = NULL;
+
+ for (;;)
+ {
+ if (node == NULL)
+ break;
+ if (IsA(node, FieldStore))
+ {
+ FieldStore *fstore = (FieldStore *) node;
+ Oid typrelid;
+ char *fieldname;
+
+ /* lookup tuple type */
+ typrelid = get_typ_typrelid(fstore->resulttype);
+ if (!OidIsValid(typrelid))
+ elog(ERROR, "argument type %s of FieldStore is not a tuple type",
+ format_type_be(fstore->resulttype));
+
+ /*
+ * Print the field name. There should only be one target field in
+ * stored rules. There could be more than that in executable
+ * target lists, but this function cannot be used for that case.
+ */
+ Assert(list_length(fstore->fieldnums) == 1);
+ fieldname = get_attname(typrelid,
+ linitial_int(fstore->fieldnums), false);
+ appendStringInfo(buf, ".%s", quote_identifier(fieldname));
+
+ /*
+ * We ignore arg since it should be an uninteresting reference to
+ * the target column or subcolumn.
+ */
+ node = (Node *) linitial(fstore->newvals);
+ }
+ else if (IsA(node, SubscriptingRef))
+ {
+ SubscriptingRef *sbsref = (SubscriptingRef *) node;
+
+ if (sbsref->refassgnexpr == NULL)
+ break;
+
+ printSubscripts(sbsref, context);
+
+ /*
+ * We ignore refexpr since it should be an uninteresting reference
+ * to the target column or subcolumn.
+ */
+ node = (Node *) sbsref->refassgnexpr;
+ }
+ else if (IsA(node, CoerceToDomain))
+ {
+ cdomain = (CoerceToDomain *) node;
+ /* If it's an explicit domain coercion, we're done */
+ if (cdomain->coercionformat != COERCE_IMPLICIT_CAST)
+ break;
+ /* Tentatively descend past the CoerceToDomain */
+ node = (Node *) cdomain->arg;
+ }
+ else
+ break;
+ }
+
+ /*
+ * If we descended past a CoerceToDomain whose argument turned out not to
+ * be a FieldStore or array assignment, back up to the CoerceToDomain.
+ * (This is not enough to be fully correct if there are nested implicit
+ * CoerceToDomains, but such cases shouldn't ever occur.)
+ */
+ if (cdomain && node == (Node *) cdomain->arg)
+ node = (Node *) cdomain;
+
+ return node;
+}
+
+static void
+printSubscripts(SubscriptingRef *sbsref, deparse_context *context)
+{
+ StringInfo buf = context->buf;
+ ListCell *lowlist_item;
+ ListCell *uplist_item;
+
+ lowlist_item = list_head(sbsref->reflowerindexpr); /* could be NULL */
+ foreach(uplist_item, sbsref->refupperindexpr)
+ {
+ appendStringInfoChar(buf, '[');
+ if (lowlist_item)
+ {
+ /* If subexpression is NULL, get_rule_expr prints nothing */
+ get_rule_expr((Node *) lfirst(lowlist_item), context, false);
+ appendStringInfoChar(buf, ':');
+ lowlist_item = lnext(sbsref->reflowerindexpr, lowlist_item);
+ }
+ /* If subexpression is NULL, get_rule_expr prints nothing */
+ get_rule_expr((Node *) lfirst(uplist_item), context, false);
+ appendStringInfoChar(buf, ']');
+ }
+}
+
+/*
+ * quote_identifier - Quote an identifier only if needed
+ *
+ * When quotes are needed, we palloc the required space; slightly
+ * space-wasteful but well worth it for notational simplicity.
+ */
+const char *
+quote_identifier(const char *ident)
+{
+ /*
+ * Can avoid quoting if ident starts with a lowercase letter or underscore
+ * and contains only lowercase letters, digits, and underscores, *and* is
+ * not any SQL keyword. Otherwise, supply quotes.
+ */
+ int nquotes = 0;
+ bool safe;
+ const char *ptr;
+ char *result;
+ char *optr;
+
+ /*
+ * would like to use <ctype.h> macros here, but they might yield unwanted
+ * locale-specific results...
+ */
+ safe = ((ident[0] >= 'a' && ident[0] <= 'z') || ident[0] == '_');
+
+ for (ptr = ident; *ptr; ptr++)
+ {
+ char ch = *ptr;
+
+ if ((ch >= 'a' && ch <= 'z') ||
+ (ch >= '0' && ch <= '9') ||
+ (ch == '_'))
+ {
+ /* okay */
+ }
+ else
+ {
+ safe = false;
+ if (ch == '"')
+ nquotes++;
+ }
+ }
+
+ if (quote_all_identifiers)
+ safe = false;
+
+ if (safe)
+ {
+ /*
+ * Check for keyword. We quote keywords except for unreserved ones.
+ * (In some cases we could avoid quoting a col_name or type_func_name
+ * keyword, but it seems much harder than it's worth to tell that.)
+ *
+ * Note: ScanKeywordLookup() does case-insensitive comparison, but
+ * that's fine, since we already know we have all-lower-case.
+ */
+ int kwnum = ScanKeywordLookup(ident, &ScanKeywords);
+
+ if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
+ safe = false;
+ }
+
+ if (safe)
+ return ident; /* no change needed */
+
+ result = (char *) palloc(strlen(ident) + nquotes + 2 + 1);
+
+ optr = result;
+ *optr++ = '"';
+ for (ptr = ident; *ptr; ptr++)
+ {
+ char ch = *ptr;
+
+ if (ch == '"')
+ *optr++ = '"';
+ *optr++ = ch;
+ }
+ *optr++ = '"';
+ *optr = '\0';
+
+ return result;
+}
+
+/*
+ * quote_qualified_identifier - Quote a possibly-qualified identifier
+ *
+ * Return a name of the form qualifier.ident, or just ident if qualifier
+ * is NULL, quoting each component if necessary. The result is palloc'd.
+ */
+char *
+quote_qualified_identifier(const char *qualifier,
+ const char *ident)
+{
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ if (qualifier)
+ appendStringInfo(&buf, "%s.", quote_identifier(qualifier));
+ appendStringInfoString(&buf, quote_identifier(ident));
+ return buf.data;
+}
+
+/*
+ * get_relation_name
+ * Get the unqualified name of a relation specified by OID
+ *
+ * This differs from the underlying get_rel_name() function in that it will
+ * throw error instead of silently returning NULL if the OID is bad.
+ */
+static char *
+get_relation_name(Oid relid)
+{
+ char *relname = get_rel_name(relid);
+
+ if (!relname)
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ return relname;
+}
+
+/*
+ * generate_relation_name
+ * Compute the name to display for a relation specified by OID
+ *
+ * The result includes all necessary quoting and schema-prefixing.
+ *
+ * If namespaces isn't NIL, it must be a list of deparse_namespace nodes.
+ * We will forcibly qualify the relation name if it equals any CTE name
+ * visible in the namespace list.
+ */
+static char *
+generate_relation_name(Oid relid, List *namespaces)
+{
+ HeapTuple tp;
+ Form_pg_class reltup;
+ bool need_qual;
+ ListCell *nslist;
+ char *relname;
+ char *nspname;
+ char *result;
+
+ tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ reltup = (Form_pg_class) GETSTRUCT(tp);
+ relname = NameStr(reltup->relname);
+
+ /* Check for conflicting CTE name */
+ need_qual = false;
+ foreach(nslist, namespaces)
+ {
+ deparse_namespace *dpns = (deparse_namespace *) lfirst(nslist);
+ ListCell *ctlist;
+
+ foreach(ctlist, dpns->ctes)
+ {
+ CommonTableExpr *cte = (CommonTableExpr *) lfirst(ctlist);
+
+ if (strcmp(cte->ctename, relname) == 0)
+ {
+ need_qual = true;
+ break;
+ }
+ }
+ if (need_qual)
+ break;
+ }
+
+ /* Otherwise, qualify the name if not visible in search path */
+ if (!need_qual)
+ need_qual = !RelationIsVisible(relid);
+
+ if (need_qual)
+ nspname = get_namespace_name_or_temp(reltup->relnamespace);
+ else
+ nspname = NULL;
+
+ result = quote_qualified_identifier(nspname, relname);
+
+ ReleaseSysCache(tp);
+
+ return result;
+}
+
+/*
+ * generate_qualified_relation_name
+ * Compute the name to display for a relation specified by OID
+ *
+ * As above, but unconditionally schema-qualify the name.
+ */
+static char *
+generate_qualified_relation_name(Oid relid)
+{
+ HeapTuple tp;
+ Form_pg_class reltup;
+ char *relname;
+ char *nspname;
+ char *result;
+
+ tp = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ reltup = (Form_pg_class) GETSTRUCT(tp);
+ relname = NameStr(reltup->relname);
+
+ nspname = get_namespace_name_or_temp(reltup->relnamespace);
+ if (!nspname)
+ elog(ERROR, "cache lookup failed for namespace %u",
+ reltup->relnamespace);
+
+ result = quote_qualified_identifier(nspname, relname);
+
+ ReleaseSysCache(tp);
+
+ return result;
+}
+
+/*
+ * generate_function_name
+ * Compute the name to display for a function specified by OID,
+ * given that it is being called with the specified actual arg names and
+ * types. (Those matter because of ambiguous-function resolution rules.)
+ *
+ * If we're dealing with a potentially variadic function (in practice, this
+ * means a FuncExpr or Aggref, not some other way of calling a function), then
+ * has_variadic must specify whether variadic arguments have been merged,
+ * and *use_variadic_p will be set to indicate whether to print VARIADIC in
+ * the output. For non-FuncExpr cases, has_variadic should be false and
+ * use_variadic_p can be NULL.
+ *
+ * The result includes all necessary quoting and schema-prefixing.
+ */
+static char *
+generate_function_name(Oid funcid, int nargs, List *argnames, Oid *argtypes,
+ bool has_variadic, bool *use_variadic_p,
+ ParseExprKind special_exprkind)
+{
+ char *result;
+ HeapTuple proctup;
+ Form_pg_proc procform;
+ char *proname;
+ bool use_variadic;
+ char *nspname;
+ FuncDetailCode p_result;
+ Oid p_funcid;
+ Oid p_rettype;
+ bool p_retset;
+ int p_nvargs;
+ Oid p_vatype;
+ Oid *p_true_typeids;
+ bool force_qualify = false;
+
+ proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid));
+ if (!HeapTupleIsValid(proctup))
+ elog(ERROR, "cache lookup failed for function %u", funcid);
+ procform = (Form_pg_proc) GETSTRUCT(proctup);
+ proname = NameStr(procform->proname);
+
+ /*
+ * Due to parser hacks to avoid needing to reserve CUBE, we need to force
+ * qualification in some special cases.
+ */
+ if (special_exprkind == EXPR_KIND_GROUP_BY)
+ {
+ if (strcmp(proname, "cube") == 0 || strcmp(proname, "rollup") == 0)
+ force_qualify = true;
+ }
+
+ /*
+ * Determine whether VARIADIC should be printed. We must do this first
+ * since it affects the lookup rules in func_get_detail().
+ *
+ * We always print VARIADIC if the function has a merged variadic-array
+ * argument. Note that this is always the case for functions taking a
+ * VARIADIC argument type other than VARIADIC ANY. If we omitted VARIADIC
+ * and printed the array elements as separate arguments, the call could
+ * match a newer non-VARIADIC function.
+ */
+ if (use_variadic_p)
+ {
+ /* Parser should not have set funcvariadic unless fn is variadic */
+ Assert(!has_variadic || OidIsValid(procform->provariadic));
+ use_variadic = has_variadic;
+ *use_variadic_p = use_variadic;
+ }
+ else
+ {
+ Assert(!has_variadic);
+ use_variadic = false;
+ }
+
+ /*
+ * The idea here is to schema-qualify only if the parser would fail to
+ * resolve the correct function given the unqualified func name with the
+ * specified argtypes and VARIADIC flag. But if we already decided to
+ * force qualification, then we can skip the lookup and pretend we didn't
+ * find it.
+ */
+ if (!force_qualify)
+ p_result = func_get_detail(list_make1(makeString(proname)),
+ NIL, argnames, nargs, argtypes,
+ !use_variadic, true, false,
+ &p_funcid, &p_rettype,
+ &p_retset, &p_nvargs, &p_vatype,
+ &p_true_typeids, NULL);
+ else
+ {
+ p_result = FUNCDETAIL_NOTFOUND;
+ p_funcid = InvalidOid;
+ }
+
+ if ((p_result == FUNCDETAIL_NORMAL ||
+ p_result == FUNCDETAIL_AGGREGATE ||
+ p_result == FUNCDETAIL_WINDOWFUNC) &&
+ p_funcid == funcid)
+ nspname = NULL;
+ else
+ nspname = get_namespace_name_or_temp(procform->pronamespace);
+
+ result = quote_qualified_identifier(nspname, proname);
+
+ ReleaseSysCache(proctup);
+
+ return result;
+}
+
+/*
+ * generate_operator_name
+ * Compute the name to display for an operator specified by OID,
+ * given that it is being called with the specified actual arg types.
+ * (Arg types matter because of ambiguous-operator resolution rules.
+ * Pass InvalidOid for unused arg of a unary operator.)
+ *
+ * The result includes all necessary quoting and schema-prefixing,
+ * plus the OPERATOR() decoration needed to use a qualified operator name
+ * in an expression.
+ */
+static char *
+generate_operator_name(Oid operid, Oid arg1, Oid arg2)
+{
+ StringInfoData buf;
+ HeapTuple opertup;
+ Form_pg_operator operform;
+ char *oprname;
+ char *nspname;
+ Operator p_result;
+
+ initStringInfo(&buf);
+
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(operid));
+ if (!HeapTupleIsValid(opertup))
+ elog(ERROR, "cache lookup failed for operator %u", operid);
+ operform = (Form_pg_operator) GETSTRUCT(opertup);
+ oprname = NameStr(operform->oprname);
+
+ /*
+ * The idea here is to schema-qualify only if the parser would fail to
+ * resolve the correct operator given the unqualified op name with the
+ * specified argtypes.
+ */
+ switch (operform->oprkind)
+ {
+ case 'b':
+ p_result = oper(NULL, list_make1(makeString(oprname)), arg1, arg2,
+ true, -1);
+ break;
+ case 'l':
+ p_result = left_oper(NULL, list_make1(makeString(oprname)), arg2,
+ true, -1);
+ break;
+ default:
+ elog(ERROR, "unrecognized oprkind: %d", operform->oprkind);
+ p_result = NULL; /* keep compiler quiet */
+ break;
+ }
+
+ if (p_result != NULL && oprid(p_result) == operid)
+ nspname = NULL;
+ else
+ {
+ nspname = get_namespace_name_or_temp(operform->oprnamespace);
+ appendStringInfo(&buf, "OPERATOR(%s.", quote_identifier(nspname));
+ }
+
+ appendStringInfoString(&buf, oprname);
+
+ if (nspname)
+ appendStringInfoChar(&buf, ')');
+
+ if (p_result != NULL)
+ ReleaseSysCache(p_result);
+
+ ReleaseSysCache(opertup);
+
+ return buf.data;
+}
+
+/*
+ * generate_operator_clause --- generate a binary-operator WHERE clause
+ *
+ * This is used for internally-generated-and-executed SQL queries, where
+ * precision is essential and readability is secondary. The basic
+ * requirement is to append "leftop op rightop" to buf, where leftop and
+ * rightop are given as strings and are assumed to yield types leftoptype
+ * and rightoptype; the operator is identified by OID. The complexity
+ * comes from needing to be sure that the parser will select the desired
+ * operator when the query is parsed. We always name the operator using
+ * OPERATOR(schema.op) syntax, so as to avoid search-path uncertainties.
+ * We have to emit casts too, if either input isn't already the input type
+ * of the operator; else we are at the mercy of the parser's heuristics for
+ * ambiguous-operator resolution. The caller must ensure that leftop and
+ * rightop are suitable arguments for a cast operation; it's best to insert
+ * parentheses if they aren't just variables or parameters.
+ */
+void
+generate_operator_clause(StringInfo buf,
+ const char *leftop, Oid leftoptype,
+ Oid opoid,
+ const char *rightop, Oid rightoptype)
+{
+ HeapTuple opertup;
+ Form_pg_operator operform;
+ char *oprname;
+ char *nspname;
+
+ opertup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opoid));
+ if (!HeapTupleIsValid(opertup))
+ elog(ERROR, "cache lookup failed for operator %u", opoid);
+ operform = (Form_pg_operator) GETSTRUCT(opertup);
+ Assert(operform->oprkind == 'b');
+ oprname = NameStr(operform->oprname);
+
+ nspname = get_namespace_name(operform->oprnamespace);
+
+ appendStringInfoString(buf, leftop);
+ if (leftoptype != operform->oprleft)
+ add_cast_to(buf, operform->oprleft);
+ appendStringInfo(buf, " OPERATOR(%s.", quote_identifier(nspname));
+ appendStringInfoString(buf, oprname);
+ appendStringInfo(buf, ") %s", rightop);
+ if (rightoptype != operform->oprright)
+ add_cast_to(buf, operform->oprright);
+
+ ReleaseSysCache(opertup);
+}
+
+/*
+ * Add a cast specification to buf. We spell out the type name the hard way,
+ * intentionally not using format_type_be(). This is to avoid corner cases
+ * for CHARACTER, BIT, and perhaps other types, where specifying the type
+ * using SQL-standard syntax results in undesirable data truncation. By
+ * doing it this way we can be certain that the cast will have default (-1)
+ * target typmod.
+ */
+static void
+add_cast_to(StringInfo buf, Oid typid)
+{
+ HeapTuple typetup;
+ Form_pg_type typform;
+ char *typname;
+ char *nspname;
+
+ typetup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+ if (!HeapTupleIsValid(typetup))
+ elog(ERROR, "cache lookup failed for type %u", typid);
+ typform = (Form_pg_type) GETSTRUCT(typetup);
+
+ typname = NameStr(typform->typname);
+ nspname = get_namespace_name_or_temp(typform->typnamespace);
+
+ appendStringInfo(buf, "::%s.%s",
+ quote_identifier(nspname), quote_identifier(typname));
+
+ ReleaseSysCache(typetup);
+}
+
+/*
+ * generate_qualified_type_name
+ * Compute the name to display for a type specified by OID
+ *
+ * This is different from format_type_be() in that we unconditionally
+ * schema-qualify the name. That also means no special syntax for
+ * SQL-standard type names ... although in current usage, this should
+ * only get used for domains, so such cases wouldn't occur anyway.
+ */
+static char *
+generate_qualified_type_name(Oid typid)
+{
+ HeapTuple tp;
+ Form_pg_type typtup;
+ char *typname;
+ char *nspname;
+ char *result;
+
+ tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for type %u", typid);
+ typtup = (Form_pg_type) GETSTRUCT(tp);
+ typname = NameStr(typtup->typname);
+
+ nspname = get_namespace_name_or_temp(typtup->typnamespace);
+ if (!nspname)
+ elog(ERROR, "cache lookup failed for namespace %u",
+ typtup->typnamespace);
+
+ result = quote_qualified_identifier(nspname, typname);
+
+ ReleaseSysCache(tp);
+
+ return result;
+}
+
+/*
+ * generate_collation_name
+ * Compute the name to display for a collation specified by OID
+ *
+ * The result includes all necessary quoting and schema-prefixing.
+ */
+char *
+generate_collation_name(Oid collid)
+{
+ HeapTuple tp;
+ Form_pg_collation colltup;
+ char *collname;
+ char *nspname;
+ char *result;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ elog(ERROR, "cache lookup failed for collation %u", collid);
+ colltup = (Form_pg_collation) GETSTRUCT(tp);
+ collname = NameStr(colltup->collname);
+
+ if (!CollationIsVisible(collid))
+ nspname = get_namespace_name_or_temp(colltup->collnamespace);
+ else
+ nspname = NULL;
+
+ result = quote_qualified_identifier(nspname, collname);
+
+ ReleaseSysCache(tp);
+
+ return result;
+}
+
+/*
+ * Given a C string, produce a TEXT datum.
+ *
+ * We assume that the input was palloc'd and may be freed.
+ */
+static text *
+string_to_text(char *str)
+{
+ text *result;
+
+ result = cstring_to_text(str);
+ pfree(str);
+ return result;
+}
+
+/*
+ * Generate a C string representing a relation options from text[] datum.
+ */
+static void
+get_reloptions(StringInfo buf, Datum reloptions)
+{
+ Datum *options;
+ int noptions;
+ int i;
+
+ deconstruct_array(DatumGetArrayTypeP(reloptions),
+ TEXTOID, -1, false, TYPALIGN_INT,
+ &options, NULL, &noptions);
+
+ for (i = 0; i < noptions; i++)
+ {
+ char *option = TextDatumGetCString(options[i]);
+ char *name;
+ char *separator;
+ char *value;
+
+ /*
+ * Each array element should have the form name=value. If the "=" is
+ * missing for some reason, treat it like an empty value.
+ */
+ name = option;
+ separator = strchr(option, '=');
+ if (separator)
+ {
+ *separator = '\0';
+ value = separator + 1;
+ }
+ else
+ value = "";
+
+ if (i > 0)
+ appendStringInfoString(buf, ", ");
+ appendStringInfo(buf, "%s=", quote_identifier(name));
+
+ /*
+ * In general we need to quote the value; but to avoid unnecessary
+ * clutter, do not quote if it is an identifier that would not need
+ * quoting. (We could also allow numbers, but that is a bit trickier
+ * than it looks --- for example, are leading zeroes significant? We
+ * don't want to assume very much here about what custom reloptions
+ * might mean.)
+ */
+ if (quote_identifier(value) == value)
+ appendStringInfoString(buf, value);
+ else
+ simple_quote_literal(buf, value);
+
+ pfree(option);
+ }
+}
+
+/*
+ * Generate a C string representing a relation's reloptions, or NULL if none.
+ */
+static char *
+flatten_reloptions(Oid relid)
+{
+ char *result = NULL;
+ HeapTuple tuple;
+ Datum reloptions;
+ bool isnull;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+
+ reloptions = SysCacheGetAttr(RELOID, tuple,
+ Anum_pg_class_reloptions, &isnull);
+ if (!isnull)
+ {
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+ get_reloptions(&buf, reloptions);
+
+ result = buf.data;
+ }
+
+ ReleaseSysCache(tuple);
+
+ return result;
+}
+
+/*
+ * get_range_partbound_string
+ * A C string representation of one range partition bound
+ */
+char *
+get_range_partbound_string(List *bound_datums)
+{
+ deparse_context context;
+ StringInfo buf = makeStringInfo();
+ ListCell *cell;
+ char *sep;
+
+ memset(&context, 0, sizeof(deparse_context));
+ context.buf = buf;
+
+ appendStringInfoChar(buf, '(');
+ sep = "";
+ foreach(cell, bound_datums)
+ {
+ PartitionRangeDatum *datum =
+ lfirst_node(PartitionRangeDatum, cell);
+
+ appendStringInfoString(buf, sep);
+ if (datum->kind == PARTITION_RANGE_DATUM_MINVALUE)
+ appendStringInfoString(buf, "MINVALUE");
+ else if (datum->kind == PARTITION_RANGE_DATUM_MAXVALUE)
+ appendStringInfoString(buf, "MAXVALUE");
+ else
+ {
+ Const *val = castNode(Const, datum->value);
+
+ get_const_expr(val, &context, -1);
+ }
+ sep = ", ";
+ }
+ appendStringInfoChar(buf, ')');
+
+ return buf->data;
+}
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
new file mode 100644
index 0000000..2dd399d
--- /dev/null
+++ b/src/backend/utils/adt/selfuncs.c
@@ -0,0 +1,7961 @@
+/*-------------------------------------------------------------------------
+ *
+ * selfuncs.c
+ * Selectivity functions and index cost estimation functions for
+ * standard operators and index access methods.
+ *
+ * Selectivity routines are registered in the pg_operator catalog
+ * in the "oprrest" and "oprjoin" attributes.
+ *
+ * Index cost functions are located via the index AM's API struct,
+ * which is obtained from the handler function registered in pg_am.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/selfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*----------
+ * Operator selectivity estimation functions are called to estimate the
+ * selectivity of WHERE clauses whose top-level operator is their operator.
+ * We divide the problem into two cases:
+ * Restriction clause estimation: the clause involves vars of just
+ * one relation.
+ * Join clause estimation: the clause involves vars of multiple rels.
+ * Join selectivity estimation is far more difficult and usually less accurate
+ * than restriction estimation.
+ *
+ * When dealing with the inner scan of a nestloop join, we consider the
+ * join's joinclauses as restriction clauses for the inner relation, and
+ * treat vars of the outer relation as parameters (a/k/a constants of unknown
+ * values). So, restriction estimators need to be able to accept an argument
+ * telling which relation is to be treated as the variable.
+ *
+ * The call convention for a restriction estimator (oprrest function) is
+ *
+ * Selectivity oprrest (PlannerInfo *root,
+ * Oid operator,
+ * List *args,
+ * int varRelid);
+ *
+ * root: general information about the query (rtable and RelOptInfo lists
+ * are particularly important for the estimator).
+ * operator: OID of the specific operator in question.
+ * args: argument list from the operator clause.
+ * varRelid: if not zero, the relid (rtable index) of the relation to
+ * be treated as the variable relation. May be zero if the args list
+ * is known to contain vars of only one relation.
+ *
+ * This is represented at the SQL level (in pg_proc) as
+ *
+ * float8 oprrest (internal, oid, internal, int4);
+ *
+ * The result is a selectivity, that is, a fraction (0 to 1) of the rows
+ * of the relation that are expected to produce a TRUE result for the
+ * given operator.
+ *
+ * The call convention for a join estimator (oprjoin function) is similar
+ * except that varRelid is not needed, and instead join information is
+ * supplied:
+ *
+ * Selectivity oprjoin (PlannerInfo *root,
+ * Oid operator,
+ * List *args,
+ * JoinType jointype,
+ * SpecialJoinInfo *sjinfo);
+ *
+ * float8 oprjoin (internal, oid, internal, int2, internal);
+ *
+ * (Before Postgres 8.4, join estimators had only the first four of these
+ * parameters. That signature is still allowed, but deprecated.) The
+ * relationship between jointype and sjinfo is explained in the comments for
+ * clause_selectivity() --- the short version is that jointype is usually
+ * best ignored in favor of examining sjinfo.
+ *
+ * Join selectivity for regular inner and outer joins is defined as the
+ * fraction (0 to 1) of the cross product of the relations that is expected
+ * to produce a TRUE result for the given operator. For both semi and anti
+ * joins, however, the selectivity is defined as the fraction of the left-hand
+ * side relation's rows that are expected to have a match (ie, at least one
+ * row with a TRUE result) in the right-hand side.
+ *
+ * For both oprrest and oprjoin functions, the operator's input collation OID
+ * (if any) is passed using the standard fmgr mechanism, so that the estimator
+ * function can fetch it with PG_GET_COLLATION(). Note, however, that all
+ * statistics in pg_statistic are currently built using the relevant column's
+ * collation.
+ *----------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+
+#include "access/brin.h"
+#include "access/brin_page.h"
+#include "access/gin.h"
+#include "access/table.h"
+#include "access/tableam.h"
+#include "access/visibilitymap.h"
+#include "catalog/pg_am.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "catalog/pg_statistic_ext.h"
+#include "executor/nodeAgg.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "optimizer/clauses.h"
+#include "optimizer/cost.h"
+#include "optimizer/optimizer.h"
+#include "optimizer/pathnode.h"
+#include "optimizer/paths.h"
+#include "optimizer/plancat.h"
+#include "parser/parse_clause.h"
+#include "parser/parsetree.h"
+#include "statistics/statistics.h"
+#include "storage/bufmgr.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datum.h"
+#include "utils/fmgroids.h"
+#include "utils/index_selfuncs.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_locale.h"
+#include "utils/rel.h"
+#include "utils/selfuncs.h"
+#include "utils/snapmgr.h"
+#include "utils/spccache.h"
+#include "utils/syscache.h"
+#include "utils/timestamp.h"
+#include "utils/typcache.h"
+
+
+/* Hooks for plugins to get control when we ask for stats */
+get_relation_stats_hook_type get_relation_stats_hook = NULL;
+get_index_stats_hook_type get_index_stats_hook = NULL;
+
+static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
+static double eqjoinsel_inner(Oid opfuncoid, Oid collation,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2);
+static double eqjoinsel_semi(Oid opfuncoid, Oid collation,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2,
+ RelOptInfo *inner_rel);
+static bool estimate_multivariate_ndistinct(PlannerInfo *root,
+ RelOptInfo *rel, List **varinfos, double *ndistinct);
+static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
+ double *scaledvalue,
+ Datum lobound, Datum hibound, Oid boundstypid,
+ double *scaledlobound, double *scaledhibound);
+static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
+static void convert_string_to_scalar(char *value,
+ double *scaledvalue,
+ char *lobound,
+ double *scaledlobound,
+ char *hibound,
+ double *scaledhibound);
+static void convert_bytea_to_scalar(Datum value,
+ double *scaledvalue,
+ Datum lobound,
+ double *scaledlobound,
+ Datum hibound,
+ double *scaledhibound);
+static double convert_one_string_to_scalar(char *value,
+ int rangelo, int rangehi);
+static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
+ int rangelo, int rangehi);
+static char *convert_string_datum(Datum value, Oid typid, Oid collid,
+ bool *failure);
+static double convert_timevalue_to_scalar(Datum value, Oid typid,
+ bool *failure);
+static void examine_simple_variable(PlannerInfo *root, Var *var,
+ VariableStatData *vardata);
+static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max);
+static void get_stats_slot_range(AttStatsSlot *sslot,
+ Oid opfuncoid, FmgrInfo *opproc,
+ Oid collation, int16 typLen, bool typByVal,
+ Datum *min, Datum *max, bool *p_have_data);
+static bool get_actual_variable_range(PlannerInfo *root,
+ VariableStatData *vardata,
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max);
+static bool get_actual_variable_endpoint(Relation heapRel,
+ Relation indexRel,
+ ScanDirection indexscandir,
+ ScanKey scankeys,
+ int16 typLen,
+ bool typByVal,
+ TupleTableSlot *tableslot,
+ MemoryContext outercontext,
+ Datum *endpointDatum);
+static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
+
+
+/*
+ * eqsel - Selectivity of "=" for any data types.
+ *
+ * Note: this routine is also used to estimate selectivity for some
+ * operators that are not "=" but have comparable selectivity behavior,
+ * such as "~=" (geometric approximate-match). Even for "=", we must
+ * keep in mind that the left and right datatypes may differ.
+ */
+Datum
+eqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
+}
+
+/*
+ * Common code for eqsel() and neqsel()
+ */
+static double
+eqsel_internal(PG_FUNCTION_ARGS, bool negate)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ double selec;
+
+ /*
+ * When asked about <>, we do the estimation using the corresponding =
+ * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
+ */
+ if (negate)
+ {
+ operator = get_negator(operator);
+ if (!OidIsValid(operator))
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ return 1.0 - DEFAULT_EQ_SEL;
+ }
+ }
+
+ /*
+ * If expression is not variable = something or something = variable, then
+ * punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
+
+ /*
+ * We can do a lot better if the something is a constant. (Note: the
+ * Const might result from estimation rather than being a simple constant
+ * in the query.)
+ */
+ if (IsA(other, Const))
+ selec = var_eq_const(&vardata, operator, collation,
+ ((Const *) other)->constvalue,
+ ((Const *) other)->constisnull,
+ varonleft, negate);
+ else
+ selec = var_eq_non_const(&vardata, operator, collation, other,
+ varonleft, negate);
+
+ ReleaseVariableStats(vardata);
+
+ return selec;
+}
+
+/*
+ * var_eq_const --- eqsel for var = const case
+ *
+ * This is exported so that some other estimation functions can use it.
+ */
+double
+var_eq_const(VariableStatData *vardata, Oid operator, Oid collation,
+ Datum constval, bool constisnull,
+ bool varonleft, bool negate)
+{
+ double selec;
+ double nullfrac = 0.0;
+ bool isdefault;
+ Oid opfuncoid;
+
+ /*
+ * If the constant is NULL, assume operator is strict and return zero, ie,
+ * operator will never return TRUE. (It's zero even for a negator op.)
+ */
+ if (constisnull)
+ return 0.0;
+
+ /*
+ * Grab the nullfrac for use below. Note we allow use of nullfrac
+ * regardless of security check.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
+ * If we matched the var to a unique index or DISTINCT clause, assume
+ * there is exactly one match regardless of anything else. (This is
+ * slightly bogus, since the index or clause's equality operator might be
+ * different from ours, but it's much more likely to be right than
+ * ignoring the information.)
+ */
+ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+ {
+ selec = 1.0 / vardata->rel->tuples;
+ }
+ else if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(operator))))
+ {
+ AttStatsSlot sslot;
+ bool match = false;
+ int i;
+
+ /*
+ * Is the constant "=" to any of the column's most common values?
+ * (Although the given operator may not really be "=", we will assume
+ * that seeing whether it returns TRUE is an appropriate test. If you
+ * don't like this, maybe you shouldn't be using eqsel for your
+ * operator...)
+ */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ {
+ LOCAL_FCINFO(fcinfo, 2);
+ FmgrInfo eqproc;
+
+ fmgr_info(opfuncoid, &eqproc);
+
+ /*
+ * Save a few cycles by setting up the fcinfo struct just once.
+ * Using FunctionCallInvoke directly also avoids failure if the
+ * eqproc returns NULL, though really equality functions should
+ * never do that.
+ */
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ /* be careful to apply operator right way 'round */
+ if (varonleft)
+ fcinfo->args[1].value = constval;
+ else
+ fcinfo->args[0].value = constval;
+
+ for (i = 0; i < sslot.nvalues; i++)
+ {
+ Datum fresult;
+
+ if (varonleft)
+ fcinfo->args[0].value = sslot.values[i];
+ else
+ fcinfo->args[1].value = sslot.values[i];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ {
+ match = true;
+ break;
+ }
+ }
+ }
+ else
+ {
+ /* no most-common-value info available */
+ i = 0; /* keep compiler quiet */
+ }
+
+ if (match)
+ {
+ /*
+ * Constant is "=" to this common value. We know selectivity
+ * exactly (or as exactly as ANALYZE could calculate it, anyway).
+ */
+ selec = sslot.numbers[i];
+ }
+ else
+ {
+ /*
+ * Comparison is against a constant that is neither NULL nor any
+ * of the common values. Its selectivity cannot be more than
+ * this:
+ */
+ double sumcommon = 0.0;
+ double otherdistinct;
+
+ for (i = 0; i < sslot.nnumbers; i++)
+ sumcommon += sslot.numbers[i];
+ selec = 1.0 - sumcommon - nullfrac;
+ CLAMP_PROBABILITY(selec);
+
+ /*
+ * and in fact it's probably a good deal less. We approximate that
+ * all the not-common values share this remaining fraction
+ * equally, so we divide by the number of other distinct values.
+ */
+ otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
+ sslot.nnumbers;
+ if (otherdistinct > 1)
+ selec /= otherdistinct;
+
+ /*
+ * Another cross-check: selectivity shouldn't be estimated as more
+ * than the least common "most common value".
+ */
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
+ selec = sslot.numbers[sslot.nnumbers - 1];
+ }
+
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /*
+ * No ANALYZE stats available, so make a guess using estimated number
+ * of distinct values and assuming they are equally common. (The guess
+ * is unlikely to be very good, but we do know a few special cases.)
+ */
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
+ }
+
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * var_eq_non_const --- eqsel for var = something-other-than-const case
+ *
+ * This is exported so that some other estimation functions can use it.
+ */
+double
+var_eq_non_const(VariableStatData *vardata, Oid operator, Oid collation,
+ Node *other,
+ bool varonleft, bool negate)
+{
+ double selec;
+ double nullfrac = 0.0;
+ bool isdefault;
+
+ /*
+ * Grab the nullfrac for use below.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ nullfrac = stats->stanullfrac;
+ }
+
+ /*
+ * If we matched the var to a unique index or DISTINCT clause, assume
+ * there is exactly one match regardless of anything else. (This is
+ * slightly bogus, since the index or clause's equality operator might be
+ * different from ours, but it's much more likely to be right than
+ * ignoring the information.)
+ */
+ if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
+ {
+ selec = 1.0 / vardata->rel->tuples;
+ }
+ else if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ double ndistinct;
+ AttStatsSlot sslot;
+
+ /*
+ * Search is for a value that we do not know a priori, but we will
+ * assume it is not NULL. Estimate the selectivity as non-null
+ * fraction divided by number of distinct values, so that we get a
+ * result averaged over all possible values whether common or
+ * uncommon. (Essentially, we are assuming that the not-yet-known
+ * comparison value is equally likely to be any of the possible
+ * values, regardless of their frequency in the table. Is that a good
+ * idea?)
+ */
+ selec = 1.0 - nullfrac;
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
+ if (ndistinct > 1)
+ selec /= ndistinct;
+
+ /*
+ * Cross-check: selectivity should never be estimated as more than the
+ * most common value's.
+ */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
+ selec = sslot.numbers[0];
+ free_attstatsslot(&sslot);
+ }
+ }
+ else
+ {
+ /*
+ * No ANALYZE stats available, so make a guess using estimated number
+ * of distinct values and assuming they are equally common. (The guess
+ * is unlikely to be very good, but we do know a few special cases.)
+ */
+ selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
+ }
+
+ /* now adjust if we wanted <> rather than = */
+ if (negate)
+ selec = 1.0 - selec - nullfrac;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * neqsel - Selectivity of "!=" for any data types.
+ *
+ * This routine is also used for some operators that are not "!="
+ * but have comparable selectivity behavior. See above comments
+ * for eqsel().
+ */
+Datum
+neqsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
+}
+
+/*
+ * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars.
+ *
+ * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
+ * The isgt and iseq flags distinguish which of the four cases apply.
+ *
+ * The caller has commuted the clause, if necessary, so that we can treat
+ * the variable as being on the left. The caller must also make sure that
+ * the other side of the clause is a non-null Const, and dissect that into
+ * a value and datatype. (This definition simplifies some callers that
+ * want to estimate against a computed value instead of a Const node.)
+ *
+ * This routine works for any datatype (or pair of datatypes) known to
+ * convert_to_scalar(). If it is applied to some other datatype,
+ * it will return an approximate estimate based on assuming that the constant
+ * value falls in the middle of the bin identified by binary search.
+ */
+static double
+scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
+ Oid collation,
+ VariableStatData *vardata, Datum constval, Oid consttype)
+{
+ Form_pg_statistic stats;
+ FmgrInfo opproc;
+ double mcv_selec,
+ hist_selec,
+ sumcommon;
+ double selec;
+
+ if (!HeapTupleIsValid(vardata->statsTuple))
+ {
+ /*
+ * No stats are available. Typically this means we have to fall back
+ * on the default estimate; but if the variable is CTID then we can
+ * make an estimate based on comparing the constant to the table size.
+ */
+ if (vardata->var && IsA(vardata->var, Var) &&
+ ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
+ {
+ ItemPointer itemptr;
+ double block;
+ double density;
+
+ /*
+ * If the relation's empty, we're going to include all of it.
+ * (This is mostly to avoid divide-by-zero below.)
+ */
+ if (vardata->rel->pages == 0)
+ return 1.0;
+
+ itemptr = (ItemPointer) DatumGetPointer(constval);
+ block = ItemPointerGetBlockNumberNoCheck(itemptr);
+
+ /*
+ * Determine the average number of tuples per page (density).
+ *
+ * Since the last page will, on average, be only half full, we can
+ * estimate it to have half as many tuples as earlier pages. So
+ * give it half the weight of a regular page.
+ */
+ density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
+
+ /* If target is the last page, use half the density. */
+ if (block >= vardata->rel->pages - 1)
+ density *= 0.5;
+
+ /*
+ * Using the average tuples per page, calculate how far into the
+ * page the itemptr is likely to be and adjust block accordingly,
+ * by adding that fraction of a whole block (but never more than a
+ * whole block, no matter how high the itemptr's offset is). Here
+ * we are ignoring the possibility of dead-tuple line pointers,
+ * which is fairly bogus, but we lack the info to do better.
+ */
+ if (density > 0.0)
+ {
+ OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr);
+
+ block += Min(offset / density, 1.0);
+ }
+
+ /*
+ * Convert relative block number to selectivity. Again, the last
+ * page has only half weight.
+ */
+ selec = block / (vardata->rel->pages - 0.5);
+
+ /*
+ * The calculation so far gave us a selectivity for the "<=" case.
+ * We'll have one fewer tuple for "<" and one additional tuple for
+ * ">=", the latter of which we'll reverse the selectivity for
+ * below, so we can simply subtract one tuple for both cases. The
+ * cases that need this adjustment can be identified by iseq being
+ * equal to isgt.
+ */
+ if (iseq == isgt && vardata->rel->tuples >= 1.0)
+ selec -= (1.0 / vardata->rel->tuples);
+
+ /* Finally, reverse the selectivity for the ">", ">=" cases. */
+ if (isgt)
+ selec = 1.0 - selec;
+
+ CLAMP_PROBABILITY(selec);
+ return selec;
+ }
+
+ /* no stats available, so default result */
+ return DEFAULT_INEQ_SEL;
+ }
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+
+ fmgr_info(get_opcode(operator), &opproc);
+
+ /*
+ * If we have most-common-values info, add up the fractions of the MCV
+ * entries that satisfy MCV OP CONST. These fractions contribute directly
+ * to the result selectivity. Also add up the total fraction represented
+ * by MCV entries.
+ */
+ mcv_selec = mcv_selectivity(vardata, &opproc, collation, constval, true,
+ &sumcommon);
+
+ /*
+ * If there is a histogram, determine which bin the constant falls in, and
+ * compute the resulting contribution to selectivity.
+ */
+ hist_selec = ineq_histogram_selectivity(root, vardata,
+ operator, &opproc, isgt, iseq,
+ collation,
+ constval, consttype);
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that are
+ * not listed in MCV.
+ */
+ selec = 1.0 - stats->stanullfrac - sumcommon;
+
+ if (hist_selec >= 0.0)
+ selec *= hist_selec;
+ else
+ {
+ /*
+ * If no histogram but there are values not accounted for by MCV,
+ * arbitrarily assume half of them will match.
+ */
+ selec *= 0.5;
+ }
+
+ selec += mcv_selec;
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * mcv_selectivity - Examine the MCV list for selectivity estimates
+ *
+ * Determine the fraction of the variable's MCV population that satisfies
+ * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also
+ * compute the fraction of the total column population represented by the MCV
+ * list. This code will work for any boolean-returning predicate operator.
+ *
+ * The function result is the MCV selectivity, and the fraction of the
+ * total population is returned into *sumcommonp. Zeroes are returned
+ * if there is no MCV list.
+ */
+double
+mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc, Oid collation,
+ Datum constval, bool varonleft,
+ double *sumcommonp)
+{
+ double mcv_selec,
+ sumcommon;
+ AttStatsSlot sslot;
+ int i;
+
+ mcv_selec = 0.0;
+ sumcommon = 0.0;
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
+ {
+ LOCAL_FCINFO(fcinfo, 2);
+
+ /*
+ * We invoke the opproc "by hand" so that we won't fail on NULL
+ * results. Such cases won't arise for normal comparison functions,
+ * but generic_restriction_selectivity could perhaps be used with
+ * operators that can return NULL. A small side benefit is to not
+ * need to re-initialize the fcinfo struct from scratch each time.
+ */
+ InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ /* be careful to apply operator right way 'round */
+ if (varonleft)
+ fcinfo->args[1].value = constval;
+ else
+ fcinfo->args[0].value = constval;
+
+ for (i = 0; i < sslot.nvalues; i++)
+ {
+ Datum fresult;
+
+ if (varonleft)
+ fcinfo->args[0].value = sslot.values[i];
+ else
+ fcinfo->args[1].value = sslot.values[i];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ mcv_selec += sslot.numbers[i];
+ sumcommon += sslot.numbers[i];
+ }
+ free_attstatsslot(&sslot);
+ }
+
+ *sumcommonp = sumcommon;
+ return mcv_selec;
+}
+
+/*
+ * histogram_selectivity - Examine the histogram for selectivity estimates
+ *
+ * Determine the fraction of the variable's histogram entries that satisfy
+ * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
+ *
+ * This code will work for any boolean-returning predicate operator, whether
+ * or not it has anything to do with the histogram sort operator. We are
+ * essentially using the histogram just as a representative sample. However,
+ * small histograms are unlikely to be all that representative, so the caller
+ * should be prepared to fall back on some other estimation approach when the
+ * histogram is missing or very small. It may also be prudent to combine this
+ * approach with another one when the histogram is small.
+ *
+ * If the actual histogram size is not at least min_hist_size, we won't bother
+ * to do the calculation at all. Also, if the n_skip parameter is > 0, we
+ * ignore the first and last n_skip histogram elements, on the grounds that
+ * they are outliers and hence not very representative. Typical values for
+ * these parameters are 10 and 1.
+ *
+ * The function result is the selectivity, or -1 if there is no histogram
+ * or it's smaller than min_hist_size.
+ *
+ * The output parameter *hist_size receives the actual histogram size,
+ * or zero if no histogram. Callers may use this number to decide how
+ * much faith to put in the function result.
+ *
+ * Note that the result disregards both the most-common-values (if any) and
+ * null entries. The caller is expected to combine this result with
+ * statistics for those portions of the column population. It may also be
+ * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
+ */
+double
+histogram_selectivity(VariableStatData *vardata,
+ FmgrInfo *opproc, Oid collation,
+ Datum constval, bool varonleft,
+ int min_hist_size, int n_skip,
+ int *hist_size)
+{
+ double result;
+ AttStatsSlot sslot;
+
+ /* check sanity of parameters */
+ Assert(n_skip >= 0);
+ Assert(min_hist_size > 2 * n_skip);
+
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ *hist_size = sslot.nvalues;
+ if (sslot.nvalues >= min_hist_size)
+ {
+ LOCAL_FCINFO(fcinfo, 2);
+ int nmatch = 0;
+ int i;
+
+ /*
+ * We invoke the opproc "by hand" so that we won't fail on NULL
+ * results. Such cases won't arise for normal comparison
+ * functions, but generic_restriction_selectivity could perhaps be
+ * used with operators that can return NULL. A small side benefit
+ * is to not need to re-initialize the fcinfo struct from scratch
+ * each time.
+ */
+ InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ /* be careful to apply operator right way 'round */
+ if (varonleft)
+ fcinfo->args[1].value = constval;
+ else
+ fcinfo->args[0].value = constval;
+
+ for (i = n_skip; i < sslot.nvalues - n_skip; i++)
+ {
+ Datum fresult;
+
+ if (varonleft)
+ fcinfo->args[0].value = sslot.values[i];
+ else
+ fcinfo->args[1].value = sslot.values[i];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ nmatch++;
+ }
+ result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
+ }
+ else
+ result = -1;
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ *hist_size = 0;
+ result = -1;
+ }
+
+ return result;
+}
+
+/*
+ * generic_restriction_selectivity - Selectivity for almost anything
+ *
+ * This function estimates selectivity for operators that we don't have any
+ * special knowledge about, but are on data types that we collect standard
+ * MCV and/or histogram statistics for. (Additional assumptions are that
+ * the operator is strict and immutable, or at least stable.)
+ *
+ * If we have "VAR OP CONST" or "CONST OP VAR", selectivity is estimated by
+ * applying the operator to each element of the column's MCV and/or histogram
+ * stats, and merging the results using the assumption that the histogram is
+ * a reasonable random sample of the column's non-MCV population. Note that
+ * if the operator's semantics are related to the histogram ordering, this
+ * might not be such a great assumption; other functions such as
+ * scalarineqsel() are probably a better match in such cases.
+ *
+ * Otherwise, fall back to the default selectivity provided by the caller.
+ */
+double
+generic_restriction_selectivity(PlannerInfo *root, Oid oproid, Oid collation,
+ List *args, int varRelid,
+ double default_selectivity)
+{
+ double selec;
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+
+ /*
+ * If expression is not variable OP something or something OP variable,
+ * then punt and return the default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ return default_selectivity;
+
+ /*
+ * If the something is a NULL constant, assume operator is strict and
+ * return zero, ie, operator will never return TRUE.
+ */
+ if (IsA(other, Const) &&
+ ((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ return 0.0;
+ }
+
+ if (IsA(other, Const))
+ {
+ /* Variable is being compared to a known non-null constant */
+ Datum constval = ((Const *) other)->constvalue;
+ FmgrInfo opproc;
+ double mcvsum;
+ double mcvsel;
+ double nullfrac;
+ int hist_size;
+
+ fmgr_info(get_opcode(oproid), &opproc);
+
+ /*
+ * Calculate the selectivity for the column's most common values.
+ */
+ mcvsel = mcv_selectivity(&vardata, &opproc, collation,
+ constval, varonleft,
+ &mcvsum);
+
+ /*
+ * If the histogram is large enough, see what fraction of it matches
+ * the query, and assume that's representative of the non-MCV
+ * population. Otherwise use the default selectivity for the non-MCV
+ * population.
+ */
+ selec = histogram_selectivity(&vardata, &opproc, collation,
+ constval, varonleft,
+ 10, 1, &hist_size);
+ if (selec < 0)
+ {
+ /* Nope, fall back on default */
+ selec = default_selectivity;
+ }
+ else if (hist_size < 100)
+ {
+ /*
+ * For histogram sizes from 10 to 100, we combine the histogram
+ * and default selectivities, putting increasingly more trust in
+ * the histogram for larger sizes.
+ */
+ double hist_weight = hist_size / 100.0;
+
+ selec = selec * hist_weight +
+ default_selectivity * (1.0 - hist_weight);
+ }
+
+ /* In any case, don't believe extremely small or large estimates. */
+ if (selec < 0.0001)
+ selec = 0.0001;
+ else if (selec > 0.9999)
+ selec = 0.9999;
+
+ /* Don't forget to account for nulls. */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+
+ /*
+ * Now merge the results from the MCV and histogram calculations,
+ * realizing that the histogram covers only the non-null values that
+ * are not listed in MCV.
+ */
+ selec *= 1.0 - nullfrac - mcvsum;
+ selec += mcvsel;
+ }
+ else
+ {
+ /* Comparison value is not constant, so we can't do anything */
+ selec = default_selectivity;
+ }
+
+ ReleaseVariableStats(vardata);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return selec;
+}
+
+/*
+ * ineq_histogram_selectivity - Examine the histogram for scalarineqsel
+ *
+ * Determine the fraction of the variable's histogram population that
+ * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
+ * The isgt and iseq flags distinguish which of the four cases apply.
+ *
+ * While opproc could be looked up from the operator OID, common callers
+ * also need to call it separately, so we make the caller pass both.
+ *
+ * Returns -1 if there is no histogram (valid results will always be >= 0).
+ *
+ * Note that the result disregards both the most-common-values (if any) and
+ * null entries. The caller is expected to combine this result with
+ * statistics for those portions of the column population.
+ *
+ * This is exported so that some other estimation functions can use it.
+ */
+double
+ineq_histogram_selectivity(PlannerInfo *root,
+ VariableStatData *vardata,
+ Oid opoid, FmgrInfo *opproc, bool isgt, bool iseq,
+ Oid collation,
+ Datum constval, Oid consttype)
+{
+ double hist_selec;
+ AttStatsSlot sslot;
+
+ hist_selec = -1.0;
+
+ /*
+ * Someday, ANALYZE might store more than one histogram per rel/att,
+ * corresponding to more than one possible sort ordering defined for the
+ * column type. Right now, we know there is only one, so just grab it and
+ * see if it matches the query.
+ *
+ * Note that we can't use opoid as search argument; the staop appearing in
+ * pg_statistic will be for the relevant '<' operator, but what we have
+ * might be some other inequality operator such as '>='. (Even if opoid
+ * is a '<' operator, it could be cross-type.) Hence we must use
+ * comparison_ops_are_compatible() to see if the operators match.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ statistic_proc_security_check(vardata, opproc->fn_oid) &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ if (sslot.nvalues > 1 &&
+ sslot.stacoll == collation &&
+ comparison_ops_are_compatible(sslot.staop, opoid))
+ {
+ /*
+ * Use binary search to find the desired location, namely the
+ * right end of the histogram bin containing the comparison value,
+ * which is the leftmost entry for which the comparison operator
+ * succeeds (if isgt) or fails (if !isgt).
+ *
+ * In this loop, we pay no attention to whether the operator iseq
+ * or not; that detail will be mopped up below. (We cannot tell,
+ * anyway, whether the operator thinks the values are equal.)
+ *
+ * If the binary search accesses the first or last histogram
+ * entry, we try to replace that endpoint with the true column min
+ * or max as found by get_actual_variable_range(). This
+ * ameliorates misestimates when the min or max is moving as a
+ * result of changes since the last ANALYZE. Note that this could
+ * result in effectively including MCVs into the histogram that
+ * weren't there before, but we don't try to correct for that.
+ */
+ double histfrac;
+ int lobound = 0; /* first possible slot to search */
+ int hibound = sslot.nvalues; /* last+1 slot to search */
+ bool have_end = false;
+
+ /*
+ * If there are only two histogram entries, we'll want up-to-date
+ * values for both. (If there are more than two, we need at most
+ * one of them to be updated, so we deal with that within the
+ * loop.)
+ */
+ if (sslot.nvalues == 2)
+ have_end = get_actual_variable_range(root,
+ vardata,
+ sslot.staop,
+ collation,
+ &sslot.values[0],
+ &sslot.values[1]);
+
+ while (lobound < hibound)
+ {
+ int probe = (lobound + hibound) / 2;
+ bool ltcmp;
+
+ /*
+ * If we find ourselves about to compare to the first or last
+ * histogram entry, first try to replace it with the actual
+ * current min or max (unless we already did so above).
+ */
+ if (probe == 0 && sslot.nvalues > 2)
+ have_end = get_actual_variable_range(root,
+ vardata,
+ sslot.staop,
+ collation,
+ &sslot.values[0],
+ NULL);
+ else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
+ have_end = get_actual_variable_range(root,
+ vardata,
+ sslot.staop,
+ collation,
+ NULL,
+ &sslot.values[probe]);
+
+ ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
+ collation,
+ sslot.values[probe],
+ constval));
+ if (isgt)
+ ltcmp = !ltcmp;
+ if (ltcmp)
+ lobound = probe + 1;
+ else
+ hibound = probe;
+ }
+
+ if (lobound <= 0)
+ {
+ /*
+ * Constant is below lower histogram boundary. More
+ * precisely, we have found that no entry in the histogram
+ * satisfies the inequality clause (if !isgt) or they all do
+ * (if isgt). We estimate that that's true of the entire
+ * table, so set histfrac to 0.0 (which we'll flip to 1.0
+ * below, if isgt).
+ */
+ histfrac = 0.0;
+ }
+ else if (lobound >= sslot.nvalues)
+ {
+ /*
+ * Inverse case: constant is above upper histogram boundary.
+ */
+ histfrac = 1.0;
+ }
+ else
+ {
+ /* We have values[i-1] <= constant <= values[i]. */
+ int i = lobound;
+ double eq_selec = 0;
+ double val,
+ high,
+ low;
+ double binfrac;
+
+ /*
+ * In the cases where we'll need it below, obtain an estimate
+ * of the selectivity of "x = constval". We use a calculation
+ * similar to what var_eq_const() does for a non-MCV constant,
+ * ie, estimate that all distinct non-MCV values occur equally
+ * often. But multiplication by "1.0 - sumcommon - nullfrac"
+ * will be done by our caller, so we shouldn't do that here.
+ * Therefore we can't try to clamp the estimate by reference
+ * to the least common MCV; the result would be too small.
+ *
+ * Note: since this is effectively assuming that constval
+ * isn't an MCV, it's logically dubious if constval in fact is
+ * one. But we have to apply *some* correction for equality,
+ * and anyway we cannot tell if constval is an MCV, since we
+ * don't have a suitable equality operator at hand.
+ */
+ if (i == 1 || isgt == iseq)
+ {
+ double otherdistinct;
+ bool isdefault;
+ AttStatsSlot mcvslot;
+
+ /* Get estimated number of distinct values */
+ otherdistinct = get_variable_numdistinct(vardata,
+ &isdefault);
+
+ /* Subtract off the number of known MCVs */
+ if (get_attstatsslot(&mcvslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ otherdistinct -= mcvslot.nnumbers;
+ free_attstatsslot(&mcvslot);
+ }
+
+ /* If result doesn't seem sane, leave eq_selec at 0 */
+ if (otherdistinct > 1)
+ eq_selec = 1.0 / otherdistinct;
+ }
+
+ /*
+ * Convert the constant and the two nearest bin boundary
+ * values to a uniform comparison scale, and do a linear
+ * interpolation within this bin.
+ */
+ if (convert_to_scalar(constval, consttype, collation,
+ &val,
+ sslot.values[i - 1], sslot.values[i],
+ vardata->vartype,
+ &low, &high))
+ {
+ if (high <= low)
+ {
+ /* cope if bin boundaries appear identical */
+ binfrac = 0.5;
+ }
+ else if (val <= low)
+ binfrac = 0.0;
+ else if (val >= high)
+ binfrac = 1.0;
+ else
+ {
+ binfrac = (val - low) / (high - low);
+
+ /*
+ * Watch out for the possibility that we got a NaN or
+ * Infinity from the division. This can happen
+ * despite the previous checks, if for example "low"
+ * is -Infinity.
+ */
+ if (isnan(binfrac) ||
+ binfrac < 0.0 || binfrac > 1.0)
+ binfrac = 0.5;
+ }
+ }
+ else
+ {
+ /*
+ * Ideally we'd produce an error here, on the grounds that
+ * the given operator shouldn't have scalarXXsel
+ * registered as its selectivity func unless we can deal
+ * with its operand types. But currently, all manner of
+ * stuff is invoking scalarXXsel, so give a default
+ * estimate until that can be fixed.
+ */
+ binfrac = 0.5;
+ }
+
+ /*
+ * Now, compute the overall selectivity across the values
+ * represented by the histogram. We have i-1 full bins and
+ * binfrac partial bin below the constant.
+ */
+ histfrac = (double) (i - 1) + binfrac;
+ histfrac /= (double) (sslot.nvalues - 1);
+
+ /*
+ * At this point, histfrac is an estimate of the fraction of
+ * the population represented by the histogram that satisfies
+ * "x <= constval". Somewhat remarkably, this statement is
+ * true regardless of which operator we were doing the probes
+ * with, so long as convert_to_scalar() delivers reasonable
+ * results. If the probe constant is equal to some histogram
+ * entry, we would have considered the bin to the left of that
+ * entry if probing with "<" or ">=", or the bin to the right
+ * if probing with "<=" or ">"; but binfrac would have come
+ * out as 1.0 in the first case and 0.0 in the second, leading
+ * to the same histfrac in either case. For probe constants
+ * between histogram entries, we find the same bin and get the
+ * same estimate with any operator.
+ *
+ * The fact that the estimate corresponds to "x <= constval"
+ * and not "x < constval" is because of the way that ANALYZE
+ * constructs the histogram: each entry is, effectively, the
+ * rightmost value in its sample bucket. So selectivity
+ * values that are exact multiples of 1/(histogram_size-1)
+ * should be understood as estimates including a histogram
+ * entry plus everything to its left.
+ *
+ * However, that breaks down for the first histogram entry,
+ * which necessarily is the leftmost value in its sample
+ * bucket. That means the first histogram bin is slightly
+ * narrower than the rest, by an amount equal to eq_selec.
+ * Another way to say that is that we want "x <= leftmost" to
+ * be estimated as eq_selec not zero. So, if we're dealing
+ * with the first bin (i==1), rescale to make that true while
+ * adjusting the rest of that bin linearly.
+ */
+ if (i == 1)
+ histfrac += eq_selec * (1.0 - binfrac);
+
+ /*
+ * "x <= constval" is good if we want an estimate for "<=" or
+ * ">", but if we are estimating for "<" or ">=", we now need
+ * to decrease the estimate by eq_selec.
+ */
+ if (isgt == iseq)
+ histfrac -= eq_selec;
+ }
+
+ /*
+ * Now the estimate is finished for "<" and "<=" cases. If we are
+ * estimating for ">" or ">=", flip it.
+ */
+ hist_selec = isgt ? (1.0 - histfrac) : histfrac;
+
+ /*
+ * The histogram boundaries are only approximate to begin with,
+ * and may well be out of date anyway. Therefore, don't believe
+ * extremely small or large selectivity estimates --- unless we
+ * got actual current endpoint values from the table, in which
+ * case just do the usual sanity clamp. Somewhat arbitrarily, we
+ * set the cutoff for other cases at a hundredth of the histogram
+ * resolution.
+ */
+ if (have_end)
+ CLAMP_PROBABILITY(hist_selec);
+ else
+ {
+ double cutoff = 0.01 / (double) (sslot.nvalues - 1);
+
+ if (hist_selec < cutoff)
+ hist_selec = cutoff;
+ else if (hist_selec > 1.0 - cutoff)
+ hist_selec = 1.0 - cutoff;
+ }
+ }
+ else if (sslot.nvalues > 1)
+ {
+ /*
+ * If we get here, we have a histogram but it's not sorted the way
+ * we want. Do a brute-force search to see how many of the
+ * entries satisfy the comparison condition, and take that
+ * fraction as our estimate. (This is identical to the inner loop
+ * of histogram_selectivity; maybe share code?)
+ */
+ LOCAL_FCINFO(fcinfo, 2);
+ int nmatch = 0;
+
+ InitFunctionCallInfoData(*fcinfo, opproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+ fcinfo->args[1].value = constval;
+ for (int i = 0; i < sslot.nvalues; i++)
+ {
+ Datum fresult;
+
+ fcinfo->args[0].value = sslot.values[i];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ nmatch++;
+ }
+ hist_selec = ((double) nmatch) / ((double) sslot.nvalues);
+
+ /*
+ * As above, clamp to a hundredth of the histogram resolution.
+ * This case is surely even less trustworthy than the normal one,
+ * so we shouldn't believe exact 0 or 1 selectivity. (Maybe the
+ * clamp should be more restrictive in this case?)
+ */
+ {
+ double cutoff = 0.01 / (double) (sslot.nvalues - 1);
+
+ if (hist_selec < cutoff)
+ hist_selec = cutoff;
+ else if (hist_selec > 1.0 - cutoff)
+ hist_selec = 1.0 - cutoff;
+ }
+ }
+
+ free_attstatsslot(&sslot);
+ }
+
+ return hist_selec;
+}
+
+/*
+ * Common wrapper function for the selectivity estimators that simply
+ * invoke scalarineqsel().
+ */
+static Datum
+scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+ VariableStatData vardata;
+ Node *other;
+ bool varonleft;
+ Datum constval;
+ Oid consttype;
+ double selec;
+
+ /*
+ * If expression is not variable op something or something op variable,
+ * then punt and return a default estimate.
+ */
+ if (!get_restriction_variable(root, args, varRelid,
+ &vardata, &other, &varonleft))
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+ /*
+ * Can't do anything useful if the something is not a constant, either.
+ */
+ if (!IsA(other, Const))
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+ }
+
+ /*
+ * If the constant is NULL, assume operator is strict and return zero, ie,
+ * operator will never return TRUE.
+ */
+ if (((Const *) other)->constisnull)
+ {
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(0.0);
+ }
+ constval = ((Const *) other)->constvalue;
+ consttype = ((Const *) other)->consttype;
+
+ /*
+ * Force the var to be on the left to simplify logic in scalarineqsel.
+ */
+ if (!varonleft)
+ {
+ operator = get_commutator(operator);
+ if (!operator)
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ ReleaseVariableStats(vardata);
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+ }
+ isgt = !isgt;
+ }
+
+ /* The rest of the work is done by scalarineqsel(). */
+ selec = scalarineqsel(root, operator, isgt, iseq, collation,
+ &vardata, constval, consttype);
+
+ ReleaseVariableStats(vardata);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * scalarltsel - Selectivity of "<" for scalars.
+ */
+Datum
+scalarltsel(PG_FUNCTION_ARGS)
+{
+ return scalarineqsel_wrapper(fcinfo, false, false);
+}
+
+/*
+ * scalarlesel - Selectivity of "<=" for scalars.
+ */
+Datum
+scalarlesel(PG_FUNCTION_ARGS)
+{
+ return scalarineqsel_wrapper(fcinfo, false, true);
+}
+
+/*
+ * scalargtsel - Selectivity of ">" for scalars.
+ */
+Datum
+scalargtsel(PG_FUNCTION_ARGS)
+{
+ return scalarineqsel_wrapper(fcinfo, true, false);
+}
+
+/*
+ * scalargesel - Selectivity of ">=" for scalars.
+ */
+Datum
+scalargesel(PG_FUNCTION_ARGS)
+{
+ return scalarineqsel_wrapper(fcinfo, true, true);
+}
+
+/*
+ * boolvarsel - Selectivity of Boolean variable.
+ *
+ * This can actually be called on any boolean-valued expression. If it
+ * involves only Vars of the specified relation, and if there are statistics
+ * about the Var or expression (the latter is possible if it's indexed) then
+ * we'll produce a real estimate; otherwise it's just a default.
+ */
+Selectivity
+boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ /*
+ * A boolean variable V is equivalent to the clause V = 't', so we
+ * compute the selectivity as if that is what we have.
+ */
+ selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
+ BoolGetDatum(true), false, true, false);
+ }
+ else
+ {
+ /* Otherwise, the default estimate is 0.5 */
+ selec = 0.5;
+ }
+ ReleaseVariableStats(vardata);
+ return selec;
+}
+
+/*
+ * booltestsel - Selectivity of BooleanTest Node.
+ */
+Selectivity
+booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
+ int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+ double freq_null;
+ AttStatsSlot sslot;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ freq_null = stats->stanullfrac;
+
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
+ && sslot.nnumbers > 0)
+ {
+ double freq_true;
+ double freq_false;
+
+ /*
+ * Get first MCV frequency and derive frequency for true.
+ */
+ if (DatumGetBool(sslot.values[0]))
+ freq_true = sslot.numbers[0];
+ else
+ freq_true = 1.0 - sslot.numbers[0] - freq_null;
+
+ /*
+ * Next derive frequency for false. Then use these as appropriate
+ * to derive frequency for each case.
+ */
+ freq_false = 1.0 - freq_true - freq_null;
+
+ switch (booltesttype)
+ {
+ case IS_UNKNOWN:
+ /* select only NULL values */
+ selec = freq_null;
+ break;
+ case IS_NOT_UNKNOWN:
+ /* select non-NULL values */
+ selec = 1.0 - freq_null;
+ break;
+ case IS_TRUE:
+ /* select only TRUE values */
+ selec = freq_true;
+ break;
+ case IS_NOT_TRUE:
+ /* select non-TRUE values */
+ selec = 1.0 - freq_true;
+ break;
+ case IS_FALSE:
+ /* select only FALSE values */
+ selec = freq_false;
+ break;
+ case IS_NOT_FALSE:
+ /* select non-FALSE values */
+ selec = 1.0 - freq_false;
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+
+ free_attstatsslot(&sslot);
+ }
+ else
+ {
+ /*
+ * No most-common-value info available. Still have null fraction
+ * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
+ * for null fraction and assume a 50-50 split of TRUE and FALSE.
+ */
+ switch (booltesttype)
+ {
+ case IS_UNKNOWN:
+ /* select only NULL values */
+ selec = freq_null;
+ break;
+ case IS_NOT_UNKNOWN:
+ /* select non-NULL values */
+ selec = 1.0 - freq_null;
+ break;
+ case IS_TRUE:
+ case IS_FALSE:
+ /* Assume we select half of the non-NULL values */
+ selec = (1.0 - freq_null) / 2.0;
+ break;
+ case IS_NOT_TRUE:
+ case IS_NOT_FALSE:
+ /* Assume we select NULLs plus half of the non-NULLs */
+ /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
+ selec = (freq_null + 1.0) / 2.0;
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+ }
+ }
+ else
+ {
+ /*
+ * If we can't get variable statistics for the argument, perhaps
+ * clause_selectivity can do something with it. We ignore the
+ * possibility of a NULL value when using clause_selectivity, and just
+ * assume the value is either TRUE or FALSE.
+ */
+ switch (booltesttype)
+ {
+ case IS_UNKNOWN:
+ selec = DEFAULT_UNK_SEL;
+ break;
+ case IS_NOT_UNKNOWN:
+ selec = DEFAULT_NOT_UNK_SEL;
+ break;
+ case IS_TRUE:
+ case IS_NOT_FALSE:
+ selec = (double) clause_selectivity(root, arg,
+ varRelid,
+ jointype, sjinfo);
+ break;
+ case IS_FALSE:
+ case IS_NOT_TRUE:
+ selec = 1.0 - (double) clause_selectivity(root, arg,
+ varRelid,
+ jointype, sjinfo);
+ break;
+ default:
+ elog(ERROR, "unrecognized booltesttype: %d",
+ (int) booltesttype);
+ selec = 0.0; /* Keep compiler quiet */
+ break;
+ }
+ }
+
+ ReleaseVariableStats(vardata);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return (Selectivity) selec;
+}
+
+/*
+ * nulltestsel - Selectivity of NullTest Node.
+ */
+Selectivity
+nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
+ int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
+{
+ VariableStatData vardata;
+ double selec;
+
+ examine_variable(root, arg, varRelid, &vardata);
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+ double freq_null;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ freq_null = stats->stanullfrac;
+
+ switch (nulltesttype)
+ {
+ case IS_NULL:
+
+ /*
+ * Use freq_null directly.
+ */
+ selec = freq_null;
+ break;
+ case IS_NOT_NULL:
+
+ /*
+ * Select not unknown (not null) values. Calculate from
+ * freq_null.
+ */
+ selec = 1.0 - freq_null;
+ break;
+ default:
+ elog(ERROR, "unrecognized nulltesttype: %d",
+ (int) nulltesttype);
+ return (Selectivity) 0; /* keep compiler quiet */
+ }
+ }
+ else if (vardata.var && IsA(vardata.var, Var) &&
+ ((Var *) vardata.var)->varattno < 0)
+ {
+ /*
+ * There are no stats for system columns, but we know they are never
+ * NULL.
+ */
+ selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
+ }
+ else
+ {
+ /*
+ * No ANALYZE stats available, so make a guess
+ */
+ switch (nulltesttype)
+ {
+ case IS_NULL:
+ selec = DEFAULT_UNK_SEL;
+ break;
+ case IS_NOT_NULL:
+ selec = DEFAULT_NOT_UNK_SEL;
+ break;
+ default:
+ elog(ERROR, "unrecognized nulltesttype: %d",
+ (int) nulltesttype);
+ return (Selectivity) 0; /* keep compiler quiet */
+ }
+ }
+
+ ReleaseVariableStats(vardata);
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(selec);
+
+ return (Selectivity) selec;
+}
+
+/*
+ * strip_array_coercion - strip binary-compatible relabeling from an array expr
+ *
+ * For array values, the parser normally generates ArrayCoerceExpr conversions,
+ * but it seems possible that RelabelType might show up. Also, the planner
+ * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
+ * so we need to be ready to deal with more than one level.
+ */
+static Node *
+strip_array_coercion(Node *node)
+{
+ for (;;)
+ {
+ if (node && IsA(node, ArrayCoerceExpr))
+ {
+ ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
+
+ /*
+ * If the per-element expression is just a RelabelType on top of
+ * CaseTestExpr, then we know it's a binary-compatible relabeling.
+ */
+ if (IsA(acoerce->elemexpr, RelabelType) &&
+ IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
+ node = (Node *) acoerce->arg;
+ else
+ break;
+ }
+ else if (node && IsA(node, RelabelType))
+ {
+ /* We don't really expect this case, but may as well cope */
+ node = (Node *) ((RelabelType *) node)->arg;
+ }
+ else
+ break;
+ }
+ return node;
+}
+
+/*
+ * scalararraysel - Selectivity of ScalarArrayOpExpr Node.
+ */
+Selectivity
+scalararraysel(PlannerInfo *root,
+ ScalarArrayOpExpr *clause,
+ bool is_join_clause,
+ int varRelid,
+ JoinType jointype,
+ SpecialJoinInfo *sjinfo)
+{
+ Oid operator = clause->opno;
+ bool useOr = clause->useOr;
+ bool isEquality = false;
+ bool isInequality = false;
+ Node *leftop;
+ Node *rightop;
+ Oid nominal_element_type;
+ Oid nominal_element_collation;
+ TypeCacheEntry *typentry;
+ RegProcedure oprsel;
+ FmgrInfo oprselproc;
+ Selectivity s1;
+ Selectivity s1disjoint;
+
+ /* First, deconstruct the expression */
+ Assert(list_length(clause->args) == 2);
+ leftop = (Node *) linitial(clause->args);
+ rightop = (Node *) lsecond(clause->args);
+
+ /* aggressively reduce both sides to constants */
+ leftop = estimate_expression_value(root, leftop);
+ rightop = estimate_expression_value(root, rightop);
+
+ /* get nominal (after relabeling) element type of rightop */
+ nominal_element_type = get_base_element_type(exprType(rightop));
+ if (!OidIsValid(nominal_element_type))
+ return (Selectivity) 0.5; /* probably shouldn't happen */
+ /* get nominal collation, too, for generating constants */
+ nominal_element_collation = exprCollation(rightop);
+
+ /* look through any binary-compatible relabeling of rightop */
+ rightop = strip_array_coercion(rightop);
+
+ /*
+ * Detect whether the operator is the default equality or inequality
+ * operator of the array element type.
+ */
+ typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
+ if (OidIsValid(typentry->eq_opr))
+ {
+ if (operator == typentry->eq_opr)
+ isEquality = true;
+ else if (get_negator(operator) == typentry->eq_opr)
+ isInequality = true;
+ }
+
+ /*
+ * If it is equality or inequality, we might be able to estimate this as a
+ * form of array containment; for instance "const = ANY(column)" can be
+ * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
+ * that, and returns the selectivity estimate if successful, or -1 if not.
+ */
+ if ((isEquality || isInequality) && !is_join_clause)
+ {
+ s1 = scalararraysel_containment(root, leftop, rightop,
+ nominal_element_type,
+ isEquality, useOr, varRelid);
+ if (s1 >= 0.0)
+ return s1;
+ }
+
+ /*
+ * Look up the underlying operator's selectivity estimator. Punt if it
+ * hasn't got one.
+ */
+ if (is_join_clause)
+ oprsel = get_oprjoin(operator);
+ else
+ oprsel = get_oprrest(operator);
+ if (!oprsel)
+ return (Selectivity) 0.5;
+ fmgr_info(oprsel, &oprselproc);
+
+ /*
+ * In the array-containment check above, we must only believe that an
+ * operator is equality or inequality if it is the default btree equality
+ * operator (or its negator) for the element type, since those are the
+ * operators that array containment will use. But in what follows, we can
+ * be a little laxer, and also believe that any operators using eqsel() or
+ * neqsel() as selectivity estimator act like equality or inequality.
+ */
+ if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
+ isEquality = true;
+ else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
+ isInequality = true;
+
+ /*
+ * We consider three cases:
+ *
+ * 1. rightop is an Array constant: deconstruct the array, apply the
+ * operator's selectivity function for each array element, and merge the
+ * results in the same way that clausesel.c does for AND/OR combinations.
+ *
+ * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
+ * function for each element of the ARRAY[] construct, and merge.
+ *
+ * 3. otherwise, make a guess ...
+ */
+ if (rightop && IsA(rightop, Const))
+ {
+ Datum arraydatum = ((Const *) rightop)->constvalue;
+ bool arrayisnull = ((Const *) rightop)->constisnull;
+ ArrayType *arrayval;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int num_elems;
+ Datum *elem_values;
+ bool *elem_nulls;
+ int i;
+
+ if (arrayisnull) /* qual can't succeed if null array */
+ return (Selectivity) 0.0;
+ arrayval = DatumGetArrayTypeP(arraydatum);
+ get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(arrayval,
+ ARR_ELEMTYPE(arrayval),
+ elmlen, elmbyval, elmalign,
+ &elem_values, &elem_nulls, &num_elems);
+
+ /*
+ * For generic operators, we assume the probability of success is
+ * independent for each array element. But for "= ANY" or "<> ALL",
+ * if the array elements are distinct (which'd typically be the case)
+ * then the probabilities are disjoint, and we should just sum them.
+ *
+ * If we were being really tense we would try to confirm that the
+ * elements are all distinct, but that would be expensive and it
+ * doesn't seem to be worth the cycles; it would amount to penalizing
+ * well-written queries in favor of poorly-written ones. However, we
+ * do protect ourselves a little bit by checking whether the
+ * disjointness assumption leads to an impossible (out of range)
+ * probability; if so, we fall back to the normal calculation.
+ */
+ s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
+ for (i = 0; i < num_elems; i++)
+ {
+ List *args;
+ Selectivity s2;
+
+ args = list_make2(leftop,
+ makeConst(nominal_element_type,
+ -1,
+ nominal_element_collation,
+ elmlen,
+ elem_values[i],
+ elem_nulls[i],
+ elmbyval));
+ if (is_join_clause)
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ else
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
+
+ if (useOr)
+ {
+ s1 = s1 + s2 - s1 * s2;
+ if (isEquality)
+ s1disjoint += s2;
+ }
+ else
+ {
+ s1 = s1 * s2;
+ if (isInequality)
+ s1disjoint += s2 - 1.0;
+ }
+ }
+
+ /* accept disjoint-probability estimate if in range */
+ if ((useOr ? isEquality : isInequality) &&
+ s1disjoint >= 0.0 && s1disjoint <= 1.0)
+ s1 = s1disjoint;
+ }
+ else if (rightop && IsA(rightop, ArrayExpr) &&
+ !((ArrayExpr *) rightop)->multidims)
+ {
+ ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
+ int16 elmlen;
+ bool elmbyval;
+ ListCell *l;
+
+ get_typlenbyval(arrayexpr->element_typeid,
+ &elmlen, &elmbyval);
+
+ /*
+ * We use the assumption of disjoint probabilities here too, although
+ * the odds of equal array elements are rather higher if the elements
+ * are not all constants (which they won't be, else constant folding
+ * would have reduced the ArrayExpr to a Const). In this path it's
+ * critical to have the sanity check on the s1disjoint estimate.
+ */
+ s1 = s1disjoint = (useOr ? 0.0 : 1.0);
+
+ foreach(l, arrayexpr->elements)
+ {
+ Node *elem = (Node *) lfirst(l);
+ List *args;
+ Selectivity s2;
+
+ /*
+ * Theoretically, if elem isn't of nominal_element_type we should
+ * insert a RelabelType, but it seems unlikely that any operator
+ * estimation function would really care ...
+ */
+ args = list_make2(leftop, elem);
+ if (is_join_clause)
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ else
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
+
+ if (useOr)
+ {
+ s1 = s1 + s2 - s1 * s2;
+ if (isEquality)
+ s1disjoint += s2;
+ }
+ else
+ {
+ s1 = s1 * s2;
+ if (isInequality)
+ s1disjoint += s2 - 1.0;
+ }
+ }
+
+ /* accept disjoint-probability estimate if in range */
+ if ((useOr ? isEquality : isInequality) &&
+ s1disjoint >= 0.0 && s1disjoint <= 1.0)
+ s1 = s1disjoint;
+ }
+ else
+ {
+ CaseTestExpr *dummyexpr;
+ List *args;
+ Selectivity s2;
+ int i;
+
+ /*
+ * We need a dummy rightop to pass to the operator selectivity
+ * routine. It can be pretty much anything that doesn't look like a
+ * constant; CaseTestExpr is a convenient choice.
+ */
+ dummyexpr = makeNode(CaseTestExpr);
+ dummyexpr->typeId = nominal_element_type;
+ dummyexpr->typeMod = -1;
+ dummyexpr->collation = clause->inputcollid;
+ args = list_make2(leftop, dummyexpr);
+ if (is_join_clause)
+ s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ else
+ s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
+ clause->inputcollid,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(operator),
+ PointerGetDatum(args),
+ Int32GetDatum(varRelid)));
+ s1 = useOr ? 0.0 : 1.0;
+
+ /*
+ * Arbitrarily assume 10 elements in the eventual array value (see
+ * also estimate_array_length). We don't risk an assumption of
+ * disjoint probabilities here.
+ */
+ for (i = 0; i < 10; i++)
+ {
+ if (useOr)
+ s1 = s1 + s2 - s1 * s2;
+ else
+ s1 = s1 * s2;
+ }
+ }
+
+ /* result should be in range, but make sure... */
+ CLAMP_PROBABILITY(s1);
+
+ return s1;
+}
+
+/*
+ * Estimate number of elements in the array yielded by an expression.
+ *
+ * It's important that this agree with scalararraysel.
+ */
+int
+estimate_array_length(Node *arrayexpr)
+{
+ /* look through any binary-compatible relabeling of arrayexpr */
+ arrayexpr = strip_array_coercion(arrayexpr);
+
+ if (arrayexpr && IsA(arrayexpr, Const))
+ {
+ Datum arraydatum = ((Const *) arrayexpr)->constvalue;
+ bool arrayisnull = ((Const *) arrayexpr)->constisnull;
+ ArrayType *arrayval;
+
+ if (arrayisnull)
+ return 0;
+ arrayval = DatumGetArrayTypeP(arraydatum);
+ return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
+ }
+ else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
+ !((ArrayExpr *) arrayexpr)->multidims)
+ {
+ return list_length(((ArrayExpr *) arrayexpr)->elements);
+ }
+ else
+ {
+ /* default guess --- see also scalararraysel */
+ return 10;
+ }
+}
+
+/*
+ * rowcomparesel - Selectivity of RowCompareExpr Node.
+ *
+ * We estimate RowCompare selectivity by considering just the first (high
+ * order) columns, which makes it equivalent to an ordinary OpExpr. While
+ * this estimate could be refined by considering additional columns, it
+ * seems unlikely that we could do a lot better without multi-column
+ * statistics.
+ */
+Selectivity
+rowcomparesel(PlannerInfo *root,
+ RowCompareExpr *clause,
+ int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
+{
+ Selectivity s1;
+ Oid opno = linitial_oid(clause->opnos);
+ Oid inputcollid = linitial_oid(clause->inputcollids);
+ List *opargs;
+ bool is_join_clause;
+
+ /* Build equivalent arg list for single operator */
+ opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
+
+ /*
+ * Decide if it's a join clause. This should match clausesel.c's
+ * treat_as_join_clause(), except that we intentionally consider only the
+ * leading columns and not the rest of the clause.
+ */
+ if (varRelid != 0)
+ {
+ /*
+ * Caller is forcing restriction mode (eg, because we are examining an
+ * inner indexscan qual).
+ */
+ is_join_clause = false;
+ }
+ else if (sjinfo == NULL)
+ {
+ /*
+ * It must be a restriction clause, since it's being evaluated at a
+ * scan node.
+ */
+ is_join_clause = false;
+ }
+ else
+ {
+ /*
+ * Otherwise, it's a join if there's more than one relation used.
+ */
+ is_join_clause = (NumRelids(root, (Node *) opargs) > 1);
+ }
+
+ if (is_join_clause)
+ {
+ /* Estimate selectivity for a join clause. */
+ s1 = join_selectivity(root, opno,
+ opargs,
+ inputcollid,
+ jointype,
+ sjinfo);
+ }
+ else
+ {
+ /* Estimate selectivity for a restriction clause. */
+ s1 = restriction_selectivity(root, opno,
+ opargs,
+ inputcollid,
+ varRelid);
+ }
+
+ return s1;
+}
+
+/*
+ * eqjoinsel - Join selectivity of "="
+ */
+Datum
+eqjoinsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+
+#ifdef NOT_USED
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ Oid collation = PG_GET_COLLATION();
+ double selec;
+ double selec_inner;
+ VariableStatData vardata1;
+ VariableStatData vardata2;
+ double nd1;
+ double nd2;
+ bool isdefault1;
+ bool isdefault2;
+ Oid opfuncoid;
+ AttStatsSlot sslot1;
+ AttStatsSlot sslot2;
+ Form_pg_statistic stats1 = NULL;
+ Form_pg_statistic stats2 = NULL;
+ bool have_mcvs1 = false;
+ bool have_mcvs2 = false;
+ bool join_is_reversed;
+ RelOptInfo *inner_rel;
+
+ get_join_variables(root, args, sjinfo,
+ &vardata1, &vardata2, &join_is_reversed);
+
+ nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
+ nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
+
+ opfuncoid = get_opcode(operator);
+
+ memset(&sslot1, 0, sizeof(sslot1));
+ memset(&sslot2, 0, sizeof(sslot2));
+
+ if (HeapTupleIsValid(vardata1.statsTuple))
+ {
+ /* note we allow use of nullfrac regardless of security check */
+ stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
+ if (statistic_proc_security_check(&vardata1, opfuncoid))
+ have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ }
+
+ if (HeapTupleIsValid(vardata2.statsTuple))
+ {
+ /* note we allow use of nullfrac regardless of security check */
+ stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
+ if (statistic_proc_security_check(&vardata2, opfuncoid))
+ have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
+ }
+
+ /* We need to compute the inner-join selectivity in all cases */
+ selec_inner = eqjoinsel_inner(opfuncoid, collation,
+ &vardata1, &vardata2,
+ nd1, nd2,
+ isdefault1, isdefault2,
+ &sslot1, &sslot2,
+ stats1, stats2,
+ have_mcvs1, have_mcvs2);
+
+ switch (sjinfo->jointype)
+ {
+ case JOIN_INNER:
+ case JOIN_LEFT:
+ case JOIN_FULL:
+ selec = selec_inner;
+ break;
+ case JOIN_SEMI:
+ case JOIN_ANTI:
+
+ /*
+ * Look up the join's inner relation. min_righthand is sufficient
+ * information because neither SEMI nor ANTI joins permit any
+ * reassociation into or out of their RHS, so the righthand will
+ * always be exactly that set of rels.
+ */
+ inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
+
+ if (!join_is_reversed)
+ selec = eqjoinsel_semi(opfuncoid, collation,
+ &vardata1, &vardata2,
+ nd1, nd2,
+ isdefault1, isdefault2,
+ &sslot1, &sslot2,
+ stats1, stats2,
+ have_mcvs1, have_mcvs2,
+ inner_rel);
+ else
+ {
+ Oid commop = get_commutator(operator);
+ Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
+
+ selec = eqjoinsel_semi(commopfuncoid, collation,
+ &vardata2, &vardata1,
+ nd2, nd1,
+ isdefault2, isdefault1,
+ &sslot2, &sslot1,
+ stats2, stats1,
+ have_mcvs2, have_mcvs1,
+ inner_rel);
+ }
+
+ /*
+ * We should never estimate the output of a semijoin to be more
+ * rows than we estimate for an inner join with the same input
+ * rels and join condition; it's obviously impossible for that to
+ * happen. The former estimate is N1 * Ssemi while the latter is
+ * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
+ * this is worthwhile because of the shakier estimation rules we
+ * use in eqjoinsel_semi, particularly in cases where it has to
+ * punt entirely.
+ */
+ selec = Min(selec, inner_rel->rows * selec_inner);
+ break;
+ default:
+ /* other values not expected here */
+ elog(ERROR, "unrecognized join type: %d",
+ (int) sjinfo->jointype);
+ selec = 0; /* keep compiler quiet */
+ break;
+ }
+
+ free_attstatsslot(&sslot1);
+ free_attstatsslot(&sslot2);
+
+ ReleaseVariableStats(vardata1);
+ ReleaseVariableStats(vardata2);
+
+ CLAMP_PROBABILITY(selec);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * eqjoinsel_inner --- eqjoinsel for normal inner join
+ *
+ * We also use this for LEFT/FULL outer joins; it's not presently clear
+ * that it's worth trying to distinguish them here.
+ */
+static double
+eqjoinsel_inner(Oid opfuncoid, Oid collation,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2)
+{
+ double selec;
+
+ if (have_mcvs1 && have_mcvs2)
+ {
+ /*
+ * We have most-common-value lists for both relations. Run through
+ * the lists to see which MCVs actually join to each other with the
+ * given operator. This allows us to determine the exact join
+ * selectivity for the portion of the relations represented by the MCV
+ * lists. We still have to estimate for the remaining population, but
+ * in a skewed distribution this gives us a big leg up in accuracy.
+ * For motivation see the analysis in Y. Ioannidis and S.
+ * Christodoulakis, "On the propagation of errors in the size of join
+ * results", Technical Report 1018, Computer Science Dept., University
+ * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
+ */
+ LOCAL_FCINFO(fcinfo, 2);
+ FmgrInfo eqproc;
+ bool *hasmatch1;
+ bool *hasmatch2;
+ double nullfrac1 = stats1->stanullfrac;
+ double nullfrac2 = stats2->stanullfrac;
+ double matchprodfreq,
+ matchfreq1,
+ matchfreq2,
+ unmatchfreq1,
+ unmatchfreq2,
+ otherfreq1,
+ otherfreq2,
+ totalsel1,
+ totalsel2;
+ int i,
+ nmatches;
+
+ fmgr_info(opfuncoid, &eqproc);
+
+ /*
+ * Save a few cycles by setting up the fcinfo struct just once. Using
+ * FunctionCallInvoke directly also avoids failure if the eqproc
+ * returns NULL, though really equality functions should never do
+ * that.
+ */
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+
+ hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
+
+ /*
+ * Note we assume that each MCV will match at most one member of the
+ * other MCV list. If the operator isn't really equality, there could
+ * be multiple matches --- but we don't look for them, both for speed
+ * and because the math wouldn't add up...
+ */
+ matchprodfreq = 0.0;
+ nmatches = 0;
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ int j;
+
+ fcinfo->args[0].value = sslot1->values[i];
+
+ for (j = 0; j < sslot2->nvalues; j++)
+ {
+ Datum fresult;
+
+ if (hasmatch2[j])
+ continue;
+ fcinfo->args[1].value = sslot2->values[j];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ {
+ hasmatch1[i] = hasmatch2[j] = true;
+ matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
+ nmatches++;
+ break;
+ }
+ }
+ }
+ CLAMP_PROBABILITY(matchprodfreq);
+ /* Sum up frequencies of matched and unmatched MCVs */
+ matchfreq1 = unmatchfreq1 = 0.0;
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ if (hasmatch1[i])
+ matchfreq1 += sslot1->numbers[i];
+ else
+ unmatchfreq1 += sslot1->numbers[i];
+ }
+ CLAMP_PROBABILITY(matchfreq1);
+ CLAMP_PROBABILITY(unmatchfreq1);
+ matchfreq2 = unmatchfreq2 = 0.0;
+ for (i = 0; i < sslot2->nvalues; i++)
+ {
+ if (hasmatch2[i])
+ matchfreq2 += sslot2->numbers[i];
+ else
+ unmatchfreq2 += sslot2->numbers[i];
+ }
+ CLAMP_PROBABILITY(matchfreq2);
+ CLAMP_PROBABILITY(unmatchfreq2);
+ pfree(hasmatch1);
+ pfree(hasmatch2);
+
+ /*
+ * Compute total frequency of non-null values that are not in the MCV
+ * lists.
+ */
+ otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
+ otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
+ CLAMP_PROBABILITY(otherfreq1);
+ CLAMP_PROBABILITY(otherfreq2);
+
+ /*
+ * We can estimate the total selectivity from the point of view of
+ * relation 1 as: the known selectivity for matched MCVs, plus
+ * unmatched MCVs that are assumed to match against random members of
+ * relation 2's non-MCV population, plus non-MCV values that are
+ * assumed to match against random members of relation 2's unmatched
+ * MCVs plus non-MCV values.
+ */
+ totalsel1 = matchprodfreq;
+ if (nd2 > sslot2->nvalues)
+ totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
+ if (nd2 > nmatches)
+ totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
+ (nd2 - nmatches);
+ /* Same estimate from the point of view of relation 2. */
+ totalsel2 = matchprodfreq;
+ if (nd1 > sslot1->nvalues)
+ totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
+ if (nd1 > nmatches)
+ totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
+ (nd1 - nmatches);
+
+ /*
+ * Use the smaller of the two estimates. This can be justified in
+ * essentially the same terms as given below for the no-stats case: to
+ * a first approximation, we are estimating from the point of view of
+ * the relation with smaller nd.
+ */
+ selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
+ }
+ else
+ {
+ /*
+ * We do not have MCV lists for both sides. Estimate the join
+ * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
+ * is plausible if we assume that the join operator is strict and the
+ * non-null values are about equally distributed: a given non-null
+ * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
+ * of rel2, so total join rows are at most
+ * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
+ * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
+ * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
+ * with MIN() is an upper bound. Using the MIN() means we estimate
+ * from the point of view of the relation with smaller nd (since the
+ * larger nd is determining the MIN). It is reasonable to assume that
+ * most tuples in this rel will have join partners, so the bound is
+ * probably reasonably tight and should be taken as-is.
+ *
+ * XXX Can we be smarter if we have an MCV list for just one side? It
+ * seems that if we assume equal distribution for the other side, we
+ * end up with the same answer anyway.
+ */
+ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
+ double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
+
+ selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
+ if (nd1 > nd2)
+ selec /= nd1;
+ else
+ selec /= nd2;
+ }
+
+ return selec;
+}
+
+/*
+ * eqjoinsel_semi --- eqjoinsel for semi join
+ *
+ * (Also used for anti join, which we are supposed to estimate the same way.)
+ * Caller has ensured that vardata1 is the LHS variable.
+ * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
+ */
+static double
+eqjoinsel_semi(Oid opfuncoid, Oid collation,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ double nd1, double nd2,
+ bool isdefault1, bool isdefault2,
+ AttStatsSlot *sslot1, AttStatsSlot *sslot2,
+ Form_pg_statistic stats1, Form_pg_statistic stats2,
+ bool have_mcvs1, bool have_mcvs2,
+ RelOptInfo *inner_rel)
+{
+ double selec;
+
+ /*
+ * We clamp nd2 to be not more than what we estimate the inner relation's
+ * size to be. This is intuitively somewhat reasonable since obviously
+ * there can't be more than that many distinct values coming from the
+ * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
+ * likewise) is that this is the only pathway by which restriction clauses
+ * applied to the inner rel will affect the join result size estimate,
+ * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
+ * only the outer rel's size. If we clamped nd1 we'd be double-counting
+ * the selectivity of outer-rel restrictions.
+ *
+ * We can apply this clamping both with respect to the base relation from
+ * which the join variable comes (if there is just one), and to the
+ * immediate inner input relation of the current join.
+ *
+ * If we clamp, we can treat nd2 as being a non-default estimate; it's not
+ * great, maybe, but it didn't come out of nowhere either. This is most
+ * helpful when the inner relation is empty and consequently has no stats.
+ */
+ if (vardata2->rel)
+ {
+ if (nd2 >= vardata2->rel->rows)
+ {
+ nd2 = vardata2->rel->rows;
+ isdefault2 = false;
+ }
+ }
+ if (nd2 >= inner_rel->rows)
+ {
+ nd2 = inner_rel->rows;
+ isdefault2 = false;
+ }
+
+ if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
+ {
+ /*
+ * We have most-common-value lists for both relations. Run through
+ * the lists to see which MCVs actually join to each other with the
+ * given operator. This allows us to determine the exact join
+ * selectivity for the portion of the relations represented by the MCV
+ * lists. We still have to estimate for the remaining population, but
+ * in a skewed distribution this gives us a big leg up in accuracy.
+ */
+ LOCAL_FCINFO(fcinfo, 2);
+ FmgrInfo eqproc;
+ bool *hasmatch1;
+ bool *hasmatch2;
+ double nullfrac1 = stats1->stanullfrac;
+ double matchfreq1,
+ uncertainfrac,
+ uncertain;
+ int i,
+ nmatches,
+ clamped_nvalues2;
+
+ /*
+ * The clamping above could have resulted in nd2 being less than
+ * sslot2->nvalues; in which case, we assume that precisely the nd2
+ * most common values in the relation will appear in the join input,
+ * and so compare to only the first nd2 members of the MCV list. Of
+ * course this is frequently wrong, but it's the best bet we can make.
+ */
+ clamped_nvalues2 = Min(sslot2->nvalues, nd2);
+
+ fmgr_info(opfuncoid, &eqproc);
+
+ /*
+ * Save a few cycles by setting up the fcinfo struct just once. Using
+ * FunctionCallInvoke directly also avoids failure if the eqproc
+ * returns NULL, though really equality functions should never do
+ * that.
+ */
+ InitFunctionCallInfoData(*fcinfo, &eqproc, 2, collation,
+ NULL, NULL);
+ fcinfo->args[0].isnull = false;
+ fcinfo->args[1].isnull = false;
+
+ hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
+ hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
+
+ /*
+ * Note we assume that each MCV will match at most one member of the
+ * other MCV list. If the operator isn't really equality, there could
+ * be multiple matches --- but we don't look for them, both for speed
+ * and because the math wouldn't add up...
+ */
+ nmatches = 0;
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ int j;
+
+ fcinfo->args[0].value = sslot1->values[i];
+
+ for (j = 0; j < clamped_nvalues2; j++)
+ {
+ Datum fresult;
+
+ if (hasmatch2[j])
+ continue;
+ fcinfo->args[1].value = sslot2->values[j];
+ fcinfo->isnull = false;
+ fresult = FunctionCallInvoke(fcinfo);
+ if (!fcinfo->isnull && DatumGetBool(fresult))
+ {
+ hasmatch1[i] = hasmatch2[j] = true;
+ nmatches++;
+ break;
+ }
+ }
+ }
+ /* Sum up frequencies of matched MCVs */
+ matchfreq1 = 0.0;
+ for (i = 0; i < sslot1->nvalues; i++)
+ {
+ if (hasmatch1[i])
+ matchfreq1 += sslot1->numbers[i];
+ }
+ CLAMP_PROBABILITY(matchfreq1);
+ pfree(hasmatch1);
+ pfree(hasmatch2);
+
+ /*
+ * Now we need to estimate the fraction of relation 1 that has at
+ * least one join partner. We know for certain that the matched MCVs
+ * do, so that gives us a lower bound, but we're really in the dark
+ * about everything else. Our crude approach is: if nd1 <= nd2 then
+ * assume all non-null rel1 rows have join partners, else assume for
+ * the uncertain rows that a fraction nd2/nd1 have join partners. We
+ * can discount the known-matched MCVs from the distinct-values counts
+ * before doing the division.
+ *
+ * Crude as the above is, it's completely useless if we don't have
+ * reliable ndistinct values for both sides. Hence, if either nd1 or
+ * nd2 is default, punt and assume half of the uncertain rows have
+ * join partners.
+ */
+ if (!isdefault1 && !isdefault2)
+ {
+ nd1 -= nmatches;
+ nd2 -= nmatches;
+ if (nd1 <= nd2 || nd2 < 0)
+ uncertainfrac = 1.0;
+ else
+ uncertainfrac = nd2 / nd1;
+ }
+ else
+ uncertainfrac = 0.5;
+ uncertain = 1.0 - matchfreq1 - nullfrac1;
+ CLAMP_PROBABILITY(uncertain);
+ selec = matchfreq1 + uncertainfrac * uncertain;
+ }
+ else
+ {
+ /*
+ * Without MCV lists for both sides, we can only use the heuristic
+ * about nd1 vs nd2.
+ */
+ double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
+
+ if (!isdefault1 && !isdefault2)
+ {
+ if (nd1 <= nd2 || nd2 < 0)
+ selec = 1.0 - nullfrac1;
+ else
+ selec = (nd2 / nd1) * (1.0 - nullfrac1);
+ }
+ else
+ selec = 0.5 * (1.0 - nullfrac1);
+ }
+
+ return selec;
+}
+
+/*
+ * neqjoinsel - Join selectivity of "!="
+ */
+Datum
+neqjoinsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ JoinType jointype = (JoinType) PG_GETARG_INT16(3);
+ SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+ Oid collation = PG_GET_COLLATION();
+ float8 result;
+
+ if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
+ {
+ /*
+ * For semi-joins, if there is more than one distinct value in the RHS
+ * relation then every non-null LHS row must find a row to join since
+ * it can only be equal to one of them. We'll assume that there is
+ * always more than one distinct RHS value for the sake of stability,
+ * though in theory we could have special cases for empty RHS
+ * (selectivity = 0) and single-distinct-value RHS (selectivity =
+ * fraction of LHS that has the same value as the single RHS value).
+ *
+ * For anti-joins, if we use the same assumption that there is more
+ * than one distinct key in the RHS relation, then every non-null LHS
+ * row must be suppressed by the anti-join.
+ *
+ * So either way, the selectivity estimate should be 1 - nullfrac.
+ */
+ VariableStatData leftvar;
+ VariableStatData rightvar;
+ bool reversed;
+ HeapTuple statsTuple;
+ double nullfrac;
+
+ get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
+ statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
+ if (HeapTupleIsValid(statsTuple))
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
+ else
+ nullfrac = 0.0;
+ ReleaseVariableStats(leftvar);
+ ReleaseVariableStats(rightvar);
+
+ result = 1.0 - nullfrac;
+ }
+ else
+ {
+ /*
+ * We want 1 - eqjoinsel() where the equality operator is the one
+ * associated with this != operator, that is, its negator.
+ */
+ Oid eqop = get_negator(operator);
+
+ if (eqop)
+ {
+ result =
+ DatumGetFloat8(DirectFunctionCall5Coll(eqjoinsel,
+ collation,
+ PointerGetDatum(root),
+ ObjectIdGetDatum(eqop),
+ PointerGetDatum(args),
+ Int16GetDatum(jointype),
+ PointerGetDatum(sjinfo)));
+ }
+ else
+ {
+ /* Use default selectivity (should we raise an error instead?) */
+ result = DEFAULT_EQ_SEL;
+ }
+ result = 1.0 - result;
+ }
+
+ PG_RETURN_FLOAT8(result);
+}
+
+/*
+ * scalarltjoinsel - Join selectivity of "<" for scalars
+ */
+Datum
+scalarltjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+}
+
+/*
+ * scalarlejoinsel - Join selectivity of "<=" for scalars
+ */
+Datum
+scalarlejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+}
+
+/*
+ * scalargtjoinsel - Join selectivity of ">" for scalars
+ */
+Datum
+scalargtjoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+}
+
+/*
+ * scalargejoinsel - Join selectivity of ">=" for scalars
+ */
+Datum
+scalargejoinsel(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+}
+
+
+/*
+ * mergejoinscansel - Scan selectivity of merge join.
+ *
+ * A merge join will stop as soon as it exhausts either input stream.
+ * Therefore, if we can estimate the ranges of both input variables,
+ * we can estimate how much of the input will actually be read. This
+ * can have a considerable impact on the cost when using indexscans.
+ *
+ * Also, we can estimate how much of each input has to be read before the
+ * first join pair is found, which will affect the join's startup time.
+ *
+ * clause should be a clause already known to be mergejoinable. opfamily,
+ * strategy, and nulls_first specify the sort ordering being used.
+ *
+ * The outputs are:
+ * *leftstart is set to the fraction of the left-hand variable expected
+ * to be scanned before the first join pair is found (0 to 1).
+ * *leftend is set to the fraction of the left-hand variable expected
+ * to be scanned before the join terminates (0 to 1).
+ * *rightstart, *rightend similarly for the right-hand variable.
+ */
+void
+mergejoinscansel(PlannerInfo *root, Node *clause,
+ Oid opfamily, int strategy, bool nulls_first,
+ Selectivity *leftstart, Selectivity *leftend,
+ Selectivity *rightstart, Selectivity *rightend)
+{
+ Node *left,
+ *right;
+ VariableStatData leftvar,
+ rightvar;
+ int op_strategy;
+ Oid op_lefttype;
+ Oid op_righttype;
+ Oid opno,
+ collation,
+ lsortop,
+ rsortop,
+ lstatop,
+ rstatop,
+ ltop,
+ leop,
+ revltop,
+ revleop;
+ bool isgt;
+ Datum leftmin,
+ leftmax,
+ rightmin,
+ rightmax;
+ double selec;
+
+ /* Set default results if we can't figure anything out. */
+ /* XXX should default "start" fraction be a bit more than 0? */
+ *leftstart = *rightstart = 0.0;
+ *leftend = *rightend = 1.0;
+
+ /* Deconstruct the merge clause */
+ if (!is_opclause(clause))
+ return; /* shouldn't happen */
+ opno = ((OpExpr *) clause)->opno;
+ collation = ((OpExpr *) clause)->inputcollid;
+ left = get_leftop((Expr *) clause);
+ right = get_rightop((Expr *) clause);
+ if (!right)
+ return; /* shouldn't happen */
+
+ /* Look for stats for the inputs */
+ examine_variable(root, left, 0, &leftvar);
+ examine_variable(root, right, 0, &rightvar);
+
+ /* Extract the operator's declared left/right datatypes */
+ get_op_opfamily_properties(opno, opfamily, false,
+ &op_strategy,
+ &op_lefttype,
+ &op_righttype);
+ Assert(op_strategy == BTEqualStrategyNumber);
+
+ /*
+ * Look up the various operators we need. If we don't find them all, it
+ * probably means the opfamily is broken, but we just fail silently.
+ *
+ * Note: we expect that pg_statistic histograms will be sorted by the '<'
+ * operator, regardless of which sort direction we are considering.
+ */
+ switch (strategy)
+ {
+ case BTLessStrategyNumber:
+ isgt = false;
+ if (op_lefttype == op_righttype)
+ {
+ /* easy case */
+ ltop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTLessStrategyNumber);
+ leop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTLessEqualStrategyNumber);
+ lsortop = ltop;
+ rsortop = ltop;
+ lstatop = lsortop;
+ rstatop = rsortop;
+ revltop = ltop;
+ revleop = leop;
+ }
+ else
+ {
+ ltop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTLessStrategyNumber);
+ leop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTLessEqualStrategyNumber);
+ lsortop = get_opfamily_member(opfamily,
+ op_lefttype, op_lefttype,
+ BTLessStrategyNumber);
+ rsortop = get_opfamily_member(opfamily,
+ op_righttype, op_righttype,
+ BTLessStrategyNumber);
+ lstatop = lsortop;
+ rstatop = rsortop;
+ revltop = get_opfamily_member(opfamily,
+ op_righttype, op_lefttype,
+ BTLessStrategyNumber);
+ revleop = get_opfamily_member(opfamily,
+ op_righttype, op_lefttype,
+ BTLessEqualStrategyNumber);
+ }
+ break;
+ case BTGreaterStrategyNumber:
+ /* descending-order case */
+ isgt = true;
+ if (op_lefttype == op_righttype)
+ {
+ /* easy case */
+ ltop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTGreaterStrategyNumber);
+ leop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTGreaterEqualStrategyNumber);
+ lsortop = ltop;
+ rsortop = ltop;
+ lstatop = get_opfamily_member(opfamily,
+ op_lefttype, op_lefttype,
+ BTLessStrategyNumber);
+ rstatop = lstatop;
+ revltop = ltop;
+ revleop = leop;
+ }
+ else
+ {
+ ltop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTGreaterStrategyNumber);
+ leop = get_opfamily_member(opfamily,
+ op_lefttype, op_righttype,
+ BTGreaterEqualStrategyNumber);
+ lsortop = get_opfamily_member(opfamily,
+ op_lefttype, op_lefttype,
+ BTGreaterStrategyNumber);
+ rsortop = get_opfamily_member(opfamily,
+ op_righttype, op_righttype,
+ BTGreaterStrategyNumber);
+ lstatop = get_opfamily_member(opfamily,
+ op_lefttype, op_lefttype,
+ BTLessStrategyNumber);
+ rstatop = get_opfamily_member(opfamily,
+ op_righttype, op_righttype,
+ BTLessStrategyNumber);
+ revltop = get_opfamily_member(opfamily,
+ op_righttype, op_lefttype,
+ BTGreaterStrategyNumber);
+ revleop = get_opfamily_member(opfamily,
+ op_righttype, op_lefttype,
+ BTGreaterEqualStrategyNumber);
+ }
+ break;
+ default:
+ goto fail; /* shouldn't get here */
+ }
+
+ if (!OidIsValid(lsortop) ||
+ !OidIsValid(rsortop) ||
+ !OidIsValid(lstatop) ||
+ !OidIsValid(rstatop) ||
+ !OidIsValid(ltop) ||
+ !OidIsValid(leop) ||
+ !OidIsValid(revltop) ||
+ !OidIsValid(revleop))
+ goto fail; /* insufficient info in catalogs */
+
+ /* Try to get ranges of both inputs */
+ if (!isgt)
+ {
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
+ &leftmin, &leftmax))
+ goto fail; /* no range available from stats */
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
+ &rightmin, &rightmax))
+ goto fail; /* no range available from stats */
+ }
+ else
+ {
+ /* need to swap the max and min */
+ if (!get_variable_range(root, &leftvar, lstatop, collation,
+ &leftmax, &leftmin))
+ goto fail; /* no range available from stats */
+ if (!get_variable_range(root, &rightvar, rstatop, collation,
+ &rightmax, &rightmin))
+ goto fail; /* no range available from stats */
+ }
+
+ /*
+ * Now, the fraction of the left variable that will be scanned is the
+ * fraction that's <= the right-side maximum value. But only believe
+ * non-default estimates, else stick with our 1.0.
+ */
+ selec = scalarineqsel(root, leop, isgt, true, collation, &leftvar,
+ rightmax, op_righttype);
+ if (selec != DEFAULT_INEQ_SEL)
+ *leftend = selec;
+
+ /* And similarly for the right variable. */
+ selec = scalarineqsel(root, revleop, isgt, true, collation, &rightvar,
+ leftmax, op_lefttype);
+ if (selec != DEFAULT_INEQ_SEL)
+ *rightend = selec;
+
+ /*
+ * Only one of the two "end" fractions can really be less than 1.0;
+ * believe the smaller estimate and reset the other one to exactly 1.0. If
+ * we get exactly equal estimates (as can easily happen with self-joins),
+ * believe neither.
+ */
+ if (*leftend > *rightend)
+ *leftend = 1.0;
+ else if (*leftend < *rightend)
+ *rightend = 1.0;
+ else
+ *leftend = *rightend = 1.0;
+
+ /*
+ * Also, the fraction of the left variable that will be scanned before the
+ * first join pair is found is the fraction that's < the right-side
+ * minimum value. But only believe non-default estimates, else stick with
+ * our own default.
+ */
+ selec = scalarineqsel(root, ltop, isgt, false, collation, &leftvar,
+ rightmin, op_righttype);
+ if (selec != DEFAULT_INEQ_SEL)
+ *leftstart = selec;
+
+ /* And similarly for the right variable. */
+ selec = scalarineqsel(root, revltop, isgt, false, collation, &rightvar,
+ leftmin, op_lefttype);
+ if (selec != DEFAULT_INEQ_SEL)
+ *rightstart = selec;
+
+ /*
+ * Only one of the two "start" fractions can really be more than zero;
+ * believe the larger estimate and reset the other one to exactly 0.0. If
+ * we get exactly equal estimates (as can easily happen with self-joins),
+ * believe neither.
+ */
+ if (*leftstart < *rightstart)
+ *leftstart = 0.0;
+ else if (*leftstart > *rightstart)
+ *rightstart = 0.0;
+ else
+ *leftstart = *rightstart = 0.0;
+
+ /*
+ * If the sort order is nulls-first, we're going to have to skip over any
+ * nulls too. These would not have been counted by scalarineqsel, and we
+ * can safely add in this fraction regardless of whether we believe
+ * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
+ */
+ if (nulls_first)
+ {
+ Form_pg_statistic stats;
+
+ if (HeapTupleIsValid(leftvar.statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
+ *leftstart += stats->stanullfrac;
+ CLAMP_PROBABILITY(*leftstart);
+ *leftend += stats->stanullfrac;
+ CLAMP_PROBABILITY(*leftend);
+ }
+ if (HeapTupleIsValid(rightvar.statsTuple))
+ {
+ stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
+ *rightstart += stats->stanullfrac;
+ CLAMP_PROBABILITY(*rightstart);
+ *rightend += stats->stanullfrac;
+ CLAMP_PROBABILITY(*rightend);
+ }
+ }
+
+ /* Disbelieve start >= end, just in case that can happen */
+ if (*leftstart >= *leftend)
+ {
+ *leftstart = 0.0;
+ *leftend = 1.0;
+ }
+ if (*rightstart >= *rightend)
+ {
+ *rightstart = 0.0;
+ *rightend = 1.0;
+ }
+
+fail:
+ ReleaseVariableStats(leftvar);
+ ReleaseVariableStats(rightvar);
+}
+
+
+/*
+ * matchingsel -- generic matching-operator selectivity support
+ *
+ * Use these for any operators that (a) are on data types for which we collect
+ * standard statistics, and (b) have behavior for which the default estimate
+ * (twice DEFAULT_EQ_SEL) is sane. Typically that is good for match-like
+ * operators.
+ */
+
+Datum
+matchingsel(PG_FUNCTION_ARGS)
+{
+ PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+ Oid operator = PG_GETARG_OID(1);
+ List *args = (List *) PG_GETARG_POINTER(2);
+ int varRelid = PG_GETARG_INT32(3);
+ Oid collation = PG_GET_COLLATION();
+ double selec;
+
+ /* Use generic restriction selectivity logic. */
+ selec = generic_restriction_selectivity(root, operator, collation,
+ args, varRelid,
+ DEFAULT_MATCHING_SEL);
+
+ PG_RETURN_FLOAT8((float8) selec);
+}
+
+Datum
+matchingjoinsel(PG_FUNCTION_ARGS)
+{
+ /* Just punt, for the moment. */
+ PG_RETURN_FLOAT8(DEFAULT_MATCHING_SEL);
+}
+
+
+/*
+ * Helper routine for estimate_num_groups: add an item to a list of
+ * GroupVarInfos, but only if it's not known equal to any of the existing
+ * entries.
+ */
+typedef struct
+{
+ Node *var; /* might be an expression, not just a Var */
+ RelOptInfo *rel; /* relation it belongs to */
+ double ndistinct; /* # distinct values */
+ bool isdefault; /* true if DEFAULT_NUM_DISTINCT was used */
+} GroupVarInfo;
+
+static List *
+add_unique_group_var(PlannerInfo *root, List *varinfos,
+ Node *var, VariableStatData *vardata)
+{
+ GroupVarInfo *varinfo;
+ double ndistinct;
+ bool isdefault;
+ ListCell *lc;
+
+ ndistinct = get_variable_numdistinct(vardata, &isdefault);
+
+ foreach(lc, varinfos)
+ {
+ varinfo = (GroupVarInfo *) lfirst(lc);
+
+ /* Drop exact duplicates */
+ if (equal(var, varinfo->var))
+ return varinfos;
+
+ /*
+ * Drop known-equal vars, but only if they belong to different
+ * relations (see comments for estimate_num_groups)
+ */
+ if (vardata->rel != varinfo->rel &&
+ exprs_known_equal(root, var, varinfo->var))
+ {
+ if (varinfo->ndistinct <= ndistinct)
+ {
+ /* Keep older item, forget new one */
+ return varinfos;
+ }
+ else
+ {
+ /* Delete the older item */
+ varinfos = foreach_delete_current(varinfos, lc);
+ }
+ }
+ }
+
+ varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
+
+ varinfo->var = var;
+ varinfo->rel = vardata->rel;
+ varinfo->ndistinct = ndistinct;
+ varinfo->isdefault = isdefault;
+ varinfos = lappend(varinfos, varinfo);
+ return varinfos;
+}
+
+/*
+ * estimate_num_groups - Estimate number of groups in a grouped query
+ *
+ * Given a query having a GROUP BY clause, estimate how many groups there
+ * will be --- ie, the number of distinct combinations of the GROUP BY
+ * expressions.
+ *
+ * This routine is also used to estimate the number of rows emitted by
+ * a DISTINCT filtering step; that is an isomorphic problem. (Note:
+ * actually, we only use it for DISTINCT when there's no grouping or
+ * aggregation ahead of the DISTINCT.)
+ *
+ * Inputs:
+ * root - the query
+ * groupExprs - list of expressions being grouped by
+ * input_rows - number of rows estimated to arrive at the group/unique
+ * filter step
+ * pgset - NULL, or a List** pointing to a grouping set to filter the
+ * groupExprs against
+ *
+ * Outputs:
+ * estinfo - When passed as non-NULL, the function will set bits in the
+ * "flags" field in order to provide callers with additional information
+ * about the estimation. Currently, we only set the SELFLAG_USED_DEFAULT
+ * bit if we used any default values in the estimation.
+ *
+ * Given the lack of any cross-correlation statistics in the system, it's
+ * impossible to do anything really trustworthy with GROUP BY conditions
+ * involving multiple Vars. We should however avoid assuming the worst
+ * case (all possible cross-product terms actually appear as groups) since
+ * very often the grouped-by Vars are highly correlated. Our current approach
+ * is as follows:
+ * 1. Expressions yielding boolean are assumed to contribute two groups,
+ * independently of their content, and are ignored in the subsequent
+ * steps. This is mainly because tests like "col IS NULL" break the
+ * heuristic used in step 2 especially badly.
+ * 2. Reduce the given expressions to a list of unique Vars used. For
+ * example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
+ * It is clearly correct not to count the same Var more than once.
+ * It is also reasonable to treat f(x) the same as x: f() cannot
+ * increase the number of distinct values (unless it is volatile,
+ * which we consider unlikely for grouping), but it probably won't
+ * reduce the number of distinct values much either.
+ * As a special case, if a GROUP BY expression can be matched to an
+ * expressional index for which we have statistics, then we treat the
+ * whole expression as though it were just a Var.
+ * 3. If the list contains Vars of different relations that are known equal
+ * due to equivalence classes, then drop all but one of the Vars from each
+ * known-equal set, keeping the one with smallest estimated # of values
+ * (since the extra values of the others can't appear in joined rows).
+ * Note the reason we only consider Vars of different relations is that
+ * if we considered ones of the same rel, we'd be double-counting the
+ * restriction selectivity of the equality in the next step.
+ * 4. For Vars within a single source rel, we multiply together the numbers
+ * of values, clamp to the number of rows in the rel (divided by 10 if
+ * more than one Var), and then multiply by a factor based on the
+ * selectivity of the restriction clauses for that rel. When there's
+ * more than one Var, the initial product is probably too high (it's the
+ * worst case) but clamping to a fraction of the rel's rows seems to be a
+ * helpful heuristic for not letting the estimate get out of hand. (The
+ * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
+ * we multiply by to adjust for the restriction selectivity assumes that
+ * the restriction clauses are independent of the grouping, which may not
+ * be a valid assumption, but it's hard to do better.
+ * 5. If there are Vars from multiple rels, we repeat step 4 for each such
+ * rel, and multiply the results together.
+ * Note that rels not containing grouped Vars are ignored completely, as are
+ * join clauses. Such rels cannot increase the number of groups, and we
+ * assume such clauses do not reduce the number either (somewhat bogus,
+ * but we don't have the info to do better).
+ */
+double
+estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
+ List **pgset, EstimationInfo *estinfo)
+{
+ List *varinfos = NIL;
+ double srf_multiplier = 1.0;
+ double numdistinct;
+ ListCell *l;
+ int i;
+
+ /* Zero the estinfo output parameter, if non-NULL */
+ if (estinfo != NULL)
+ memset(estinfo, 0, sizeof(EstimationInfo));
+
+ /*
+ * We don't ever want to return an estimate of zero groups, as that tends
+ * to lead to division-by-zero and other unpleasantness. The input_rows
+ * estimate is usually already at least 1, but clamp it just in case it
+ * isn't.
+ */
+ input_rows = clamp_row_est(input_rows);
+
+ /*
+ * If no grouping columns, there's exactly one group. (This can't happen
+ * for normal cases with GROUP BY or DISTINCT, but it is possible for
+ * corner cases with set operations.)
+ */
+ if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
+ return 1.0;
+
+ /*
+ * Count groups derived from boolean grouping expressions. For other
+ * expressions, find the unique Vars used, treating an expression as a Var
+ * if we can find stats for it. For each one, record the statistical
+ * estimate of number of distinct values (total in its table, without
+ * regard for filtering).
+ */
+ numdistinct = 1.0;
+
+ i = 0;
+ foreach(l, groupExprs)
+ {
+ Node *groupexpr = (Node *) lfirst(l);
+ double this_srf_multiplier;
+ VariableStatData vardata;
+ List *varshere;
+ ListCell *l2;
+
+ /* is expression in this grouping set? */
+ if (pgset && !list_member_int(*pgset, i++))
+ continue;
+
+ /*
+ * Set-returning functions in grouping columns are a bit problematic.
+ * The code below will effectively ignore their SRF nature and come up
+ * with a numdistinct estimate as though they were scalar functions.
+ * We compensate by scaling up the end result by the largest SRF
+ * rowcount estimate. (This will be an overestimate if the SRF
+ * produces multiple copies of any output value, but it seems best to
+ * assume the SRF's outputs are distinct. In any case, it's probably
+ * pointless to worry too much about this without much better
+ * estimates for SRF output rowcounts than we have today.)
+ */
+ this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
+ if (srf_multiplier < this_srf_multiplier)
+ srf_multiplier = this_srf_multiplier;
+
+ /* Short-circuit for expressions returning boolean */
+ if (exprType(groupexpr) == BOOLOID)
+ {
+ numdistinct *= 2.0;
+ continue;
+ }
+
+ /*
+ * If examine_variable is able to deduce anything about the GROUP BY
+ * expression, treat it as a single variable even if it's really more
+ * complicated.
+ *
+ * XXX This has the consequence that if there's a statistics object on
+ * the expression, we don't split it into individual Vars. This
+ * affects our selection of statistics in
+ * estimate_multivariate_ndistinct, because it's probably better to
+ * use more accurate estimate for each expression and treat them as
+ * independent, than to combine estimates for the extracted variables
+ * when we don't know how that relates to the expressions.
+ */
+ examine_variable(root, groupexpr, 0, &vardata);
+ if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
+ {
+ varinfos = add_unique_group_var(root, varinfos,
+ groupexpr, &vardata);
+ ReleaseVariableStats(vardata);
+ continue;
+ }
+ ReleaseVariableStats(vardata);
+
+ /*
+ * Else pull out the component Vars. Handle PlaceHolderVars by
+ * recursing into their arguments (effectively assuming that the
+ * PlaceHolderVar doesn't change the number of groups, which boils
+ * down to ignoring the possible addition of nulls to the result set).
+ */
+ varshere = pull_var_clause(groupexpr,
+ PVC_RECURSE_AGGREGATES |
+ PVC_RECURSE_WINDOWFUNCS |
+ PVC_RECURSE_PLACEHOLDERS);
+
+ /*
+ * If we find any variable-free GROUP BY item, then either it is a
+ * constant (and we can ignore it) or it contains a volatile function;
+ * in the latter case we punt and assume that each input row will
+ * yield a distinct group.
+ */
+ if (varshere == NIL)
+ {
+ if (contain_volatile_functions(groupexpr))
+ return input_rows;
+ continue;
+ }
+
+ /*
+ * Else add variables to varinfos list
+ */
+ foreach(l2, varshere)
+ {
+ Node *var = (Node *) lfirst(l2);
+
+ examine_variable(root, var, 0, &vardata);
+ varinfos = add_unique_group_var(root, varinfos, var, &vardata);
+ ReleaseVariableStats(vardata);
+ }
+ }
+
+ /*
+ * If now no Vars, we must have an all-constant or all-boolean GROUP BY
+ * list.
+ */
+ if (varinfos == NIL)
+ {
+ /* Apply SRF multiplier as we would do in the long path */
+ numdistinct *= srf_multiplier;
+ /* Round off */
+ numdistinct = ceil(numdistinct);
+ /* Guard against out-of-range answers */
+ if (numdistinct > input_rows)
+ numdistinct = input_rows;
+ if (numdistinct < 1.0)
+ numdistinct = 1.0;
+ return numdistinct;
+ }
+
+ /*
+ * Group Vars by relation and estimate total numdistinct.
+ *
+ * For each iteration of the outer loop, we process the frontmost Var in
+ * varinfos, plus all other Vars in the same relation. We remove these
+ * Vars from the newvarinfos list for the next iteration. This is the
+ * easiest way to group Vars of same rel together.
+ */
+ do
+ {
+ GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
+ RelOptInfo *rel = varinfo1->rel;
+ double reldistinct = 1;
+ double relmaxndistinct = reldistinct;
+ int relvarcount = 0;
+ List *newvarinfos = NIL;
+ List *relvarinfos = NIL;
+
+ /*
+ * Split the list of varinfos in two - one for the current rel, one
+ * for remaining Vars on other rels.
+ */
+ relvarinfos = lappend(relvarinfos, varinfo1);
+ for_each_from(l, varinfos, 1)
+ {
+ GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+ if (varinfo2->rel == varinfo1->rel)
+ {
+ /* varinfos on current rel */
+ relvarinfos = lappend(relvarinfos, varinfo2);
+ }
+ else
+ {
+ /* not time to process varinfo2 yet */
+ newvarinfos = lappend(newvarinfos, varinfo2);
+ }
+ }
+
+ /*
+ * Get the numdistinct estimate for the Vars of this rel. We
+ * iteratively search for multivariate n-distinct with maximum number
+ * of vars; assuming that each var group is independent of the others,
+ * we multiply them together. Any remaining relvarinfos after no more
+ * multivariate matches are found are assumed independent too, so
+ * their individual ndistinct estimates are multiplied also.
+ *
+ * While iterating, count how many separate numdistinct values we
+ * apply. We apply a fudge factor below, but only if we multiplied
+ * more than one such values.
+ */
+ while (relvarinfos)
+ {
+ double mvndistinct;
+
+ if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
+ &mvndistinct))
+ {
+ reldistinct *= mvndistinct;
+ if (relmaxndistinct < mvndistinct)
+ relmaxndistinct = mvndistinct;
+ relvarcount++;
+ }
+ else
+ {
+ foreach(l, relvarinfos)
+ {
+ GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
+
+ reldistinct *= varinfo2->ndistinct;
+ if (relmaxndistinct < varinfo2->ndistinct)
+ relmaxndistinct = varinfo2->ndistinct;
+ relvarcount++;
+
+ /*
+ * When varinfo2's isdefault is set then we'd better set
+ * the SELFLAG_USED_DEFAULT bit in the EstimationInfo.
+ */
+ if (estinfo != NULL && varinfo2->isdefault)
+ estinfo->flags |= SELFLAG_USED_DEFAULT;
+ }
+
+ /* we're done with this relation */
+ relvarinfos = NIL;
+ }
+ }
+
+ /*
+ * Sanity check --- don't divide by zero if empty relation.
+ */
+ Assert(IS_SIMPLE_REL(rel));
+ if (rel->tuples > 0)
+ {
+ /*
+ * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
+ * fudge factor is because the Vars are probably correlated but we
+ * don't know by how much. We should never clamp to less than the
+ * largest ndistinct value for any of the Vars, though, since
+ * there will surely be at least that many groups.
+ */
+ double clamp = rel->tuples;
+
+ if (relvarcount > 1)
+ {
+ clamp *= 0.1;
+ if (clamp < relmaxndistinct)
+ {
+ clamp = relmaxndistinct;
+ /* for sanity in case some ndistinct is too large: */
+ if (clamp > rel->tuples)
+ clamp = rel->tuples;
+ }
+ }
+ if (reldistinct > clamp)
+ reldistinct = clamp;
+
+ /*
+ * Update the estimate based on the restriction selectivity,
+ * guarding against division by zero when reldistinct is zero.
+ * Also skip this if we know that we are returning all rows.
+ */
+ if (reldistinct > 0 && rel->rows < rel->tuples)
+ {
+ /*
+ * Given a table containing N rows with n distinct values in a
+ * uniform distribution, if we select p rows at random then
+ * the expected number of distinct values selected is
+ *
+ * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
+ *
+ * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
+ *
+ * See "Approximating block accesses in database
+ * organizations", S. B. Yao, Communications of the ACM,
+ * Volume 20 Issue 4, April 1977 Pages 260-261.
+ *
+ * Alternatively, re-arranging the terms from the factorials,
+ * this may be written as
+ *
+ * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
+ *
+ * This form of the formula is more efficient to compute in
+ * the common case where p is larger than N/n. Additionally,
+ * as pointed out by Dell'Era, if i << N for all terms in the
+ * product, it can be approximated by
+ *
+ * n * (1 - ((N-p)/N)^(N/n))
+ *
+ * See "Expected distinct values when selecting from a bag
+ * without replacement", Alberto Dell'Era,
+ * http://www.adellera.it/investigations/distinct_balls/.
+ *
+ * The condition i << N is equivalent to n >> 1, so this is a
+ * good approximation when the number of distinct values in
+ * the table is large. It turns out that this formula also
+ * works well even when n is small.
+ */
+ reldistinct *=
+ (1 - pow((rel->tuples - rel->rows) / rel->tuples,
+ rel->tuples / reldistinct));
+ }
+ reldistinct = clamp_row_est(reldistinct);
+
+ /*
+ * Update estimate of total distinct groups.
+ */
+ numdistinct *= reldistinct;
+ }
+
+ varinfos = newvarinfos;
+ } while (varinfos != NIL);
+
+ /* Now we can account for the effects of any SRFs */
+ numdistinct *= srf_multiplier;
+
+ /* Round off */
+ numdistinct = ceil(numdistinct);
+
+ /* Guard against out-of-range answers */
+ if (numdistinct > input_rows)
+ numdistinct = input_rows;
+ if (numdistinct < 1.0)
+ numdistinct = 1.0;
+
+ return numdistinct;
+}
+
+/*
+ * Estimate hash bucket statistics when the specified expression is used
+ * as a hash key for the given number of buckets.
+ *
+ * This attempts to determine two values:
+ *
+ * 1. The frequency of the most common value of the expression (returns
+ * zero into *mcv_freq if we can't get that).
+ *
+ * 2. The "bucketsize fraction", ie, average number of entries in a bucket
+ * divided by total tuples in relation.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation. However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * We are passed the number of buckets the executor will use for the given
+ * input relation. If the data were perfectly distributed, with the same
+ * number of tuples going into each available bucket, then the bucketsize
+ * fraction would be 1/nbuckets. But this happy state of affairs will occur
+ * only if (a) there are at least nbuckets distinct data values, and (b)
+ * we have a not-too-skewed data distribution. Otherwise the buckets will
+ * be nonuniformly occupied. If the other relation in the join has a key
+ * distribution similar to this one's, then the most-loaded buckets are
+ * exactly those that will be probed most often. Therefore, the "average"
+ * bucket size for costing purposes should really be taken as something close
+ * to the "worst case" bucket size. We try to estimate this by adjusting the
+ * fraction if there are too few distinct data values, and then scaling up
+ * by the ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1. This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want. We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ *
+ * The caller should also check that the mcv_freq is not so large that the
+ * most common value would by itself require an impractically large bucket.
+ * In a hash join, the executor can split buckets if they get too big, but
+ * obviously that doesn't help for a bucket that contains many duplicates of
+ * the same value.
+ */
+void
+estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
+ Selectivity *mcv_freq,
+ Selectivity *bucketsize_frac)
+{
+ VariableStatData vardata;
+ double estfract,
+ ndistinct,
+ stanullfrac,
+ avgfreq;
+ bool isdefault;
+ AttStatsSlot sslot;
+
+ examine_variable(root, hashkey, 0, &vardata);
+
+ /* Look up the frequency of the most common value, if available */
+ *mcv_freq = 0.0;
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ /*
+ * The first MCV stat is for the most common value.
+ */
+ if (sslot.nnumbers > 0)
+ *mcv_freq = sslot.numbers[0];
+ free_attstatsslot(&sslot);
+ }
+ }
+
+ /* Get number of distinct values */
+ ndistinct = get_variable_numdistinct(&vardata, &isdefault);
+
+ /*
+ * If ndistinct isn't real, punt. We normally return 0.1, but if the
+ * mcv_freq is known to be even higher than that, use it instead.
+ */
+ if (isdefault)
+ {
+ *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
+ ReleaseVariableStats(vardata);
+ return;
+ }
+
+ /* Get fraction that are null */
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+ stanullfrac = stats->stanullfrac;
+ }
+ else
+ stanullfrac = 0.0;
+
+ /* Compute avg freq of all distinct data values in raw relation */
+ avgfreq = (1.0 - stanullfrac) / ndistinct;
+
+ /*
+ * Adjust ndistinct to account for restriction clauses. Observe we are
+ * assuming that the data distribution is affected uniformly by the
+ * restriction clauses!
+ *
+ * XXX Possibly better way, but much more expensive: multiply by
+ * selectivity of rel's restriction clauses that mention the target Var.
+ */
+ if (vardata.rel && vardata.rel->tuples > 0)
+ {
+ ndistinct *= vardata.rel->rows / vardata.rel->tuples;
+ ndistinct = clamp_row_est(ndistinct);
+ }
+
+ /*
+ * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
+ * number of buckets is less than the expected number of distinct values;
+ * otherwise it is 1/ndistinct.
+ */
+ if (ndistinct > nbuckets)
+ estfract = 1.0 / nbuckets;
+ else
+ estfract = 1.0 / ndistinct;
+
+ /*
+ * Adjust estimated bucketsize upward to account for skewed distribution.
+ */
+ if (avgfreq > 0.0 && *mcv_freq > avgfreq)
+ estfract *= *mcv_freq / avgfreq;
+
+ /*
+ * Clamp bucketsize to sane range (the above adjustment could easily
+ * produce an out-of-range result). We set the lower bound a little above
+ * zero, since zero isn't a very sane result.
+ */
+ if (estfract < 1.0e-6)
+ estfract = 1.0e-6;
+ else if (estfract > 1.0)
+ estfract = 1.0;
+
+ *bucketsize_frac = (Selectivity) estfract;
+
+ ReleaseVariableStats(vardata);
+}
+
+/*
+ * estimate_hashagg_tablesize
+ * estimate the number of bytes that a hash aggregate hashtable will
+ * require based on the agg_costs, path width and number of groups.
+ *
+ * We return the result as "double" to forestall any possible overflow
+ * problem in the multiplication by dNumGroups.
+ *
+ * XXX this may be over-estimating the size now that hashagg knows to omit
+ * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
+ * grouping columns not in the hashed set are counted here even though hashagg
+ * won't store them. Is this a problem?
+ */
+double
+estimate_hashagg_tablesize(PlannerInfo *root, Path *path,
+ const AggClauseCosts *agg_costs, double dNumGroups)
+{
+ Size hashentrysize;
+
+ hashentrysize = hash_agg_entry_size(list_length(root->aggtransinfos),
+ path->pathtarget->width,
+ agg_costs->transitionSpace);
+
+ /*
+ * Note that this disregards the effect of fill-factor and growth policy
+ * of the hash table. That's probably ok, given that the default
+ * fill-factor is relatively high. It'd be hard to meaningfully factor in
+ * "double-in-size" growth policies here.
+ */
+ return hashentrysize * dNumGroups;
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Support routines
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Find applicable ndistinct statistics for the given list of VarInfos (which
+ * must all belong to the given rel), and update *ndistinct to the estimate of
+ * the MVNDistinctItem that best matches. If a match it found, *varinfos is
+ * updated to remove the list of matched varinfos.
+ *
+ * Varinfos that aren't for simple Vars are ignored.
+ *
+ * Return true if we're able to find a match, false otherwise.
+ */
+static bool
+estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
+ List **varinfos, double *ndistinct)
+{
+ ListCell *lc;
+ int nmatches_vars;
+ int nmatches_exprs;
+ Oid statOid = InvalidOid;
+ MVNDistinct *stats;
+ StatisticExtInfo *matched_info = NULL;
+ RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
+
+ /* bail out immediately if the table has no extended statistics */
+ if (!rel->statlist)
+ return false;
+
+ /* look for the ndistinct statistics object matching the most vars */
+ nmatches_vars = 0; /* we require at least two matches */
+ nmatches_exprs = 0;
+ foreach(lc, rel->statlist)
+ {
+ ListCell *lc2;
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
+ int nshared_vars = 0;
+ int nshared_exprs = 0;
+
+ /* skip statistics of other kinds */
+ if (info->kind != STATS_EXT_NDISTINCT)
+ continue;
+
+ /* skip statistics with mismatching stxdinherit value */
+ if (info->inherit != rte->inh)
+ continue;
+
+ /*
+ * Determine how many expressions (and variables in non-matched
+ * expressions) match. We'll then use these numbers to pick the
+ * statistics object that best matches the clauses.
+ */
+ foreach(lc2, *varinfos)
+ {
+ ListCell *lc3;
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+ AttrNumber attnum;
+
+ Assert(varinfo->rel == rel);
+
+ /* simple Var, search in statistics keys directly */
+ if (IsA(varinfo->var, Var))
+ {
+ attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * Ignore system attributes - we don't support statistics on
+ * them, so can't match them (and it'd fail as the values are
+ * negative).
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ continue;
+
+ if (bms_is_member(attnum, info->keys))
+ nshared_vars++;
+
+ continue;
+ }
+
+ /* expression - see if it's in the statistics object */
+ foreach(lc3, info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ nshared_exprs++;
+ break;
+ }
+ }
+ }
+
+ if (nshared_vars + nshared_exprs < 2)
+ continue;
+
+ /*
+ * Does this statistics object match more columns than the currently
+ * best object? If so, use this one instead.
+ *
+ * XXX This should break ties using name of the object, or something
+ * like that, to make the outcome stable.
+ */
+ if ((nshared_exprs > nmatches_exprs) ||
+ (((nshared_exprs == nmatches_exprs)) && (nshared_vars > nmatches_vars)))
+ {
+ statOid = info->statOid;
+ nmatches_vars = nshared_vars;
+ nmatches_exprs = nshared_exprs;
+ matched_info = info;
+ }
+ }
+
+ /* No match? */
+ if (statOid == InvalidOid)
+ return false;
+
+ Assert(nmatches_vars + nmatches_exprs > 1);
+
+ stats = statext_ndistinct_load(statOid, rte->inh);
+
+ /*
+ * If we have a match, search it for the specific item that matches (there
+ * must be one), and construct the output values.
+ */
+ if (stats)
+ {
+ int i;
+ List *newlist = NIL;
+ MVNDistinctItem *item = NULL;
+ ListCell *lc2;
+ Bitmapset *matched = NULL;
+ AttrNumber attnum_offset;
+
+ /*
+ * How much we need to offset the attnums? If there are no
+ * expressions, no offset is needed. Otherwise offset enough to move
+ * the lowest one (which is equal to number of expressions) to 1.
+ */
+ if (matched_info->exprs)
+ attnum_offset = (list_length(matched_info->exprs) + 1);
+ else
+ attnum_offset = 0;
+
+ /* see what actually matched */
+ foreach(lc2, *varinfos)
+ {
+ ListCell *lc3;
+ int idx;
+ bool found = false;
+
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc2);
+
+ /*
+ * Process a simple Var expression, by matching it to keys
+ * directly. If there's a matching expression, we'll try matching
+ * it later.
+ */
+ if (IsA(varinfo->var, Var))
+ {
+ AttrNumber attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * Ignore expressions on system attributes. Can't rely on the
+ * bms check for negative values.
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ continue;
+
+ /* Is the variable covered by the statistics object? */
+ if (!bms_is_member(attnum, matched_info->keys))
+ continue;
+
+ attnum = attnum + attnum_offset;
+
+ /* ensure sufficient offset */
+ Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+ matched = bms_add_member(matched, attnum);
+
+ found = true;
+ }
+
+ /*
+ * XXX Maybe we should allow searching the expressions even if we
+ * found an attribute matching the expression? That would handle
+ * trivial expressions like "(a)" but it seems fairly useless.
+ */
+ if (found)
+ continue;
+
+ /* expression - see if it's in the statistics object */
+ idx = 0;
+ foreach(lc3, matched_info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ AttrNumber attnum = -(idx + 1);
+
+ attnum = attnum + attnum_offset;
+
+ /* ensure sufficient offset */
+ Assert(AttrNumberIsForUserDefinedAttr(attnum));
+
+ matched = bms_add_member(matched, attnum);
+
+ /* there should be just one matching expression */
+ break;
+ }
+
+ idx++;
+ }
+ }
+
+ /* Find the specific item that exactly matches the combination */
+ for (i = 0; i < stats->nitems; i++)
+ {
+ int j;
+ MVNDistinctItem *tmpitem = &stats->items[i];
+
+ if (tmpitem->nattributes != bms_num_members(matched))
+ continue;
+
+ /* assume it's the right item */
+ item = tmpitem;
+
+ /* check that all item attributes/expressions fit the match */
+ for (j = 0; j < tmpitem->nattributes; j++)
+ {
+ AttrNumber attnum = tmpitem->attributes[j];
+
+ /*
+ * Thanks to how we constructed the matched bitmap above, we
+ * can just offset all attnums the same way.
+ */
+ attnum = attnum + attnum_offset;
+
+ if (!bms_is_member(attnum, matched))
+ {
+ /* nah, it's not this item */
+ item = NULL;
+ break;
+ }
+ }
+
+ /*
+ * If the item has all the matched attributes, we know it's the
+ * right one - there can't be a better one. matching more.
+ */
+ if (item)
+ break;
+ }
+
+ /*
+ * Make sure we found an item. There has to be one, because ndistinct
+ * statistics includes all combinations of attributes.
+ */
+ if (!item)
+ elog(ERROR, "corrupt MVNDistinct entry");
+
+ /* Form the output varinfo list, keeping only unmatched ones */
+ foreach(lc, *varinfos)
+ {
+ GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
+ ListCell *lc3;
+ bool found = false;
+
+ /*
+ * Let's look at plain variables first, because it's the most
+ * common case and the check is quite cheap. We can simply get the
+ * attnum and check (with an offset) matched bitmap.
+ */
+ if (IsA(varinfo->var, Var))
+ {
+ AttrNumber attnum = ((Var *) varinfo->var)->varattno;
+
+ /*
+ * If it's a system attribute, we're done. We don't support
+ * extended statistics on system attributes, so it's clearly
+ * not matched. Just keep the expression and continue.
+ */
+ if (!AttrNumberIsForUserDefinedAttr(attnum))
+ {
+ newlist = lappend(newlist, varinfo);
+ continue;
+ }
+
+ /* apply the same offset as above */
+ attnum += attnum_offset;
+
+ /* if it's not matched, keep the varinfo */
+ if (!bms_is_member(attnum, matched))
+ newlist = lappend(newlist, varinfo);
+
+ /* The rest of the loop deals with complex expressions. */
+ continue;
+ }
+
+ /*
+ * Process complex expressions, not just simple Vars.
+ *
+ * First, we search for an exact match of an expression. If we
+ * find one, we can just discard the whole GroupExprInfo, with all
+ * the variables we extracted from it.
+ *
+ * Otherwise we inspect the individual vars, and try matching it
+ * to variables in the item.
+ */
+ foreach(lc3, matched_info->exprs)
+ {
+ Node *expr = (Node *) lfirst(lc3);
+
+ if (equal(varinfo->var, expr))
+ {
+ found = true;
+ break;
+ }
+ }
+
+ /* found exact match, skip */
+ if (found)
+ continue;
+
+ newlist = lappend(newlist, varinfo);
+ }
+
+ *varinfos = newlist;
+ *ndistinct = item->ndistinct;
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * convert_to_scalar
+ * Convert non-NULL values of the indicated types to the comparison
+ * scale needed by scalarineqsel().
+ * Returns "true" if successful.
+ *
+ * XXX this routine is a hack: ideally we should look up the conversion
+ * subroutines in pg_type.
+ *
+ * All numeric datatypes are simply converted to their equivalent
+ * "double" values. (NUMERIC values that are outside the range of "double"
+ * are clamped to +/- HUGE_VAL.)
+ *
+ * String datatypes are converted by convert_string_to_scalar(),
+ * which is explained below. The reason why this routine deals with
+ * three values at a time, not just one, is that we need it for strings.
+ *
+ * The bytea datatype is just enough different from strings that it has
+ * to be treated separately.
+ *
+ * The several datatypes representing absolute times are all converted
+ * to Timestamp, which is actually an int64, and then we promote that to
+ * a double. Note this will give correct results even for the "special"
+ * values of Timestamp, since those are chosen to compare correctly;
+ * see timestamp_cmp.
+ *
+ * The several datatypes representing relative times (intervals) are all
+ * converted to measurements expressed in seconds.
+ */
+static bool
+convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
+ Datum lobound, Datum hibound, Oid boundstypid,
+ double *scaledlobound, double *scaledhibound)
+{
+ bool failure = false;
+
+ /*
+ * Both the valuetypid and the boundstypid should exactly match the
+ * declared input type(s) of the operator we are invoked for. However,
+ * extensions might try to use scalarineqsel as estimator for operators
+ * with input type(s) we don't handle here; in such cases, we want to
+ * return false, not fail. In any case, we mustn't assume that valuetypid
+ * and boundstypid are identical.
+ *
+ * XXX The histogram we are interpolating between points of could belong
+ * to a column that's only binary-compatible with the declared type. In
+ * essence we are assuming that the semantics of binary-compatible types
+ * are enough alike that we can use a histogram generated with one type's
+ * operators to estimate selectivity for the other's. This is outright
+ * wrong in some cases --- in particular signed versus unsigned
+ * interpretation could trip us up. But it's useful enough in the
+ * majority of cases that we do it anyway. Should think about more
+ * rigorous ways to do it.
+ */
+ switch (valuetypid)
+ {
+ /*
+ * Built-in numeric types
+ */
+ case BOOLOID:
+ case INT2OID:
+ case INT4OID:
+ case INT8OID:
+ case FLOAT4OID:
+ case FLOAT8OID:
+ case NUMERICOID:
+ case OIDOID:
+ case REGPROCOID:
+ case REGPROCEDUREOID:
+ case REGOPEROID:
+ case REGOPERATOROID:
+ case REGCLASSOID:
+ case REGTYPEOID:
+ case REGCOLLATIONOID:
+ case REGCONFIGOID:
+ case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
+ *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
+ &failure);
+ *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
+ &failure);
+ *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
+ &failure);
+ return !failure;
+
+ /*
+ * Built-in string types
+ */
+ case CHAROID:
+ case BPCHAROID:
+ case VARCHAROID:
+ case TEXTOID:
+ case NAMEOID:
+ {
+ char *valstr = convert_string_datum(value, valuetypid,
+ collid, &failure);
+ char *lostr = convert_string_datum(lobound, boundstypid,
+ collid, &failure);
+ char *histr = convert_string_datum(hibound, boundstypid,
+ collid, &failure);
+
+ /*
+ * Bail out if any of the values is not of string type. We
+ * might leak converted strings for the other value(s), but
+ * that's not worth troubling over.
+ */
+ if (failure)
+ return false;
+
+ convert_string_to_scalar(valstr, scaledvalue,
+ lostr, scaledlobound,
+ histr, scaledhibound);
+ pfree(valstr);
+ pfree(lostr);
+ pfree(histr);
+ return true;
+ }
+
+ /*
+ * Built-in bytea type
+ */
+ case BYTEAOID:
+ {
+ /* We only support bytea vs bytea comparison */
+ if (boundstypid != BYTEAOID)
+ return false;
+ convert_bytea_to_scalar(value, scaledvalue,
+ lobound, scaledlobound,
+ hibound, scaledhibound);
+ return true;
+ }
+
+ /*
+ * Built-in time types
+ */
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ case DATEOID:
+ case INTERVALOID:
+ case TIMEOID:
+ case TIMETZOID:
+ *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
+ &failure);
+ *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
+ &failure);
+ *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
+ &failure);
+ return !failure;
+
+ /*
+ * Built-in network types
+ */
+ case INETOID:
+ case CIDROID:
+ case MACADDROID:
+ case MACADDR8OID:
+ *scaledvalue = convert_network_to_scalar(value, valuetypid,
+ &failure);
+ *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
+ &failure);
+ *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
+ &failure);
+ return !failure;
+ }
+ /* Don't know how to convert */
+ *scaledvalue = *scaledlobound = *scaledhibound = 0;
+ return false;
+}
+
+/*
+ * Do convert_to_scalar()'s work for any numeric data type.
+ *
+ * On failure (e.g., unsupported typid), set *failure to true;
+ * otherwise, that variable is not changed.
+ */
+static double
+convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
+{
+ switch (typid)
+ {
+ case BOOLOID:
+ return (double) DatumGetBool(value);
+ case INT2OID:
+ return (double) DatumGetInt16(value);
+ case INT4OID:
+ return (double) DatumGetInt32(value);
+ case INT8OID:
+ return (double) DatumGetInt64(value);
+ case FLOAT4OID:
+ return (double) DatumGetFloat4(value);
+ case FLOAT8OID:
+ return (double) DatumGetFloat8(value);
+ case NUMERICOID:
+ /* Note: out-of-range values will be clamped to +-HUGE_VAL */
+ return (double)
+ DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
+ value));
+ case OIDOID:
+ case REGPROCOID:
+ case REGPROCEDUREOID:
+ case REGOPEROID:
+ case REGOPERATOROID:
+ case REGCLASSOID:
+ case REGTYPEOID:
+ case REGCOLLATIONOID:
+ case REGCONFIGOID:
+ case REGDICTIONARYOID:
+ case REGROLEOID:
+ case REGNAMESPACEOID:
+ /* we can treat OIDs as integers... */
+ return (double) DatumGetObjectId(value);
+ }
+
+ *failure = true;
+ return 0;
+}
+
+/*
+ * Do convert_to_scalar()'s work for any character-string data type.
+ *
+ * String datatypes are converted to a scale that ranges from 0 to 1,
+ * where we visualize the bytes of the string as fractional digits.
+ *
+ * We do not want the base to be 256, however, since that tends to
+ * generate inflated selectivity estimates; few databases will have
+ * occurrences of all 256 possible byte values at each position.
+ * Instead, use the smallest and largest byte values seen in the bounds
+ * as the estimated range for each byte, after some fudging to deal with
+ * the fact that we probably aren't going to see the full range that way.
+ *
+ * An additional refinement is that we discard any common prefix of the
+ * three strings before computing the scaled values. This allows us to
+ * "zoom in" when we encounter a narrow data range. An example is a phone
+ * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
+ */
+static void
+convert_string_to_scalar(char *value,
+ double *scaledvalue,
+ char *lobound,
+ double *scaledlobound,
+ char *hibound,
+ double *scaledhibound)
+{
+ int rangelo,
+ rangehi;
+ char *sptr;
+
+ rangelo = rangehi = (unsigned char) hibound[0];
+ for (sptr = lobound; *sptr; sptr++)
+ {
+ if (rangelo > (unsigned char) *sptr)
+ rangelo = (unsigned char) *sptr;
+ if (rangehi < (unsigned char) *sptr)
+ rangehi = (unsigned char) *sptr;
+ }
+ for (sptr = hibound; *sptr; sptr++)
+ {
+ if (rangelo > (unsigned char) *sptr)
+ rangelo = (unsigned char) *sptr;
+ if (rangehi < (unsigned char) *sptr)
+ rangehi = (unsigned char) *sptr;
+ }
+ /* If range includes any upper-case ASCII chars, make it include all */
+ if (rangelo <= 'Z' && rangehi >= 'A')
+ {
+ if (rangelo > 'A')
+ rangelo = 'A';
+ if (rangehi < 'Z')
+ rangehi = 'Z';
+ }
+ /* Ditto lower-case */
+ if (rangelo <= 'z' && rangehi >= 'a')
+ {
+ if (rangelo > 'a')
+ rangelo = 'a';
+ if (rangehi < 'z')
+ rangehi = 'z';
+ }
+ /* Ditto digits */
+ if (rangelo <= '9' && rangehi >= '0')
+ {
+ if (rangelo > '0')
+ rangelo = '0';
+ if (rangehi < '9')
+ rangehi = '9';
+ }
+
+ /*
+ * If range includes less than 10 chars, assume we have not got enough
+ * data, and make it include regular ASCII set.
+ */
+ if (rangehi - rangelo < 9)
+ {
+ rangelo = ' ';
+ rangehi = 127;
+ }
+
+ /*
+ * Now strip any common prefix of the three strings.
+ */
+ while (*lobound)
+ {
+ if (*lobound != *hibound || *lobound != *value)
+ break;
+ lobound++, hibound++, value++;
+ }
+
+ /*
+ * Now we can do the conversions.
+ */
+ *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
+ *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
+ *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
+}
+
+static double
+convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
+{
+ int slen = strlen(value);
+ double num,
+ denom,
+ base;
+
+ if (slen <= 0)
+ return 0.0; /* empty string has scalar value 0 */
+
+ /*
+ * There seems little point in considering more than a dozen bytes from
+ * the string. Since base is at least 10, that will give us nominal
+ * resolution of at least 12 decimal digits, which is surely far more
+ * precision than this estimation technique has got anyway (especially in
+ * non-C locales). Also, even with the maximum possible base of 256, this
+ * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
+ * overflow on any known machine.
+ */
+ if (slen > 12)
+ slen = 12;
+
+ /* Convert initial characters to fraction */
+ base = rangehi - rangelo + 1;
+ num = 0.0;
+ denom = base;
+ while (slen-- > 0)
+ {
+ int ch = (unsigned char) *value++;
+
+ if (ch < rangelo)
+ ch = rangelo - 1;
+ else if (ch > rangehi)
+ ch = rangehi + 1;
+ num += ((double) (ch - rangelo)) / denom;
+ denom *= base;
+ }
+
+ return num;
+}
+
+/*
+ * Convert a string-type Datum into a palloc'd, null-terminated string.
+ *
+ * On failure (e.g., unsupported typid), set *failure to true;
+ * otherwise, that variable is not changed. (We'll return NULL on failure.)
+ *
+ * When using a non-C locale, we must pass the string through strxfrm()
+ * before continuing, so as to generate correct locale-specific results.
+ */
+static char *
+convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
+{
+ char *val;
+
+ switch (typid)
+ {
+ case CHAROID:
+ val = (char *) palloc(2);
+ val[0] = DatumGetChar(value);
+ val[1] = '\0';
+ break;
+ case BPCHAROID:
+ case VARCHAROID:
+ case TEXTOID:
+ val = TextDatumGetCString(value);
+ break;
+ case NAMEOID:
+ {
+ NameData *nm = (NameData *) DatumGetPointer(value);
+
+ val = pstrdup(NameStr(*nm));
+ break;
+ }
+ default:
+ *failure = true;
+ return NULL;
+ }
+
+ if (!lc_collate_is_c(collid))
+ {
+ char *xfrmstr;
+ size_t xfrmlen;
+ size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
+
+ /*
+ * XXX: We could guess at a suitable output buffer size and only call
+ * strxfrm twice if our guess is too small.
+ *
+ * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
+ * bogus data or set an error. This is not really a problem unless it
+ * crashes since it will only give an estimation error and nothing
+ * fatal.
+ */
+ xfrmlen = strxfrm(NULL, val, 0);
+#ifdef WIN32
+
+ /*
+ * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
+ * of trying to allocate this much memory (and fail), just return the
+ * original string unmodified as if we were in the C locale.
+ */
+ if (xfrmlen == INT_MAX)
+ return val;
+#endif
+ xfrmstr = (char *) palloc(xfrmlen + 1);
+ xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
+
+ /*
+ * Some systems (e.g., glibc) can return a smaller value from the
+ * second call than the first; thus the Assert must be <= not ==.
+ */
+ Assert(xfrmlen2 <= xfrmlen);
+ pfree(val);
+ val = xfrmstr;
+ }
+
+ return val;
+}
+
+/*
+ * Do convert_to_scalar()'s work for any bytea data type.
+ *
+ * Very similar to convert_string_to_scalar except we can't assume
+ * null-termination and therefore pass explicit lengths around.
+ *
+ * Also, assumptions about likely "normal" ranges of characters have been
+ * removed - a data range of 0..255 is always used, for now. (Perhaps
+ * someday we will add information about actual byte data range to
+ * pg_statistic.)
+ */
+static void
+convert_bytea_to_scalar(Datum value,
+ double *scaledvalue,
+ Datum lobound,
+ double *scaledlobound,
+ Datum hibound,
+ double *scaledhibound)
+{
+ bytea *valuep = DatumGetByteaPP(value);
+ bytea *loboundp = DatumGetByteaPP(lobound);
+ bytea *hiboundp = DatumGetByteaPP(hibound);
+ int rangelo,
+ rangehi,
+ valuelen = VARSIZE_ANY_EXHDR(valuep),
+ loboundlen = VARSIZE_ANY_EXHDR(loboundp),
+ hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
+ i,
+ minlen;
+ unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
+ unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
+ unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
+
+ /*
+ * Assume bytea data is uniformly distributed across all byte values.
+ */
+ rangelo = 0;
+ rangehi = 255;
+
+ /*
+ * Now strip any common prefix of the three strings.
+ */
+ minlen = Min(Min(valuelen, loboundlen), hiboundlen);
+ for (i = 0; i < minlen; i++)
+ {
+ if (*lostr != *histr || *lostr != *valstr)
+ break;
+ lostr++, histr++, valstr++;
+ loboundlen--, hiboundlen--, valuelen--;
+ }
+
+ /*
+ * Now we can do the conversions.
+ */
+ *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
+ *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
+ *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
+}
+
+static double
+convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
+ int rangelo, int rangehi)
+{
+ double num,
+ denom,
+ base;
+
+ if (valuelen <= 0)
+ return 0.0; /* empty string has scalar value 0 */
+
+ /*
+ * Since base is 256, need not consider more than about 10 chars (even
+ * this many seems like overkill)
+ */
+ if (valuelen > 10)
+ valuelen = 10;
+
+ /* Convert initial characters to fraction */
+ base = rangehi - rangelo + 1;
+ num = 0.0;
+ denom = base;
+ while (valuelen-- > 0)
+ {
+ int ch = *value++;
+
+ if (ch < rangelo)
+ ch = rangelo - 1;
+ else if (ch > rangehi)
+ ch = rangehi + 1;
+ num += ((double) (ch - rangelo)) / denom;
+ denom *= base;
+ }
+
+ return num;
+}
+
+/*
+ * Do convert_to_scalar()'s work for any timevalue data type.
+ *
+ * On failure (e.g., unsupported typid), set *failure to true;
+ * otherwise, that variable is not changed.
+ */
+static double
+convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
+{
+ switch (typid)
+ {
+ case TIMESTAMPOID:
+ return DatumGetTimestamp(value);
+ case TIMESTAMPTZOID:
+ return DatumGetTimestampTz(value);
+ case DATEOID:
+ return date2timestamp_no_overflow(DatumGetDateADT(value));
+ case INTERVALOID:
+ {
+ Interval *interval = DatumGetIntervalP(value);
+
+ /*
+ * Convert the month part of Interval to days using assumed
+ * average month length of 365.25/12.0 days. Not too
+ * accurate, but plenty good enough for our purposes.
+ */
+ return interval->time + interval->day * (double) USECS_PER_DAY +
+ interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
+ }
+ case TIMEOID:
+ return DatumGetTimeADT(value);
+ case TIMETZOID:
+ {
+ TimeTzADT *timetz = DatumGetTimeTzADTP(value);
+
+ /* use GMT-equivalent time */
+ return (double) (timetz->time + (timetz->zone * 1000000.0));
+ }
+ }
+
+ *failure = true;
+ return 0;
+}
+
+
+/*
+ * get_restriction_variable
+ * Examine the args of a restriction clause to see if it's of the
+ * form (variable op pseudoconstant) or (pseudoconstant op variable),
+ * where "variable" could be either a Var or an expression in vars of a
+ * single relation. If so, extract information about the variable,
+ * and also indicate which side it was on and the other argument.
+ *
+ * Inputs:
+ * root: the planner info
+ * args: clause argument list
+ * varRelid: see specs for restriction selectivity functions
+ *
+ * Outputs: (these are valid only if true is returned)
+ * *vardata: gets information about variable (see examine_variable)
+ * *other: gets other clause argument, aggressively reduced to a constant
+ * *varonleft: set true if variable is on the left, false if on the right
+ *
+ * Returns true if a variable is identified, otherwise false.
+ *
+ * Note: if there are Vars on both sides of the clause, we must fail, because
+ * callers are expecting that the other side will act like a pseudoconstant.
+ */
+bool
+get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
+ VariableStatData *vardata, Node **other,
+ bool *varonleft)
+{
+ Node *left,
+ *right;
+ VariableStatData rdata;
+
+ /* Fail if not a binary opclause (probably shouldn't happen) */
+ if (list_length(args) != 2)
+ return false;
+
+ left = (Node *) linitial(args);
+ right = (Node *) lsecond(args);
+
+ /*
+ * Examine both sides. Note that when varRelid is nonzero, Vars of other
+ * relations will be treated as pseudoconstants.
+ */
+ examine_variable(root, left, varRelid, vardata);
+ examine_variable(root, right, varRelid, &rdata);
+
+ /*
+ * If one side is a variable and the other not, we win.
+ */
+ if (vardata->rel && rdata.rel == NULL)
+ {
+ *varonleft = true;
+ *other = estimate_expression_value(root, rdata.var);
+ /* Assume we need no ReleaseVariableStats(rdata) here */
+ return true;
+ }
+
+ if (vardata->rel == NULL && rdata.rel)
+ {
+ *varonleft = false;
+ *other = estimate_expression_value(root, vardata->var);
+ /* Assume we need no ReleaseVariableStats(*vardata) here */
+ *vardata = rdata;
+ return true;
+ }
+
+ /* Oops, clause has wrong structure (probably var op var) */
+ ReleaseVariableStats(*vardata);
+ ReleaseVariableStats(rdata);
+
+ return false;
+}
+
+/*
+ * get_join_variables
+ * Apply examine_variable() to each side of a join clause.
+ * Also, attempt to identify whether the join clause has the same
+ * or reversed sense compared to the SpecialJoinInfo.
+ *
+ * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
+ * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases
+ * where we can't tell for sure, we default to assuming it's normal.
+ */
+void
+get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
+ VariableStatData *vardata1, VariableStatData *vardata2,
+ bool *join_is_reversed)
+{
+ Node *left,
+ *right;
+
+ if (list_length(args) != 2)
+ elog(ERROR, "join operator should take two arguments");
+
+ left = (Node *) linitial(args);
+ right = (Node *) lsecond(args);
+
+ examine_variable(root, left, 0, vardata1);
+ examine_variable(root, right, 0, vardata2);
+
+ if (vardata1->rel &&
+ bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
+ *join_is_reversed = true; /* var1 is on RHS */
+ else if (vardata2->rel &&
+ bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
+ *join_is_reversed = true; /* var2 is on LHS */
+ else
+ *join_is_reversed = false;
+}
+
+/* statext_expressions_load copies the tuple, so just pfree it. */
+static void
+ReleaseDummy(HeapTuple tuple)
+{
+ pfree(tuple);
+}
+
+/*
+ * examine_variable
+ * Try to look up statistical data about an expression.
+ * Fill in a VariableStatData struct to describe the expression.
+ *
+ * Inputs:
+ * root: the planner info
+ * node: the expression tree to examine
+ * varRelid: see specs for restriction selectivity functions
+ *
+ * Outputs: *vardata is filled as follows:
+ * var: the input expression (with any binary relabeling stripped, if
+ * it is or contains a variable; but otherwise the type is preserved)
+ * rel: RelOptInfo for relation containing variable; NULL if expression
+ * contains no Vars (NOTE this could point to a RelOptInfo of a
+ * subquery, not one in the current query).
+ * statsTuple: the pg_statistic entry for the variable, if one exists;
+ * otherwise NULL.
+ * freefunc: pointer to a function to release statsTuple with.
+ * vartype: exposed type of the expression; this should always match
+ * the declared input type of the operator we are estimating for.
+ * atttype, atttypmod: actual type/typmod of the "var" expression. This is
+ * commonly the same as the exposed type of the variable argument,
+ * but can be different in binary-compatible-type cases.
+ * isunique: true if we were able to match the var to a unique index or a
+ * single-column DISTINCT clause, implying its values are unique for
+ * this query. (Caution: this should be trusted for statistical
+ * purposes only, since we do not check indimmediate nor verify that
+ * the exact same definition of equality applies.)
+ * acl_ok: true if current user has permission to read the column(s)
+ * underlying the pg_statistic entry. This is consulted by
+ * statistic_proc_security_check().
+ *
+ * Caller is responsible for doing ReleaseVariableStats() before exiting.
+ */
+void
+examine_variable(PlannerInfo *root, Node *node, int varRelid,
+ VariableStatData *vardata)
+{
+ Node *basenode;
+ Relids varnos;
+ RelOptInfo *onerel;
+
+ /* Make sure we don't return dangling pointers in vardata */
+ MemSet(vardata, 0, sizeof(VariableStatData));
+
+ /* Save the exposed type of the expression */
+ vardata->vartype = exprType(node);
+
+ /* Look inside any binary-compatible relabeling */
+
+ if (IsA(node, RelabelType))
+ basenode = (Node *) ((RelabelType *) node)->arg;
+ else
+ basenode = node;
+
+ /* Fast path for a simple Var */
+
+ if (IsA(basenode, Var) &&
+ (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
+ {
+ Var *var = (Var *) basenode;
+
+ /* Set up result fields other than the stats tuple */
+ vardata->var = basenode; /* return Var without relabeling */
+ vardata->rel = find_base_rel(root, var->varno);
+ vardata->atttype = var->vartype;
+ vardata->atttypmod = var->vartypmod;
+ vardata->isunique = has_unique_index(vardata->rel, var->varattno);
+
+ /* Try to locate some stats */
+ examine_simple_variable(root, var, vardata);
+
+ return;
+ }
+
+ /*
+ * Okay, it's a more complicated expression. Determine variable
+ * membership. Note that when varRelid isn't zero, only vars of that
+ * relation are considered "real" vars.
+ */
+ varnos = pull_varnos(root, basenode);
+
+ onerel = NULL;
+
+ switch (bms_membership(varnos))
+ {
+ case BMS_EMPTY_SET:
+ /* No Vars at all ... must be pseudo-constant clause */
+ break;
+ case BMS_SINGLETON:
+ if (varRelid == 0 || bms_is_member(varRelid, varnos))
+ {
+ onerel = find_base_rel(root,
+ (varRelid ? varRelid : bms_singleton_member(varnos)));
+ vardata->rel = onerel;
+ node = basenode; /* strip any relabeling */
+ }
+ /* else treat it as a constant */
+ break;
+ case BMS_MULTIPLE:
+ if (varRelid == 0)
+ {
+ /* treat it as a variable of a join relation */
+ vardata->rel = find_join_rel(root, varnos);
+ node = basenode; /* strip any relabeling */
+ }
+ else if (bms_is_member(varRelid, varnos))
+ {
+ /* ignore the vars belonging to other relations */
+ vardata->rel = find_base_rel(root, varRelid);
+ node = basenode; /* strip any relabeling */
+ /* note: no point in expressional-index search here */
+ }
+ /* else treat it as a constant */
+ break;
+ }
+
+ bms_free(varnos);
+
+ vardata->var = node;
+ vardata->atttype = exprType(node);
+ vardata->atttypmod = exprTypmod(node);
+
+ if (onerel)
+ {
+ /*
+ * We have an expression in vars of a single relation. Try to match
+ * it to expressional index columns, in hopes of finding some
+ * statistics.
+ *
+ * Note that we consider all index columns including INCLUDE columns,
+ * since there could be stats for such columns. But the test for
+ * uniqueness needs to be warier.
+ *
+ * XXX it's conceivable that there are multiple matches with different
+ * index opfamilies; if so, we need to pick one that matches the
+ * operator we are estimating for. FIXME later.
+ */
+ ListCell *ilist;
+ ListCell *slist;
+
+ foreach(ilist, onerel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
+ ListCell *indexpr_item;
+ int pos;
+
+ indexpr_item = list_head(index->indexprs);
+ if (indexpr_item == NULL)
+ continue; /* no expressions here... */
+
+ for (pos = 0; pos < index->ncolumns; pos++)
+ {
+ if (index->indexkeys[pos] == 0)
+ {
+ Node *indexkey;
+
+ if (indexpr_item == NULL)
+ elog(ERROR, "too few entries in indexprs list");
+ indexkey = (Node *) lfirst(indexpr_item);
+ if (indexkey && IsA(indexkey, RelabelType))
+ indexkey = (Node *) ((RelabelType *) indexkey)->arg;
+ if (equal(node, indexkey))
+ {
+ /*
+ * Found a match ... is it a unique index? Tests here
+ * should match has_unique_index().
+ */
+ if (index->unique &&
+ index->nkeycolumns == 1 &&
+ pos == 0 &&
+ (index->indpred == NIL || index->predOK))
+ vardata->isunique = true;
+
+ /*
+ * Has it got stats? We only consider stats for
+ * non-partial indexes, since partial indexes probably
+ * don't reflect whole-relation statistics; the above
+ * check for uniqueness is the only info we take from
+ * a partial index.
+ *
+ * An index stats hook, however, must make its own
+ * decisions about what to do with partial indexes.
+ */
+ if (get_index_stats_hook &&
+ (*get_index_stats_hook) (root, index->indexoid,
+ pos + 1, vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats
+ * tuple. If it did supply a tuple, it'd better
+ * have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ !vardata->freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else if (index->indpred == NIL)
+ {
+ vardata->statsTuple =
+ SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(index->indexoid),
+ Int16GetDatum(pos + 1),
+ BoolGetDatum(false));
+ vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* Get index's table for permission check */
+ RangeTblEntry *rte;
+ Oid userid;
+
+ rte = planner_rt_fetch(index->rel->relid, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /*
+ * Use checkAsUser if it's set, in case we're
+ * accessing the table via a view.
+ */
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ /*
+ * For simplicity, we insist on the whole
+ * table being selectable, rather than trying
+ * to identify which column(s) the index
+ * depends on. Also require all rows to be
+ * selectable --- there must be no
+ * securityQuals from security barrier views
+ * or RLS policies.
+ */
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid, userid,
+ ACL_SELECT) == ACLCHECK_OK);
+
+ /*
+ * If the user doesn't have permissions to
+ * access an inheritance child relation, check
+ * the permissions of the table actually
+ * mentioned in the query, since most likely
+ * the user does have that permission. Note
+ * that whole-table select privilege on the
+ * parent doesn't quite guarantee that the
+ * user could read all columns of the child.
+ * But in practice it's unlikely that any
+ * interesting security violation could result
+ * from allowing access to the expression
+ * index's stats, so we allow it anyway. See
+ * similar code in examine_simple_variable()
+ * for additional comments.
+ */
+ if (!vardata->acl_ok &&
+ root->append_rel_array != NULL)
+ {
+ AppendRelInfo *appinfo;
+ Index varno = index->rel->relid;
+
+ appinfo = root->append_rel_array[varno];
+ while (appinfo &&
+ planner_rt_fetch(appinfo->parent_relid,
+ root)->rtekind == RTE_RELATION)
+ {
+ varno = appinfo->parent_relid;
+ appinfo = root->append_rel_array[varno];
+ }
+ if (varno != index->rel->relid)
+ {
+ /* Repeat access check on this rel */
+ rte = planner_rt_fetch(varno, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid,
+ userid,
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ }
+ }
+ else
+ {
+ /* suppress leakproofness checks later */
+ vardata->acl_ok = true;
+ }
+ }
+ if (vardata->statsTuple)
+ break;
+ }
+ indexpr_item = lnext(index->indexprs, indexpr_item);
+ }
+ }
+ if (vardata->statsTuple)
+ break;
+ }
+
+ /*
+ * Search extended statistics for one with a matching expression.
+ * There might be multiple ones, so just grab the first one. In the
+ * future, we might consider the statistics target (and pick the most
+ * accurate statistics) and maybe some other parameters.
+ */
+ foreach(slist, onerel->statlist)
+ {
+ StatisticExtInfo *info = (StatisticExtInfo *) lfirst(slist);
+ RangeTblEntry *rte = planner_rt_fetch(onerel->relid, root);
+ ListCell *expr_item;
+ int pos;
+
+ /*
+ * Stop once we've found statistics for the expression (either
+ * from extended stats, or for an index in the preceding loop).
+ */
+ if (vardata->statsTuple)
+ break;
+
+ /* skip stats without per-expression stats */
+ if (info->kind != STATS_EXT_EXPRESSIONS)
+ continue;
+
+ /* skip stats with mismatching stxdinherit value */
+ if (info->inherit != rte->inh)
+ continue;
+
+ pos = 0;
+ foreach(expr_item, info->exprs)
+ {
+ Node *expr = (Node *) lfirst(expr_item);
+
+ Assert(expr);
+
+ /* strip RelabelType before comparing it */
+ if (expr && IsA(expr, RelabelType))
+ expr = (Node *) ((RelabelType *) expr)->arg;
+
+ /* found a match, see if we can extract pg_statistic row */
+ if (equal(node, expr))
+ {
+ Oid userid;
+
+ /*
+ * XXX Not sure if we should cache the tuple somewhere.
+ * Now we just create a new copy every time.
+ */
+ vardata->statsTuple =
+ statext_expressions_load(info->statOid, rte->inh, pos);
+
+ vardata->freefunc = ReleaseDummy;
+
+ /*
+ * Use checkAsUser if it's set, in case we're accessing
+ * the table via a view.
+ */
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ /*
+ * For simplicity, we insist on the whole table being
+ * selectable, rather than trying to identify which
+ * column(s) the statistics object depends on. Also
+ * require all rows to be selectable --- there must be no
+ * securityQuals from security barrier views or RLS
+ * policies.
+ */
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid, userid,
+ ACL_SELECT) == ACLCHECK_OK);
+
+ /*
+ * If the user doesn't have permissions to access an
+ * inheritance child relation, check the permissions of
+ * the table actually mentioned in the query, since most
+ * likely the user does have that permission. Note that
+ * whole-table select privilege on the parent doesn't
+ * quite guarantee that the user could read all columns of
+ * the child. But in practice it's unlikely that any
+ * interesting security violation could result from
+ * allowing access to the expression stats, so we allow it
+ * anyway. See similar code in examine_simple_variable()
+ * for additional comments.
+ */
+ if (!vardata->acl_ok &&
+ root->append_rel_array != NULL)
+ {
+ AppendRelInfo *appinfo;
+ Index varno = onerel->relid;
+
+ appinfo = root->append_rel_array[varno];
+ while (appinfo &&
+ planner_rt_fetch(appinfo->parent_relid,
+ root)->rtekind == RTE_RELATION)
+ {
+ varno = appinfo->parent_relid;
+ appinfo = root->append_rel_array[varno];
+ }
+ if (varno != onerel->relid)
+ {
+ /* Repeat access check on this rel */
+ rte = planner_rt_fetch(varno, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ (pg_class_aclcheck(rte->relid,
+ userid,
+ ACL_SELECT) == ACLCHECK_OK);
+ }
+ }
+
+ break;
+ }
+
+ pos++;
+ }
+ }
+ }
+}
+
+/*
+ * examine_simple_variable
+ * Handle a simple Var for examine_variable
+ *
+ * This is split out as a subroutine so that we can recurse to deal with
+ * Vars referencing subqueries.
+ *
+ * We already filled in all the fields of *vardata except for the stats tuple.
+ */
+static void
+examine_simple_variable(PlannerInfo *root, Var *var,
+ VariableStatData *vardata)
+{
+ RangeTblEntry *rte = root->simple_rte_array[var->varno];
+
+ Assert(IsA(rte, RangeTblEntry));
+
+ if (get_relation_stats_hook &&
+ (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it did supply
+ * a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata->statsTuple) &&
+ !vardata->freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else if (rte->rtekind == RTE_RELATION)
+ {
+ /*
+ * Plain table or parent of an inheritance appendrel, so look up the
+ * column in pg_statistic
+ */
+ vardata->statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(rte->relid),
+ Int16GetDatum(var->varattno),
+ BoolGetDatum(rte->inh));
+ vardata->freefunc = ReleaseSysCache;
+
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ Oid userid;
+
+ /*
+ * Check if user has permission to read this column. We require
+ * all rows to be accessible, so there must be no securityQuals
+ * from security barrier views or RLS policies. Use checkAsUser
+ * if it's set, in case we're accessing the table via a view.
+ */
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ ((pg_class_aclcheck(rte->relid, userid,
+ ACL_SELECT) == ACLCHECK_OK) ||
+ (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
+ ACL_SELECT) == ACLCHECK_OK));
+
+ /*
+ * If the user doesn't have permissions to access an inheritance
+ * child relation or specifically this attribute, check the
+ * permissions of the table/column actually mentioned in the
+ * query, since most likely the user does have that permission
+ * (else the query will fail at runtime), and if the user can read
+ * the column there then he can get the values of the child table
+ * too. To do that, we must find out which of the root parent's
+ * attributes the child relation's attribute corresponds to.
+ */
+ if (!vardata->acl_ok && var->varattno > 0 &&
+ root->append_rel_array != NULL)
+ {
+ AppendRelInfo *appinfo;
+ Index varno = var->varno;
+ int varattno = var->varattno;
+ bool found = false;
+
+ appinfo = root->append_rel_array[varno];
+
+ /*
+ * Partitions are mapped to their immediate parent, not the
+ * root parent, so must be ready to walk up multiple
+ * AppendRelInfos. But stop if we hit a parent that is not
+ * RTE_RELATION --- that's a flattened UNION ALL subquery, not
+ * an inheritance parent.
+ */
+ while (appinfo &&
+ planner_rt_fetch(appinfo->parent_relid,
+ root)->rtekind == RTE_RELATION)
+ {
+ int parent_varattno;
+
+ found = false;
+ if (varattno <= 0 || varattno > appinfo->num_child_cols)
+ break; /* safety check */
+ parent_varattno = appinfo->parent_colnos[varattno - 1];
+ if (parent_varattno == 0)
+ break; /* Var is local to child */
+
+ varno = appinfo->parent_relid;
+ varattno = parent_varattno;
+ found = true;
+
+ /* If the parent is itself a child, continue up. */
+ appinfo = root->append_rel_array[varno];
+ }
+
+ /*
+ * In rare cases, the Var may be local to the child table, in
+ * which case, we've got to live with having no access to this
+ * column's stats.
+ */
+ if (!found)
+ return;
+
+ /* Repeat the access check on this parent rel & column */
+ rte = planner_rt_fetch(varno, root);
+ Assert(rte->rtekind == RTE_RELATION);
+
+ userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
+
+ vardata->acl_ok =
+ rte->securityQuals == NIL &&
+ ((pg_class_aclcheck(rte->relid, userid,
+ ACL_SELECT) == ACLCHECK_OK) ||
+ (pg_attribute_aclcheck(rte->relid, varattno, userid,
+ ACL_SELECT) == ACLCHECK_OK));
+ }
+ }
+ else
+ {
+ /* suppress any possible leakproofness checks later */
+ vardata->acl_ok = true;
+ }
+ }
+ else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
+ {
+ /*
+ * Plain subquery (not one that was converted to an appendrel).
+ */
+ Query *subquery = rte->subquery;
+ RelOptInfo *rel;
+ TargetEntry *ste;
+
+ /*
+ * Punt if it's a whole-row var rather than a plain column reference.
+ */
+ if (var->varattno == InvalidAttrNumber)
+ return;
+
+ /*
+ * Punt if subquery uses set operations or GROUP BY, as these will
+ * mash underlying columns' stats beyond recognition. (Set ops are
+ * particularly nasty; if we forged ahead, we would return stats
+ * relevant to only the leftmost subselect...) DISTINCT is also
+ * problematic, but we check that later because there is a possibility
+ * of learning something even with it.
+ */
+ if (subquery->setOperations ||
+ subquery->groupClause ||
+ subquery->groupingSets)
+ return;
+
+ /*
+ * OK, fetch RelOptInfo for subquery. Note that we don't change the
+ * rel returned in vardata, since caller expects it to be a rel of the
+ * caller's query level. Because we might already be recursing, we
+ * can't use that rel pointer either, but have to look up the Var's
+ * rel afresh.
+ */
+ rel = find_base_rel(root, var->varno);
+
+ /* If the subquery hasn't been planned yet, we have to punt */
+ if (rel->subroot == NULL)
+ return;
+ Assert(IsA(rel->subroot, PlannerInfo));
+
+ /*
+ * Switch our attention to the subquery as mangled by the planner. It
+ * was okay to look at the pre-planning version for the tests above,
+ * but now we need a Var that will refer to the subroot's live
+ * RelOptInfos. For instance, if any subquery pullup happened during
+ * planning, Vars in the targetlist might have gotten replaced, and we
+ * need to see the replacement expressions.
+ */
+ subquery = rel->subroot->parse;
+ Assert(IsA(subquery, Query));
+
+ /* Get the subquery output expression referenced by the upper Var */
+ ste = get_tle_by_resno(subquery->targetList, var->varattno);
+ if (ste == NULL || ste->resjunk)
+ elog(ERROR, "subquery %s does not have attribute %d",
+ rte->eref->aliasname, var->varattno);
+ var = (Var *) ste->expr;
+
+ /*
+ * If subquery uses DISTINCT, we can't make use of any stats for the
+ * variable ... but, if it's the only DISTINCT column, we are entitled
+ * to consider it unique. We do the test this way so that it works
+ * for cases involving DISTINCT ON.
+ */
+ if (subquery->distinctClause)
+ {
+ if (list_length(subquery->distinctClause) == 1 &&
+ targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
+ vardata->isunique = true;
+ /* cannot go further */
+ return;
+ }
+
+ /*
+ * If the sub-query originated from a view with the security_barrier
+ * attribute, we must not look at the variable's statistics, though it
+ * seems all right to notice the existence of a DISTINCT clause. So
+ * stop here.
+ *
+ * This is probably a harsher restriction than necessary; it's
+ * certainly OK for the selectivity estimator (which is a C function,
+ * and therefore omnipotent anyway) to look at the statistics. But
+ * many selectivity estimators will happily *invoke the operator
+ * function* to try to work out a good estimate - and that's not OK.
+ * So for now, don't dig down for stats.
+ */
+ if (rte->security_barrier)
+ return;
+
+ /* Can only handle a simple Var of subquery's query level */
+ if (var && IsA(var, Var) &&
+ var->varlevelsup == 0)
+ {
+ /*
+ * OK, recurse into the subquery. Note that the original setting
+ * of vardata->isunique (which will surely be false) is left
+ * unchanged in this situation. That's what we want, since even
+ * if the underlying column is unique, the subquery may have
+ * joined to other tables in a way that creates duplicates.
+ */
+ examine_simple_variable(rel->subroot, var, vardata);
+ }
+ }
+ else
+ {
+ /*
+ * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
+ * won't see RTE_JOIN here because join alias Vars have already been
+ * flattened.) There's not much we can do with function outputs, but
+ * maybe someday try to be smarter about VALUES and/or CTEs.
+ */
+ }
+}
+
+/*
+ * Check whether it is permitted to call func_oid passing some of the
+ * pg_statistic data in vardata. We allow this either if the user has SELECT
+ * privileges on the table or column underlying the pg_statistic data or if
+ * the function is marked leak-proof.
+ */
+bool
+statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
+{
+ if (vardata->acl_ok)
+ return true;
+
+ if (!OidIsValid(func_oid))
+ return false;
+
+ if (get_func_leakproof(func_oid))
+ return true;
+
+ ereport(DEBUG2,
+ (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
+ get_func_name(func_oid))));
+ return false;
+}
+
+/*
+ * get_variable_numdistinct
+ * Estimate the number of distinct values of a variable.
+ *
+ * vardata: results of examine_variable
+ * *isdefault: set to true if the result is a default rather than based on
+ * anything meaningful.
+ *
+ * NB: be careful to produce a positive integral result, since callers may
+ * compare the result to exact integer counts, or might divide by it.
+ */
+double
+get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
+{
+ double stadistinct;
+ double stanullfrac = 0.0;
+ double ntuples;
+
+ *isdefault = false;
+
+ /*
+ * Determine the stadistinct value to use. There are cases where we can
+ * get an estimate even without a pg_statistic entry, or can get a better
+ * value than is in pg_statistic. Grab stanullfrac too if we can find it
+ * (otherwise, assume no nulls, for lack of any better idea).
+ */
+ if (HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* Use the pg_statistic entry */
+ Form_pg_statistic stats;
+
+ stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
+ stadistinct = stats->stadistinct;
+ stanullfrac = stats->stanullfrac;
+ }
+ else if (vardata->vartype == BOOLOID)
+ {
+ /*
+ * Special-case boolean columns: presumably, two distinct values.
+ *
+ * Are there any other datatypes we should wire in special estimates
+ * for?
+ */
+ stadistinct = 2.0;
+ }
+ else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
+ {
+ /*
+ * If the Var represents a column of a VALUES RTE, assume it's unique.
+ * This could of course be very wrong, but it should tend to be true
+ * in well-written queries. We could consider examining the VALUES'
+ * contents to get some real statistics; but that only works if the
+ * entries are all constants, and it would be pretty expensive anyway.
+ */
+ stadistinct = -1.0; /* unique (and all non null) */
+ }
+ else
+ {
+ /*
+ * We don't keep statistics for system columns, but in some cases we
+ * can infer distinctness anyway.
+ */
+ if (vardata->var && IsA(vardata->var, Var))
+ {
+ switch (((Var *) vardata->var)->varattno)
+ {
+ case SelfItemPointerAttributeNumber:
+ stadistinct = -1.0; /* unique (and all non null) */
+ break;
+ case TableOidAttributeNumber:
+ stadistinct = 1.0; /* only 1 value */
+ break;
+ default:
+ stadistinct = 0.0; /* means "unknown" */
+ break;
+ }
+ }
+ else
+ stadistinct = 0.0; /* means "unknown" */
+
+ /*
+ * XXX consider using estimate_num_groups on expressions?
+ */
+ }
+
+ /*
+ * If there is a unique index or DISTINCT clause for the variable, assume
+ * it is unique no matter what pg_statistic says; the statistics could be
+ * out of date, or we might have found a partial unique index that proves
+ * the var is unique for this query. However, we'd better still believe
+ * the null-fraction statistic.
+ */
+ if (vardata->isunique)
+ stadistinct = -1.0 * (1.0 - stanullfrac);
+
+ /*
+ * If we had an absolute estimate, use that.
+ */
+ if (stadistinct > 0.0)
+ return clamp_row_est(stadistinct);
+
+ /*
+ * Otherwise we need to get the relation size; punt if not available.
+ */
+ if (vardata->rel == NULL)
+ {
+ *isdefault = true;
+ return DEFAULT_NUM_DISTINCT;
+ }
+ ntuples = vardata->rel->tuples;
+ if (ntuples <= 0.0)
+ {
+ *isdefault = true;
+ return DEFAULT_NUM_DISTINCT;
+ }
+
+ /*
+ * If we had a relative estimate, use that.
+ */
+ if (stadistinct < 0.0)
+ return clamp_row_est(-stadistinct * ntuples);
+
+ /*
+ * With no data, estimate ndistinct = ntuples if the table is small, else
+ * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
+ * that the behavior isn't discontinuous.
+ */
+ if (ntuples < DEFAULT_NUM_DISTINCT)
+ return clamp_row_est(ntuples);
+
+ *isdefault = true;
+ return DEFAULT_NUM_DISTINCT;
+}
+
+/*
+ * get_variable_range
+ * Estimate the minimum and maximum value of the specified variable.
+ * If successful, store values in *min and *max, and return true.
+ * If no data available, return false.
+ *
+ * sortop is the "<" comparison operator to use. This should generally
+ * be "<" not ">", as only the former is likely to be found in pg_statistic.
+ * The collation must be specified too.
+ */
+static bool
+get_variable_range(PlannerInfo *root, VariableStatData *vardata,
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max)
+{
+ Datum tmin = 0;
+ Datum tmax = 0;
+ bool have_data = false;
+ int16 typLen;
+ bool typByVal;
+ Oid opfuncoid;
+ FmgrInfo opproc;
+ AttStatsSlot sslot;
+
+ /*
+ * XXX It's very tempting to try to use the actual column min and max, if
+ * we can get them relatively-cheaply with an index probe. However, since
+ * this function is called many times during join planning, that could
+ * have unpleasant effects on planning speed. Need more investigation
+ * before enabling this.
+ */
+#ifdef NOT_USED
+ if (get_actual_variable_range(root, vardata, sortop, collation, min, max))
+ return true;
+#endif
+
+ if (!HeapTupleIsValid(vardata->statsTuple))
+ {
+ /* no stats available, so default result */
+ return false;
+ }
+
+ /*
+ * If we can't apply the sortop to the stats data, just fail. In
+ * principle, if there's a histogram and no MCVs, we could return the
+ * histogram endpoints without ever applying the sortop ... but it's
+ * probably not worth trying, because whatever the caller wants to do with
+ * the endpoints would likely fail the security check too.
+ */
+ if (!statistic_proc_security_check(vardata,
+ (opfuncoid = get_opcode(sortop))))
+ return false;
+
+ opproc.fn_oid = InvalidOid; /* mark this as not looked up yet */
+
+ get_typlenbyval(vardata->atttype, &typLen, &typByVal);
+
+ /*
+ * If there is a histogram with the ordering we want, grab the first and
+ * last values.
+ */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, sortop,
+ ATTSTATSSLOT_VALUES))
+ {
+ if (sslot.stacoll == collation && sslot.nvalues > 0)
+ {
+ tmin = datumCopy(sslot.values[0], typByVal, typLen);
+ tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
+ have_data = true;
+ }
+ free_attstatsslot(&sslot);
+ }
+
+ /*
+ * Otherwise, if there is a histogram with some other ordering, scan it
+ * and get the min and max values according to the ordering we want. This
+ * of course may not find values that are really extremal according to our
+ * ordering, but it beats ignoring available data.
+ */
+ if (!have_data &&
+ get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_HISTOGRAM, InvalidOid,
+ ATTSTATSSLOT_VALUES))
+ {
+ get_stats_slot_range(&sslot, opfuncoid, &opproc,
+ collation, typLen, typByVal,
+ &tmin, &tmax, &have_data);
+ free_attstatsslot(&sslot);
+ }
+
+ /*
+ * If we have most-common-values info, look for extreme MCVs. This is
+ * needed even if we also have a histogram, since the histogram excludes
+ * the MCVs. However, if we *only* have MCVs and no histogram, we should
+ * be pretty wary of deciding that that is a full representation of the
+ * data. Proceed only if the MCVs represent the whole table (to within
+ * roundoff error).
+ */
+ if (get_attstatsslot(&sslot, vardata->statsTuple,
+ STATISTIC_KIND_MCV, InvalidOid,
+ have_data ? ATTSTATSSLOT_VALUES :
+ (ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)))
+ {
+ bool use_mcvs = have_data;
+
+ if (!have_data)
+ {
+ double sumcommon = 0.0;
+ double nullfrac;
+ int i;
+
+ for (i = 0; i < sslot.nnumbers; i++)
+ sumcommon += sslot.numbers[i];
+ nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata->statsTuple))->stanullfrac;
+ if (sumcommon + nullfrac > 0.99999)
+ use_mcvs = true;
+ }
+
+ if (use_mcvs)
+ get_stats_slot_range(&sslot, opfuncoid, &opproc,
+ collation, typLen, typByVal,
+ &tmin, &tmax, &have_data);
+ free_attstatsslot(&sslot);
+ }
+
+ *min = tmin;
+ *max = tmax;
+ return have_data;
+}
+
+/*
+ * get_stats_slot_range: scan sslot for min/max values
+ *
+ * Subroutine for get_variable_range: update min/max/have_data according
+ * to what we find in the statistics array.
+ */
+static void
+get_stats_slot_range(AttStatsSlot *sslot, Oid opfuncoid, FmgrInfo *opproc,
+ Oid collation, int16 typLen, bool typByVal,
+ Datum *min, Datum *max, bool *p_have_data)
+{
+ Datum tmin = *min;
+ Datum tmax = *max;
+ bool have_data = *p_have_data;
+ bool found_tmin = false;
+ bool found_tmax = false;
+
+ /* Look up the comparison function, if we didn't already do so */
+ if (opproc->fn_oid != opfuncoid)
+ fmgr_info(opfuncoid, opproc);
+
+ /* Scan all the slot's values */
+ for (int i = 0; i < sslot->nvalues; i++)
+ {
+ if (!have_data)
+ {
+ tmin = tmax = sslot->values[i];
+ found_tmin = found_tmax = true;
+ *p_have_data = have_data = true;
+ continue;
+ }
+ if (DatumGetBool(FunctionCall2Coll(opproc,
+ collation,
+ sslot->values[i], tmin)))
+ {
+ tmin = sslot->values[i];
+ found_tmin = true;
+ }
+ if (DatumGetBool(FunctionCall2Coll(opproc,
+ collation,
+ tmax, sslot->values[i])))
+ {
+ tmax = sslot->values[i];
+ found_tmax = true;
+ }
+ }
+
+ /*
+ * Copy the slot's values, if we found new extreme values.
+ */
+ if (found_tmin)
+ *min = datumCopy(tmin, typByVal, typLen);
+ if (found_tmax)
+ *max = datumCopy(tmax, typByVal, typLen);
+}
+
+
+/*
+ * get_actual_variable_range
+ * Attempt to identify the current *actual* minimum and/or maximum
+ * of the specified variable, by looking for a suitable btree index
+ * and fetching its low and/or high values.
+ * If successful, store values in *min and *max, and return true.
+ * (Either pointer can be NULL if that endpoint isn't needed.)
+ * If unsuccessful, return false.
+ *
+ * sortop is the "<" comparison operator to use.
+ * collation is the required collation.
+ */
+static bool
+get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
+ Oid sortop, Oid collation,
+ Datum *min, Datum *max)
+{
+ bool have_data = false;
+ RelOptInfo *rel = vardata->rel;
+ RangeTblEntry *rte;
+ ListCell *lc;
+
+ /* No hope if no relation or it doesn't have indexes */
+ if (rel == NULL || rel->indexlist == NIL)
+ return false;
+ /* If it has indexes it must be a plain relation */
+ rte = root->simple_rte_array[rel->relid];
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /* Search through the indexes to see if any match our problem */
+ foreach(lc, rel->indexlist)
+ {
+ IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
+ ScanDirection indexscandir;
+
+ /* Ignore non-btree indexes */
+ if (index->relam != BTREE_AM_OID)
+ continue;
+
+ /*
+ * Ignore partial indexes --- we only want stats that cover the entire
+ * relation.
+ */
+ if (index->indpred != NIL)
+ continue;
+
+ /*
+ * The index list might include hypothetical indexes inserted by a
+ * get_relation_info hook --- don't try to access them.
+ */
+ if (index->hypothetical)
+ continue;
+
+ /*
+ * The first index column must match the desired variable, sortop, and
+ * collation --- but we can use a descending-order index.
+ */
+ if (collation != index->indexcollations[0])
+ continue; /* test first 'cause it's cheapest */
+ if (!match_index_to_operand(vardata->var, 0, index))
+ continue;
+ switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
+ {
+ case BTLessStrategyNumber:
+ if (index->reverse_sort[0])
+ indexscandir = BackwardScanDirection;
+ else
+ indexscandir = ForwardScanDirection;
+ break;
+ case BTGreaterStrategyNumber:
+ if (index->reverse_sort[0])
+ indexscandir = ForwardScanDirection;
+ else
+ indexscandir = BackwardScanDirection;
+ break;
+ default:
+ /* index doesn't match the sortop */
+ continue;
+ }
+
+ /*
+ * Found a suitable index to extract data from. Set up some data that
+ * can be used by both invocations of get_actual_variable_endpoint.
+ */
+ {
+ MemoryContext tmpcontext;
+ MemoryContext oldcontext;
+ Relation heapRel;
+ Relation indexRel;
+ TupleTableSlot *slot;
+ int16 typLen;
+ bool typByVal;
+ ScanKeyData scankeys[1];
+
+ /* Make sure any cruft gets recycled when we're done */
+ tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
+ "get_actual_variable_range workspace",
+ ALLOCSET_DEFAULT_SIZES);
+ oldcontext = MemoryContextSwitchTo(tmpcontext);
+
+ /*
+ * Open the table and index so we can read from them. We should
+ * already have some type of lock on each.
+ */
+ heapRel = table_open(rte->relid, NoLock);
+ indexRel = index_open(index->indexoid, NoLock);
+
+ /* build some stuff needed for indexscan execution */
+ slot = table_slot_create(heapRel, NULL);
+ get_typlenbyval(vardata->atttype, &typLen, &typByVal);
+
+ /* set up an IS NOT NULL scan key so that we ignore nulls */
+ ScanKeyEntryInitialize(&scankeys[0],
+ SK_ISNULL | SK_SEARCHNOTNULL,
+ 1, /* index col to scan */
+ InvalidStrategy, /* no strategy */
+ InvalidOid, /* no strategy subtype */
+ InvalidOid, /* no collation */
+ InvalidOid, /* no reg proc for this */
+ (Datum) 0); /* constant */
+
+ /* If min is requested ... */
+ if (min)
+ {
+ have_data = get_actual_variable_endpoint(heapRel,
+ indexRel,
+ indexscandir,
+ scankeys,
+ typLen,
+ typByVal,
+ slot,
+ oldcontext,
+ min);
+ }
+ else
+ {
+ /* If min not requested, still want to fetch max */
+ have_data = true;
+ }
+
+ /* If max is requested, and we didn't already fail ... */
+ if (max && have_data)
+ {
+ /* scan in the opposite direction; all else is the same */
+ have_data = get_actual_variable_endpoint(heapRel,
+ indexRel,
+ -indexscandir,
+ scankeys,
+ typLen,
+ typByVal,
+ slot,
+ oldcontext,
+ max);
+ }
+
+ /* Clean everything up */
+ ExecDropSingleTupleTableSlot(slot);
+
+ index_close(indexRel, NoLock);
+ table_close(heapRel, NoLock);
+
+ MemoryContextSwitchTo(oldcontext);
+ MemoryContextDelete(tmpcontext);
+
+ /* And we're done */
+ break;
+ }
+ }
+
+ return have_data;
+}
+
+/*
+ * Get one endpoint datum (min or max depending on indexscandir) from the
+ * specified index. Return true if successful, false if not.
+ * On success, endpoint value is stored to *endpointDatum (and copied into
+ * outercontext).
+ *
+ * scankeys is a 1-element scankey array set up to reject nulls.
+ * typLen/typByVal describe the datatype of the index's first column.
+ * tableslot is a slot suitable to hold table tuples, in case we need
+ * to probe the heap.
+ * (We could compute these values locally, but that would mean computing them
+ * twice when get_actual_variable_range needs both the min and the max.)
+ *
+ * Failure occurs either when the index is empty, or we decide that it's
+ * taking too long to find a suitable tuple.
+ */
+static bool
+get_actual_variable_endpoint(Relation heapRel,
+ Relation indexRel,
+ ScanDirection indexscandir,
+ ScanKey scankeys,
+ int16 typLen,
+ bool typByVal,
+ TupleTableSlot *tableslot,
+ MemoryContext outercontext,
+ Datum *endpointDatum)
+{
+ bool have_data = false;
+ SnapshotData SnapshotNonVacuumable;
+ IndexScanDesc index_scan;
+ Buffer vmbuffer = InvalidBuffer;
+ BlockNumber last_heap_block = InvalidBlockNumber;
+ int n_visited_heap_pages = 0;
+ ItemPointer tid;
+ Datum values[INDEX_MAX_KEYS];
+ bool isnull[INDEX_MAX_KEYS];
+ MemoryContext oldcontext;
+
+ /*
+ * We use the index-only-scan machinery for this. With mostly-static
+ * tables that's a win because it avoids a heap visit. It's also a win
+ * for dynamic data, but the reason is less obvious; read on for details.
+ *
+ * In principle, we should scan the index with our current active
+ * snapshot, which is the best approximation we've got to what the query
+ * will see when executed. But that won't be exact if a new snap is taken
+ * before running the query, and it can be very expensive if a lot of
+ * recently-dead or uncommitted rows exist at the beginning or end of the
+ * index (because we'll laboriously fetch each one and reject it).
+ * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
+ * and uncommitted rows as well as normal visible rows. On the other
+ * hand, it will reject known-dead rows, and thus not give a bogus answer
+ * when the extreme value has been deleted (unless the deletion was quite
+ * recent); that case motivates not using SnapshotAny here.
+ *
+ * A crucial point here is that SnapshotNonVacuumable, with
+ * GlobalVisTestFor(heapRel) as horizon, yields the inverse of the
+ * condition that the indexscan will use to decide that index entries are
+ * killable (see heap_hot_search_buffer()). Therefore, if the snapshot
+ * rejects a tuple (or more precisely, all tuples of a HOT chain) and we
+ * have to continue scanning past it, we know that the indexscan will mark
+ * that index entry killed. That means that the next
+ * get_actual_variable_endpoint() call will not have to re-consider that
+ * index entry. In this way we avoid repetitive work when this function
+ * is used a lot during planning.
+ *
+ * But using SnapshotNonVacuumable creates a hazard of its own. In a
+ * recently-created index, some index entries may point at "broken" HOT
+ * chains in which not all the tuple versions contain data matching the
+ * index entry. The live tuple version(s) certainly do match the index,
+ * but SnapshotNonVacuumable can accept recently-dead tuple versions that
+ * don't match. Hence, if we took data from the selected heap tuple, we
+ * might get a bogus answer that's not close to the index extremal value,
+ * or could even be NULL. We avoid this hazard because we take the data
+ * from the index entry not the heap.
+ *
+ * Despite all this care, there are situations where we might find many
+ * non-visible tuples near the end of the index. We don't want to expend
+ * a huge amount of time here, so we give up once we've read too many heap
+ * pages. When we fail for that reason, the caller will end up using
+ * whatever extremal value is recorded in pg_statistic.
+ */
+ InitNonVacuumableSnapshot(SnapshotNonVacuumable,
+ GlobalVisTestFor(heapRel));
+
+ index_scan = index_beginscan(heapRel, indexRel,
+ &SnapshotNonVacuumable,
+ 1, 0);
+ /* Set it up for index-only scan */
+ index_scan->xs_want_itup = true;
+ index_rescan(index_scan, scankeys, 1, NULL, 0);
+
+ /* Fetch first/next tuple in specified direction */
+ while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
+ {
+ BlockNumber block = ItemPointerGetBlockNumber(tid);
+
+ if (!VM_ALL_VISIBLE(heapRel,
+ block,
+ &vmbuffer))
+ {
+ /* Rats, we have to visit the heap to check visibility */
+ if (!index_fetch_heap(index_scan, tableslot))
+ {
+ /*
+ * No visible tuple for this index entry, so we need to
+ * advance to the next entry. Before doing so, count heap
+ * page fetches and give up if we've done too many.
+ *
+ * We don't charge a page fetch if this is the same heap page
+ * as the previous tuple. This is on the conservative side,
+ * since other recently-accessed pages are probably still in
+ * buffers too; but it's good enough for this heuristic.
+ */
+#define VISITED_PAGES_LIMIT 100
+
+ if (block != last_heap_block)
+ {
+ last_heap_block = block;
+ n_visited_heap_pages++;
+ if (n_visited_heap_pages > VISITED_PAGES_LIMIT)
+ break;
+ }
+
+ continue; /* no visible tuple, try next index entry */
+ }
+
+ /* We don't actually need the heap tuple for anything */
+ ExecClearTuple(tableslot);
+
+ /*
+ * We don't care whether there's more than one visible tuple in
+ * the HOT chain; if any are visible, that's good enough.
+ */
+ }
+
+ /*
+ * We expect that btree will return data in IndexTuple not HeapTuple
+ * format. It's not lossy either.
+ */
+ if (!index_scan->xs_itup)
+ elog(ERROR, "no data returned for index-only scan");
+ if (index_scan->xs_recheck)
+ elog(ERROR, "unexpected recheck indication from btree");
+
+ /* OK to deconstruct the index tuple */
+ index_deform_tuple(index_scan->xs_itup,
+ index_scan->xs_itupdesc,
+ values, isnull);
+
+ /* Shouldn't have got a null, but be careful */
+ if (isnull[0])
+ elog(ERROR, "found unexpected null value in index \"%s\"",
+ RelationGetRelationName(indexRel));
+
+ /* Copy the index column value out to caller's context */
+ oldcontext = MemoryContextSwitchTo(outercontext);
+ *endpointDatum = datumCopy(values[0], typByVal, typLen);
+ MemoryContextSwitchTo(oldcontext);
+ have_data = true;
+ break;
+ }
+
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ index_endscan(index_scan);
+
+ return have_data;
+}
+
+/*
+ * find_join_input_rel
+ * Look up the input relation for a join.
+ *
+ * We assume that the input relation's RelOptInfo must have been constructed
+ * already.
+ */
+static RelOptInfo *
+find_join_input_rel(PlannerInfo *root, Relids relids)
+{
+ RelOptInfo *rel = NULL;
+
+ switch (bms_membership(relids))
+ {
+ case BMS_EMPTY_SET:
+ /* should not happen */
+ break;
+ case BMS_SINGLETON:
+ rel = find_base_rel(root, bms_singleton_member(relids));
+ break;
+ case BMS_MULTIPLE:
+ rel = find_join_rel(root, relids);
+ break;
+ }
+
+ if (rel == NULL)
+ elog(ERROR, "could not find RelOptInfo for given relids");
+
+ return rel;
+}
+
+
+/*-------------------------------------------------------------------------
+ *
+ * Index cost estimation functions
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Extract the actual indexquals (as RestrictInfos) from an IndexClause list
+ */
+List *
+get_quals_from_indexclauses(List *indexclauses)
+{
+ List *result = NIL;
+ ListCell *lc;
+
+ foreach(lc, indexclauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, lc);
+ ListCell *lc2;
+
+ foreach(lc2, iclause->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+
+ result = lappend(result, rinfo);
+ }
+ }
+ return result;
+}
+
+/*
+ * Compute the total evaluation cost of the comparison operands in a list
+ * of index qual expressions. Since we know these will be evaluated just
+ * once per scan, there's no need to distinguish startup from per-row cost.
+ *
+ * This can be used either on the result of get_quals_from_indexclauses(),
+ * or directly on an indexorderbys list. In both cases, we expect that the
+ * index key expression is on the left side of binary clauses.
+ */
+Cost
+index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
+{
+ Cost qual_arg_cost = 0;
+ ListCell *lc;
+
+ foreach(lc, indexquals)
+ {
+ Expr *clause = (Expr *) lfirst(lc);
+ Node *other_operand;
+ QualCost index_qual_cost;
+
+ /*
+ * Index quals will have RestrictInfos, indexorderbys won't. Look
+ * through RestrictInfo if present.
+ */
+ if (IsA(clause, RestrictInfo))
+ clause = ((RestrictInfo *) clause)->clause;
+
+ if (IsA(clause, OpExpr))
+ {
+ OpExpr *op = (OpExpr *) clause;
+
+ other_operand = (Node *) lsecond(op->args);
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ RowCompareExpr *rc = (RowCompareExpr *) clause;
+
+ other_operand = (Node *) rc->rargs;
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+
+ other_operand = (Node *) lsecond(saop->args);
+ }
+ else if (IsA(clause, NullTest))
+ {
+ other_operand = NULL;
+ }
+ else
+ {
+ elog(ERROR, "unsupported indexqual type: %d",
+ (int) nodeTag(clause));
+ other_operand = NULL; /* keep compiler quiet */
+ }
+
+ cost_qual_eval_node(&index_qual_cost, other_operand, root);
+ qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
+ }
+ return qual_arg_cost;
+}
+
+void
+genericcostestimate(PlannerInfo *root,
+ IndexPath *path,
+ double loop_count,
+ GenericCosts *costs)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
+ List *indexOrderBys = path->indexorderbys;
+ Cost indexStartupCost;
+ Cost indexTotalCost;
+ Selectivity indexSelectivity;
+ double indexCorrelation;
+ double numIndexPages;
+ double numIndexTuples;
+ double spc_random_page_cost;
+ double num_sa_scans;
+ double num_outer_scans;
+ double num_scans;
+ double qual_op_cost;
+ double qual_arg_cost;
+ List *selectivityQuals;
+ ListCell *l;
+
+ /*
+ * If the index is partial, AND the index predicate with the explicitly
+ * given indexquals to produce a more accurate idea of the index
+ * selectivity.
+ */
+ selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
+
+ /*
+ * Check for ScalarArrayOpExpr index quals, and estimate the number of
+ * index scans that will be performed.
+ */
+ num_sa_scans = 1;
+ foreach(l, indexQuals)
+ {
+ RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
+
+ if (IsA(rinfo->clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
+ int alength = estimate_array_length(lsecond(saop->args));
+
+ if (alength > 1)
+ num_sa_scans *= alength;
+ }
+ }
+
+ /* Estimate the fraction of main-table tuples that will be visited */
+ indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+ index->rel->relid,
+ JOIN_INNER,
+ NULL);
+
+ /*
+ * If caller didn't give us an estimate, estimate the number of index
+ * tuples that will be visited. We do it in this rather peculiar-looking
+ * way in order to get the right answer for partial indexes.
+ */
+ numIndexTuples = costs->numIndexTuples;
+ if (numIndexTuples <= 0.0)
+ {
+ numIndexTuples = indexSelectivity * index->rel->tuples;
+
+ /*
+ * The above calculation counts all the tuples visited across all
+ * scans induced by ScalarArrayOpExpr nodes. We want to consider the
+ * average per-indexscan number, so adjust. This is a handy place to
+ * round to integer, too. (If caller supplied tuple estimate, it's
+ * responsible for handling these considerations.)
+ */
+ numIndexTuples = rint(numIndexTuples / num_sa_scans);
+ }
+
+ /*
+ * We can bound the number of tuples by the index size in any case. Also,
+ * always estimate at least one tuple is touched, even when
+ * indexSelectivity estimate is tiny.
+ */
+ if (numIndexTuples > index->tuples)
+ numIndexTuples = index->tuples;
+ if (numIndexTuples < 1.0)
+ numIndexTuples = 1.0;
+
+ /*
+ * Estimate the number of index pages that will be retrieved.
+ *
+ * We use the simplistic method of taking a pro-rata fraction of the total
+ * number of index pages. In effect, this counts only leaf pages and not
+ * any overhead such as index metapage or upper tree levels.
+ *
+ * In practice access to upper index levels is often nearly free because
+ * those tend to stay in cache under load; moreover, the cost involved is
+ * highly dependent on index type. We therefore ignore such costs here
+ * and leave it to the caller to add a suitable charge if needed.
+ */
+ if (index->pages > 1 && index->tuples > 1)
+ numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
+ else
+ numIndexPages = 1.0;
+
+ /* fetch estimated page cost for tablespace containing index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
+ /*
+ * Now compute the disk access costs.
+ *
+ * The above calculations are all per-index-scan. However, if we are in a
+ * nestloop inner scan, we can expect the scan to be repeated (with
+ * different search keys) for each row of the outer relation. Likewise,
+ * ScalarArrayOpExpr quals result in multiple index scans. This creates
+ * the potential for cache effects to reduce the number of disk page
+ * fetches needed. We want to estimate the average per-scan I/O cost in
+ * the presence of caching.
+ *
+ * We use the Mackert-Lohman formula (see costsize.c for details) to
+ * estimate the total number of page fetches that occur. While this
+ * wasn't what it was designed for, it seems a reasonable model anyway.
+ * Note that we are counting pages not tuples anymore, so we take N = T =
+ * index size, as if there were one "tuple" per page.
+ */
+ num_outer_scans = loop_count;
+ num_scans = num_sa_scans * num_outer_scans;
+
+ if (num_scans > 1)
+ {
+ double pages_fetched;
+
+ /* total page fetches ignoring cache effects */
+ pages_fetched = numIndexPages * num_scans;
+
+ /* use Mackert and Lohman formula to adjust for cache effects */
+ pages_fetched = index_pages_fetched(pages_fetched,
+ index->pages,
+ (double) index->pages,
+ root);
+
+ /*
+ * Now compute the total disk access cost, and then report a pro-rated
+ * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
+ * since that's internal to the indexscan.)
+ */
+ indexTotalCost = (pages_fetched * spc_random_page_cost)
+ / num_outer_scans;
+ }
+ else
+ {
+ /*
+ * For a single index scan, we just charge spc_random_page_cost per
+ * page touched.
+ */
+ indexTotalCost = numIndexPages * spc_random_page_cost;
+ }
+
+ /*
+ * CPU cost: any complex expressions in the indexquals will need to be
+ * evaluated once at the start of the scan to reduce them to runtime keys
+ * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
+ * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
+ * indexqual operator. Because we have numIndexTuples as a per-scan
+ * number, we have to multiply by num_sa_scans to get the correct result
+ * for ScalarArrayOpExpr cases. Similarly add in costs for any index
+ * ORDER BY expressions.
+ *
+ * Note: this neglects the possible costs of rechecking lossy operators.
+ * Detecting that that might be needed seems more expensive than it's
+ * worth, though, considering all the other inaccuracies here ...
+ */
+ qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
+ index_other_operands_eval_cost(root, indexOrderBys);
+ qual_op_cost = cpu_operator_cost *
+ (list_length(indexQuals) + list_length(indexOrderBys));
+
+ indexStartupCost = qual_arg_cost;
+ indexTotalCost += qual_arg_cost;
+ indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
+
+ /*
+ * Generic assumption about index correlation: there isn't any.
+ */
+ indexCorrelation = 0.0;
+
+ /*
+ * Return everything to caller.
+ */
+ costs->indexStartupCost = indexStartupCost;
+ costs->indexTotalCost = indexTotalCost;
+ costs->indexSelectivity = indexSelectivity;
+ costs->indexCorrelation = indexCorrelation;
+ costs->numIndexPages = numIndexPages;
+ costs->numIndexTuples = numIndexTuples;
+ costs->spc_random_page_cost = spc_random_page_cost;
+ costs->num_sa_scans = num_sa_scans;
+}
+
+/*
+ * If the index is partial, add its predicate to the given qual list.
+ *
+ * ANDing the index predicate with the explicitly given indexquals produces
+ * a more accurate idea of the index's selectivity. However, we need to be
+ * careful not to insert redundant clauses, because clauselist_selectivity()
+ * is easily fooled into computing a too-low selectivity estimate. Our
+ * approach is to add only the predicate clause(s) that cannot be proven to
+ * be implied by the given indexquals. This successfully handles cases such
+ * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
+ * There are many other cases where we won't detect redundancy, leading to a
+ * too-low selectivity estimate, which will bias the system in favor of using
+ * partial indexes where possible. That is not necessarily bad though.
+ *
+ * Note that indexQuals contains RestrictInfo nodes while the indpred
+ * does not, so the output list will be mixed. This is OK for both
+ * predicate_implied_by() and clauselist_selectivity(), but might be
+ * problematic if the result were passed to other things.
+ */
+List *
+add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
+{
+ List *predExtraQuals = NIL;
+ ListCell *lc;
+
+ if (index->indpred == NIL)
+ return indexQuals;
+
+ foreach(lc, index->indpred)
+ {
+ Node *predQual = (Node *) lfirst(lc);
+ List *oneQual = list_make1(predQual);
+
+ if (!predicate_implied_by(oneQual, indexQuals, false))
+ predExtraQuals = list_concat(predExtraQuals, oneQual);
+ }
+ return list_concat(predExtraQuals, indexQuals);
+}
+
+
+void
+btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+ Oid relid;
+ AttrNumber colnum;
+ VariableStatData vardata;
+ double numIndexTuples;
+ Cost descentCost;
+ List *indexBoundQuals;
+ int indexcol;
+ bool eqQualHere;
+ bool found_saop;
+ bool found_is_null_op;
+ double num_sa_scans;
+ ListCell *lc;
+
+ /*
+ * For a btree scan, only leading '=' quals plus inequality quals for the
+ * immediately next attribute contribute to index selectivity (these are
+ * the "boundary quals" that determine the starting and stopping points of
+ * the index scan). Additional quals can suppress visits to the heap, so
+ * it's OK to count them in indexSelectivity, but they should not count
+ * for estimating numIndexTuples. So we must examine the given indexquals
+ * to find out which ones count as boundary quals. We rely on the
+ * knowledge that they are given in index column order.
+ *
+ * For a RowCompareExpr, we consider only the first column, just as
+ * rowcomparesel() does.
+ *
+ * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
+ * index scans not one, but the ScalarArrayOpExpr's operator can be
+ * considered to act the same as it normally does.
+ */
+ indexBoundQuals = NIL;
+ indexcol = 0;
+ eqQualHere = false;
+ found_saop = false;
+ found_is_null_op = false;
+ num_sa_scans = 1;
+ foreach(lc, path->indexclauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, lc);
+ ListCell *lc2;
+
+ if (indexcol != iclause->indexcol)
+ {
+ /* Beginning of a new column's quals */
+ if (!eqQualHere)
+ break; /* done if no '=' qual for indexcol */
+ eqQualHere = false;
+ indexcol++;
+ if (indexcol != iclause->indexcol)
+ break; /* no quals at all for indexcol */
+ }
+
+ /* Examine each indexqual associated with this index clause */
+ foreach(lc2, iclause->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+ Expr *clause = rinfo->clause;
+ Oid clause_op = InvalidOid;
+ int op_strategy;
+
+ if (IsA(clause, OpExpr))
+ {
+ OpExpr *op = (OpExpr *) clause;
+
+ clause_op = op->opno;
+ }
+ else if (IsA(clause, RowCompareExpr))
+ {
+ RowCompareExpr *rc = (RowCompareExpr *) clause;
+
+ clause_op = linitial_oid(rc->opnos);
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
+ Node *other_operand = (Node *) lsecond(saop->args);
+ int alength = estimate_array_length(other_operand);
+
+ clause_op = saop->opno;
+ found_saop = true;
+ /* count number of SA scans induced by indexBoundQuals only */
+ if (alength > 1)
+ num_sa_scans *= alength;
+ }
+ else if (IsA(clause, NullTest))
+ {
+ NullTest *nt = (NullTest *) clause;
+
+ if (nt->nulltesttype == IS_NULL)
+ {
+ found_is_null_op = true;
+ /* IS NULL is like = for selectivity purposes */
+ eqQualHere = true;
+ }
+ }
+ else
+ elog(ERROR, "unsupported indexqual type: %d",
+ (int) nodeTag(clause));
+
+ /* check for equality operator */
+ if (OidIsValid(clause_op))
+ {
+ op_strategy = get_op_opfamily_strategy(clause_op,
+ index->opfamily[indexcol]);
+ Assert(op_strategy != 0); /* not a member of opfamily?? */
+ if (op_strategy == BTEqualStrategyNumber)
+ eqQualHere = true;
+ }
+
+ indexBoundQuals = lappend(indexBoundQuals, rinfo);
+ }
+ }
+
+ /*
+ * If index is unique and we found an '=' clause for each column, we can
+ * just assume numIndexTuples = 1 and skip the expensive
+ * clauselist_selectivity calculations. However, a ScalarArrayOp or
+ * NullTest invalidates that theory, even though it sets eqQualHere.
+ */
+ if (index->unique &&
+ indexcol == index->nkeycolumns - 1 &&
+ eqQualHere &&
+ !found_saop &&
+ !found_is_null_op)
+ numIndexTuples = 1.0;
+ else
+ {
+ List *selectivityQuals;
+ Selectivity btreeSelectivity;
+
+ /*
+ * If the index is partial, AND the index predicate with the
+ * index-bound quals to produce a more accurate idea of the number of
+ * rows covered by the bound conditions.
+ */
+ selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
+
+ btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
+ index->rel->relid,
+ JOIN_INNER,
+ NULL);
+ numIndexTuples = btreeSelectivity * index->rel->tuples;
+
+ /*
+ * As in genericcostestimate(), we have to adjust for any
+ * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
+ * to integer.
+ */
+ numIndexTuples = rint(numIndexTuples / num_sa_scans);
+ }
+
+ /*
+ * Now do generic index cost estimation.
+ */
+ MemSet(&costs, 0, sizeof(costs));
+ costs.numIndexTuples = numIndexTuples;
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial btree
+ * descent. We don't charge any I/O cost for touching upper btree levels,
+ * since they tend to stay in cache, but we still have to do about log2(N)
+ * comparisons to descend a btree of N leaf tuples. We charge one
+ * cpu_operator_cost per comparison.
+ *
+ * If there are ScalarArrayOpExprs, charge this once per SA scan. The
+ * ones after the first one are not startup cost so far as the overall
+ * plan is concerned, so add them only to "total" cost.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Even though we're not charging I/O cost for touching upper btree pages,
+ * it's still reasonable to charge some CPU cost per page descended
+ * through. Moreover, if we had no such charge at all, bloated indexes
+ * would appear to have the same search cost as unbloated ones, at least
+ * in cases where only a single leaf page is expected to be visited. This
+ * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
+ * touched. The number of such pages is btree tree height plus one (ie,
+ * we charge for the leaf page too). As above, charge once per SA scan.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ /*
+ * If we can get an estimate of the first column's ordering correlation C
+ * from pg_statistic, estimate the index correlation as C for a
+ * single-column index, or C * 0.75 for multiple columns. (The idea here
+ * is that multiple columns dilute the importance of the first column's
+ * ordering, but don't negate it entirely. Before 8.0 we divided the
+ * correlation by the number of columns, but that seems too strong.)
+ */
+ MemSet(&vardata, 0, sizeof(vardata));
+
+ if (index->indexkeys[0] != 0)
+ {
+ /* Simple variable --- look to stats for the underlying table */
+ RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
+
+ Assert(rte->rtekind == RTE_RELATION);
+ relid = rte->relid;
+ Assert(relid != InvalidOid);
+ colnum = index->indexkeys[0];
+
+ if (get_relation_stats_hook &&
+ (*get_relation_stats_hook) (root, rte, colnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it did
+ * supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ !vardata.freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(colnum),
+ BoolGetDatum(rte->inh));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+ else
+ {
+ /* Expression --- maybe there are stats for the index itself */
+ relid = index->indexoid;
+ colnum = 1;
+
+ if (get_index_stats_hook &&
+ (*get_index_stats_hook) (root, relid, colnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it did
+ * supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ !vardata.freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(relid),
+ Int16GetDatum(colnum),
+ BoolGetDatum(false));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ Oid sortop;
+ AttStatsSlot sslot;
+
+ sortop = get_opfamily_member(index->opfamily[0],
+ index->opcintype[0],
+ index->opcintype[0],
+ BTLessStrategyNumber);
+ if (OidIsValid(sortop) &&
+ get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_CORRELATION, sortop,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ double varCorrelation;
+
+ Assert(sslot.nnumbers == 1);
+ varCorrelation = sslot.numbers[0];
+
+ if (index->reverse_sort[0])
+ varCorrelation = -varCorrelation;
+
+ if (index->nkeycolumns > 1)
+ costs.indexCorrelation = varCorrelation * 0.75;
+ else
+ costs.indexCorrelation = varCorrelation;
+
+ free_attstatsslot(&sslot);
+ }
+ }
+
+ ReleaseVariableStats(vardata);
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
+
+void
+hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ GenericCosts costs;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * A hash index has no descent costs as such, since the index AM can go
+ * directly to the target bucket after computing the hash value. There
+ * are a couple of other hash-specific costs that we could conceivably add
+ * here, though:
+ *
+ * Ideally we'd charge spc_random_page_cost for each page in the target
+ * bucket, not just the numIndexPages pages that genericcostestimate
+ * thought we'd visit. However in most cases we don't know which bucket
+ * that will be. There's no point in considering the average bucket size
+ * because the hash AM makes sure that's always one page.
+ *
+ * Likewise, we could consider charging some CPU for each index tuple in
+ * the bucket, if we knew how many there were. But the per-tuple cost is
+ * just a hash value comparison, not a general datatype-dependent
+ * comparison, so any such charge ought to be quite a bit less than
+ * cpu_operator_cost; which makes it probably not worth worrying about.
+ *
+ * A bigger issue is that chance hash-value collisions will result in
+ * wasted probes into the heap. We don't currently attempt to model this
+ * cost on the grounds that it's rare, but maybe it's not rare enough.
+ * (Any fix for this ought to consider the generic lossy-operator problem,
+ * though; it's not entirely hash-specific.)
+ */
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
+
+void
+gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+ Cost descentCost;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent. We
+ * just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
+
+void
+spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ GenericCosts costs;
+ Cost descentCost;
+
+ MemSet(&costs, 0, sizeof(costs));
+
+ genericcostestimate(root, path, loop_count, &costs);
+
+ /*
+ * We model index descent costs similarly to those for btree, but to do
+ * that we first need an idea of the tree height. We somewhat arbitrarily
+ * assume that the fanout is 100, meaning the tree height is at most
+ * log100(index->pages).
+ *
+ * Although this computation isn't really expensive enough to require
+ * caching, we might as well use index->tree_height to cache it.
+ */
+ if (index->tree_height < 0) /* unknown? */
+ {
+ if (index->pages > 1) /* avoid computing log(0) */
+ index->tree_height = (int) (log(index->pages) / log(100.0));
+ else
+ index->tree_height = 0;
+ }
+
+ /*
+ * Add a CPU-cost component to represent the costs of initial descent. We
+ * just use log(N) here not log2(N) since the branching factor isn't
+ * necessarily two anyway. As for btree, charge once per SA scan.
+ */
+ if (index->tuples > 1) /* avoid computing log(0) */
+ {
+ descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+ }
+
+ /*
+ * Likewise add a per-page charge, calculated the same as for btrees.
+ */
+ descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
+ costs.indexStartupCost += descentCost;
+ costs.indexTotalCost += costs.num_sa_scans * descentCost;
+
+ *indexStartupCost = costs.indexStartupCost;
+ *indexTotalCost = costs.indexTotalCost;
+ *indexSelectivity = costs.indexSelectivity;
+ *indexCorrelation = costs.indexCorrelation;
+ *indexPages = costs.numIndexPages;
+}
+
+
+/*
+ * Support routines for gincostestimate
+ */
+
+typedef struct
+{
+ bool attHasFullScan[INDEX_MAX_KEYS];
+ bool attHasNormalScan[INDEX_MAX_KEYS];
+ double partialEntries;
+ double exactEntries;
+ double searchEntries;
+ double arrayScans;
+} GinQualCounts;
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN query, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_pattern(IndexOptInfo *index, int indexcol,
+ Oid clause_op, Datum query,
+ GinQualCounts *counts)
+{
+ FmgrInfo flinfo;
+ Oid extractProcOid;
+ Oid collation;
+ int strategy_op;
+ Oid lefttype,
+ righttype;
+ int32 nentries = 0;
+ bool *partial_matches = NULL;
+ Pointer *extra_data = NULL;
+ bool *nullFlags = NULL;
+ int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
+ int32 i;
+
+ Assert(indexcol < index->nkeycolumns);
+
+ /*
+ * Get the operator's strategy number and declared input data types within
+ * the index opfamily. (We don't need the latter, but we use
+ * get_op_opfamily_properties because it will throw error if it fails to
+ * find a matching pg_amop entry.)
+ */
+ get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
+ &strategy_op, &lefttype, &righttype);
+
+ /*
+ * GIN always uses the "default" support functions, which are those with
+ * lefttype == righttype == the opclass' opcintype (see
+ * IndexSupportInitialize in relcache.c).
+ */
+ extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
+ index->opcintype[indexcol],
+ index->opcintype[indexcol],
+ GIN_EXTRACTQUERY_PROC);
+
+ if (!OidIsValid(extractProcOid))
+ {
+ /* should not happen; throw same error as index_getprocinfo */
+ elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
+ GIN_EXTRACTQUERY_PROC, indexcol + 1,
+ get_rel_name(index->indexoid));
+ }
+
+ /*
+ * Choose collation to pass to extractProc (should match initGinState).
+ */
+ if (OidIsValid(index->indexcollations[indexcol]))
+ collation = index->indexcollations[indexcol];
+ else
+ collation = DEFAULT_COLLATION_OID;
+
+ fmgr_info(extractProcOid, &flinfo);
+
+ set_fn_opclass_options(&flinfo, index->opclassoptions[indexcol]);
+
+ FunctionCall7Coll(&flinfo,
+ collation,
+ query,
+ PointerGetDatum(&nentries),
+ UInt16GetDatum(strategy_op),
+ PointerGetDatum(&partial_matches),
+ PointerGetDatum(&extra_data),
+ PointerGetDatum(&nullFlags),
+ PointerGetDatum(&searchMode));
+
+ if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
+ {
+ /* No match is possible */
+ return false;
+ }
+
+ for (i = 0; i < nentries; i++)
+ {
+ /*
+ * For partial match we haven't any information to estimate number of
+ * matched entries in index, so, we just estimate it as 100
+ */
+ if (partial_matches && partial_matches[i])
+ counts->partialEntries += 100;
+ else
+ counts->exactEntries++;
+
+ counts->searchEntries++;
+ }
+
+ if (searchMode == GIN_SEARCH_MODE_DEFAULT)
+ {
+ counts->attHasNormalScan[indexcol] = true;
+ }
+ else if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
+ {
+ /* Treat "include empty" like an exact-match item */
+ counts->attHasNormalScan[indexcol] = true;
+ counts->exactEntries++;
+ counts->searchEntries++;
+ }
+ else
+ {
+ /* It's GIN_SEARCH_MODE_ALL */
+ counts->attHasFullScan[indexcol] = true;
+ }
+
+ return true;
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ */
+static bool
+gincost_opexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ int indexcol,
+ OpExpr *clause,
+ GinQualCounts *counts)
+{
+ Oid clause_op = clause->opno;
+ Node *operand = (Node *) lsecond(clause->args);
+
+ /* aggressively reduce to a constant, and look through relabeling */
+ operand = estimate_expression_value(root, operand);
+
+ if (IsA(operand, RelabelType))
+ operand = (Node *) ((RelabelType *) operand)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will be
+ * one ordinary search entry from the operand at runtime.
+ */
+ if (!IsA(operand, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) operand)->constisnull)
+ return false;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ return gincost_pattern(index, indexcol, clause_op,
+ ((Const *) operand)->constvalue,
+ counts);
+}
+
+/*
+ * Estimate the number of index terms that need to be searched for while
+ * testing the given GIN index clause, and increment the counts in *counts
+ * appropriately. If the query is unsatisfiable, return false.
+ *
+ * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
+ * each of which involves one value from the RHS array, plus all the
+ * non-array quals (if any). To model this, we average the counts across
+ * the RHS elements, and add the averages to the counts in *counts (which
+ * correspond to per-indexscan costs). We also multiply counts->arrayScans
+ * by N, causing gincostestimate to scale up its estimates accordingly.
+ */
+static bool
+gincost_scalararrayopexpr(PlannerInfo *root,
+ IndexOptInfo *index,
+ int indexcol,
+ ScalarArrayOpExpr *clause,
+ double numIndexEntries,
+ GinQualCounts *counts)
+{
+ Oid clause_op = clause->opno;
+ Node *rightop = (Node *) lsecond(clause->args);
+ ArrayType *arrayval;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int numElems;
+ Datum *elemValues;
+ bool *elemNulls;
+ GinQualCounts arraycounts;
+ int numPossible = 0;
+ int i;
+
+ Assert(clause->useOr);
+
+ /* aggressively reduce to a constant, and look through relabeling */
+ rightop = estimate_expression_value(root, rightop);
+
+ if (IsA(rightop, RelabelType))
+ rightop = (Node *) ((RelabelType *) rightop)->arg;
+
+ /*
+ * It's impossible to call extractQuery method for unknown operand. So
+ * unless operand is a Const we can't do much; just assume there will be
+ * one ordinary search entry from each array entry at runtime, and fall
+ * back on a probably-bad estimate of the number of array entries.
+ */
+ if (!IsA(rightop, Const))
+ {
+ counts->exactEntries++;
+ counts->searchEntries++;
+ counts->arrayScans *= estimate_array_length(rightop);
+ return true;
+ }
+
+ /* If Const is null, there can be no matches */
+ if (((Const *) rightop)->constisnull)
+ return false;
+
+ /* Otherwise, extract the array elements and iterate over them */
+ arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
+ get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
+ &elmlen, &elmbyval, &elmalign);
+ deconstruct_array(arrayval,
+ ARR_ELEMTYPE(arrayval),
+ elmlen, elmbyval, elmalign,
+ &elemValues, &elemNulls, &numElems);
+
+ memset(&arraycounts, 0, sizeof(arraycounts));
+
+ for (i = 0; i < numElems; i++)
+ {
+ GinQualCounts elemcounts;
+
+ /* NULL can't match anything, so ignore, as the executor will */
+ if (elemNulls[i])
+ continue;
+
+ /* Otherwise, apply extractQuery and get the actual term counts */
+ memset(&elemcounts, 0, sizeof(elemcounts));
+
+ if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
+ &elemcounts))
+ {
+ /* We ignore array elements that are unsatisfiable patterns */
+ numPossible++;
+
+ if (elemcounts.attHasFullScan[indexcol] &&
+ !elemcounts.attHasNormalScan[indexcol])
+ {
+ /*
+ * Full index scan will be required. We treat this as if
+ * every key in the index had been listed in the query; is
+ * that reasonable?
+ */
+ elemcounts.partialEntries = 0;
+ elemcounts.exactEntries = numIndexEntries;
+ elemcounts.searchEntries = numIndexEntries;
+ }
+ arraycounts.partialEntries += elemcounts.partialEntries;
+ arraycounts.exactEntries += elemcounts.exactEntries;
+ arraycounts.searchEntries += elemcounts.searchEntries;
+ }
+ }
+
+ if (numPossible == 0)
+ {
+ /* No satisfiable patterns in the array */
+ return false;
+ }
+
+ /*
+ * Now add the averages to the global counts. This will give us an
+ * estimate of the average number of terms searched for in each indexscan,
+ * including contributions from both array and non-array quals.
+ */
+ counts->partialEntries += arraycounts.partialEntries / numPossible;
+ counts->exactEntries += arraycounts.exactEntries / numPossible;
+ counts->searchEntries += arraycounts.searchEntries / numPossible;
+
+ counts->arrayScans *= numPossible;
+
+ return true;
+}
+
+/*
+ * GIN has search behavior completely different from other index types
+ */
+void
+gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
+ List *selectivityQuals;
+ double numPages = index->pages,
+ numTuples = index->tuples;
+ double numEntryPages,
+ numDataPages,
+ numPendingPages,
+ numEntries;
+ GinQualCounts counts;
+ bool matchPossible;
+ bool fullIndexScan;
+ double partialScale;
+ double entryPagesFetched,
+ dataPagesFetched,
+ dataPagesFetchedBySel;
+ double qual_op_cost,
+ qual_arg_cost,
+ spc_random_page_cost,
+ outer_scans;
+ Relation indexRel;
+ GinStatsData ginStats;
+ ListCell *lc;
+ int i;
+
+ /*
+ * Obtain statistical information from the meta page, if possible. Else
+ * set ginStats to zeroes, and we'll cope below.
+ */
+ if (!index->hypothetical)
+ {
+ /* Lock should have already been obtained in plancat.c */
+ indexRel = index_open(index->indexoid, NoLock);
+ ginGetStats(indexRel, &ginStats);
+ index_close(indexRel, NoLock);
+ }
+ else
+ {
+ memset(&ginStats, 0, sizeof(ginStats));
+ }
+
+ /*
+ * Assuming we got valid (nonzero) stats at all, nPendingPages can be
+ * trusted, but the other fields are data as of the last VACUUM. We can
+ * scale them up to account for growth since then, but that method only
+ * goes so far; in the worst case, the stats might be for a completely
+ * empty index, and scaling them will produce pretty bogus numbers.
+ * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
+ * it's grown more than that, fall back to estimating things only from the
+ * assumed-accurate index size. But we'll trust nPendingPages in any case
+ * so long as it's not clearly insane, ie, more than the index size.
+ */
+ if (ginStats.nPendingPages < numPages)
+ numPendingPages = ginStats.nPendingPages;
+ else
+ numPendingPages = 0;
+
+ if (numPages > 0 && ginStats.nTotalPages <= numPages &&
+ ginStats.nTotalPages > numPages / 4 &&
+ ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
+ {
+ /*
+ * OK, the stats seem close enough to sane to be trusted. But we
+ * still need to scale them by the ratio numPages / nTotalPages to
+ * account for growth since the last VACUUM.
+ */
+ double scale = numPages / ginStats.nTotalPages;
+
+ numEntryPages = ceil(ginStats.nEntryPages * scale);
+ numDataPages = ceil(ginStats.nDataPages * scale);
+ numEntries = ceil(ginStats.nEntries * scale);
+ /* ensure we didn't round up too much */
+ numEntryPages = Min(numEntryPages, numPages - numPendingPages);
+ numDataPages = Min(numDataPages,
+ numPages - numPendingPages - numEntryPages);
+ }
+ else
+ {
+ /*
+ * We might get here because it's a hypothetical index, or an index
+ * created pre-9.1 and never vacuumed since upgrading (in which case
+ * its stats would read as zeroes), or just because it's grown too
+ * much since the last VACUUM for us to put our faith in scaling.
+ *
+ * Invent some plausible internal statistics based on the index page
+ * count (and clamp that to at least 10 pages, just in case). We
+ * estimate that 90% of the index is entry pages, and the rest is data
+ * pages. Estimate 100 entries per entry page; this is rather bogus
+ * since it'll depend on the size of the keys, but it's more robust
+ * than trying to predict the number of entries per heap tuple.
+ */
+ numPages = Max(numPages, 10);
+ numEntryPages = floor((numPages - numPendingPages) * 0.90);
+ numDataPages = numPages - numPendingPages - numEntryPages;
+ numEntries = floor(numEntryPages * 100);
+ }
+
+ /* In an empty index, numEntries could be zero. Avoid divide-by-zero */
+ if (numEntries < 1)
+ numEntries = 1;
+
+ /*
+ * If the index is partial, AND the index predicate with the index-bound
+ * quals to produce a more accurate idea of the number of rows covered by
+ * the bound conditions.
+ */
+ selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
+
+ /* Estimate the fraction of main-table tuples that will be visited */
+ *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
+ index->rel->relid,
+ JOIN_INNER,
+ NULL);
+
+ /* fetch estimated page cost for tablespace containing index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ NULL);
+
+ /*
+ * Generic assumption about index correlation: there isn't any.
+ */
+ *indexCorrelation = 0.0;
+
+ /*
+ * Examine quals to estimate number of search entries & partial matches
+ */
+ memset(&counts, 0, sizeof(counts));
+ counts.arrayScans = 1;
+ matchPossible = true;
+
+ foreach(lc, path->indexclauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, lc);
+ ListCell *lc2;
+
+ foreach(lc2, iclause->indexquals)
+ {
+ RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
+ Expr *clause = rinfo->clause;
+
+ if (IsA(clause, OpExpr))
+ {
+ matchPossible = gincost_opexpr(root,
+ index,
+ iclause->indexcol,
+ (OpExpr *) clause,
+ &counts);
+ if (!matchPossible)
+ break;
+ }
+ else if (IsA(clause, ScalarArrayOpExpr))
+ {
+ matchPossible = gincost_scalararrayopexpr(root,
+ index,
+ iclause->indexcol,
+ (ScalarArrayOpExpr *) clause,
+ numEntries,
+ &counts);
+ if (!matchPossible)
+ break;
+ }
+ else
+ {
+ /* shouldn't be anything else for a GIN index */
+ elog(ERROR, "unsupported GIN indexqual type: %d",
+ (int) nodeTag(clause));
+ }
+ }
+ }
+
+ /* Fall out if there were any provably-unsatisfiable quals */
+ if (!matchPossible)
+ {
+ *indexStartupCost = 0;
+ *indexTotalCost = 0;
+ *indexSelectivity = 0;
+ return;
+ }
+
+ /*
+ * If attribute has a full scan and at the same time doesn't have normal
+ * scan, then we'll have to scan all non-null entries of that attribute.
+ * Currently, we don't have per-attribute statistics for GIN. Thus, we
+ * must assume the whole GIN index has to be scanned in this case.
+ */
+ fullIndexScan = false;
+ for (i = 0; i < index->nkeycolumns; i++)
+ {
+ if (counts.attHasFullScan[i] && !counts.attHasNormalScan[i])
+ {
+ fullIndexScan = true;
+ break;
+ }
+ }
+
+ if (fullIndexScan || indexQuals == NIL)
+ {
+ /*
+ * Full index scan will be required. We treat this as if every key in
+ * the index had been listed in the query; is that reasonable?
+ */
+ counts.partialEntries = 0;
+ counts.exactEntries = numEntries;
+ counts.searchEntries = numEntries;
+ }
+
+ /* Will we have more than one iteration of a nestloop scan? */
+ outer_scans = loop_count;
+
+ /*
+ * Compute cost to begin scan, first of all, pay attention to pending
+ * list.
+ */
+ entryPagesFetched = numPendingPages;
+
+ /*
+ * Estimate number of entry pages read. We need to do
+ * counts.searchEntries searches. Use a power function as it should be,
+ * but tuples on leaf pages usually is much greater. Here we include all
+ * searches in entry tree, including search of first entry in partial
+ * match algorithm
+ */
+ entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
+
+ /*
+ * Add an estimate of entry pages read by partial match algorithm. It's a
+ * scan over leaf pages in entry tree. We haven't any useful stats here,
+ * so estimate it as proportion. Because counts.partialEntries is really
+ * pretty bogus (see code above), it's possible that it is more than
+ * numEntries; clamp the proportion to ensure sanity.
+ */
+ partialScale = counts.partialEntries / numEntries;
+ partialScale = Min(partialScale, 1.0);
+
+ entryPagesFetched += ceil(numEntryPages * partialScale);
+
+ /*
+ * Partial match algorithm reads all data pages before doing actual scan,
+ * so it's a startup cost. Again, we haven't any useful stats here, so
+ * estimate it as proportion.
+ */
+ dataPagesFetched = ceil(numDataPages * partialScale);
+
+ /*
+ * Calculate cache effects if more than one scan due to nestloops or array
+ * quals. The result is pro-rated per nestloop scan, but the array qual
+ * factor shouldn't be pro-rated (compare genericcostestimate).
+ */
+ if (outer_scans > 1 || counts.arrayScans > 1)
+ {
+ entryPagesFetched *= outer_scans * counts.arrayScans;
+ entryPagesFetched = index_pages_fetched(entryPagesFetched,
+ (BlockNumber) numEntryPages,
+ numEntryPages, root);
+ entryPagesFetched /= outer_scans;
+ dataPagesFetched *= outer_scans * counts.arrayScans;
+ dataPagesFetched = index_pages_fetched(dataPagesFetched,
+ (BlockNumber) numDataPages,
+ numDataPages, root);
+ dataPagesFetched /= outer_scans;
+ }
+
+ /*
+ * Here we use random page cost because logically-close pages could be far
+ * apart on disk.
+ */
+ *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
+
+ /*
+ * Now compute the number of data pages fetched during the scan.
+ *
+ * We assume every entry to have the same number of items, and that there
+ * is no overlap between them. (XXX: tsvector and array opclasses collect
+ * statistics on the frequency of individual keys; it would be nice to use
+ * those here.)
+ */
+ dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
+
+ /*
+ * If there is a lot of overlap among the entries, in particular if one of
+ * the entries is very frequent, the above calculation can grossly
+ * under-estimate. As a simple cross-check, calculate a lower bound based
+ * on the overall selectivity of the quals. At a minimum, we must read
+ * one item pointer for each matching entry.
+ *
+ * The width of each item pointer varies, based on the level of
+ * compression. We don't have statistics on that, but an average of
+ * around 3 bytes per item is fairly typical.
+ */
+ dataPagesFetchedBySel = ceil(*indexSelectivity *
+ (numTuples / (BLCKSZ / 3)));
+ if (dataPagesFetchedBySel > dataPagesFetched)
+ dataPagesFetched = dataPagesFetchedBySel;
+
+ /* Account for cache effects, the same as above */
+ if (outer_scans > 1 || counts.arrayScans > 1)
+ {
+ dataPagesFetched *= outer_scans * counts.arrayScans;
+ dataPagesFetched = index_pages_fetched(dataPagesFetched,
+ (BlockNumber) numDataPages,
+ numDataPages, root);
+ dataPagesFetched /= outer_scans;
+ }
+
+ /* And apply random_page_cost as the cost per page */
+ *indexTotalCost = *indexStartupCost +
+ dataPagesFetched * spc_random_page_cost;
+
+ /*
+ * Add on index qual eval costs, much as in genericcostestimate. But we
+ * can disregard indexorderbys, since GIN doesn't support those.
+ */
+ qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
+ qual_op_cost = cpu_operator_cost * list_length(indexQuals);
+
+ *indexStartupCost += qual_arg_cost;
+ *indexTotalCost += qual_arg_cost;
+ *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
+ *indexPages = dataPagesFetched;
+}
+
+/*
+ * BRIN has search behavior completely different from other index types
+ */
+void
+brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
+ Cost *indexStartupCost, Cost *indexTotalCost,
+ Selectivity *indexSelectivity, double *indexCorrelation,
+ double *indexPages)
+{
+ IndexOptInfo *index = path->indexinfo;
+ List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
+ double numPages = index->pages;
+ RelOptInfo *baserel = index->rel;
+ RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
+ Cost spc_seq_page_cost;
+ Cost spc_random_page_cost;
+ double qual_arg_cost;
+ double qualSelectivity;
+ BrinStatsData statsData;
+ double indexRanges;
+ double minimalRanges;
+ double estimatedRanges;
+ double selec;
+ Relation indexRel;
+ ListCell *l;
+ VariableStatData vardata;
+
+ Assert(rte->rtekind == RTE_RELATION);
+
+ /* fetch estimated page cost for the tablespace containing the index */
+ get_tablespace_page_costs(index->reltablespace,
+ &spc_random_page_cost,
+ &spc_seq_page_cost);
+
+ /*
+ * Obtain some data from the index itself, if possible. Otherwise invent
+ * some plausible internal statistics based on the relation page count.
+ */
+ if (!index->hypothetical)
+ {
+ /*
+ * A lock should have already been obtained on the index in plancat.c.
+ */
+ indexRel = index_open(index->indexoid, NoLock);
+ brinGetStats(indexRel, &statsData);
+ index_close(indexRel, NoLock);
+
+ /* work out the actual number of ranges in the index */
+ indexRanges = Max(ceil((double) baserel->pages /
+ statsData.pagesPerRange), 1.0);
+ }
+ else
+ {
+ /*
+ * Assume default number of pages per range, and estimate the number
+ * of ranges based on that.
+ */
+ indexRanges = Max(ceil((double) baserel->pages /
+ BRIN_DEFAULT_PAGES_PER_RANGE), 1.0);
+
+ statsData.pagesPerRange = BRIN_DEFAULT_PAGES_PER_RANGE;
+ statsData.revmapNumPages = (indexRanges / REVMAP_PAGE_MAXITEMS) + 1;
+ }
+
+ /*
+ * Compute index correlation
+ *
+ * Because we can use all index quals equally when scanning, we can use
+ * the largest correlation (in absolute value) among columns used by the
+ * query. Start at zero, the worst possible case. If we cannot find any
+ * correlation statistics, we will keep it as 0.
+ */
+ *indexCorrelation = 0;
+
+ foreach(l, path->indexclauses)
+ {
+ IndexClause *iclause = lfirst_node(IndexClause, l);
+ AttrNumber attnum = index->indexkeys[iclause->indexcol];
+
+ /* attempt to lookup stats in relation for this index column */
+ if (attnum != 0)
+ {
+ /* Simple variable -- look to stats for the underlying table */
+ if (get_relation_stats_hook &&
+ (*get_relation_stats_hook) (root, rte, attnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it
+ * did supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
+ elog(ERROR,
+ "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple =
+ SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(rte->relid),
+ Int16GetDatum(attnum),
+ BoolGetDatum(false));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+ else
+ {
+ /*
+ * Looks like we've found an expression column in the index. Let's
+ * see if there's any stats for it.
+ */
+
+ /* get the attnum from the 0-based index. */
+ attnum = iclause->indexcol + 1;
+
+ if (get_index_stats_hook &&
+ (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
+ {
+ /*
+ * The hook took control of acquiring a stats tuple. If it
+ * did supply a tuple, it'd better have supplied a freefunc.
+ */
+ if (HeapTupleIsValid(vardata.statsTuple) &&
+ !vardata.freefunc)
+ elog(ERROR, "no function provided to release variable stats with");
+ }
+ else
+ {
+ vardata.statsTuple = SearchSysCache3(STATRELATTINH,
+ ObjectIdGetDatum(index->indexoid),
+ Int16GetDatum(attnum),
+ BoolGetDatum(false));
+ vardata.freefunc = ReleaseSysCache;
+ }
+ }
+
+ if (HeapTupleIsValid(vardata.statsTuple))
+ {
+ AttStatsSlot sslot;
+
+ if (get_attstatsslot(&sslot, vardata.statsTuple,
+ STATISTIC_KIND_CORRELATION, InvalidOid,
+ ATTSTATSSLOT_NUMBERS))
+ {
+ double varCorrelation = 0.0;
+
+ if (sslot.nnumbers > 0)
+ varCorrelation = Abs(sslot.numbers[0]);
+
+ if (varCorrelation > *indexCorrelation)
+ *indexCorrelation = varCorrelation;
+
+ free_attstatsslot(&sslot);
+ }
+ }
+
+ ReleaseVariableStats(vardata);
+ }
+
+ qualSelectivity = clauselist_selectivity(root, indexQuals,
+ baserel->relid,
+ JOIN_INNER, NULL);
+
+ /*
+ * Now calculate the minimum possible ranges we could match with if all of
+ * the rows were in the perfect order in the table's heap.
+ */
+ minimalRanges = ceil(indexRanges * qualSelectivity);
+
+ /*
+ * Now estimate the number of ranges that we'll touch by using the
+ * indexCorrelation from the stats. Careful not to divide by zero (note
+ * we're using the absolute value of the correlation).
+ */
+ if (*indexCorrelation < 1.0e-10)
+ estimatedRanges = indexRanges;
+ else
+ estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
+
+ /* we expect to visit this portion of the table */
+ selec = estimatedRanges / indexRanges;
+
+ CLAMP_PROBABILITY(selec);
+
+ *indexSelectivity = selec;
+
+ /*
+ * Compute the index qual costs, much as in genericcostestimate, to add to
+ * the index costs. We can disregard indexorderbys, since BRIN doesn't
+ * support those.
+ */
+ qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
+
+ /*
+ * Compute the startup cost as the cost to read the whole revmap
+ * sequentially, including the cost to execute the index quals.
+ */
+ *indexStartupCost =
+ spc_seq_page_cost * statsData.revmapNumPages * loop_count;
+ *indexStartupCost += qual_arg_cost;
+
+ /*
+ * To read a BRIN index there might be a bit of back and forth over
+ * regular pages, as revmap might point to them out of sequential order;
+ * calculate the total cost as reading the whole index in random order.
+ */
+ *indexTotalCost = *indexStartupCost +
+ spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
+
+ /*
+ * Charge a small amount per range tuple which we expect to match to. This
+ * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
+ * will set a bit for each page in the range when we find a matching
+ * range, so we must multiply the charge by the number of pages in the
+ * range.
+ */
+ *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
+ statsData.pagesPerRange;
+
+ *indexPages = index->pages;
+}
diff --git a/src/backend/utils/adt/tid.c b/src/backend/utils/adt/tid.c
new file mode 100644
index 0000000..83ac589
--- /dev/null
+++ b/src/backend/utils/adt/tid.c
@@ -0,0 +1,429 @@
+/*-------------------------------------------------------------------------
+ *
+ * tid.c
+ * Functions for the built-in type tuple id
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tid.c
+ *
+ * NOTES
+ * input routine largely stolen from boxin().
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <math.h>
+#include <limits.h>
+
+#include "access/heapam.h"
+#include "access/sysattr.h"
+#include "access/tableam.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "parser/parsetree.h"
+#include "utils/acl.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+#include "utils/varlena.h"
+
+
+#define DatumGetItemPointer(X) ((ItemPointer) DatumGetPointer(X))
+#define ItemPointerGetDatum(X) PointerGetDatum(X)
+#define PG_GETARG_ITEMPOINTER(n) DatumGetItemPointer(PG_GETARG_DATUM(n))
+#define PG_RETURN_ITEMPOINTER(x) return ItemPointerGetDatum(x)
+
+#define LDELIM '('
+#define RDELIM ')'
+#define DELIM ','
+#define NTIDARGS 2
+
+static ItemPointer currtid_for_view(Relation viewrel, ItemPointer tid);
+
+/* ----------------------------------------------------------------
+ * tidin
+ * ----------------------------------------------------------------
+ */
+Datum
+tidin(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ char *p,
+ *coord[NTIDARGS];
+ int i;
+ ItemPointer result;
+ BlockNumber blockNumber;
+ OffsetNumber offsetNumber;
+ char *badp;
+ unsigned long cvt;
+
+ for (i = 0, p = str; *p && i < NTIDARGS && *p != RDELIM; p++)
+ if (*p == DELIM || (*p == LDELIM && i == 0))
+ coord[i++] = p + 1;
+
+ if (i < NTIDARGS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "tid", str)));
+
+ errno = 0;
+ cvt = strtoul(coord[0], &badp, 10);
+ if (errno || *badp != DELIM)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "tid", str)));
+ blockNumber = (BlockNumber) cvt;
+
+ /*
+ * Cope with possibility that unsigned long is wider than BlockNumber, in
+ * which case strtoul will not raise an error for some values that are out
+ * of the range of BlockNumber. (See similar code in oidin().)
+ */
+#if SIZEOF_LONG > 4
+ if (cvt != (unsigned long) blockNumber &&
+ cvt != (unsigned long) ((int32) blockNumber))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "tid", str)));
+#endif
+
+ cvt = strtoul(coord[1], &badp, 10);
+ if (errno || *badp != RDELIM ||
+ cvt > USHRT_MAX)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "tid", str)));
+ offsetNumber = (OffsetNumber) cvt;
+
+ result = (ItemPointer) palloc(sizeof(ItemPointerData));
+
+ ItemPointerSet(result, blockNumber, offsetNumber);
+
+ PG_RETURN_ITEMPOINTER(result);
+}
+
+/* ----------------------------------------------------------------
+ * tidout
+ * ----------------------------------------------------------------
+ */
+Datum
+tidout(PG_FUNCTION_ARGS)
+{
+ ItemPointer itemPtr = PG_GETARG_ITEMPOINTER(0);
+ BlockNumber blockNumber;
+ OffsetNumber offsetNumber;
+ char buf[32];
+
+ blockNumber = ItemPointerGetBlockNumberNoCheck(itemPtr);
+ offsetNumber = ItemPointerGetOffsetNumberNoCheck(itemPtr);
+
+ /* Perhaps someday we should output this as a record. */
+ snprintf(buf, sizeof(buf), "(%u,%u)", blockNumber, offsetNumber);
+
+ PG_RETURN_CSTRING(pstrdup(buf));
+}
+
+/*
+ * tidrecv - converts external binary format to tid
+ */
+Datum
+tidrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ ItemPointer result;
+ BlockNumber blockNumber;
+ OffsetNumber offsetNumber;
+
+ blockNumber = pq_getmsgint(buf, sizeof(blockNumber));
+ offsetNumber = pq_getmsgint(buf, sizeof(offsetNumber));
+
+ result = (ItemPointer) palloc(sizeof(ItemPointerData));
+
+ ItemPointerSet(result, blockNumber, offsetNumber);
+
+ PG_RETURN_ITEMPOINTER(result);
+}
+
+/*
+ * tidsend - converts tid to binary format
+ */
+Datum
+tidsend(PG_FUNCTION_ARGS)
+{
+ ItemPointer itemPtr = PG_GETARG_ITEMPOINTER(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, ItemPointerGetBlockNumberNoCheck(itemPtr));
+ pq_sendint16(&buf, ItemPointerGetOffsetNumberNoCheck(itemPtr));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+Datum
+tideq(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) == 0);
+}
+
+Datum
+tidne(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) != 0);
+}
+
+Datum
+tidlt(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) < 0);
+}
+
+Datum
+tidle(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) <= 0);
+}
+
+Datum
+tidgt(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) > 0);
+}
+
+Datum
+tidge(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_BOOL(ItemPointerCompare(arg1, arg2) >= 0);
+}
+
+Datum
+bttidcmp(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_INT32(ItemPointerCompare(arg1, arg2));
+}
+
+Datum
+tidlarger(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) >= 0 ? arg1 : arg2);
+}
+
+Datum
+tidsmaller(PG_FUNCTION_ARGS)
+{
+ ItemPointer arg1 = PG_GETARG_ITEMPOINTER(0);
+ ItemPointer arg2 = PG_GETARG_ITEMPOINTER(1);
+
+ PG_RETURN_ITEMPOINTER(ItemPointerCompare(arg1, arg2) <= 0 ? arg1 : arg2);
+}
+
+Datum
+hashtid(PG_FUNCTION_ARGS)
+{
+ ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+
+ /*
+ * While you'll probably have a lot of trouble with a compiler that
+ * insists on appending pad space to struct ItemPointerData, we can at
+ * least make this code work, by not using sizeof(ItemPointerData).
+ * Instead rely on knowing the sizes of the component fields.
+ */
+ return hash_any((unsigned char *) key,
+ sizeof(BlockIdData) + sizeof(OffsetNumber));
+}
+
+Datum
+hashtidextended(PG_FUNCTION_ARGS)
+{
+ ItemPointer key = PG_GETARG_ITEMPOINTER(0);
+ uint64 seed = PG_GETARG_INT64(1);
+
+ /* As above */
+ return hash_any_extended((unsigned char *) key,
+ sizeof(BlockIdData) + sizeof(OffsetNumber),
+ seed);
+}
+
+
+/*
+ * Functions to get latest tid of a specified tuple.
+ *
+ * Maybe these implementations should be moved to another place
+ */
+
+/*
+ * Utility wrapper for current CTID functions.
+ * Returns the latest version of a tuple pointing at "tid" for
+ * relation "rel".
+ */
+static ItemPointer
+currtid_internal(Relation rel, ItemPointer tid)
+{
+ ItemPointer result;
+ AclResult aclresult;
+ Snapshot snapshot;
+ TableScanDesc scan;
+
+ result = (ItemPointer) palloc(sizeof(ItemPointerData));
+
+ aclresult = pg_class_aclcheck(RelationGetRelid(rel), GetUserId(),
+ ACL_SELECT);
+ if (aclresult != ACLCHECK_OK)
+ aclcheck_error(aclresult, get_relkind_objtype(rel->rd_rel->relkind),
+ RelationGetRelationName(rel));
+
+ if (rel->rd_rel->relkind == RELKIND_VIEW)
+ return currtid_for_view(rel, tid);
+
+ if (!RELKIND_HAS_STORAGE(rel->rd_rel->relkind))
+ elog(ERROR, "cannot look at latest visible tid for relation \"%s.%s\"",
+ get_namespace_name(RelationGetNamespace(rel)),
+ RelationGetRelationName(rel));
+
+ ItemPointerCopy(tid, result);
+
+ snapshot = RegisterSnapshot(GetLatestSnapshot());
+ scan = table_beginscan_tid(rel, snapshot);
+ table_tuple_get_latest_tid(scan, result);
+ table_endscan(scan);
+ UnregisterSnapshot(snapshot);
+
+ return result;
+}
+
+/*
+ * Handle CTIDs of views.
+ * CTID should be defined in the view and it must
+ * correspond to the CTID of a base relation.
+ */
+static ItemPointer
+currtid_for_view(Relation viewrel, ItemPointer tid)
+{
+ TupleDesc att = RelationGetDescr(viewrel);
+ RuleLock *rulelock;
+ RewriteRule *rewrite;
+ int i,
+ natts = att->natts,
+ tididx = -1;
+
+ for (i = 0; i < natts; i++)
+ {
+ Form_pg_attribute attr = TupleDescAttr(att, i);
+
+ if (strcmp(NameStr(attr->attname), "ctid") == 0)
+ {
+ if (attr->atttypid != TIDOID)
+ elog(ERROR, "ctid isn't of type TID");
+ tididx = i;
+ break;
+ }
+ }
+ if (tididx < 0)
+ elog(ERROR, "currtid cannot handle views with no CTID");
+ rulelock = viewrel->rd_rules;
+ if (!rulelock)
+ elog(ERROR, "the view has no rules");
+ for (i = 0; i < rulelock->numLocks; i++)
+ {
+ rewrite = rulelock->rules[i];
+ if (rewrite->event == CMD_SELECT)
+ {
+ Query *query;
+ TargetEntry *tle;
+
+ if (list_length(rewrite->actions) != 1)
+ elog(ERROR, "only one select rule is allowed in views");
+ query = (Query *) linitial(rewrite->actions);
+ tle = get_tle_by_resno(query->targetList, tididx + 1);
+ if (tle && tle->expr && IsA(tle->expr, Var))
+ {
+ Var *var = (Var *) tle->expr;
+ RangeTblEntry *rte;
+
+ if (!IS_SPECIAL_VARNO(var->varno) &&
+ var->varattno == SelfItemPointerAttributeNumber)
+ {
+ rte = rt_fetch(var->varno, query->rtable);
+ if (rte)
+ {
+ ItemPointer result;
+ Relation rel;
+
+ rel = table_open(rte->relid, AccessShareLock);
+ result = currtid_internal(rel, tid);
+ table_close(rel, AccessShareLock);
+ return result;
+ }
+ }
+ }
+ break;
+ }
+ }
+ elog(ERROR, "currtid cannot handle this view");
+ return NULL;
+}
+
+/*
+ * currtid_byrelname
+ * Get the latest tuple version of the tuple pointing at a CTID, for a
+ * given relation name.
+ */
+Datum
+currtid_byrelname(PG_FUNCTION_ARGS)
+{
+ text *relname = PG_GETARG_TEXT_PP(0);
+ ItemPointer tid = PG_GETARG_ITEMPOINTER(1);
+ ItemPointer result;
+ RangeVar *relrv;
+ Relation rel;
+
+ relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+ rel = table_openrv(relrv, AccessShareLock);
+
+ /* grab the latest tuple version associated to this CTID */
+ result = currtid_internal(rel, tid);
+
+ table_close(rel, AccessShareLock);
+
+ PG_RETURN_ITEMPOINTER(result);
+}
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
new file mode 100644
index 0000000..f70f829
--- /dev/null
+++ b/src/backend/utils/adt/timestamp.c
@@ -0,0 +1,5921 @@
+/*-------------------------------------------------------------------------
+ *
+ * timestamp.c
+ * Functions for the built-in SQL types "timestamp" and "interval".
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/timestamp.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include <ctype.h>
+#include <math.h>
+#include <limits.h>
+#include <sys/time.h>
+
+#include "access/xact.h"
+#include "catalog/pg_type.h"
+#include "common/int.h"
+#include "common/int128.h"
+#include "funcapi.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "parser/scansup.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/float.h"
+#include "utils/numeric.h"
+#include "utils/sortsupport.h"
+
+/*
+ * gcc's -ffast-math switch breaks routines that expect exact results from
+ * expressions like timeval / SECS_PER_HOUR, where timeval is double.
+ */
+#ifdef __FAST_MATH__
+#error -ffast-math is known to break this code
+#endif
+
+#define SAMESIGN(a,b) (((a) < 0) == ((b) < 0))
+
+/* Set at postmaster start */
+TimestampTz PgStartTime;
+
+/* Set at configuration reload */
+TimestampTz PgReloadTime;
+
+typedef struct
+{
+ Timestamp current;
+ Timestamp finish;
+ Interval step;
+ int step_sign;
+} generate_series_timestamp_fctx;
+
+typedef struct
+{
+ TimestampTz current;
+ TimestampTz finish;
+ Interval step;
+ int step_sign;
+} generate_series_timestamptz_fctx;
+
+
+static TimeOffset time2t(const int hour, const int min, const int sec, const fsec_t fsec);
+static Timestamp dt2local(Timestamp dt, int timezone);
+static void AdjustIntervalForTypmod(Interval *interval, int32 typmod);
+static TimestampTz timestamp2timestamptz(Timestamp timestamp);
+static Timestamp timestamptz2timestamp(TimestampTz timestamp);
+
+
+/* common code for timestamptypmodin and timestamptztypmodin */
+static int32
+anytimestamp_typmodin(bool istz, ArrayType *ta)
+{
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * we're not too tense about good error message here because grammar
+ * shouldn't allow wrong number of modifiers for TIMESTAMP
+ */
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+
+ return anytimestamp_typmod_check(istz, tl[0]);
+}
+
+/* exported so parse_expr.c can use it */
+int32
+anytimestamp_typmod_check(bool istz, int32 typmod)
+{
+ if (typmod < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("TIMESTAMP(%d)%s precision must not be negative",
+ typmod, (istz ? " WITH TIME ZONE" : ""))));
+ if (typmod > MAX_TIMESTAMP_PRECISION)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("TIMESTAMP(%d)%s precision reduced to maximum allowed, %d",
+ typmod, (istz ? " WITH TIME ZONE" : ""),
+ MAX_TIMESTAMP_PRECISION)));
+ typmod = MAX_TIMESTAMP_PRECISION;
+ }
+
+ return typmod;
+}
+
+/* common code for timestamptypmodout and timestamptztypmodout */
+static char *
+anytimestamp_typmodout(bool istz, int32 typmod)
+{
+ const char *tz = istz ? " with time zone" : " without time zone";
+
+ if (typmod >= 0)
+ return psprintf("(%d)%s", (int) typmod, tz);
+ else
+ return psprintf("%s", tz);
+}
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+/* timestamp_in()
+ * Convert a string to internal form.
+ */
+Datum
+timestamp_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ Timestamp result;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ int dtype;
+ int nf;
+ int dterr;
+ char *field[MAXDATEFIELDS];
+ int ftype[MAXDATEFIELDS];
+ char workbuf[MAXDATELEN + MAXDATEFIELDS];
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
+ field, ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeDateTime(field, ftype, nf, &dtype, tm, &fsec, &tz);
+ if (dterr != 0)
+ DateTimeParseError(dterr, str, "timestamp");
+
+ switch (dtype)
+ {
+ case DTK_DATE:
+ if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: \"%s\"", str)));
+ break;
+
+ case DTK_EPOCH:
+ result = SetEpochTimestamp();
+ break;
+
+ case DTK_LATE:
+ TIMESTAMP_NOEND(result);
+ break;
+
+ case DTK_EARLY:
+ TIMESTAMP_NOBEGIN(result);
+ break;
+
+ default:
+ elog(ERROR, "unexpected dtype %d while parsing timestamp \"%s\"",
+ dtype, str);
+ TIMESTAMP_NOEND(result);
+ }
+
+ AdjustTimestampForTypmod(&result, typmod);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamp_out()
+ * Convert a timestamp to external form.
+ */
+Datum
+timestamp_out(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ char *result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ char buf[MAXDATELEN + 1];
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ EncodeSpecialTimestamp(timestamp, buf);
+ else if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0)
+ EncodeDateTime(tm, fsec, false, 0, NULL, DateStyle, buf);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * timestamp_recv - converts external binary format to timestamp
+ */
+Datum
+timestamp_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ Timestamp timestamp;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ timestamp = (Timestamp) pq_getmsgint64(buf);
+
+ /* range check: see if timestamp_out would like it */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ /* ok */ ;
+ else if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0 ||
+ !IS_VALID_TIMESTAMP(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ AdjustTimestampForTypmod(&timestamp, typmod);
+
+ PG_RETURN_TIMESTAMP(timestamp);
+}
+
+/*
+ * timestamp_send - converts timestamp to binary format
+ */
+Datum
+timestamp_send(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, timestamp);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+timestamptypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anytimestamp_typmodin(false, ta));
+}
+
+Datum
+timestamptypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anytimestamp_typmodout(false, typmod));
+}
+
+
+/*
+ * timestamp_support()
+ *
+ * Planner support function for the timestamp_scale() and timestamptz_scale()
+ * length coercion functions (we need not distinguish them here).
+ */
+Datum
+timestamp_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+
+ ret = TemporalSimplify(MAX_TIMESTAMP_PRECISION, (Node *) req->fcall);
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/* timestamp_scale()
+ * Adjust time type for specified scale factor.
+ * Used by PostgreSQL type system to stuff columns.
+ */
+Datum
+timestamp_scale(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ Timestamp result;
+
+ result = timestamp;
+
+ AdjustTimestampForTypmod(&result, typmod);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/*
+ * AdjustTimestampForTypmodError --- round off a timestamp to suit given typmod
+ * Works for either timestamp or timestamptz.
+ */
+bool
+AdjustTimestampForTypmodError(Timestamp *time, int32 typmod, bool *error)
+{
+ static const int64 TimestampScales[MAX_TIMESTAMP_PRECISION + 1] = {
+ INT64CONST(1000000),
+ INT64CONST(100000),
+ INT64CONST(10000),
+ INT64CONST(1000),
+ INT64CONST(100),
+ INT64CONST(10),
+ INT64CONST(1)
+ };
+
+ static const int64 TimestampOffsets[MAX_TIMESTAMP_PRECISION + 1] = {
+ INT64CONST(500000),
+ INT64CONST(50000),
+ INT64CONST(5000),
+ INT64CONST(500),
+ INT64CONST(50),
+ INT64CONST(5),
+ INT64CONST(0)
+ };
+
+ if (!TIMESTAMP_NOT_FINITE(*time)
+ && (typmod != -1) && (typmod != MAX_TIMESTAMP_PRECISION))
+ {
+ if (typmod < 0 || typmod > MAX_TIMESTAMP_PRECISION)
+ {
+ if (error)
+ {
+ *error = true;
+ return false;
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("timestamp(%d) precision must be between %d and %d",
+ typmod, 0, MAX_TIMESTAMP_PRECISION)));
+ }
+
+ if (*time >= INT64CONST(0))
+ {
+ *time = ((*time + TimestampOffsets[typmod]) / TimestampScales[typmod]) *
+ TimestampScales[typmod];
+ }
+ else
+ {
+ *time = -((((-*time) + TimestampOffsets[typmod]) / TimestampScales[typmod])
+ * TimestampScales[typmod]);
+ }
+ }
+
+ return true;
+}
+
+void
+AdjustTimestampForTypmod(Timestamp *time, int32 typmod)
+{
+ (void) AdjustTimestampForTypmodError(time, typmod, NULL);
+}
+
+/* timestamptz_in()
+ * Convert a string to internal form.
+ */
+Datum
+timestamptz_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimestampTz result;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+ int tz;
+ int dtype;
+ int nf;
+ int dterr;
+ char *field[MAXDATEFIELDS];
+ int ftype[MAXDATEFIELDS];
+ char workbuf[MAXDATELEN + MAXDATEFIELDS];
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf),
+ field, ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeDateTime(field, ftype, nf, &dtype, tm, &fsec, &tz);
+ if (dterr != 0)
+ DateTimeParseError(dterr, str, "timestamp with time zone");
+
+ switch (dtype)
+ {
+ case DTK_DATE:
+ if (tm2timestamp(tm, fsec, &tz, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: \"%s\"", str)));
+ break;
+
+ case DTK_EPOCH:
+ result = SetEpochTimestamp();
+ break;
+
+ case DTK_LATE:
+ TIMESTAMP_NOEND(result);
+ break;
+
+ case DTK_EARLY:
+ TIMESTAMP_NOBEGIN(result);
+ break;
+
+ default:
+ elog(ERROR, "unexpected dtype %d while parsing timestamptz \"%s\"",
+ dtype, str);
+ TIMESTAMP_NOEND(result);
+ }
+
+ AdjustTimestampForTypmod(&result, typmod);
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/*
+ * Try to parse a timezone specification, and return its timezone offset value
+ * if it's acceptable. Otherwise, an error is thrown.
+ *
+ * Note: some code paths update tm->tm_isdst, and some don't; current callers
+ * don't care, so we don't bother being consistent.
+ */
+static int
+parse_sane_timezone(struct pg_tm *tm, text *zone)
+{
+ char tzname[TZ_STRLEN_MAX + 1];
+ int rt;
+ int tz;
+
+ text_to_cstring_buffer(zone, tzname, sizeof(tzname));
+
+ /*
+ * Look up the requested timezone. First we try to interpret it as a
+ * numeric timezone specification; if DecodeTimezone decides it doesn't
+ * like the format, we look in the timezone abbreviation table (to handle
+ * cases like "EST"), and if that also fails, we look in the timezone
+ * database (to handle cases like "America/New_York"). (This matches the
+ * order in which timestamp input checks the cases; it's important because
+ * the timezone database unwisely uses a few zone names that are identical
+ * to offset abbreviations.)
+ *
+ * Note pg_tzset happily parses numeric input that DecodeTimezone would
+ * reject. To avoid having it accept input that would otherwise be seen
+ * as invalid, it's enough to disallow having a digit in the first
+ * position of our input string.
+ */
+ if (isdigit((unsigned char) *tzname))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "numeric time zone", tzname),
+ errhint("Numeric time zones must have \"-\" or \"+\" as first character.")));
+
+ rt = DecodeTimezone(tzname, &tz);
+ if (rt != 0)
+ {
+ char *lowzone;
+ int type,
+ val;
+ pg_tz *tzp;
+
+ if (rt == DTERR_TZDISP_OVERFLOW)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("numeric time zone \"%s\" out of range", tzname)));
+ else if (rt != DTERR_BAD_FORMAT)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+
+ /* DecodeTimezoneAbbrev requires lowercase input */
+ lowzone = downcase_truncate_identifier(tzname,
+ strlen(tzname),
+ false);
+ type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp);
+
+ if (type == TZ || type == DTZ)
+ {
+ /* fixed-offset abbreviation */
+ tz = -val;
+ }
+ else if (type == DYNTZ)
+ {
+ /* dynamic-offset abbreviation, resolve using specified time */
+ tz = DetermineTimeZoneAbbrevOffset(tm, tzname, tzp);
+ }
+ else
+ {
+ /* try it as a full zone name */
+ tzp = pg_tzset(tzname);
+ if (tzp)
+ tz = DetermineTimeZoneOffset(tm, tzp);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+ }
+ }
+
+ return tz;
+}
+
+/*
+ * make_timestamp_internal
+ * workhorse for make_timestamp and make_timestamptz
+ */
+static Timestamp
+make_timestamp_internal(int year, int month, int day,
+ int hour, int min, double sec)
+{
+ struct pg_tm tm;
+ TimeOffset date;
+ TimeOffset time;
+ int dterr;
+ bool bc = false;
+ Timestamp result;
+
+ tm.tm_year = year;
+ tm.tm_mon = month;
+ tm.tm_mday = day;
+
+ /* Handle negative years as BC */
+ if (tm.tm_year < 0)
+ {
+ bc = true;
+ tm.tm_year = -tm.tm_year;
+ }
+
+ dterr = ValidateDate(DTK_DATE_M, false, false, bc, &tm);
+
+ if (dterr != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("date field value out of range: %d-%02d-%02d",
+ year, month, day)));
+
+ if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range: %d-%02d-%02d",
+ year, month, day)));
+
+ date = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
+
+ /* Check for time overflow */
+ if (float_time_overflows(hour, min, sec))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_FIELD_OVERFLOW),
+ errmsg("time field value out of range: %d:%02d:%02g",
+ hour, min, sec)));
+
+ /* This should match tm2time */
+ time = (((hour * MINS_PER_HOUR + min) * SECS_PER_MINUTE)
+ * USECS_PER_SEC) + (int64) rint(sec * USECS_PER_SEC);
+
+ result = date * USECS_PER_DAY + time;
+ /* check for major overflow */
+ if ((result - time) / USECS_PER_DAY != date)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g",
+ year, month, day,
+ hour, min, sec)));
+
+ /* check for just-barely overflow (okay except time-of-day wraps) */
+ /* caution: we want to allow 1999-12-31 24:00:00 */
+ if ((result < 0 && date > 0) ||
+ (result > 0 && date < -1))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g",
+ year, month, day,
+ hour, min, sec)));
+
+ /* final range check catches just-out-of-range timestamps */
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: %d-%02d-%02d %d:%02d:%02g",
+ year, month, day,
+ hour, min, sec)));
+
+ return result;
+}
+
+/*
+ * make_timestamp() - timestamp constructor
+ */
+Datum
+make_timestamp(PG_FUNCTION_ARGS)
+{
+ int32 year = PG_GETARG_INT32(0);
+ int32 month = PG_GETARG_INT32(1);
+ int32 mday = PG_GETARG_INT32(2);
+ int32 hour = PG_GETARG_INT32(3);
+ int32 min = PG_GETARG_INT32(4);
+ float8 sec = PG_GETARG_FLOAT8(5);
+ Timestamp result;
+
+ result = make_timestamp_internal(year, month, mday,
+ hour, min, sec);
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/*
+ * make_timestamptz() - timestamp with time zone constructor
+ */
+Datum
+make_timestamptz(PG_FUNCTION_ARGS)
+{
+ int32 year = PG_GETARG_INT32(0);
+ int32 month = PG_GETARG_INT32(1);
+ int32 mday = PG_GETARG_INT32(2);
+ int32 hour = PG_GETARG_INT32(3);
+ int32 min = PG_GETARG_INT32(4);
+ float8 sec = PG_GETARG_FLOAT8(5);
+ Timestamp result;
+
+ result = make_timestamp_internal(year, month, mday,
+ hour, min, sec);
+
+ PG_RETURN_TIMESTAMPTZ(timestamp2timestamptz(result));
+}
+
+/*
+ * Construct a timestamp with time zone.
+ * As above, but the time zone is specified as seventh argument.
+ */
+Datum
+make_timestamptz_at_timezone(PG_FUNCTION_ARGS)
+{
+ int32 year = PG_GETARG_INT32(0);
+ int32 month = PG_GETARG_INT32(1);
+ int32 mday = PG_GETARG_INT32(2);
+ int32 hour = PG_GETARG_INT32(3);
+ int32 min = PG_GETARG_INT32(4);
+ float8 sec = PG_GETARG_FLOAT8(5);
+ text *zone = PG_GETARG_TEXT_PP(6);
+ TimestampTz result;
+ Timestamp timestamp;
+ struct pg_tm tt;
+ int tz;
+ fsec_t fsec;
+
+ timestamp = make_timestamp_internal(year, month, mday,
+ hour, min, sec);
+
+ if (timestamp2tm(timestamp, NULL, &tt, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ tz = parse_sane_timezone(&tt, zone);
+
+ result = dt2local(timestamp, -tz);
+
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/*
+ * to_timestamp(double precision)
+ * Convert UNIX epoch to timestamptz.
+ */
+Datum
+float8_timestamptz(PG_FUNCTION_ARGS)
+{
+ float8 seconds = PG_GETARG_FLOAT8(0);
+ TimestampTz result;
+
+ /* Deal with NaN and infinite inputs ... */
+ if (isnan(seconds))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp cannot be NaN")));
+
+ if (isinf(seconds))
+ {
+ if (seconds < 0)
+ TIMESTAMP_NOBEGIN(result);
+ else
+ TIMESTAMP_NOEND(result);
+ }
+ else
+ {
+ /* Out of range? */
+ if (seconds <
+ (float8) SECS_PER_DAY * (DATETIME_MIN_JULIAN - UNIX_EPOCH_JDATE)
+ || seconds >=
+ (float8) SECS_PER_DAY * (TIMESTAMP_END_JULIAN - UNIX_EPOCH_JDATE))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: \"%g\"", seconds)));
+
+ /* Convert UNIX epoch to Postgres epoch */
+ seconds -= ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
+
+ seconds = rint(seconds * USECS_PER_SEC);
+ result = (int64) seconds;
+
+ /* Recheck in case roundoff produces something just out of range */
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range: \"%g\"",
+ PG_GETARG_FLOAT8(0))));
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamptz_out()
+ * Convert a timestamp to external form.
+ */
+Datum
+timestamptz_out(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt = PG_GETARG_TIMESTAMPTZ(0);
+ char *result;
+ int tz;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ const char *tzn;
+ char buf[MAXDATELEN + 1];
+
+ if (TIMESTAMP_NOT_FINITE(dt))
+ EncodeSpecialTimestamp(dt, buf);
+ else if (timestamp2tm(dt, &tz, tm, &fsec, &tzn, NULL) == 0)
+ EncodeDateTime(tm, fsec, true, tz, tzn, DateStyle, buf);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * timestamptz_recv - converts external binary format to timestamptz
+ */
+Datum
+timestamptz_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ TimestampTz timestamp;
+ int tz;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ timestamp = (TimestampTz) pq_getmsgint64(buf);
+
+ /* range check: see if timestamptz_out would like it */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ /* ok */ ;
+ else if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0 ||
+ !IS_VALID_TIMESTAMP(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ AdjustTimestampForTypmod(&timestamp, typmod);
+
+ PG_RETURN_TIMESTAMPTZ(timestamp);
+}
+
+/*
+ * timestamptz_send - converts timestamptz to binary format
+ */
+Datum
+timestamptz_send(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, timestamp);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+timestamptztypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anytimestamp_typmodin(true, ta));
+}
+
+Datum
+timestamptztypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anytimestamp_typmodout(true, typmod));
+}
+
+
+/* timestamptz_scale()
+ * Adjust time type for specified scale factor.
+ * Used by PostgreSQL type system to stuff columns.
+ */
+Datum
+timestamptz_scale(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ TimestampTz result;
+
+ result = timestamp;
+
+ AdjustTimestampForTypmod(&result, typmod);
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+
+/* interval_in()
+ * Convert a string to internal form.
+ *
+ * External format(s):
+ * Uses the generic date/time parsing and decoding routines.
+ */
+Datum
+interval_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ Interval *result;
+ struct pg_itm_in tt,
+ *itm_in = &tt;
+ int dtype;
+ int nf;
+ int range;
+ int dterr;
+ char *field[MAXDATEFIELDS];
+ int ftype[MAXDATEFIELDS];
+ char workbuf[256];
+
+ itm_in->tm_year = 0;
+ itm_in->tm_mon = 0;
+ itm_in->tm_mday = 0;
+ itm_in->tm_usec = 0;
+
+ if (typmod >= 0)
+ range = INTERVAL_RANGE(typmod);
+ else
+ range = INTERVAL_FULL_RANGE;
+
+ dterr = ParseDateTime(str, workbuf, sizeof(workbuf), field,
+ ftype, MAXDATEFIELDS, &nf);
+ if (dterr == 0)
+ dterr = DecodeInterval(field, ftype, nf, range,
+ &dtype, itm_in);
+
+ /* if those functions think it's a bad format, try ISO8601 style */
+ if (dterr == DTERR_BAD_FORMAT)
+ dterr = DecodeISO8601Interval(str,
+ &dtype, itm_in);
+
+ if (dterr != 0)
+ {
+ if (dterr == DTERR_FIELD_OVERFLOW)
+ dterr = DTERR_INTERVAL_OVERFLOW;
+ DateTimeParseError(dterr, str, "interval");
+ }
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ switch (dtype)
+ {
+ case DTK_DELTA:
+ if (itmin2interval(itm_in, result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ break;
+
+ default:
+ elog(ERROR, "unexpected dtype %d while parsing interval \"%s\"",
+ dtype, str);
+ }
+
+ AdjustIntervalForTypmod(result, typmod);
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* interval_out()
+ * Convert a time span to external form.
+ */
+Datum
+interval_out(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ char *result;
+ struct pg_itm tt,
+ *itm = &tt;
+ char buf[MAXDATELEN + 1];
+
+ interval2itm(*span, itm);
+ EncodeInterval(itm, IntervalStyle, buf);
+
+ result = pstrdup(buf);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * interval_recv - converts external binary format to interval
+ */
+Datum
+interval_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 typmod = PG_GETARG_INT32(2);
+ Interval *interval;
+
+ interval = (Interval *) palloc(sizeof(Interval));
+
+ interval->time = pq_getmsgint64(buf);
+ interval->day = pq_getmsgint(buf, sizeof(interval->day));
+ interval->month = pq_getmsgint(buf, sizeof(interval->month));
+
+ AdjustIntervalForTypmod(interval, typmod);
+
+ PG_RETURN_INTERVAL_P(interval);
+}
+
+/*
+ * interval_send - converts interval to binary format
+ */
+Datum
+interval_send(PG_FUNCTION_ARGS)
+{
+ Interval *interval = PG_GETARG_INTERVAL_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, interval->time);
+ pq_sendint32(&buf, interval->day);
+ pq_sendint32(&buf, interval->month);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * The interval typmod stores a "range" in its high 16 bits and a "precision"
+ * in its low 16 bits. Both contribute to defining the resolution of the
+ * type. Range addresses resolution granules larger than one second, and
+ * precision specifies resolution below one second. This representation can
+ * express all SQL standard resolutions, but we implement them all in terms of
+ * truncating rightward from some position. Range is a bitmap of permitted
+ * fields, but only the temporally-smallest such field is significant to our
+ * calculations. Precision is a count of sub-second decimal places to retain.
+ * Setting all bits (INTERVAL_FULL_PRECISION) gives the same truncation
+ * semantics as choosing MAX_INTERVAL_PRECISION.
+ */
+Datum
+intervaltypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+ int32 *tl;
+ int n;
+ int32 typmod;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * tl[0] - interval range (fields bitmask) tl[1] - precision (optional)
+ *
+ * Note we must validate tl[0] even though it's normally guaranteed
+ * correct by the grammar --- consider SELECT 'foo'::"interval"(1000).
+ */
+ if (n > 0)
+ {
+ switch (tl[0])
+ {
+ case INTERVAL_MASK(YEAR):
+ case INTERVAL_MASK(MONTH):
+ case INTERVAL_MASK(DAY):
+ case INTERVAL_MASK(HOUR):
+ case INTERVAL_MASK(MINUTE):
+ case INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ case INTERVAL_FULL_RANGE:
+ /* all OK */
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid INTERVAL type modifier")));
+ }
+ }
+
+ if (n == 1)
+ {
+ if (tl[0] != INTERVAL_FULL_RANGE)
+ typmod = INTERVAL_TYPMOD(INTERVAL_FULL_PRECISION, tl[0]);
+ else
+ typmod = -1;
+ }
+ else if (n == 2)
+ {
+ if (tl[1] < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("INTERVAL(%d) precision must not be negative",
+ tl[1])));
+ if (tl[1] > MAX_INTERVAL_PRECISION)
+ {
+ ereport(WARNING,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("INTERVAL(%d) precision reduced to maximum allowed, %d",
+ tl[1], MAX_INTERVAL_PRECISION)));
+ typmod = INTERVAL_TYPMOD(MAX_INTERVAL_PRECISION, tl[0]);
+ }
+ else
+ typmod = INTERVAL_TYPMOD(tl[1], tl[0]);
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid INTERVAL type modifier")));
+ typmod = 0; /* keep compiler quiet */
+ }
+
+ PG_RETURN_INT32(typmod);
+}
+
+Datum
+intervaltypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+ char *res = (char *) palloc(64);
+ int fields;
+ int precision;
+ const char *fieldstr;
+
+ if (typmod < 0)
+ {
+ *res = '\0';
+ PG_RETURN_CSTRING(res);
+ }
+
+ fields = INTERVAL_RANGE(typmod);
+ precision = INTERVAL_PRECISION(typmod);
+
+ switch (fields)
+ {
+ case INTERVAL_MASK(YEAR):
+ fieldstr = " year";
+ break;
+ case INTERVAL_MASK(MONTH):
+ fieldstr = " month";
+ break;
+ case INTERVAL_MASK(DAY):
+ fieldstr = " day";
+ break;
+ case INTERVAL_MASK(HOUR):
+ fieldstr = " hour";
+ break;
+ case INTERVAL_MASK(MINUTE):
+ fieldstr = " minute";
+ break;
+ case INTERVAL_MASK(SECOND):
+ fieldstr = " second";
+ break;
+ case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH):
+ fieldstr = " year to month";
+ break;
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR):
+ fieldstr = " day to hour";
+ break;
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ fieldstr = " day to minute";
+ break;
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ fieldstr = " day to second";
+ break;
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ fieldstr = " hour to minute";
+ break;
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ fieldstr = " hour to second";
+ break;
+ case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ fieldstr = " minute to second";
+ break;
+ case INTERVAL_FULL_RANGE:
+ fieldstr = "";
+ break;
+ default:
+ elog(ERROR, "invalid INTERVAL typmod: 0x%x", typmod);
+ fieldstr = "";
+ break;
+ }
+
+ if (precision != INTERVAL_FULL_PRECISION)
+ snprintf(res, 64, "%s(%d)", fieldstr, precision);
+ else
+ snprintf(res, 64, "%s", fieldstr);
+
+ PG_RETURN_CSTRING(res);
+}
+
+/*
+ * Given an interval typmod value, return a code for the least-significant
+ * field that the typmod allows to be nonzero, for instance given
+ * INTERVAL DAY TO HOUR we want to identify "hour".
+ *
+ * The results should be ordered by field significance, which means
+ * we can't use the dt.h macros YEAR etc, because for some odd reason
+ * they aren't ordered that way. Instead, arbitrarily represent
+ * SECOND = 0, MINUTE = 1, HOUR = 2, DAY = 3, MONTH = 4, YEAR = 5.
+ */
+static int
+intervaltypmodleastfield(int32 typmod)
+{
+ if (typmod < 0)
+ return 0; /* SECOND */
+
+ switch (INTERVAL_RANGE(typmod))
+ {
+ case INTERVAL_MASK(YEAR):
+ return 5; /* YEAR */
+ case INTERVAL_MASK(MONTH):
+ return 4; /* MONTH */
+ case INTERVAL_MASK(DAY):
+ return 3; /* DAY */
+ case INTERVAL_MASK(HOUR):
+ return 2; /* HOUR */
+ case INTERVAL_MASK(MINUTE):
+ return 1; /* MINUTE */
+ case INTERVAL_MASK(SECOND):
+ return 0; /* SECOND */
+ case INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH):
+ return 4; /* MONTH */
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR):
+ return 2; /* HOUR */
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ return 1; /* MINUTE */
+ case INTERVAL_MASK(DAY) | INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ return 0; /* SECOND */
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE):
+ return 1; /* MINUTE */
+ case INTERVAL_MASK(HOUR) | INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ return 0; /* SECOND */
+ case INTERVAL_MASK(MINUTE) | INTERVAL_MASK(SECOND):
+ return 0; /* SECOND */
+ case INTERVAL_FULL_RANGE:
+ return 0; /* SECOND */
+ default:
+ elog(ERROR, "invalid INTERVAL typmod: 0x%x", typmod);
+ break;
+ }
+ return 0; /* can't get here, but keep compiler quiet */
+}
+
+
+/*
+ * interval_support()
+ *
+ * Planner support function for interval_scale().
+ *
+ * Flatten superfluous calls to interval_scale(). The interval typmod is
+ * complex to permit accepting and regurgitating all SQL standard variations.
+ * For truncation purposes, it boils down to a single, simple granularity.
+ */
+Datum
+interval_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+ FuncExpr *expr = req->fcall;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
+ bool noop;
+
+ if (new_typmod < 0)
+ noop = true;
+ else
+ {
+ int32 old_typmod = exprTypmod(source);
+ int old_least_field;
+ int new_least_field;
+ int old_precis;
+ int new_precis;
+
+ old_least_field = intervaltypmodleastfield(old_typmod);
+ new_least_field = intervaltypmodleastfield(new_typmod);
+ if (old_typmod < 0)
+ old_precis = INTERVAL_FULL_PRECISION;
+ else
+ old_precis = INTERVAL_PRECISION(old_typmod);
+ new_precis = INTERVAL_PRECISION(new_typmod);
+
+ /*
+ * Cast is a no-op if least field stays the same or decreases
+ * while precision stays the same or increases. But
+ * precision, which is to say, sub-second precision, only
+ * affects ranges that include SECOND.
+ */
+ noop = (new_least_field <= old_least_field) &&
+ (old_least_field > 0 /* SECOND */ ||
+ new_precis >= MAX_INTERVAL_PRECISION ||
+ new_precis >= old_precis);
+ }
+ if (noop)
+ ret = relabel_to_typmod(source, new_typmod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/* interval_scale()
+ * Adjust interval type for specified fields.
+ * Used by PostgreSQL type system to stuff columns.
+ */
+Datum
+interval_scale(PG_FUNCTION_ARGS)
+{
+ Interval *interval = PG_GETARG_INTERVAL_P(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ Interval *result;
+
+ result = palloc(sizeof(Interval));
+ *result = *interval;
+
+ AdjustIntervalForTypmod(result, typmod);
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/*
+ * Adjust interval for specified precision, in both YEAR to SECOND
+ * range and sub-second precision.
+ */
+static void
+AdjustIntervalForTypmod(Interval *interval, int32 typmod)
+{
+ static const int64 IntervalScales[MAX_INTERVAL_PRECISION + 1] = {
+ INT64CONST(1000000),
+ INT64CONST(100000),
+ INT64CONST(10000),
+ INT64CONST(1000),
+ INT64CONST(100),
+ INT64CONST(10),
+ INT64CONST(1)
+ };
+
+ static const int64 IntervalOffsets[MAX_INTERVAL_PRECISION + 1] = {
+ INT64CONST(500000),
+ INT64CONST(50000),
+ INT64CONST(5000),
+ INT64CONST(500),
+ INT64CONST(50),
+ INT64CONST(5),
+ INT64CONST(0)
+ };
+
+ /*
+ * Unspecified range and precision? Then not necessary to adjust. Setting
+ * typmod to -1 is the convention for all data types.
+ */
+ if (typmod >= 0)
+ {
+ int range = INTERVAL_RANGE(typmod);
+ int precision = INTERVAL_PRECISION(typmod);
+
+ /*
+ * Our interpretation of intervals with a limited set of fields is
+ * that fields to the right of the last one specified are zeroed out,
+ * but those to the left of it remain valid. Thus for example there
+ * is no operational difference between INTERVAL YEAR TO MONTH and
+ * INTERVAL MONTH. In some cases we could meaningfully enforce that
+ * higher-order fields are zero; for example INTERVAL DAY could reject
+ * nonzero "month" field. However that seems a bit pointless when we
+ * can't do it consistently. (We cannot enforce a range limit on the
+ * highest expected field, since we do not have any equivalent of
+ * SQL's <interval leading field precision>.) If we ever decide to
+ * revisit this, interval_support will likely require adjusting.
+ *
+ * Note: before PG 8.4 we interpreted a limited set of fields as
+ * actually causing a "modulo" operation on a given value, potentially
+ * losing high-order as well as low-order information. But there is
+ * no support for such behavior in the standard, and it seems fairly
+ * undesirable on data consistency grounds anyway. Now we only
+ * perform truncation or rounding of low-order fields.
+ */
+ if (range == INTERVAL_FULL_RANGE)
+ {
+ /* Do nothing... */
+ }
+ else if (range == INTERVAL_MASK(YEAR))
+ {
+ interval->month = (interval->month / MONTHS_PER_YEAR) * MONTHS_PER_YEAR;
+ interval->day = 0;
+ interval->time = 0;
+ }
+ else if (range == INTERVAL_MASK(MONTH))
+ {
+ interval->day = 0;
+ interval->time = 0;
+ }
+ /* YEAR TO MONTH */
+ else if (range == (INTERVAL_MASK(YEAR) | INTERVAL_MASK(MONTH)))
+ {
+ interval->day = 0;
+ interval->time = 0;
+ }
+ else if (range == INTERVAL_MASK(DAY))
+ {
+ interval->time = 0;
+ }
+ else if (range == INTERVAL_MASK(HOUR))
+ {
+ interval->time = (interval->time / USECS_PER_HOUR) *
+ USECS_PER_HOUR;
+ }
+ else if (range == INTERVAL_MASK(MINUTE))
+ {
+ interval->time = (interval->time / USECS_PER_MINUTE) *
+ USECS_PER_MINUTE;
+ }
+ else if (range == INTERVAL_MASK(SECOND))
+ {
+ /* fractional-second rounding will be dealt with below */
+ }
+ /* DAY TO HOUR */
+ else if (range == (INTERVAL_MASK(DAY) |
+ INTERVAL_MASK(HOUR)))
+ {
+ interval->time = (interval->time / USECS_PER_HOUR) *
+ USECS_PER_HOUR;
+ }
+ /* DAY TO MINUTE */
+ else if (range == (INTERVAL_MASK(DAY) |
+ INTERVAL_MASK(HOUR) |
+ INTERVAL_MASK(MINUTE)))
+ {
+ interval->time = (interval->time / USECS_PER_MINUTE) *
+ USECS_PER_MINUTE;
+ }
+ /* DAY TO SECOND */
+ else if (range == (INTERVAL_MASK(DAY) |
+ INTERVAL_MASK(HOUR) |
+ INTERVAL_MASK(MINUTE) |
+ INTERVAL_MASK(SECOND)))
+ {
+ /* fractional-second rounding will be dealt with below */
+ }
+ /* HOUR TO MINUTE */
+ else if (range == (INTERVAL_MASK(HOUR) |
+ INTERVAL_MASK(MINUTE)))
+ {
+ interval->time = (interval->time / USECS_PER_MINUTE) *
+ USECS_PER_MINUTE;
+ }
+ /* HOUR TO SECOND */
+ else if (range == (INTERVAL_MASK(HOUR) |
+ INTERVAL_MASK(MINUTE) |
+ INTERVAL_MASK(SECOND)))
+ {
+ /* fractional-second rounding will be dealt with below */
+ }
+ /* MINUTE TO SECOND */
+ else if (range == (INTERVAL_MASK(MINUTE) |
+ INTERVAL_MASK(SECOND)))
+ {
+ /* fractional-second rounding will be dealt with below */
+ }
+ else
+ elog(ERROR, "unrecognized interval typmod: %d", typmod);
+
+ /* Need to adjust sub-second precision? */
+ if (precision != INTERVAL_FULL_PRECISION)
+ {
+ if (precision < 0 || precision > MAX_INTERVAL_PRECISION)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("interval(%d) precision must be between %d and %d",
+ precision, 0, MAX_INTERVAL_PRECISION)));
+
+ if (interval->time >= INT64CONST(0))
+ {
+ interval->time = ((interval->time +
+ IntervalOffsets[precision]) /
+ IntervalScales[precision]) *
+ IntervalScales[precision];
+ }
+ else
+ {
+ interval->time = -(((-interval->time +
+ IntervalOffsets[precision]) /
+ IntervalScales[precision]) *
+ IntervalScales[precision]);
+ }
+ }
+ }
+}
+
+/*
+ * make_interval - numeric Interval constructor
+ */
+Datum
+make_interval(PG_FUNCTION_ARGS)
+{
+ int32 years = PG_GETARG_INT32(0);
+ int32 months = PG_GETARG_INT32(1);
+ int32 weeks = PG_GETARG_INT32(2);
+ int32 days = PG_GETARG_INT32(3);
+ int32 hours = PG_GETARG_INT32(4);
+ int32 mins = PG_GETARG_INT32(5);
+ double secs = PG_GETARG_FLOAT8(6);
+ Interval *result;
+
+ /*
+ * Reject out-of-range inputs. We really ought to check the integer
+ * inputs as well, but it's not entirely clear what limits to apply.
+ */
+ if (isinf(secs) || isnan(secs))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ result = (Interval *) palloc(sizeof(Interval));
+ result->month = years * MONTHS_PER_YEAR + months;
+ result->day = weeks * 7 + days;
+
+ secs = rint(secs * USECS_PER_SEC);
+ result->time = hours * ((int64) SECS_PER_HOUR * USECS_PER_SEC) +
+ mins * ((int64) SECS_PER_MINUTE * USECS_PER_SEC) +
+ (int64) secs;
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* EncodeSpecialTimestamp()
+ * Convert reserved timestamp data type to string.
+ */
+void
+EncodeSpecialTimestamp(Timestamp dt, char *str)
+{
+ if (TIMESTAMP_IS_NOBEGIN(dt))
+ strcpy(str, EARLY);
+ else if (TIMESTAMP_IS_NOEND(dt))
+ strcpy(str, LATE);
+ else /* shouldn't happen */
+ elog(ERROR, "invalid argument for EncodeSpecialTimestamp");
+}
+
+Datum
+now(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(GetCurrentTransactionStartTimestamp());
+}
+
+Datum
+statement_timestamp(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(GetCurrentStatementStartTimestamp());
+}
+
+Datum
+clock_timestamp(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(GetCurrentTimestamp());
+}
+
+Datum
+pg_postmaster_start_time(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(PgStartTime);
+}
+
+Datum
+pg_conf_load_time(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TIMESTAMPTZ(PgReloadTime);
+}
+
+/*
+ * GetCurrentTimestamp -- get the current operating system time
+ *
+ * Result is in the form of a TimestampTz value, and is expressed to the
+ * full precision of the gettimeofday() syscall
+ */
+TimestampTz
+GetCurrentTimestamp(void)
+{
+ TimestampTz result;
+ struct timeval tp;
+
+ gettimeofday(&tp, NULL);
+
+ result = (TimestampTz) tp.tv_sec -
+ ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
+ result = (result * USECS_PER_SEC) + tp.tv_usec;
+
+ return result;
+}
+
+/*
+ * GetSQLCurrentTimestamp -- implements CURRENT_TIMESTAMP, CURRENT_TIMESTAMP(n)
+ */
+TimestampTz
+GetSQLCurrentTimestamp(int32 typmod)
+{
+ TimestampTz ts;
+
+ ts = GetCurrentTransactionStartTimestamp();
+ if (typmod >= 0)
+ AdjustTimestampForTypmod(&ts, typmod);
+ return ts;
+}
+
+/*
+ * GetSQLLocalTimestamp -- implements LOCALTIMESTAMP, LOCALTIMESTAMP(n)
+ */
+Timestamp
+GetSQLLocalTimestamp(int32 typmod)
+{
+ Timestamp ts;
+
+ ts = timestamptz2timestamp(GetCurrentTransactionStartTimestamp());
+ if (typmod >= 0)
+ AdjustTimestampForTypmod(&ts, typmod);
+ return ts;
+}
+
+/*
+ * timeofday(*) -- returns the current time as a text.
+ */
+Datum
+timeofday(PG_FUNCTION_ARGS)
+{
+ struct timeval tp;
+ char templ[128];
+ char buf[128];
+ pg_time_t tt;
+
+ gettimeofday(&tp, NULL);
+ tt = (pg_time_t) tp.tv_sec;
+ pg_strftime(templ, sizeof(templ), "%a %b %d %H:%M:%S.%%06d %Y %Z",
+ pg_localtime(&tt, session_timezone));
+ snprintf(buf, sizeof(buf), templ, tp.tv_usec);
+
+ PG_RETURN_TEXT_P(cstring_to_text(buf));
+}
+
+/*
+ * TimestampDifference -- convert the difference between two timestamps
+ * into integer seconds and microseconds
+ *
+ * This is typically used to calculate a wait timeout for select(2),
+ * which explains the otherwise-odd choice of output format.
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()).
+ *
+ * We expect start_time <= stop_time. If not, we return zeros,
+ * since then we're already past the previously determined stop_time.
+ */
+void
+TimestampDifference(TimestampTz start_time, TimestampTz stop_time,
+ long *secs, int *microsecs)
+{
+ TimestampTz diff = stop_time - start_time;
+
+ if (diff <= 0)
+ {
+ *secs = 0;
+ *microsecs = 0;
+ }
+ else
+ {
+ *secs = (long) (diff / USECS_PER_SEC);
+ *microsecs = (int) (diff % USECS_PER_SEC);
+ }
+}
+
+/*
+ * TimestampDifferenceMilliseconds -- convert the difference between two
+ * timestamps into integer milliseconds
+ *
+ * This is typically used to calculate a wait timeout for WaitLatch()
+ * or a related function. The choice of "long" as the result type
+ * is to harmonize with that. It is caller's responsibility that the
+ * input timestamps not be so far apart as to risk overflow of "long"
+ * (which'd happen at about 25 days on machines with 32-bit "long").
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()).
+ *
+ * We expect start_time <= stop_time. If not, we return zero,
+ * since then we're already past the previously determined stop_time.
+ *
+ * Note we round up any fractional millisecond, since waiting for just
+ * less than the intended timeout is undesirable.
+ */
+long
+TimestampDifferenceMilliseconds(TimestampTz start_time, TimestampTz stop_time)
+{
+ TimestampTz diff = stop_time - start_time;
+
+ if (diff <= 0)
+ return 0;
+ else
+ return (long) ((diff + 999) / 1000);
+}
+
+/*
+ * TimestampDifferenceExceeds -- report whether the difference between two
+ * timestamps is >= a threshold (expressed in milliseconds)
+ *
+ * Both inputs must be ordinary finite timestamps (in current usage,
+ * they'll be results from GetCurrentTimestamp()).
+ */
+bool
+TimestampDifferenceExceeds(TimestampTz start_time,
+ TimestampTz stop_time,
+ int msec)
+{
+ TimestampTz diff = stop_time - start_time;
+
+ return (diff >= msec * INT64CONST(1000));
+}
+
+/*
+ * Convert a time_t to TimestampTz.
+ *
+ * We do not use time_t internally in Postgres, but this is provided for use
+ * by functions that need to interpret, say, a stat(2) result.
+ *
+ * To avoid having the function's ABI vary depending on the width of time_t,
+ * we declare the argument as pg_time_t, which is cast-compatible with
+ * time_t but always 64 bits wide (unless the platform has no 64-bit type).
+ * This detail should be invisible to callers, at least at source code level.
+ */
+TimestampTz
+time_t_to_timestamptz(pg_time_t tm)
+{
+ TimestampTz result;
+
+ result = (TimestampTz) tm -
+ ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY);
+ result *= USECS_PER_SEC;
+
+ return result;
+}
+
+/*
+ * Convert a TimestampTz to time_t.
+ *
+ * This too is just marginally useful, but some places need it.
+ *
+ * To avoid having the function's ABI vary depending on the width of time_t,
+ * we declare the result as pg_time_t, which is cast-compatible with
+ * time_t but always 64 bits wide (unless the platform has no 64-bit type).
+ * This detail should be invisible to callers, at least at source code level.
+ */
+pg_time_t
+timestamptz_to_time_t(TimestampTz t)
+{
+ pg_time_t result;
+
+ result = (pg_time_t) (t / USECS_PER_SEC +
+ ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY));
+
+ return result;
+}
+
+/*
+ * Produce a C-string representation of a TimestampTz.
+ *
+ * This is mostly for use in emitting messages. The primary difference
+ * from timestamptz_out is that we force the output format to ISO. Note
+ * also that the result is in a static buffer, not pstrdup'd.
+ *
+ * See also pg_strftime.
+ */
+const char *
+timestamptz_to_str(TimestampTz t)
+{
+ static char buf[MAXDATELEN + 1];
+ int tz;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ const char *tzn;
+
+ if (TIMESTAMP_NOT_FINITE(t))
+ EncodeSpecialTimestamp(t, buf);
+ else if (timestamp2tm(t, &tz, tm, &fsec, &tzn, NULL) == 0)
+ EncodeDateTime(tm, fsec, true, tz, tzn, USE_ISO_DATES, buf);
+ else
+ strlcpy(buf, "(timestamp out of range)", sizeof(buf));
+
+ return buf;
+}
+
+
+void
+dt2time(Timestamp jd, int *hour, int *min, int *sec, fsec_t *fsec)
+{
+ TimeOffset time;
+
+ time = jd;
+
+ *hour = time / USECS_PER_HOUR;
+ time -= (*hour) * USECS_PER_HOUR;
+ *min = time / USECS_PER_MINUTE;
+ time -= (*min) * USECS_PER_MINUTE;
+ *sec = time / USECS_PER_SEC;
+ *fsec = time - (*sec * USECS_PER_SEC);
+} /* dt2time() */
+
+
+/*
+ * timestamp2tm() - Convert timestamp data type to POSIX time structure.
+ *
+ * Note that year is _not_ 1900-based, but is an explicit full value.
+ * Also, month is one-based, _not_ zero-based.
+ * Returns:
+ * 0 on success
+ * -1 on out of range
+ *
+ * If attimezone is NULL, the global timezone setting will be used.
+ */
+int
+timestamp2tm(Timestamp dt, int *tzp, struct pg_tm *tm, fsec_t *fsec, const char **tzn, pg_tz *attimezone)
+{
+ Timestamp date;
+ Timestamp time;
+ pg_time_t utime;
+
+ /* Use session timezone if caller asks for default */
+ if (attimezone == NULL)
+ attimezone = session_timezone;
+
+ time = dt;
+ TMODULO(time, date, USECS_PER_DAY);
+
+ if (time < INT64CONST(0))
+ {
+ time += USECS_PER_DAY;
+ date -= 1;
+ }
+
+ /* add offset to go from J2000 back to standard Julian date */
+ date += POSTGRES_EPOCH_JDATE;
+
+ /* Julian day routine does not work for negative Julian days */
+ if (date < 0 || date > (Timestamp) INT_MAX)
+ return -1;
+
+ j2date((int) date, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+ dt2time(time, &tm->tm_hour, &tm->tm_min, &tm->tm_sec, fsec);
+
+ /* Done if no TZ conversion wanted */
+ if (tzp == NULL)
+ {
+ tm->tm_isdst = -1;
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = NULL;
+ if (tzn != NULL)
+ *tzn = NULL;
+ return 0;
+ }
+
+ /*
+ * If the time falls within the range of pg_time_t, use pg_localtime() to
+ * rotate to the local time zone.
+ *
+ * First, convert to an integral timestamp, avoiding possibly
+ * platform-specific roundoff-in-wrong-direction errors, and adjust to
+ * Unix epoch. Then see if we can convert to pg_time_t without loss. This
+ * coding avoids hardwiring any assumptions about the width of pg_time_t,
+ * so it should behave sanely on machines without int64.
+ */
+ dt = (dt - *fsec) / USECS_PER_SEC +
+ (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY;
+ utime = (pg_time_t) dt;
+ if ((Timestamp) utime == dt)
+ {
+ struct pg_tm *tx = pg_localtime(&utime, attimezone);
+
+ tm->tm_year = tx->tm_year + 1900;
+ tm->tm_mon = tx->tm_mon + 1;
+ tm->tm_mday = tx->tm_mday;
+ tm->tm_hour = tx->tm_hour;
+ tm->tm_min = tx->tm_min;
+ tm->tm_sec = tx->tm_sec;
+ tm->tm_isdst = tx->tm_isdst;
+ tm->tm_gmtoff = tx->tm_gmtoff;
+ tm->tm_zone = tx->tm_zone;
+ *tzp = -tm->tm_gmtoff;
+ if (tzn != NULL)
+ *tzn = tm->tm_zone;
+ }
+ else
+ {
+ /*
+ * When out of range of pg_time_t, treat as GMT
+ */
+ *tzp = 0;
+ /* Mark this as *no* time zone available */
+ tm->tm_isdst = -1;
+ tm->tm_gmtoff = 0;
+ tm->tm_zone = NULL;
+ if (tzn != NULL)
+ *tzn = NULL;
+ }
+
+ return 0;
+}
+
+
+/* tm2timestamp()
+ * Convert a tm structure to a timestamp data type.
+ * Note that year is _not_ 1900-based, but is an explicit full value.
+ * Also, month is one-based, _not_ zero-based.
+ *
+ * Returns -1 on failure (value out of range).
+ */
+int
+tm2timestamp(struct pg_tm *tm, fsec_t fsec, int *tzp, Timestamp *result)
+{
+ TimeOffset date;
+ TimeOffset time;
+
+ /* Prevent overflow in Julian-day routines */
+ if (!IS_VALID_JULIAN(tm->tm_year, tm->tm_mon, tm->tm_mday))
+ {
+ *result = 0; /* keep compiler quiet */
+ return -1;
+ }
+
+ date = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) - POSTGRES_EPOCH_JDATE;
+ time = time2t(tm->tm_hour, tm->tm_min, tm->tm_sec, fsec);
+
+ *result = date * USECS_PER_DAY + time;
+ /* check for major overflow */
+ if ((*result - time) / USECS_PER_DAY != date)
+ {
+ *result = 0; /* keep compiler quiet */
+ return -1;
+ }
+ /* check for just-barely overflow (okay except time-of-day wraps) */
+ /* caution: we want to allow 1999-12-31 24:00:00 */
+ if ((*result < 0 && date > 0) ||
+ (*result > 0 && date < -1))
+ {
+ *result = 0; /* keep compiler quiet */
+ return -1;
+ }
+ if (tzp != NULL)
+ *result = dt2local(*result, -(*tzp));
+
+ /* final range check catches just-out-of-range timestamps */
+ if (!IS_VALID_TIMESTAMP(*result))
+ {
+ *result = 0; /* keep compiler quiet */
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/* interval2itm()
+ * Convert an Interval to a pg_itm structure.
+ * Note: overflow is not possible, because the pg_itm fields are
+ * wide enough for all possible conversion results.
+ */
+void
+interval2itm(Interval span, struct pg_itm *itm)
+{
+ TimeOffset time;
+ TimeOffset tfrac;
+
+ itm->tm_year = span.month / MONTHS_PER_YEAR;
+ itm->tm_mon = span.month % MONTHS_PER_YEAR;
+ itm->tm_mday = span.day;
+ time = span.time;
+
+ tfrac = time / USECS_PER_HOUR;
+ time -= tfrac * USECS_PER_HOUR;
+ itm->tm_hour = tfrac;
+ tfrac = time / USECS_PER_MINUTE;
+ time -= tfrac * USECS_PER_MINUTE;
+ itm->tm_min = (int) tfrac;
+ tfrac = time / USECS_PER_SEC;
+ time -= tfrac * USECS_PER_SEC;
+ itm->tm_sec = (int) tfrac;
+ itm->tm_usec = (int) time;
+}
+
+/* itm2interval()
+ * Convert a pg_itm structure to an Interval.
+ * Returns 0 if OK, -1 on overflow.
+ */
+int
+itm2interval(struct pg_itm *itm, Interval *span)
+{
+ int64 total_months = (int64) itm->tm_year * MONTHS_PER_YEAR + itm->tm_mon;
+
+ if (total_months > INT_MAX || total_months < INT_MIN)
+ return -1;
+ span->month = (int32) total_months;
+ span->day = itm->tm_mday;
+ if (pg_mul_s64_overflow(itm->tm_hour, USECS_PER_HOUR,
+ &span->time))
+ return -1;
+ /* tm_min, tm_sec are 32 bits, so intermediate products can't overflow */
+ if (pg_add_s64_overflow(span->time, itm->tm_min * USECS_PER_MINUTE,
+ &span->time))
+ return -1;
+ if (pg_add_s64_overflow(span->time, itm->tm_sec * USECS_PER_SEC,
+ &span->time))
+ return -1;
+ if (pg_add_s64_overflow(span->time, itm->tm_usec,
+ &span->time))
+ return -1;
+ return 0;
+}
+
+/* itmin2interval()
+ * Convert a pg_itm_in structure to an Interval.
+ * Returns 0 if OK, -1 on overflow.
+ */
+int
+itmin2interval(struct pg_itm_in *itm_in, Interval *span)
+{
+ int64 total_months = (int64) itm_in->tm_year * MONTHS_PER_YEAR + itm_in->tm_mon;
+
+ if (total_months > INT_MAX || total_months < INT_MIN)
+ return -1;
+ span->month = (int32) total_months;
+ span->day = itm_in->tm_mday;
+ span->time = itm_in->tm_usec;
+ return 0;
+}
+
+static TimeOffset
+time2t(const int hour, const int min, const int sec, const fsec_t fsec)
+{
+ return (((((hour * MINS_PER_HOUR) + min) * SECS_PER_MINUTE) + sec) * USECS_PER_SEC) + fsec;
+}
+
+static Timestamp
+dt2local(Timestamp dt, int tz)
+{
+ dt -= (tz * USECS_PER_SEC);
+ return dt;
+}
+
+
+/*****************************************************************************
+ * PUBLIC ROUTINES *
+ *****************************************************************************/
+
+
+Datum
+timestamp_finite(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+
+ PG_RETURN_BOOL(!TIMESTAMP_NOT_FINITE(timestamp));
+}
+
+Datum
+interval_finite(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(true);
+}
+
+
+/*----------------------------------------------------------
+ * Relational operators for timestamp.
+ *---------------------------------------------------------*/
+
+void
+GetEpochTime(struct pg_tm *tm)
+{
+ struct pg_tm *t0;
+ pg_time_t epoch = 0;
+
+ t0 = pg_gmtime(&epoch);
+
+ if (t0 == NULL)
+ elog(ERROR, "could not convert epoch to timestamp: %m");
+
+ tm->tm_year = t0->tm_year;
+ tm->tm_mon = t0->tm_mon;
+ tm->tm_mday = t0->tm_mday;
+ tm->tm_hour = t0->tm_hour;
+ tm->tm_min = t0->tm_min;
+ tm->tm_sec = t0->tm_sec;
+
+ tm->tm_year += 1900;
+ tm->tm_mon++;
+}
+
+Timestamp
+SetEpochTimestamp(void)
+{
+ Timestamp dt;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ GetEpochTime(tm);
+ /* we don't bother to test for failure ... */
+ tm2timestamp(tm, 0, NULL, &dt);
+
+ return dt;
+} /* SetEpochTimestamp() */
+
+/*
+ * We are currently sharing some code between timestamp and timestamptz.
+ * The comparison functions are among them. - thomas 2001-09-25
+ *
+ * timestamp_relop - is timestamp1 relop timestamp2
+ */
+int
+timestamp_cmp_internal(Timestamp dt1, Timestamp dt2)
+{
+ return (dt1 < dt2) ? -1 : ((dt1 > dt2) ? 1 : 0);
+}
+
+Datum
+timestamp_eq(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) == 0);
+}
+
+Datum
+timestamp_ne(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) != 0);
+}
+
+Datum
+timestamp_lt(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) < 0);
+}
+
+Datum
+timestamp_gt(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) > 0);
+}
+
+Datum
+timestamp_le(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) <= 0);
+}
+
+Datum
+timestamp_ge(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_internal(dt1, dt2) >= 0);
+}
+
+Datum
+timestamp_cmp(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2));
+}
+
+#if SIZEOF_DATUM < 8
+/* note: this is used for timestamptz also */
+static int
+timestamp_fastcmp(Datum x, Datum y, SortSupport ssup)
+{
+ Timestamp a = DatumGetTimestamp(x);
+ Timestamp b = DatumGetTimestamp(y);
+
+ return timestamp_cmp_internal(a, b);
+}
+#endif
+
+Datum
+timestamp_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+#if SIZEOF_DATUM >= 8
+
+ /*
+ * If this build has pass-by-value timestamps, then we can use a standard
+ * comparator function.
+ */
+ ssup->comparator = ssup_datum_signed_cmp;
+#else
+ ssup->comparator = timestamp_fastcmp;
+#endif
+ PG_RETURN_VOID();
+}
+
+Datum
+timestamp_hash(PG_FUNCTION_ARGS)
+{
+ return hashint8(fcinfo);
+}
+
+Datum
+timestamp_hash_extended(PG_FUNCTION_ARGS)
+{
+ return hashint8extended(fcinfo);
+}
+
+/*
+ * Cross-type comparison functions for timestamp vs timestamptz
+ */
+
+int32
+timestamp_cmp_timestamptz_internal(Timestamp timestampVal, TimestampTz dt2)
+{
+ TimestampTz dt1;
+ int overflow;
+
+ dt1 = timestamp2timestamptz_opt_overflow(timestampVal, &overflow);
+ if (overflow > 0)
+ {
+ /* dt1 is larger than any finite timestamp, but less than infinity */
+ return TIMESTAMP_IS_NOEND(dt2) ? -1 : +1;
+ }
+ if (overflow < 0)
+ {
+ /* dt1 is less than any finite timestamp, but more than -infinity */
+ return TIMESTAMP_IS_NOBEGIN(dt2) ? +1 : -1;
+ }
+
+ return timestamptz_cmp_internal(dt1, dt2);
+}
+
+Datum
+timestamp_eq_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) == 0);
+}
+
+Datum
+timestamp_ne_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) != 0);
+}
+
+Datum
+timestamp_lt_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) < 0);
+}
+
+Datum
+timestamp_gt_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) > 0);
+}
+
+Datum
+timestamp_le_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) <= 0);
+}
+
+Datum
+timestamp_ge_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt2) >= 0);
+}
+
+Datum
+timestamp_cmp_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+
+ PG_RETURN_INT32(timestamp_cmp_timestamptz_internal(timestampVal, dt2));
+}
+
+Datum
+timestamptz_eq_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) == 0);
+}
+
+Datum
+timestamptz_ne_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) != 0);
+}
+
+Datum
+timestamptz_lt_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) > 0);
+}
+
+Datum
+timestamptz_gt_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) < 0);
+}
+
+Datum
+timestamptz_le_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) >= 0);
+}
+
+Datum
+timestamptz_ge_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_BOOL(timestamp_cmp_timestamptz_internal(timestampVal, dt1) <= 0);
+}
+
+Datum
+timestamptz_cmp_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ Timestamp timestampVal = PG_GETARG_TIMESTAMP(1);
+
+ PG_RETURN_INT32(-timestamp_cmp_timestamptz_internal(timestampVal, dt1));
+}
+
+
+/*
+ * interval_relop - is interval1 relop interval2
+ *
+ * Interval comparison is based on converting interval values to a linear
+ * representation expressed in the units of the time field (microseconds,
+ * in the case of integer timestamps) with days assumed to be always 24 hours
+ * and months assumed to be always 30 days. To avoid overflow, we need a
+ * wider-than-int64 datatype for the linear representation, so use INT128.
+ */
+
+static inline INT128
+interval_cmp_value(const Interval *interval)
+{
+ INT128 span;
+ int64 days;
+
+ /*
+ * Combine the month and day fields into an integral number of days.
+ * Because the inputs are int32, int64 arithmetic suffices here.
+ */
+ days = interval->month * INT64CONST(30);
+ days += interval->day;
+
+ /* Widen time field to 128 bits */
+ span = int64_to_int128(interval->time);
+
+ /* Scale up days to microseconds, forming a 128-bit product */
+ int128_add_int64_mul_int64(&span, days, USECS_PER_DAY);
+
+ return span;
+}
+
+static int
+interval_cmp_internal(Interval *interval1, Interval *interval2)
+{
+ INT128 span1 = interval_cmp_value(interval1);
+ INT128 span2 = interval_cmp_value(interval2);
+
+ return int128_compare(span1, span2);
+}
+
+Datum
+interval_eq(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) == 0);
+}
+
+Datum
+interval_ne(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) != 0);
+}
+
+Datum
+interval_lt(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) < 0);
+}
+
+Datum
+interval_gt(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) > 0);
+}
+
+Datum
+interval_le(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) <= 0);
+}
+
+Datum
+interval_ge(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_BOOL(interval_cmp_internal(interval1, interval2) >= 0);
+}
+
+Datum
+interval_cmp(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+
+ PG_RETURN_INT32(interval_cmp_internal(interval1, interval2));
+}
+
+/*
+ * Hashing for intervals
+ *
+ * We must produce equal hashvals for values that interval_cmp_internal()
+ * considers equal. So, compute the net span the same way it does,
+ * and then hash that.
+ */
+Datum
+interval_hash(PG_FUNCTION_ARGS)
+{
+ Interval *interval = PG_GETARG_INTERVAL_P(0);
+ INT128 span = interval_cmp_value(interval);
+ int64 span64;
+
+ /*
+ * Use only the least significant 64 bits for hashing. The upper 64 bits
+ * seldom add any useful information, and besides we must do it like this
+ * for compatibility with hashes calculated before use of INT128 was
+ * introduced.
+ */
+ span64 = int128_to_int64(span);
+
+ return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64));
+}
+
+Datum
+interval_hash_extended(PG_FUNCTION_ARGS)
+{
+ Interval *interval = PG_GETARG_INTERVAL_P(0);
+ INT128 span = interval_cmp_value(interval);
+ int64 span64;
+
+ /* Same approach as interval_hash */
+ span64 = int128_to_int64(span);
+
+ return DirectFunctionCall2(hashint8extended, Int64GetDatumFast(span64),
+ PG_GETARG_DATUM(1));
+}
+
+/* overlaps_timestamp() --- implements the SQL OVERLAPS operator.
+ *
+ * Algorithm is per SQL spec. This is much harder than you'd think
+ * because the spec requires us to deliver a non-null answer in some cases
+ * where some of the inputs are null.
+ */
+Datum
+overlaps_timestamp(PG_FUNCTION_ARGS)
+{
+ /*
+ * The arguments are Timestamps, but we leave them as generic Datums to
+ * avoid unnecessary conversions between value and reference forms --- not
+ * to mention possible dereferences of null pointers.
+ */
+ Datum ts1 = PG_GETARG_DATUM(0);
+ Datum te1 = PG_GETARG_DATUM(1);
+ Datum ts2 = PG_GETARG_DATUM(2);
+ Datum te2 = PG_GETARG_DATUM(3);
+ bool ts1IsNull = PG_ARGISNULL(0);
+ bool te1IsNull = PG_ARGISNULL(1);
+ bool ts2IsNull = PG_ARGISNULL(2);
+ bool te2IsNull = PG_ARGISNULL(3);
+
+#define TIMESTAMP_GT(t1,t2) \
+ DatumGetBool(DirectFunctionCall2(timestamp_gt,t1,t2))
+#define TIMESTAMP_LT(t1,t2) \
+ DatumGetBool(DirectFunctionCall2(timestamp_lt,t1,t2))
+
+ /*
+ * If both endpoints of interval 1 are null, the result is null (unknown).
+ * If just one endpoint is null, take ts1 as the non-null one. Otherwise,
+ * take ts1 as the lesser endpoint.
+ */
+ if (ts1IsNull)
+ {
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts1 = te1;
+ te1IsNull = true;
+ }
+ else if (!te1IsNull)
+ {
+ if (TIMESTAMP_GT(ts1, te1))
+ {
+ Datum tt = ts1;
+
+ ts1 = te1;
+ te1 = tt;
+ }
+ }
+
+ /* Likewise for interval 2. */
+ if (ts2IsNull)
+ {
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ /* swap null for non-null */
+ ts2 = te2;
+ te2IsNull = true;
+ }
+ else if (!te2IsNull)
+ {
+ if (TIMESTAMP_GT(ts2, te2))
+ {
+ Datum tt = ts2;
+
+ ts2 = te2;
+ te2 = tt;
+ }
+ }
+
+ /*
+ * At this point neither ts1 nor ts2 is null, so we can consider three
+ * cases: ts1 > ts2, ts1 < ts2, ts1 = ts2
+ */
+ if (TIMESTAMP_GT(ts1, ts2))
+ {
+ /*
+ * This case is ts1 < te2 OR te1 < te2, which may look redundant but
+ * in the presence of nulls it's not quite completely so.
+ */
+ if (te2IsNull)
+ PG_RETURN_NULL();
+ if (TIMESTAMP_LT(ts1, te2))
+ PG_RETURN_BOOL(true);
+ if (te1IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te1 is not null then we had ts1 <= te1 above, and we just found
+ * ts1 >= te2, hence te1 >= te2.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else if (TIMESTAMP_LT(ts1, ts2))
+ {
+ /* This case is ts2 < te1 OR te2 < te1 */
+ if (te1IsNull)
+ PG_RETURN_NULL();
+ if (TIMESTAMP_LT(ts2, te1))
+ PG_RETURN_BOOL(true);
+ if (te2IsNull)
+ PG_RETURN_NULL();
+
+ /*
+ * If te2 is not null then we had ts2 <= te2 above, and we just found
+ * ts2 >= te1, hence te2 >= te1.
+ */
+ PG_RETURN_BOOL(false);
+ }
+ else
+ {
+ /*
+ * For ts1 = ts2 the spec says te1 <> te2 OR te1 = te2, which is a
+ * rather silly way of saying "true if both are non-null, else null".
+ */
+ if (te1IsNull || te2IsNull)
+ PG_RETURN_NULL();
+ PG_RETURN_BOOL(true);
+ }
+
+#undef TIMESTAMP_GT
+#undef TIMESTAMP_LT
+}
+
+
+/*----------------------------------------------------------
+ * "Arithmetic" operators on date/times.
+ *---------------------------------------------------------*/
+
+Datum
+timestamp_smaller(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+ Timestamp result;
+
+ /* use timestamp_cmp_internal to be sure this agrees with comparisons */
+ if (timestamp_cmp_internal(dt1, dt2) < 0)
+ result = dt1;
+ else
+ result = dt2;
+ PG_RETURN_TIMESTAMP(result);
+}
+
+Datum
+timestamp_larger(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+ Timestamp result;
+
+ if (timestamp_cmp_internal(dt1, dt2) > 0)
+ result = dt1;
+ else
+ result = dt2;
+ PG_RETURN_TIMESTAMP(result);
+}
+
+
+Datum
+timestamp_mi(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ if (TIMESTAMP_NOT_FINITE(dt1) || TIMESTAMP_NOT_FINITE(dt2))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("cannot subtract infinite timestamps")));
+
+ result->time = dt1 - dt2;
+
+ result->month = 0;
+ result->day = 0;
+
+ /*----------
+ * This is wrong, but removing it breaks a lot of regression tests.
+ * For example:
+ *
+ * test=> SET timezone = 'EST5EDT';
+ * test=> SELECT
+ * test-> ('2005-10-30 13:22:00-05'::timestamptz -
+ * test(> '2005-10-29 13:22:00-04'::timestamptz);
+ * ?column?
+ * ----------------
+ * 1 day 01:00:00
+ * (1 row)
+ *
+ * so adding that to the first timestamp gets:
+ *
+ * test=> SELECT
+ * test-> ('2005-10-29 13:22:00-04'::timestamptz +
+ * test(> ('2005-10-30 13:22:00-05'::timestamptz -
+ * test(> '2005-10-29 13:22:00-04'::timestamptz)) at time zone 'EST';
+ * timezone
+ * --------------------
+ * 2005-10-30 14:22:00
+ * (1 row)
+ *----------
+ */
+ result = DatumGetIntervalP(DirectFunctionCall1(interval_justify_hours,
+ IntervalPGetDatum(result)));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/*
+ * interval_justify_interval()
+ *
+ * Adjust interval so 'month', 'day', and 'time' portions are within
+ * customary bounds. Specifically:
+ *
+ * 0 <= abs(time) < 24 hours
+ * 0 <= abs(day) < 30 days
+ *
+ * Also, the sign bit on all three fields is made equal, so either
+ * all three fields are negative or all are positive.
+ */
+Datum
+interval_justify_interval(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ Interval *result;
+ TimeOffset wholeday;
+ int32 wholemonth;
+
+ result = (Interval *) palloc(sizeof(Interval));
+ result->month = span->month;
+ result->day = span->day;
+ result->time = span->time;
+
+ /* pre-justify days if it might prevent overflow */
+ if ((result->day > 0 && result->time > 0) ||
+ (result->day < 0 && result->time < 0))
+ {
+ wholemonth = result->day / DAYS_PER_MONTH;
+ result->day -= wholemonth * DAYS_PER_MONTH;
+ if (pg_add_s32_overflow(result->month, wholemonth, &result->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ }
+
+ /*
+ * Since TimeOffset is int64, abs(wholeday) can't exceed about 1.07e8. If
+ * we pre-justified then abs(result->day) is less than DAYS_PER_MONTH, so
+ * this addition can't overflow. If we didn't pre-justify, then day and
+ * time are of different signs, so it still can't overflow.
+ */
+ TMODULO(result->time, wholeday, USECS_PER_DAY);
+ result->day += wholeday;
+
+ wholemonth = result->day / DAYS_PER_MONTH;
+ result->day -= wholemonth * DAYS_PER_MONTH;
+ if (pg_add_s32_overflow(result->month, wholemonth, &result->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ if (result->month > 0 &&
+ (result->day < 0 || (result->day == 0 && result->time < 0)))
+ {
+ result->day += DAYS_PER_MONTH;
+ result->month--;
+ }
+ else if (result->month < 0 &&
+ (result->day > 0 || (result->day == 0 && result->time > 0)))
+ {
+ result->day -= DAYS_PER_MONTH;
+ result->month++;
+ }
+
+ if (result->day > 0 && result->time < 0)
+ {
+ result->time += USECS_PER_DAY;
+ result->day--;
+ }
+ else if (result->day < 0 && result->time > 0)
+ {
+ result->time -= USECS_PER_DAY;
+ result->day++;
+ }
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/*
+ * interval_justify_hours()
+ *
+ * Adjust interval so 'time' contains less than a whole day, adding
+ * the excess to 'day'. This is useful for
+ * situations (such as non-TZ) where '1 day' = '24 hours' is valid,
+ * e.g. interval subtraction and division.
+ */
+Datum
+interval_justify_hours(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ Interval *result;
+ TimeOffset wholeday;
+
+ result = (Interval *) palloc(sizeof(Interval));
+ result->month = span->month;
+ result->day = span->day;
+ result->time = span->time;
+
+ TMODULO(result->time, wholeday, USECS_PER_DAY);
+ if (pg_add_s32_overflow(result->day, wholeday, &result->day))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ if (result->day > 0 && result->time < 0)
+ {
+ result->time += USECS_PER_DAY;
+ result->day--;
+ }
+ else if (result->day < 0 && result->time > 0)
+ {
+ result->time -= USECS_PER_DAY;
+ result->day++;
+ }
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/*
+ * interval_justify_days()
+ *
+ * Adjust interval so 'day' contains less than 30 days, adding
+ * the excess to 'month'.
+ */
+Datum
+interval_justify_days(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ Interval *result;
+ int32 wholemonth;
+
+ result = (Interval *) palloc(sizeof(Interval));
+ result->month = span->month;
+ result->day = span->day;
+ result->time = span->time;
+
+ wholemonth = result->day / DAYS_PER_MONTH;
+ result->day -= wholemonth * DAYS_PER_MONTH;
+ if (pg_add_s32_overflow(result->month, wholemonth, &result->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ if (result->month > 0 && result->day < 0)
+ {
+ result->day += DAYS_PER_MONTH;
+ result->month--;
+ }
+ else if (result->month < 0 && result->day > 0)
+ {
+ result->day -= DAYS_PER_MONTH;
+ result->month++;
+ }
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* timestamp_pl_interval()
+ * Add an interval to a timestamp data type.
+ * Note that interval has provisions for qualitative year/month and day
+ * units, so try to do the right thing with them.
+ * To add a month, increment the month, and use the same day of month.
+ * Then, if the next month has fewer days, set the day of month
+ * to the last day of month.
+ * To add a day, increment the mday, and use the same time of day.
+ * Lastly, add in the "quantitative time".
+ */
+Datum
+timestamp_pl_interval(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ Timestamp result;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ result = timestamp;
+ else
+ {
+ if (span->month != 0)
+ {
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ tm->tm_mon += span->month;
+ if (tm->tm_mon > MONTHS_PER_YEAR)
+ {
+ tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
+ tm->tm_mon = ((tm->tm_mon - 1) % MONTHS_PER_YEAR) + 1;
+ }
+ else if (tm->tm_mon < 1)
+ {
+ tm->tm_year += tm->tm_mon / MONTHS_PER_YEAR - 1;
+ tm->tm_mon = tm->tm_mon % MONTHS_PER_YEAR + MONTHS_PER_YEAR;
+ }
+
+ /* adjust for end of month boundary problems... */
+ if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1])
+ tm->tm_mday = (day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]);
+
+ if (tm2timestamp(tm, fsec, NULL, &timestamp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+
+ if (span->day != 0)
+ {
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int julian;
+
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* Add days by converting to and from Julian */
+ julian = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + span->day;
+ j2date(julian, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+
+ if (tm2timestamp(tm, fsec, NULL, &timestamp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+
+ timestamp += span->time;
+
+ if (!IS_VALID_TIMESTAMP(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = timestamp;
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+Datum
+timestamp_mi_interval(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ Interval tspan;
+
+ tspan.month = -span->month;
+ tspan.day = -span->day;
+ tspan.time = -span->time;
+
+ return DirectFunctionCall2(timestamp_pl_interval,
+ TimestampGetDatum(timestamp),
+ PointerGetDatum(&tspan));
+}
+
+
+/* timestamptz_pl_interval()
+ * Add an interval to a timestamp with time zone data type.
+ * Note that interval has provisions for qualitative year/month
+ * units, so try to do the right thing with them.
+ * To add a month, increment the month, and use the same day of month.
+ * Then, if the next month has fewer days, set the day of month
+ * to the last day of month.
+ * Lastly, add in the "quantitative time".
+ */
+Datum
+timestamptz_pl_interval(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ TimestampTz result;
+ int tz;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ result = timestamp;
+ else
+ {
+ if (span->month != 0)
+ {
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ tm->tm_mon += span->month;
+ if (tm->tm_mon > MONTHS_PER_YEAR)
+ {
+ tm->tm_year += (tm->tm_mon - 1) / MONTHS_PER_YEAR;
+ tm->tm_mon = ((tm->tm_mon - 1) % MONTHS_PER_YEAR) + 1;
+ }
+ else if (tm->tm_mon < 1)
+ {
+ tm->tm_year += tm->tm_mon / MONTHS_PER_YEAR - 1;
+ tm->tm_mon = tm->tm_mon % MONTHS_PER_YEAR + MONTHS_PER_YEAR;
+ }
+
+ /* adjust for end of month boundary problems... */
+ if (tm->tm_mday > day_tab[isleap(tm->tm_year)][tm->tm_mon - 1])
+ tm->tm_mday = (day_tab[isleap(tm->tm_year)][tm->tm_mon - 1]);
+
+ tz = DetermineTimeZoneOffset(tm, session_timezone);
+
+ if (tm2timestamp(tm, fsec, &tz, &timestamp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+
+ if (span->day != 0)
+ {
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int julian;
+
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ /* Add days by converting to and from Julian */
+ julian = date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) + span->day;
+ j2date(julian, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
+
+ tz = DetermineTimeZoneOffset(tm, session_timezone);
+
+ if (tm2timestamp(tm, fsec, &tz, &timestamp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+
+ timestamp += span->time;
+
+ if (!IS_VALID_TIMESTAMP(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ result = timestamp;
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+Datum
+timestamptz_mi_interval(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
+ Interval *span = PG_GETARG_INTERVAL_P(1);
+ Interval tspan;
+
+ tspan.month = -span->month;
+ tspan.day = -span->day;
+ tspan.time = -span->time;
+
+ return DirectFunctionCall2(timestamptz_pl_interval,
+ TimestampGetDatum(timestamp),
+ PointerGetDatum(&tspan));
+}
+
+
+Datum
+interval_um(PG_FUNCTION_ARGS)
+{
+ Interval *interval = PG_GETARG_INTERVAL_P(0);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result->time = -interval->time;
+ /* overflow check copied from int4um */
+ if (interval->time != 0 && SAMESIGN(result->time, interval->time))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ result->day = -interval->day;
+ if (interval->day != 0 && SAMESIGN(result->day, interval->day))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ result->month = -interval->month;
+ if (interval->month != 0 && SAMESIGN(result->month, interval->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+
+Datum
+interval_smaller(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+ Interval *result;
+
+ /* use interval_cmp_internal to be sure this agrees with comparisons */
+ if (interval_cmp_internal(interval1, interval2) < 0)
+ result = interval1;
+ else
+ result = interval2;
+ PG_RETURN_INTERVAL_P(result);
+}
+
+Datum
+interval_larger(PG_FUNCTION_ARGS)
+{
+ Interval *interval1 = PG_GETARG_INTERVAL_P(0);
+ Interval *interval2 = PG_GETARG_INTERVAL_P(1);
+ Interval *result;
+
+ if (interval_cmp_internal(interval1, interval2) > 0)
+ result = interval1;
+ else
+ result = interval2;
+ PG_RETURN_INTERVAL_P(result);
+}
+
+Datum
+interval_pl(PG_FUNCTION_ARGS)
+{
+ Interval *span1 = PG_GETARG_INTERVAL_P(0);
+ Interval *span2 = PG_GETARG_INTERVAL_P(1);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result->month = span1->month + span2->month;
+ /* overflow check copied from int4pl */
+ if (SAMESIGN(span1->month, span2->month) &&
+ !SAMESIGN(result->month, span1->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ result->day = span1->day + span2->day;
+ if (SAMESIGN(span1->day, span2->day) &&
+ !SAMESIGN(result->day, span1->day))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ result->time = span1->time + span2->time;
+ if (SAMESIGN(span1->time, span2->time) &&
+ !SAMESIGN(result->time, span1->time))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+Datum
+interval_mi(PG_FUNCTION_ARGS)
+{
+ Interval *span1 = PG_GETARG_INTERVAL_P(0);
+ Interval *span2 = PG_GETARG_INTERVAL_P(1);
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result->month = span1->month - span2->month;
+ /* overflow check copied from int4mi */
+ if (!SAMESIGN(span1->month, span2->month) &&
+ !SAMESIGN(result->month, span1->month))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ result->day = span1->day - span2->day;
+ if (!SAMESIGN(span1->day, span2->day) &&
+ !SAMESIGN(result->day, span1->day))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ result->time = span1->time - span2->time;
+ if (!SAMESIGN(span1->time, span2->time) &&
+ !SAMESIGN(result->time, span1->time))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/*
+ * There is no interval_abs(): it is unclear what value to return:
+ * http://archives.postgresql.org/pgsql-general/2009-10/msg01031.php
+ * http://archives.postgresql.org/pgsql-general/2009-11/msg00041.php
+ */
+
+Datum
+interval_mul(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ float8 factor = PG_GETARG_FLOAT8(1);
+ double month_remainder_days,
+ sec_remainder,
+ result_double;
+ int32 orig_month = span->month,
+ orig_day = span->day;
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ result_double = span->month * factor;
+ if (isnan(result_double) ||
+ result_double > INT_MAX || result_double < INT_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ result->month = (int32) result_double;
+
+ result_double = span->day * factor;
+ if (isnan(result_double) ||
+ result_double > INT_MAX || result_double < INT_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ result->day = (int32) result_double;
+
+ /*
+ * The above correctly handles the whole-number part of the month and day
+ * products, but we have to do something with any fractional part
+ * resulting when the factor is non-integral. We cascade the fractions
+ * down to lower units using the conversion factors DAYS_PER_MONTH and
+ * SECS_PER_DAY. Note we do NOT cascade up, since we are not forced to do
+ * so by the representation. The user can choose to cascade up later,
+ * using justify_hours and/or justify_days.
+ */
+
+ /*
+ * Fractional months full days into days.
+ *
+ * Floating point calculation are inherently imprecise, so these
+ * calculations are crafted to produce the most reliable result possible.
+ * TSROUND() is needed to more accurately produce whole numbers where
+ * appropriate.
+ */
+ month_remainder_days = (orig_month * factor - result->month) * DAYS_PER_MONTH;
+ month_remainder_days = TSROUND(month_remainder_days);
+ sec_remainder = (orig_day * factor - result->day +
+ month_remainder_days - (int) month_remainder_days) * SECS_PER_DAY;
+ sec_remainder = TSROUND(sec_remainder);
+
+ /*
+ * Might have 24:00:00 hours due to rounding, or >24 hours because of time
+ * cascade from months and days. It might still be >24 if the combination
+ * of cascade and the seconds factor operation itself.
+ */
+ if (Abs(sec_remainder) >= SECS_PER_DAY)
+ {
+ result->day += (int) (sec_remainder / SECS_PER_DAY);
+ sec_remainder -= (int) (sec_remainder / SECS_PER_DAY) * SECS_PER_DAY;
+ }
+
+ /* cascade units down */
+ result->day += (int32) month_remainder_days;
+ result_double = rint(span->time * factor + sec_remainder * USECS_PER_SEC);
+ if (isnan(result_double) || !FLOAT8_FITS_IN_INT64(result_double))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ result->time = (int64) result_double;
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+Datum
+mul_d_interval(PG_FUNCTION_ARGS)
+{
+ /* Args are float8 and Interval *, but leave them as generic Datum */
+ Datum factor = PG_GETARG_DATUM(0);
+ Datum span = PG_GETARG_DATUM(1);
+
+ return DirectFunctionCall2(interval_mul, span, factor);
+}
+
+Datum
+interval_div(PG_FUNCTION_ARGS)
+{
+ Interval *span = PG_GETARG_INTERVAL_P(0);
+ float8 factor = PG_GETARG_FLOAT8(1);
+ double month_remainder_days,
+ sec_remainder;
+ int32 orig_month = span->month,
+ orig_day = span->day;
+ Interval *result;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ if (factor == 0.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DIVISION_BY_ZERO),
+ errmsg("division by zero")));
+
+ result->month = (int32) (span->month / factor);
+ result->day = (int32) (span->day / factor);
+
+ /*
+ * Fractional months full days into days. See comment in interval_mul().
+ */
+ month_remainder_days = (orig_month / factor - result->month) * DAYS_PER_MONTH;
+ month_remainder_days = TSROUND(month_remainder_days);
+ sec_remainder = (orig_day / factor - result->day +
+ month_remainder_days - (int) month_remainder_days) * SECS_PER_DAY;
+ sec_remainder = TSROUND(sec_remainder);
+ if (Abs(sec_remainder) >= SECS_PER_DAY)
+ {
+ result->day += (int) (sec_remainder / SECS_PER_DAY);
+ sec_remainder -= (int) (sec_remainder / SECS_PER_DAY) * SECS_PER_DAY;
+ }
+
+ /* cascade units down */
+ result->day += (int32) month_remainder_days;
+ result->time = rint(span->time / factor + sec_remainder * USECS_PER_SEC);
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+
+/*
+ * in_range support functions for timestamps and intervals.
+ *
+ * Per SQL spec, we support these with interval as the offset type.
+ * The spec's restriction that the offset not be negative is a bit hard to
+ * decipher for intervals, but we choose to interpret it the same as our
+ * interval comparison operators would.
+ */
+
+Datum
+in_range_timestamptz_interval(PG_FUNCTION_ARGS)
+{
+ TimestampTz val = PG_GETARG_TIMESTAMPTZ(0);
+ TimestampTz base = PG_GETARG_TIMESTAMPTZ(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ TimestampTz sum;
+
+ if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /* We don't currently bother to avoid overflow hazards here */
+ if (sub)
+ sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_mi_interval,
+ TimestampTzGetDatum(base),
+ IntervalPGetDatum(offset)));
+ else
+ sum = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
+ TimestampTzGetDatum(base),
+ IntervalPGetDatum(offset)));
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_timestamp_interval(PG_FUNCTION_ARGS)
+{
+ Timestamp val = PG_GETARG_TIMESTAMP(0);
+ Timestamp base = PG_GETARG_TIMESTAMP(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ Timestamp sum;
+
+ if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /* We don't currently bother to avoid overflow hazards here */
+ if (sub)
+ sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_mi_interval,
+ TimestampGetDatum(base),
+ IntervalPGetDatum(offset)));
+ else
+ sum = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval,
+ TimestampGetDatum(base),
+ IntervalPGetDatum(offset)));
+
+ if (less)
+ PG_RETURN_BOOL(val <= sum);
+ else
+ PG_RETURN_BOOL(val >= sum);
+}
+
+Datum
+in_range_interval_interval(PG_FUNCTION_ARGS)
+{
+ Interval *val = PG_GETARG_INTERVAL_P(0);
+ Interval *base = PG_GETARG_INTERVAL_P(1);
+ Interval *offset = PG_GETARG_INTERVAL_P(2);
+ bool sub = PG_GETARG_BOOL(3);
+ bool less = PG_GETARG_BOOL(4);
+ Interval *sum;
+
+ if (int128_compare(interval_cmp_value(offset), int64_to_int128(0)) < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PRECEDING_OR_FOLLOWING_SIZE),
+ errmsg("invalid preceding or following size in window function")));
+
+ /* We don't currently bother to avoid overflow hazards here */
+ if (sub)
+ sum = DatumGetIntervalP(DirectFunctionCall2(interval_mi,
+ IntervalPGetDatum(base),
+ IntervalPGetDatum(offset)));
+ else
+ sum = DatumGetIntervalP(DirectFunctionCall2(interval_pl,
+ IntervalPGetDatum(base),
+ IntervalPGetDatum(offset)));
+
+ if (less)
+ PG_RETURN_BOOL(interval_cmp_internal(val, sum) <= 0);
+ else
+ PG_RETURN_BOOL(interval_cmp_internal(val, sum) >= 0);
+}
+
+
+/*
+ * interval_accum, interval_accum_inv, and interval_avg implement the
+ * AVG(interval) aggregate.
+ *
+ * The transition datatype for this aggregate is a 2-element array of
+ * intervals, where the first is the running sum and the second contains
+ * the number of values so far in its 'time' field. This is a bit ugly
+ * but it beats inventing a specialized datatype for the purpose.
+ */
+
+Datum
+interval_accum(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Interval *newval = PG_GETARG_INTERVAL_P(1);
+ Datum *transdatums;
+ int ndatums;
+ Interval sumX,
+ N;
+ Interval *newsum;
+ ArrayType *result;
+
+ deconstruct_array(transarray,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE,
+ &transdatums, NULL, &ndatums);
+ if (ndatums != 2)
+ elog(ERROR, "expected 2-element interval array");
+
+ sumX = *(DatumGetIntervalP(transdatums[0]));
+ N = *(DatumGetIntervalP(transdatums[1]));
+
+ newsum = DatumGetIntervalP(DirectFunctionCall2(interval_pl,
+ IntervalPGetDatum(&sumX),
+ IntervalPGetDatum(newval)));
+ N.time += 1;
+
+ transdatums[0] = IntervalPGetDatum(newsum);
+ transdatums[1] = IntervalPGetDatum(&N);
+
+ result = construct_array(transdatums, 2,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+Datum
+interval_combine(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray1 = PG_GETARG_ARRAYTYPE_P(0);
+ ArrayType *transarray2 = PG_GETARG_ARRAYTYPE_P(1);
+ Datum *transdatums1;
+ Datum *transdatums2;
+ int ndatums1;
+ int ndatums2;
+ Interval sum1,
+ N1;
+ Interval sum2,
+ N2;
+
+ Interval *newsum;
+ ArrayType *result;
+
+ deconstruct_array(transarray1,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE,
+ &transdatums1, NULL, &ndatums1);
+ if (ndatums1 != 2)
+ elog(ERROR, "expected 2-element interval array");
+
+ sum1 = *(DatumGetIntervalP(transdatums1[0]));
+ N1 = *(DatumGetIntervalP(transdatums1[1]));
+
+ deconstruct_array(transarray2,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE,
+ &transdatums2, NULL, &ndatums2);
+ if (ndatums2 != 2)
+ elog(ERROR, "expected 2-element interval array");
+
+ sum2 = *(DatumGetIntervalP(transdatums2[0]));
+ N2 = *(DatumGetIntervalP(transdatums2[1]));
+
+ newsum = DatumGetIntervalP(DirectFunctionCall2(interval_pl,
+ IntervalPGetDatum(&sum1),
+ IntervalPGetDatum(&sum2)));
+ N1.time += N2.time;
+
+ transdatums1[0] = IntervalPGetDatum(newsum);
+ transdatums1[1] = IntervalPGetDatum(&N1);
+
+ result = construct_array(transdatums1, 2,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+Datum
+interval_accum_inv(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Interval *newval = PG_GETARG_INTERVAL_P(1);
+ Datum *transdatums;
+ int ndatums;
+ Interval sumX,
+ N;
+ Interval *newsum;
+ ArrayType *result;
+
+ deconstruct_array(transarray,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE,
+ &transdatums, NULL, &ndatums);
+ if (ndatums != 2)
+ elog(ERROR, "expected 2-element interval array");
+
+ sumX = *(DatumGetIntervalP(transdatums[0]));
+ N = *(DatumGetIntervalP(transdatums[1]));
+
+ newsum = DatumGetIntervalP(DirectFunctionCall2(interval_mi,
+ IntervalPGetDatum(&sumX),
+ IntervalPGetDatum(newval)));
+ N.time -= 1;
+
+ transdatums[0] = IntervalPGetDatum(newsum);
+ transdatums[1] = IntervalPGetDatum(&N);
+
+ result = construct_array(transdatums, 2,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE);
+
+ PG_RETURN_ARRAYTYPE_P(result);
+}
+
+Datum
+interval_avg(PG_FUNCTION_ARGS)
+{
+ ArrayType *transarray = PG_GETARG_ARRAYTYPE_P(0);
+ Datum *transdatums;
+ int ndatums;
+ Interval sumX,
+ N;
+
+ deconstruct_array(transarray,
+ INTERVALOID, sizeof(Interval), false, TYPALIGN_DOUBLE,
+ &transdatums, NULL, &ndatums);
+ if (ndatums != 2)
+ elog(ERROR, "expected 2-element interval array");
+
+ sumX = *(DatumGetIntervalP(transdatums[0]));
+ N = *(DatumGetIntervalP(transdatums[1]));
+
+ /* SQL defines AVG of no values to be NULL */
+ if (N.time == 0)
+ PG_RETURN_NULL();
+
+ return DirectFunctionCall2(interval_div,
+ IntervalPGetDatum(&sumX),
+ Float8GetDatum((double) N.time));
+}
+
+
+/* timestamp_age()
+ * Calculate time difference while retaining year/month fields.
+ * Note that this does not result in an accurate absolute time span
+ * since year and month are out of context once the arithmetic
+ * is done.
+ */
+Datum
+timestamp_age(PG_FUNCTION_ARGS)
+{
+ Timestamp dt1 = PG_GETARG_TIMESTAMP(0);
+ Timestamp dt2 = PG_GETARG_TIMESTAMP(1);
+ Interval *result;
+ fsec_t fsec1,
+ fsec2;
+ struct pg_itm tt,
+ *tm = &tt;
+ struct pg_tm tt1,
+ *tm1 = &tt1;
+ struct pg_tm tt2,
+ *tm2 = &tt2;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ if (timestamp2tm(dt1, NULL, tm1, &fsec1, NULL, NULL) == 0 &&
+ timestamp2tm(dt2, NULL, tm2, &fsec2, NULL, NULL) == 0)
+ {
+ /* form the symbolic difference */
+ tm->tm_usec = fsec1 - fsec2;
+ tm->tm_sec = tm1->tm_sec - tm2->tm_sec;
+ tm->tm_min = tm1->tm_min - tm2->tm_min;
+ tm->tm_hour = tm1->tm_hour - tm2->tm_hour;
+ tm->tm_mday = tm1->tm_mday - tm2->tm_mday;
+ tm->tm_mon = tm1->tm_mon - tm2->tm_mon;
+ tm->tm_year = tm1->tm_year - tm2->tm_year;
+
+ /* flip sign if necessary... */
+ if (dt1 < dt2)
+ {
+ tm->tm_usec = -tm->tm_usec;
+ tm->tm_sec = -tm->tm_sec;
+ tm->tm_min = -tm->tm_min;
+ tm->tm_hour = -tm->tm_hour;
+ tm->tm_mday = -tm->tm_mday;
+ tm->tm_mon = -tm->tm_mon;
+ tm->tm_year = -tm->tm_year;
+ }
+
+ /* propagate any negative fields into the next higher field */
+ while (tm->tm_usec < 0)
+ {
+ tm->tm_usec += USECS_PER_SEC;
+ tm->tm_sec--;
+ }
+
+ while (tm->tm_sec < 0)
+ {
+ tm->tm_sec += SECS_PER_MINUTE;
+ tm->tm_min--;
+ }
+
+ while (tm->tm_min < 0)
+ {
+ tm->tm_min += MINS_PER_HOUR;
+ tm->tm_hour--;
+ }
+
+ while (tm->tm_hour < 0)
+ {
+ tm->tm_hour += HOURS_PER_DAY;
+ tm->tm_mday--;
+ }
+
+ while (tm->tm_mday < 0)
+ {
+ if (dt1 < dt2)
+ {
+ tm->tm_mday += day_tab[isleap(tm1->tm_year)][tm1->tm_mon - 1];
+ tm->tm_mon--;
+ }
+ else
+ {
+ tm->tm_mday += day_tab[isleap(tm2->tm_year)][tm2->tm_mon - 1];
+ tm->tm_mon--;
+ }
+ }
+
+ while (tm->tm_mon < 0)
+ {
+ tm->tm_mon += MONTHS_PER_YEAR;
+ tm->tm_year--;
+ }
+
+ /* recover sign if necessary... */
+ if (dt1 < dt2)
+ {
+ tm->tm_usec = -tm->tm_usec;
+ tm->tm_sec = -tm->tm_sec;
+ tm->tm_min = -tm->tm_min;
+ tm->tm_hour = -tm->tm_hour;
+ tm->tm_mday = -tm->tm_mday;
+ tm->tm_mon = -tm->tm_mon;
+ tm->tm_year = -tm->tm_year;
+ }
+
+ if (itm2interval(tm, result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+
+/* timestamptz_age()
+ * Calculate time difference while retaining year/month fields.
+ * Note that this does not result in an accurate absolute time span
+ * since year and month are out of context once the arithmetic
+ * is done.
+ */
+Datum
+timestamptz_age(PG_FUNCTION_ARGS)
+{
+ TimestampTz dt1 = PG_GETARG_TIMESTAMPTZ(0);
+ TimestampTz dt2 = PG_GETARG_TIMESTAMPTZ(1);
+ Interval *result;
+ fsec_t fsec1,
+ fsec2;
+ struct pg_itm tt,
+ *tm = &tt;
+ struct pg_tm tt1,
+ *tm1 = &tt1;
+ struct pg_tm tt2,
+ *tm2 = &tt2;
+ int tz1;
+ int tz2;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ if (timestamp2tm(dt1, &tz1, tm1, &fsec1, NULL, NULL) == 0 &&
+ timestamp2tm(dt2, &tz2, tm2, &fsec2, NULL, NULL) == 0)
+ {
+ /* form the symbolic difference */
+ tm->tm_usec = fsec1 - fsec2;
+ tm->tm_sec = tm1->tm_sec - tm2->tm_sec;
+ tm->tm_min = tm1->tm_min - tm2->tm_min;
+ tm->tm_hour = tm1->tm_hour - tm2->tm_hour;
+ tm->tm_mday = tm1->tm_mday - tm2->tm_mday;
+ tm->tm_mon = tm1->tm_mon - tm2->tm_mon;
+ tm->tm_year = tm1->tm_year - tm2->tm_year;
+
+ /* flip sign if necessary... */
+ if (dt1 < dt2)
+ {
+ tm->tm_usec = -tm->tm_usec;
+ tm->tm_sec = -tm->tm_sec;
+ tm->tm_min = -tm->tm_min;
+ tm->tm_hour = -tm->tm_hour;
+ tm->tm_mday = -tm->tm_mday;
+ tm->tm_mon = -tm->tm_mon;
+ tm->tm_year = -tm->tm_year;
+ }
+
+ /* propagate any negative fields into the next higher field */
+ while (tm->tm_usec < 0)
+ {
+ tm->tm_usec += USECS_PER_SEC;
+ tm->tm_sec--;
+ }
+
+ while (tm->tm_sec < 0)
+ {
+ tm->tm_sec += SECS_PER_MINUTE;
+ tm->tm_min--;
+ }
+
+ while (tm->tm_min < 0)
+ {
+ tm->tm_min += MINS_PER_HOUR;
+ tm->tm_hour--;
+ }
+
+ while (tm->tm_hour < 0)
+ {
+ tm->tm_hour += HOURS_PER_DAY;
+ tm->tm_mday--;
+ }
+
+ while (tm->tm_mday < 0)
+ {
+ if (dt1 < dt2)
+ {
+ tm->tm_mday += day_tab[isleap(tm1->tm_year)][tm1->tm_mon - 1];
+ tm->tm_mon--;
+ }
+ else
+ {
+ tm->tm_mday += day_tab[isleap(tm2->tm_year)][tm2->tm_mon - 1];
+ tm->tm_mon--;
+ }
+ }
+
+ while (tm->tm_mon < 0)
+ {
+ tm->tm_mon += MONTHS_PER_YEAR;
+ tm->tm_year--;
+ }
+
+ /*
+ * Note: we deliberately ignore any difference between tz1 and tz2.
+ */
+
+ /* recover sign if necessary... */
+ if (dt1 < dt2)
+ {
+ tm->tm_usec = -tm->tm_usec;
+ tm->tm_sec = -tm->tm_sec;
+ tm->tm_min = -tm->tm_min;
+ tm->tm_hour = -tm->tm_hour;
+ tm->tm_mday = -tm->tm_mday;
+ tm->tm_mon = -tm->tm_mon;
+ tm->tm_year = -tm->tm_year;
+ }
+
+ if (itm2interval(tm, result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+
+/*----------------------------------------------------------
+ * Conversion operators.
+ *---------------------------------------------------------*/
+
+
+/* timestamp_bin()
+ * Bin timestamp into specified interval.
+ */
+Datum
+timestamp_bin(PG_FUNCTION_ARGS)
+{
+ Interval *stride = PG_GETARG_INTERVAL_P(0);
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
+ Timestamp origin = PG_GETARG_TIMESTAMP(2);
+ Timestamp result,
+ tm_diff,
+ stride_usecs,
+ tm_delta;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMP(timestamp);
+
+ if (TIMESTAMP_NOT_FINITE(origin))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("origin out of range")));
+
+ if (stride->month != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("timestamps cannot be binned into intervals containing months or years")));
+
+ stride_usecs = stride->day * USECS_PER_DAY + stride->time;
+
+ if (stride_usecs <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("stride must be greater than zero")));
+
+ tm_diff = timestamp - origin;
+ tm_delta = tm_diff - tm_diff % stride_usecs;
+
+ /*
+ * Make sure the returned timestamp is at the start of the bin, even if
+ * the origin is in the future.
+ */
+ if (origin > timestamp && stride_usecs > 1)
+ tm_delta -= stride_usecs;
+
+ result = origin + tm_delta;
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamp_trunc()
+ * Truncate timestamp to specified units.
+ */
+Datum
+timestamp_trunc(PG_FUNCTION_ARGS)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
+ Timestamp result;
+ int type,
+ val;
+ char *lowunits;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMP(timestamp);
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ switch (val)
+ {
+ case DTK_WEEK:
+ {
+ int woy;
+
+ woy = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday);
+
+ /*
+ * If it is week 52/53 and the month is January, then the
+ * week must belong to the previous year. Also, some
+ * December dates belong to the next year.
+ */
+ if (woy >= 52 && tm->tm_mon == 1)
+ --tm->tm_year;
+ if (woy <= 1 && tm->tm_mon == MONTHS_PER_YEAR)
+ ++tm->tm_year;
+ isoweek2date(woy, &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday));
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ fsec = 0;
+ break;
+ }
+ case DTK_MILLENNIUM:
+ /* see comments in timestamptz_trunc */
+ if (tm->tm_year > 0)
+ tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999;
+ else
+ tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1;
+ /* FALL THRU */
+ case DTK_CENTURY:
+ /* see comments in timestamptz_trunc */
+ if (tm->tm_year > 0)
+ tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99;
+ else
+ tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1;
+ /* FALL THRU */
+ case DTK_DECADE:
+ /* see comments in timestamptz_trunc */
+ if (val != DTK_MILLENNIUM && val != DTK_CENTURY)
+ {
+ if (tm->tm_year > 0)
+ tm->tm_year = (tm->tm_year / 10) * 10;
+ else
+ tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10;
+ }
+ /* FALL THRU */
+ case DTK_YEAR:
+ tm->tm_mon = 1;
+ /* FALL THRU */
+ case DTK_QUARTER:
+ tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1;
+ /* FALL THRU */
+ case DTK_MONTH:
+ tm->tm_mday = 1;
+ /* FALL THRU */
+ case DTK_DAY:
+ tm->tm_hour = 0;
+ /* FALL THRU */
+ case DTK_HOUR:
+ tm->tm_min = 0;
+ /* FALL THRU */
+ case DTK_MINUTE:
+ tm->tm_sec = 0;
+ /* FALL THRU */
+ case DTK_SECOND:
+ fsec = 0;
+ break;
+
+ case DTK_MILLISEC:
+ fsec = (fsec / 1000) * 1000;
+ break;
+
+ case DTK_MICROSEC:
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPOID))));
+ result = 0;
+ }
+
+ if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMESTAMPOID))));
+ result = 0;
+ }
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamptz_bin()
+ * Bin timestamptz into specified interval using specified origin.
+ */
+Datum
+timestamptz_bin(PG_FUNCTION_ARGS)
+{
+ Interval *stride = PG_GETARG_INTERVAL_P(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ TimestampTz origin = PG_GETARG_TIMESTAMPTZ(2);
+ TimestampTz result,
+ stride_usecs,
+ tm_diff,
+ tm_delta;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMPTZ(timestamp);
+
+ if (TIMESTAMP_NOT_FINITE(origin))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("origin out of range")));
+
+ if (stride->month != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("timestamps cannot be binned into intervals containing months or years")));
+
+ stride_usecs = stride->day * USECS_PER_DAY + stride->time;
+
+ if (stride_usecs <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("stride must be greater than zero")));
+
+ tm_diff = timestamp - origin;
+ tm_delta = tm_diff - tm_diff % stride_usecs;
+
+ /*
+ * Make sure the returned timestamp is at the start of the bin, even if
+ * the origin is in the future.
+ */
+ if (origin > timestamp && stride_usecs > 1)
+ tm_delta -= stride_usecs;
+
+ result = origin + tm_delta;
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/*
+ * Common code for timestamptz_trunc() and timestamptz_trunc_zone().
+ *
+ * tzp identifies the zone to truncate with respect to. We assume
+ * infinite timestamps have already been rejected.
+ */
+static TimestampTz
+timestamptz_trunc_internal(text *units, TimestampTz timestamp, pg_tz *tzp)
+{
+ TimestampTz result;
+ int tz;
+ int type,
+ val;
+ bool redotz = false;
+ char *lowunits;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ switch (val)
+ {
+ case DTK_WEEK:
+ {
+ int woy;
+
+ woy = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday);
+
+ /*
+ * If it is week 52/53 and the month is January, then the
+ * week must belong to the previous year. Also, some
+ * December dates belong to the next year.
+ */
+ if (woy >= 52 && tm->tm_mon == 1)
+ --tm->tm_year;
+ if (woy <= 1 && tm->tm_mon == MONTHS_PER_YEAR)
+ ++tm->tm_year;
+ isoweek2date(woy, &(tm->tm_year), &(tm->tm_mon), &(tm->tm_mday));
+ tm->tm_hour = 0;
+ tm->tm_min = 0;
+ tm->tm_sec = 0;
+ fsec = 0;
+ redotz = true;
+ break;
+ }
+ /* one may consider DTK_THOUSAND and DTK_HUNDRED... */
+ case DTK_MILLENNIUM:
+
+ /*
+ * truncating to the millennium? what is this supposed to
+ * mean? let us put the first year of the millennium... i.e.
+ * -1000, 1, 1001, 2001...
+ */
+ if (tm->tm_year > 0)
+ tm->tm_year = ((tm->tm_year + 999) / 1000) * 1000 - 999;
+ else
+ tm->tm_year = -((999 - (tm->tm_year - 1)) / 1000) * 1000 + 1;
+ /* FALL THRU */
+ case DTK_CENTURY:
+ /* truncating to the century? as above: -100, 1, 101... */
+ if (tm->tm_year > 0)
+ tm->tm_year = ((tm->tm_year + 99) / 100) * 100 - 99;
+ else
+ tm->tm_year = -((99 - (tm->tm_year - 1)) / 100) * 100 + 1;
+ /* FALL THRU */
+ case DTK_DECADE:
+
+ /*
+ * truncating to the decade? first year of the decade. must
+ * not be applied if year was truncated before!
+ */
+ if (val != DTK_MILLENNIUM && val != DTK_CENTURY)
+ {
+ if (tm->tm_year > 0)
+ tm->tm_year = (tm->tm_year / 10) * 10;
+ else
+ tm->tm_year = -((8 - (tm->tm_year - 1)) / 10) * 10;
+ }
+ /* FALL THRU */
+ case DTK_YEAR:
+ tm->tm_mon = 1;
+ /* FALL THRU */
+ case DTK_QUARTER:
+ tm->tm_mon = (3 * ((tm->tm_mon - 1) / 3)) + 1;
+ /* FALL THRU */
+ case DTK_MONTH:
+ tm->tm_mday = 1;
+ /* FALL THRU */
+ case DTK_DAY:
+ tm->tm_hour = 0;
+ redotz = true; /* for all cases >= DAY */
+ /* FALL THRU */
+ case DTK_HOUR:
+ tm->tm_min = 0;
+ /* FALL THRU */
+ case DTK_MINUTE:
+ tm->tm_sec = 0;
+ /* FALL THRU */
+ case DTK_SECOND:
+ fsec = 0;
+ break;
+ case DTK_MILLISEC:
+ fsec = (fsec / 1000) * 1000;
+ break;
+ case DTK_MICROSEC:
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPTZOID))));
+ result = 0;
+ }
+
+ if (redotz)
+ tz = DetermineTimeZoneOffset(tm, tzp);
+
+ if (tm2timestamp(tm, fsec, &tz, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMESTAMPTZOID))));
+ result = 0;
+ }
+
+ return result;
+}
+
+/* timestamptz_trunc()
+ * Truncate timestamptz to specified units in session timezone.
+ */
+Datum
+timestamptz_trunc(PG_FUNCTION_ARGS)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ TimestampTz result;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMPTZ(timestamp);
+
+ result = timestamptz_trunc_internal(units, timestamp, session_timezone);
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/* timestamptz_trunc_zone()
+ * Truncate timestamptz to specified units in specified timezone.
+ */
+Datum
+timestamptz_trunc_zone(PG_FUNCTION_ARGS)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ text *zone = PG_GETARG_TEXT_PP(2);
+ TimestampTz result;
+ char tzname[TZ_STRLEN_MAX + 1];
+ char *lowzone;
+ int type,
+ val;
+ pg_tz *tzp;
+
+ /*
+ * timestamptz_zone() doesn't look up the zone for infinite inputs, so we
+ * don't do so here either.
+ */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMP(timestamp);
+
+ /*
+ * Look up the requested timezone (see notes in timestamptz_zone()).
+ */
+ text_to_cstring_buffer(zone, tzname, sizeof(tzname));
+
+ /* DecodeTimezoneAbbrev requires lowercase input */
+ lowzone = downcase_truncate_identifier(tzname,
+ strlen(tzname),
+ false);
+
+ type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp);
+
+ if (type == TZ || type == DTZ)
+ {
+ /* fixed-offset abbreviation, get a pg_tz descriptor for that */
+ tzp = pg_tzset_offset(-val);
+ }
+ else if (type == DYNTZ)
+ {
+ /* dynamic-offset abbreviation, use its referenced timezone */
+ }
+ else
+ {
+ /* try it as a full zone name */
+ tzp = pg_tzset(tzname);
+ if (!tzp)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+ }
+
+ result = timestamptz_trunc_internal(units, timestamp, tzp);
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/* interval_trunc()
+ * Extract specified field from interval.
+ */
+Datum
+interval_trunc(PG_FUNCTION_ARGS)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ Interval *interval = PG_GETARG_INTERVAL_P(1);
+ Interval *result;
+ int type,
+ val;
+ char *lowunits;
+ struct pg_itm tt,
+ *tm = &tt;
+
+ result = (Interval *) palloc(sizeof(Interval));
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ interval2itm(*interval, tm);
+ switch (val)
+ {
+ case DTK_MILLENNIUM:
+ /* caution: C division may have negative remainder */
+ tm->tm_year = (tm->tm_year / 1000) * 1000;
+ /* FALL THRU */
+ case DTK_CENTURY:
+ /* caution: C division may have negative remainder */
+ tm->tm_year = (tm->tm_year / 100) * 100;
+ /* FALL THRU */
+ case DTK_DECADE:
+ /* caution: C division may have negative remainder */
+ tm->tm_year = (tm->tm_year / 10) * 10;
+ /* FALL THRU */
+ case DTK_YEAR:
+ tm->tm_mon = 0;
+ /* FALL THRU */
+ case DTK_QUARTER:
+ tm->tm_mon = 3 * (tm->tm_mon / 3);
+ /* FALL THRU */
+ case DTK_MONTH:
+ tm->tm_mday = 0;
+ /* FALL THRU */
+ case DTK_DAY:
+ tm->tm_hour = 0;
+ /* FALL THRU */
+ case DTK_HOUR:
+ tm->tm_min = 0;
+ /* FALL THRU */
+ case DTK_MINUTE:
+ tm->tm_sec = 0;
+ /* FALL THRU */
+ case DTK_SECOND:
+ tm->tm_usec = 0;
+ break;
+ case DTK_MILLISEC:
+ tm->tm_usec = (tm->tm_usec / 1000) * 1000;
+ break;
+ case DTK_MICROSEC:
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(INTERVALOID)),
+ (val == DTK_WEEK) ? errdetail("Months usually have fractional weeks.") : 0));
+ }
+
+ if (itm2interval(tm, result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("interval out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(INTERVALOID))));
+ }
+
+ PG_RETURN_INTERVAL_P(result);
+}
+
+/* isoweek2j()
+ *
+ * Return the Julian day which corresponds to the first day (Monday) of the given ISO 8601 year and week.
+ * Julian days are used to convert between ISO week dates and Gregorian dates.
+ */
+int
+isoweek2j(int year, int week)
+{
+ int day0,
+ day4;
+
+ /* fourth day of current year */
+ day4 = date2j(year, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ return ((week - 1) * 7) + (day4 - day0);
+}
+
+/* isoweek2date()
+ * Convert ISO week of year number to date.
+ * The year field must be specified with the ISO year!
+ * karel 2000/08/07
+ */
+void
+isoweek2date(int woy, int *year, int *mon, int *mday)
+{
+ j2date(isoweek2j(*year, woy), year, mon, mday);
+}
+
+/* isoweekdate2date()
+ *
+ * Convert an ISO 8601 week date (ISO year, ISO week) into a Gregorian date.
+ * Gregorian day of week sent so weekday strings can be supplied.
+ * Populates year, mon, and mday with the correct Gregorian values.
+ * year must be passed in as the ISO year.
+ */
+void
+isoweekdate2date(int isoweek, int wday, int *year, int *mon, int *mday)
+{
+ int jday;
+
+ jday = isoweek2j(*year, isoweek);
+ /* convert Gregorian week start (Sunday=1) to ISO week start (Monday=1) */
+ if (wday > 1)
+ jday += wday - 2;
+ else
+ jday += 6;
+ j2date(jday, year, mon, mday);
+}
+
+/* date2isoweek()
+ *
+ * Returns ISO week number of year.
+ */
+int
+date2isoweek(int year, int mon, int mday)
+{
+ float8 result;
+ int day0,
+ day4,
+ dayn;
+
+ /* current day */
+ dayn = date2j(year, mon, mday);
+
+ /* fourth day of current year */
+ day4 = date2j(year, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ /*
+ * We need the first week containing a Thursday, otherwise this day falls
+ * into the previous year for purposes of counting weeks
+ */
+ if (dayn < day4 - day0)
+ {
+ day4 = date2j(year - 1, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+ }
+
+ result = (dayn - (day4 - day0)) / 7 + 1;
+
+ /*
+ * Sometimes the last few days in a year will fall into the first week of
+ * the next year, so check for this.
+ */
+ if (result >= 52)
+ {
+ day4 = date2j(year + 1, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ if (dayn >= day4 - day0)
+ result = (dayn - (day4 - day0)) / 7 + 1;
+ }
+
+ return (int) result;
+}
+
+
+/* date2isoyear()
+ *
+ * Returns ISO 8601 year number.
+ * Note: zero or negative results follow the year-zero-exists convention.
+ */
+int
+date2isoyear(int year, int mon, int mday)
+{
+ float8 result;
+ int day0,
+ day4,
+ dayn;
+
+ /* current day */
+ dayn = date2j(year, mon, mday);
+
+ /* fourth day of current year */
+ day4 = date2j(year, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ /*
+ * We need the first week containing a Thursday, otherwise this day falls
+ * into the previous year for purposes of counting weeks
+ */
+ if (dayn < day4 - day0)
+ {
+ day4 = date2j(year - 1, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ year--;
+ }
+
+ result = (dayn - (day4 - day0)) / 7 + 1;
+
+ /*
+ * Sometimes the last few days in a year will fall into the first week of
+ * the next year, so check for this.
+ */
+ if (result >= 52)
+ {
+ day4 = date2j(year + 1, 1, 4);
+
+ /* day0 == offset to first day of week (Monday) */
+ day0 = j2day(day4 - 1);
+
+ if (dayn >= day4 - day0)
+ year++;
+ }
+
+ return year;
+}
+
+
+/* date2isoyearday()
+ *
+ * Returns the ISO 8601 day-of-year, given a Gregorian year, month and day.
+ * Possible return values are 1 through 371 (364 in non-leap years).
+ */
+int
+date2isoyearday(int year, int mon, int mday)
+{
+ return date2j(year, mon, mday) - isoweek2j(date2isoyear(year, mon, mday), 1) + 1;
+}
+
+/*
+ * NonFiniteTimestampTzPart
+ *
+ * Used by timestamp_part and timestamptz_part when extracting from infinite
+ * timestamp[tz]. Returns +/-Infinity if that is the appropriate result,
+ * otherwise returns zero (which should be taken as meaning to return NULL).
+ *
+ * Errors thrown here for invalid units should exactly match those that
+ * would be thrown in the calling functions, else there will be unexpected
+ * discrepancies between finite- and infinite-input cases.
+ */
+static float8
+NonFiniteTimestampTzPart(int type, int unit, char *lowunits,
+ bool isNegative, bool isTz)
+{
+ if ((type != UNITS) && (type != RESERV))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits,
+ format_type_be(isTz ? TIMESTAMPTZOID : TIMESTAMPOID))));
+
+ switch (unit)
+ {
+ /* Oscillating units */
+ case DTK_MICROSEC:
+ case DTK_MILLISEC:
+ case DTK_SECOND:
+ case DTK_MINUTE:
+ case DTK_HOUR:
+ case DTK_DAY:
+ case DTK_MONTH:
+ case DTK_QUARTER:
+ case DTK_WEEK:
+ case DTK_DOW:
+ case DTK_ISODOW:
+ case DTK_DOY:
+ case DTK_TZ:
+ case DTK_TZ_MINUTE:
+ case DTK_TZ_HOUR:
+ return 0.0;
+
+ /* Monotonically-increasing units */
+ case DTK_YEAR:
+ case DTK_DECADE:
+ case DTK_CENTURY:
+ case DTK_MILLENNIUM:
+ case DTK_JULIAN:
+ case DTK_ISOYEAR:
+ case DTK_EPOCH:
+ if (isNegative)
+ return -get_float8_infinity();
+ else
+ return get_float8_infinity();
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits,
+ format_type_be(isTz ? TIMESTAMPTZOID : TIMESTAMPOID))));
+ return 0.0; /* keep compiler quiet */
+ }
+}
+
+/* timestamp_part() and extract_timestamp()
+ * Extract specified field from timestamp.
+ */
+static Datum
+timestamp_part_common(PG_FUNCTION_ARGS, bool retnumeric)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
+ int64 intresult;
+ Timestamp epoch;
+ int type,
+ val;
+ char *lowunits;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ {
+ double r = NonFiniteTimestampTzPart(type, val, lowunits,
+ TIMESTAMP_IS_NOBEGIN(timestamp),
+ false);
+
+ if (r)
+ {
+ if (retnumeric)
+ {
+ if (r < 0)
+ return DirectFunctionCall3(numeric_in,
+ CStringGetDatum("-Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ else if (r > 0)
+ return DirectFunctionCall3(numeric_in,
+ CStringGetDatum("Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ }
+ else
+ PG_RETURN_FLOAT8(r);
+ }
+ else
+ PG_RETURN_NULL();
+ }
+
+ if (type == UNITS)
+ {
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ switch (val)
+ {
+ case DTK_MICROSEC:
+ intresult = tm->tm_sec * INT64CONST(1000000) + fsec;
+ break;
+
+ case DTK_MILLISEC:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec * 1000 + fsec / 1000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0);
+ break;
+
+ case DTK_SECOND:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec + fsec / 1'000'000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0);
+ break;
+
+ case DTK_MINUTE:
+ intresult = tm->tm_min;
+ break;
+
+ case DTK_HOUR:
+ intresult = tm->tm_hour;
+ break;
+
+ case DTK_DAY:
+ intresult = tm->tm_mday;
+ break;
+
+ case DTK_MONTH:
+ intresult = tm->tm_mon;
+ break;
+
+ case DTK_QUARTER:
+ intresult = (tm->tm_mon - 1) / 3 + 1;
+ break;
+
+ case DTK_WEEK:
+ intresult = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday);
+ break;
+
+ case DTK_YEAR:
+ if (tm->tm_year > 0)
+ intresult = tm->tm_year;
+ else
+ /* there is no year 0, just 1 BC and 1 AD */
+ intresult = tm->tm_year - 1;
+ break;
+
+ case DTK_DECADE:
+
+ /*
+ * what is a decade wrt dates? let us assume that decade 199
+ * is 1990 thru 1999... decade 0 starts on year 1 BC, and -1
+ * is 11 BC thru 2 BC...
+ */
+ if (tm->tm_year >= 0)
+ intresult = tm->tm_year / 10;
+ else
+ intresult = -((8 - (tm->tm_year - 1)) / 10);
+ break;
+
+ case DTK_CENTURY:
+
+ /* ----
+ * centuries AD, c>0: year in [ (c-1)* 100 + 1 : c*100 ]
+ * centuries BC, c<0: year in [ c*100 : (c+1) * 100 - 1]
+ * there is no number 0 century.
+ * ----
+ */
+ if (tm->tm_year > 0)
+ intresult = (tm->tm_year + 99) / 100;
+ else
+ /* caution: C division may have negative remainder */
+ intresult = -((99 - (tm->tm_year - 1)) / 100);
+ break;
+
+ case DTK_MILLENNIUM:
+ /* see comments above. */
+ if (tm->tm_year > 0)
+ intresult = (tm->tm_year + 999) / 1000;
+ else
+ intresult = -((999 - (tm->tm_year - 1)) / 1000);
+ break;
+
+ case DTK_JULIAN:
+ if (retnumeric)
+ PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
+ numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
+ int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
+ NULL),
+ NULL));
+ else
+ PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) +
+ ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) +
+ tm->tm_sec + (fsec / 1000000.0)) / (double) SECS_PER_DAY);
+ break;
+
+ case DTK_ISOYEAR:
+ intresult = date2isoyear(tm->tm_year, tm->tm_mon, tm->tm_mday);
+ /* Adjust BC years */
+ if (intresult <= 0)
+ intresult -= 1;
+ break;
+
+ case DTK_DOW:
+ case DTK_ISODOW:
+ intresult = j2day(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
+ if (val == DTK_ISODOW && intresult == 0)
+ intresult = 7;
+ break;
+
+ case DTK_DOY:
+ intresult = (date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)
+ - date2j(tm->tm_year, 1, 1) + 1);
+ break;
+
+ case DTK_TZ:
+ case DTK_TZ_MINUTE:
+ case DTK_TZ_HOUR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV)
+ {
+ switch (val)
+ {
+ case DTK_EPOCH:
+ epoch = SetEpochTimestamp();
+ /* (timestamp - epoch) / 1000000 */
+ if (retnumeric)
+ {
+ Numeric result;
+
+ if (timestamp < (PG_INT64_MAX + epoch))
+ result = int64_div_fast_to_numeric(timestamp - epoch, 6);
+ else
+ {
+ result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp),
+ int64_to_numeric(epoch),
+ NULL),
+ int64_to_numeric(1000000),
+ NULL);
+ result = DatumGetNumeric(DirectFunctionCall2(numeric_round,
+ NumericGetDatum(result),
+ Int32GetDatum(6)));
+ }
+ PG_RETURN_NUMERIC(result);
+ }
+ else
+ {
+ float8 result;
+
+ /* try to avoid precision loss in subtraction */
+ if (timestamp < (PG_INT64_MAX + epoch))
+ result = (timestamp - epoch) / 1000000.0;
+ else
+ result = ((float8) timestamp - epoch) / 1000000.0;
+ PG_RETURN_FLOAT8(result);
+ }
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPOID))));
+ intresult = 0;
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMESTAMPOID))));
+ intresult = 0;
+ }
+
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+ else
+ PG_RETURN_FLOAT8(intresult);
+}
+
+Datum
+timestamp_part(PG_FUNCTION_ARGS)
+{
+ return timestamp_part_common(fcinfo, false);
+}
+
+Datum
+extract_timestamp(PG_FUNCTION_ARGS)
+{
+ return timestamp_part_common(fcinfo, true);
+}
+
+/* timestamptz_part() and extract_timestamptz()
+ * Extract specified field from timestamp with time zone.
+ */
+static Datum
+timestamptz_part_common(PG_FUNCTION_ARGS, bool retnumeric)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ int64 intresult;
+ Timestamp epoch;
+ int tz;
+ int type,
+ val;
+ char *lowunits;
+ fsec_t fsec;
+ struct pg_tm tt,
+ *tm = &tt;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ {
+ double r = NonFiniteTimestampTzPart(type, val, lowunits,
+ TIMESTAMP_IS_NOBEGIN(timestamp),
+ true);
+
+ if (r)
+ {
+ if (retnumeric)
+ {
+ if (r < 0)
+ return DirectFunctionCall3(numeric_in,
+ CStringGetDatum("-Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ else if (r > 0)
+ return DirectFunctionCall3(numeric_in,
+ CStringGetDatum("Infinity"),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1));
+ }
+ else
+ PG_RETURN_FLOAT8(r);
+ }
+ else
+ PG_RETURN_NULL();
+ }
+
+ if (type == UNITS)
+ {
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ switch (val)
+ {
+ case DTK_TZ:
+ intresult = -tz;
+ break;
+
+ case DTK_TZ_MINUTE:
+ intresult = (-tz / SECS_PER_MINUTE) % MINS_PER_HOUR;
+ break;
+
+ case DTK_TZ_HOUR:
+ intresult = -tz / SECS_PER_HOUR;
+ break;
+
+ case DTK_MICROSEC:
+ intresult = tm->tm_sec * INT64CONST(1000000) + fsec;
+ break;
+
+ case DTK_MILLISEC:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec * 1000 + fsec / 1000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 3));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + fsec / 1000.0);
+ break;
+
+ case DTK_SECOND:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec + fsec / 1'000'000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + fsec, 6));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec + fsec / 1000000.0);
+ break;
+
+ case DTK_MINUTE:
+ intresult = tm->tm_min;
+ break;
+
+ case DTK_HOUR:
+ intresult = tm->tm_hour;
+ break;
+
+ case DTK_DAY:
+ intresult = tm->tm_mday;
+ break;
+
+ case DTK_MONTH:
+ intresult = tm->tm_mon;
+ break;
+
+ case DTK_QUARTER:
+ intresult = (tm->tm_mon - 1) / 3 + 1;
+ break;
+
+ case DTK_WEEK:
+ intresult = date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday);
+ break;
+
+ case DTK_YEAR:
+ if (tm->tm_year > 0)
+ intresult = tm->tm_year;
+ else
+ /* there is no year 0, just 1 BC and 1 AD */
+ intresult = tm->tm_year - 1;
+ break;
+
+ case DTK_DECADE:
+ /* see comments in timestamp_part */
+ if (tm->tm_year > 0)
+ intresult = tm->tm_year / 10;
+ else
+ intresult = -((8 - (tm->tm_year - 1)) / 10);
+ break;
+
+ case DTK_CENTURY:
+ /* see comments in timestamp_part */
+ if (tm->tm_year > 0)
+ intresult = (tm->tm_year + 99) / 100;
+ else
+ intresult = -((99 - (tm->tm_year - 1)) / 100);
+ break;
+
+ case DTK_MILLENNIUM:
+ /* see comments in timestamp_part */
+ if (tm->tm_year > 0)
+ intresult = (tm->tm_year + 999) / 1000;
+ else
+ intresult = -((999 - (tm->tm_year - 1)) / 1000);
+ break;
+
+ case DTK_JULIAN:
+ if (retnumeric)
+ PG_RETURN_NUMERIC(numeric_add_opt_error(int64_to_numeric(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)),
+ numeric_div_opt_error(int64_to_numeric(((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) + tm->tm_sec) * INT64CONST(1000000) + fsec),
+ int64_to_numeric(SECS_PER_DAY * INT64CONST(1000000)),
+ NULL),
+ NULL));
+ else
+ PG_RETURN_FLOAT8(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday) +
+ ((((tm->tm_hour * MINS_PER_HOUR) + tm->tm_min) * SECS_PER_MINUTE) +
+ tm->tm_sec + (fsec / 1000000.0)) / (double) SECS_PER_DAY);
+ break;
+
+ case DTK_ISOYEAR:
+ intresult = date2isoyear(tm->tm_year, tm->tm_mon, tm->tm_mday);
+ /* Adjust BC years */
+ if (intresult <= 0)
+ intresult -= 1;
+ break;
+
+ case DTK_DOW:
+ case DTK_ISODOW:
+ intresult = j2day(date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
+ if (val == DTK_ISODOW && intresult == 0)
+ intresult = 7;
+ break;
+
+ case DTK_DOY:
+ intresult = (date2j(tm->tm_year, tm->tm_mon, tm->tm_mday)
+ - date2j(tm->tm_year, 1, 1) + 1);
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPTZOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV)
+ {
+ switch (val)
+ {
+ case DTK_EPOCH:
+ epoch = SetEpochTimestamp();
+ /* (timestamp - epoch) / 1000000 */
+ if (retnumeric)
+ {
+ Numeric result;
+
+ if (timestamp < (PG_INT64_MAX + epoch))
+ result = int64_div_fast_to_numeric(timestamp - epoch, 6);
+ else
+ {
+ result = numeric_div_opt_error(numeric_sub_opt_error(int64_to_numeric(timestamp),
+ int64_to_numeric(epoch),
+ NULL),
+ int64_to_numeric(1000000),
+ NULL);
+ result = DatumGetNumeric(DirectFunctionCall2(numeric_round,
+ NumericGetDatum(result),
+ Int32GetDatum(6)));
+ }
+ PG_RETURN_NUMERIC(result);
+ }
+ else
+ {
+ float8 result;
+
+ /* try to avoid precision loss in subtraction */
+ if (timestamp < (PG_INT64_MAX + epoch))
+ result = (timestamp - epoch) / 1000000.0;
+ else
+ result = ((float8) timestamp - epoch) / 1000000.0;
+ PG_RETURN_FLOAT8(result);
+ }
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(TIMESTAMPTZOID))));
+ intresult = 0;
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(TIMESTAMPTZOID))));
+
+ intresult = 0;
+ }
+
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+ else
+ PG_RETURN_FLOAT8(intresult);
+}
+
+Datum
+timestamptz_part(PG_FUNCTION_ARGS)
+{
+ return timestamptz_part_common(fcinfo, false);
+}
+
+Datum
+extract_timestamptz(PG_FUNCTION_ARGS)
+{
+ return timestamptz_part_common(fcinfo, true);
+}
+
+
+/* interval_part() and extract_interval()
+ * Extract specified field from interval.
+ */
+static Datum
+interval_part_common(PG_FUNCTION_ARGS, bool retnumeric)
+{
+ text *units = PG_GETARG_TEXT_PP(0);
+ Interval *interval = PG_GETARG_INTERVAL_P(1);
+ int64 intresult;
+ int type,
+ val;
+ char *lowunits;
+ struct pg_itm tt,
+ *tm = &tt;
+
+ lowunits = downcase_truncate_identifier(VARDATA_ANY(units),
+ VARSIZE_ANY_EXHDR(units),
+ false);
+
+ type = DecodeUnits(0, lowunits, &val);
+ if (type == UNKNOWN_FIELD)
+ type = DecodeSpecial(0, lowunits, &val);
+
+ if (type == UNITS)
+ {
+ interval2itm(*interval, tm);
+ switch (val)
+ {
+ case DTK_MICROSEC:
+ intresult = tm->tm_sec * INT64CONST(1000000) + tm->tm_usec;
+ break;
+
+ case DTK_MILLISEC:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec * 1000 + fsec / 1000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + tm->tm_usec, 3));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec * 1000.0 + tm->tm_usec / 1000.0);
+ break;
+
+ case DTK_SECOND:
+ if (retnumeric)
+ /*---
+ * tm->tm_sec + fsec / 1'000'000
+ * = (tm->tm_sec * 1'000'000 + fsec) / 1'000'000
+ */
+ PG_RETURN_NUMERIC(int64_div_fast_to_numeric(tm->tm_sec * INT64CONST(1000000) + tm->tm_usec, 6));
+ else
+ PG_RETURN_FLOAT8(tm->tm_sec + tm->tm_usec / 1000000.0);
+ break;
+
+ case DTK_MINUTE:
+ intresult = tm->tm_min;
+ break;
+
+ case DTK_HOUR:
+ intresult = tm->tm_hour;
+ break;
+
+ case DTK_DAY:
+ intresult = tm->tm_mday;
+ break;
+
+ case DTK_MONTH:
+ intresult = tm->tm_mon;
+ break;
+
+ case DTK_QUARTER:
+ intresult = (tm->tm_mon / 3) + 1;
+ break;
+
+ case DTK_YEAR:
+ intresult = tm->tm_year;
+ break;
+
+ case DTK_DECADE:
+ /* caution: C division may have negative remainder */
+ intresult = tm->tm_year / 10;
+ break;
+
+ case DTK_CENTURY:
+ /* caution: C division may have negative remainder */
+ intresult = tm->tm_year / 100;
+ break;
+
+ case DTK_MILLENNIUM:
+ /* caution: C division may have negative remainder */
+ intresult = tm->tm_year / 1000;
+ break;
+
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("unit \"%s\" not supported for type %s",
+ lowunits, format_type_be(INTERVALOID))));
+ intresult = 0;
+ }
+ }
+ else if (type == RESERV && val == DTK_EPOCH)
+ {
+ if (retnumeric)
+ {
+ Numeric result;
+ int64 secs_from_day_month;
+ int64 val;
+
+ /*
+ * To do this calculation in integer arithmetic even though
+ * DAYS_PER_YEAR is fractional, multiply everything by 4 and then
+ * divide by 4 again at the end. This relies on DAYS_PER_YEAR
+ * being a multiple of 0.25 and on SECS_PER_DAY being a multiple
+ * of 4.
+ */
+ secs_from_day_month = ((int64) (4 * DAYS_PER_YEAR) * (interval->month / MONTHS_PER_YEAR) +
+ (int64) (4 * DAYS_PER_MONTH) * (interval->month % MONTHS_PER_YEAR) +
+ (int64) 4 * interval->day) * (SECS_PER_DAY / 4);
+
+ /*---
+ * result = secs_from_day_month + interval->time / 1'000'000
+ * = (secs_from_day_month * 1'000'000 + interval->time) / 1'000'000
+ */
+
+ /*
+ * Try the computation inside int64; if it overflows, do it in
+ * numeric (slower). This overflow happens around 10^9 days, so
+ * not common in practice.
+ */
+ if (!pg_mul_s64_overflow(secs_from_day_month, 1000000, &val) &&
+ !pg_add_s64_overflow(val, interval->time, &val))
+ result = int64_div_fast_to_numeric(val, 6);
+ else
+ result =
+ numeric_add_opt_error(int64_div_fast_to_numeric(interval->time, 6),
+ int64_to_numeric(secs_from_day_month),
+ NULL);
+
+ PG_RETURN_NUMERIC(result);
+ }
+ else
+ {
+ float8 result;
+
+ result = interval->time / 1000000.0;
+ result += ((double) DAYS_PER_YEAR * SECS_PER_DAY) * (interval->month / MONTHS_PER_YEAR);
+ result += ((double) DAYS_PER_MONTH * SECS_PER_DAY) * (interval->month % MONTHS_PER_YEAR);
+ result += ((double) SECS_PER_DAY) * interval->day;
+
+ PG_RETURN_FLOAT8(result);
+ }
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unit \"%s\" not recognized for type %s",
+ lowunits, format_type_be(INTERVALOID))));
+ intresult = 0;
+ }
+
+ if (retnumeric)
+ PG_RETURN_NUMERIC(int64_to_numeric(intresult));
+ else
+ PG_RETURN_FLOAT8(intresult);
+}
+
+Datum
+interval_part(PG_FUNCTION_ARGS)
+{
+ return interval_part_common(fcinfo, false);
+}
+
+Datum
+extract_interval(PG_FUNCTION_ARGS)
+{
+ return interval_part_common(fcinfo, true);
+}
+
+
+/* timestamp_zone()
+ * Encode timestamp type with specified time zone.
+ * This function is just timestamp2timestamptz() except instead of
+ * shifting to the global timezone, we shift to the specified timezone.
+ * This is different from the other AT TIME ZONE cases because instead
+ * of shifting _to_ a new time zone, it sets the time to _be_ the
+ * specified timezone.
+ */
+Datum
+timestamp_zone(PG_FUNCTION_ARGS)
+{
+ text *zone = PG_GETARG_TEXT_PP(0);
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
+ TimestampTz result;
+ int tz;
+ char tzname[TZ_STRLEN_MAX + 1];
+ char *lowzone;
+ int type,
+ val;
+ pg_tz *tzp;
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMPTZ(timestamp);
+
+ /*
+ * Look up the requested timezone. First we look in the timezone
+ * abbreviation table (to handle cases like "EST"), and if that fails, we
+ * look in the timezone database (to handle cases like
+ * "America/New_York"). (This matches the order in which timestamp input
+ * checks the cases; it's important because the timezone database unwisely
+ * uses a few zone names that are identical to offset abbreviations.)
+ */
+ text_to_cstring_buffer(zone, tzname, sizeof(tzname));
+
+ /* DecodeTimezoneAbbrev requires lowercase input */
+ lowzone = downcase_truncate_identifier(tzname,
+ strlen(tzname),
+ false);
+
+ type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp);
+
+ if (type == TZ || type == DTZ)
+ {
+ /* fixed-offset abbreviation */
+ tz = val;
+ result = dt2local(timestamp, tz);
+ }
+ else if (type == DYNTZ)
+ {
+ /* dynamic-offset abbreviation, resolve using specified time */
+ if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ tz = -DetermineTimeZoneAbbrevOffset(&tm, tzname, tzp);
+ result = dt2local(timestamp, tz);
+ }
+ else
+ {
+ /* try it as a full zone name */
+ tzp = pg_tzset(tzname);
+ if (tzp)
+ {
+ /* Apply the timezone change */
+ if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ tz = DetermineTimeZoneOffset(&tm, tzp);
+ if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+ result = 0; /* keep compiler quiet */
+ }
+ }
+
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_TIMESTAMPTZ(result);
+}
+
+/* timestamp_izone()
+ * Encode timestamp type with specified time interval as time zone.
+ */
+Datum
+timestamp_izone(PG_FUNCTION_ARGS)
+{
+ Interval *zone = PG_GETARG_INTERVAL_P(0);
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(1);
+ TimestampTz result;
+ int tz;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMPTZ(timestamp);
+
+ if (zone->month != 0 || zone->day != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("interval time zone \"%s\" must not include months or days",
+ DatumGetCString(DirectFunctionCall1(interval_out,
+ PointerGetDatum(zone))))));
+
+ tz = zone->time / USECS_PER_SEC;
+
+ result = dt2local(timestamp, tz);
+
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_TIMESTAMPTZ(result);
+} /* timestamp_izone() */
+
+/* TimestampTimestampTzRequiresRewrite()
+ *
+ * Returns false if the TimeZone GUC setting causes timestamp_timestamptz and
+ * timestamptz_timestamp to be no-ops, where the return value has the same
+ * bits as the argument. Since project convention is to assume a GUC changes
+ * no more often than STABLE functions change, the answer is valid that long.
+ */
+bool
+TimestampTimestampTzRequiresRewrite(void)
+{
+ long offset;
+
+ if (pg_get_timezone_offset(session_timezone, &offset) && offset == 0)
+ return false;
+ return true;
+}
+
+/* timestamp_timestamptz()
+ * Convert local timestamp to timestamp at GMT
+ */
+Datum
+timestamp_timestamptz(PG_FUNCTION_ARGS)
+{
+ Timestamp timestamp = PG_GETARG_TIMESTAMP(0);
+
+ PG_RETURN_TIMESTAMPTZ(timestamp2timestamptz(timestamp));
+}
+
+/*
+ * Convert timestamp to timestamp with time zone.
+ *
+ * On successful conversion, *overflow is set to zero if it's not NULL.
+ *
+ * If the timestamp is finite but out of the valid range for timestamptz, then:
+ * if overflow is NULL, we throw an out-of-range error.
+ * if overflow is not NULL, we store +1 or -1 there to indicate the sign
+ * of the overflow, and return the appropriate timestamptz infinity.
+ */
+TimestampTz
+timestamp2timestamptz_opt_overflow(Timestamp timestamp, int *overflow)
+{
+ TimestampTz result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ if (overflow)
+ *overflow = 0;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ return timestamp;
+
+ /* We don't expect this to fail, but check it pro forma */
+ if (timestamp2tm(timestamp, NULL, tm, &fsec, NULL, NULL) == 0)
+ {
+ tz = DetermineTimeZoneOffset(tm, session_timezone);
+
+ result = dt2local(timestamp, -tz);
+
+ if (IS_VALID_TIMESTAMP(result))
+ {
+ return result;
+ }
+ else if (overflow)
+ {
+ if (result < MIN_TIMESTAMP)
+ {
+ *overflow = -1;
+ TIMESTAMP_NOBEGIN(result);
+ }
+ else
+ {
+ *overflow = 1;
+ TIMESTAMP_NOEND(result);
+ }
+ return result;
+ }
+ }
+
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ return 0;
+}
+
+/*
+ * Promote timestamp to timestamptz, throwing error for overflow.
+ */
+static TimestampTz
+timestamp2timestamptz(Timestamp timestamp)
+{
+ return timestamp2timestamptz_opt_overflow(timestamp, NULL);
+}
+
+/* timestamptz_timestamp()
+ * Convert timestamp at GMT to local timestamp
+ */
+Datum
+timestamptz_timestamp(PG_FUNCTION_ARGS)
+{
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
+
+ PG_RETURN_TIMESTAMP(timestamptz2timestamp(timestamp));
+}
+
+static Timestamp
+timestamptz2timestamp(TimestampTz timestamp)
+{
+ Timestamp result;
+ struct pg_tm tt,
+ *tm = &tt;
+ fsec_t fsec;
+ int tz;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ result = timestamp;
+ else
+ {
+ if (timestamp2tm(timestamp, &tz, tm, &fsec, NULL, NULL) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ if (tm2timestamp(tm, fsec, NULL, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ return result;
+}
+
+/* timestamptz_zone()
+ * Evaluate timestamp with time zone type at the specified time zone.
+ * Returns a timestamp without time zone.
+ */
+Datum
+timestamptz_zone(PG_FUNCTION_ARGS)
+{
+ text *zone = PG_GETARG_TEXT_PP(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ Timestamp result;
+ int tz;
+ char tzname[TZ_STRLEN_MAX + 1];
+ char *lowzone;
+ int type,
+ val;
+ pg_tz *tzp;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMP(timestamp);
+
+ /*
+ * Look up the requested timezone. First we look in the timezone
+ * abbreviation table (to handle cases like "EST"), and if that fails, we
+ * look in the timezone database (to handle cases like
+ * "America/New_York"). (This matches the order in which timestamp input
+ * checks the cases; it's important because the timezone database unwisely
+ * uses a few zone names that are identical to offset abbreviations.)
+ */
+ text_to_cstring_buffer(zone, tzname, sizeof(tzname));
+
+ /* DecodeTimezoneAbbrev requires lowercase input */
+ lowzone = downcase_truncate_identifier(tzname,
+ strlen(tzname),
+ false);
+
+ type = DecodeTimezoneAbbrev(0, lowzone, &val, &tzp);
+
+ if (type == TZ || type == DTZ)
+ {
+ /* fixed-offset abbreviation */
+ tz = -val;
+ result = dt2local(timestamp, tz);
+ }
+ else if (type == DYNTZ)
+ {
+ /* dynamic-offset abbreviation, resolve using specified time */
+ int isdst;
+
+ tz = DetermineTimeZoneAbbrevOffsetTS(timestamp, tzname, tzp, &isdst);
+ result = dt2local(timestamp, tz);
+ }
+ else
+ {
+ /* try it as a full zone name */
+ tzp = pg_tzset(tzname);
+ if (tzp)
+ {
+ /* Apply the timezone change */
+ struct pg_tm tm;
+ fsec_t fsec;
+
+ if (timestamp2tm(timestamp, &tz, &tm, &fsec, NULL, tzp) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+ }
+ else
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("time zone \"%s\" not recognized", tzname)));
+ result = 0; /* keep compiler quiet */
+ }
+ }
+
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* timestamptz_izone()
+ * Encode timestamp with time zone type with specified time interval as time zone.
+ * Returns a timestamp without time zone.
+ */
+Datum
+timestamptz_izone(PG_FUNCTION_ARGS)
+{
+ Interval *zone = PG_GETARG_INTERVAL_P(0);
+ TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(1);
+ Timestamp result;
+ int tz;
+
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ PG_RETURN_TIMESTAMP(timestamp);
+
+ if (zone->month != 0 || zone->day != 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("interval time zone \"%s\" must not include months or days",
+ DatumGetCString(DirectFunctionCall1(interval_out,
+ PointerGetDatum(zone))))));
+
+ tz = -(zone->time / USECS_PER_SEC);
+
+ result = dt2local(timestamp, tz);
+
+ if (!IS_VALID_TIMESTAMP(result))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ PG_RETURN_TIMESTAMP(result);
+}
+
+/* generate_series_timestamp()
+ * Generate the set of timestamps from start to finish by step
+ */
+Datum
+generate_series_timestamp(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ generate_series_timestamp_fctx *fctx;
+ Timestamp result;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ Timestamp start = PG_GETARG_TIMESTAMP(0);
+ Timestamp finish = PG_GETARG_TIMESTAMP(1);
+ Interval *step = PG_GETARG_INTERVAL_P(2);
+ MemoryContext oldcontext;
+ Interval interval_zero;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ fctx = (generate_series_timestamp_fctx *)
+ palloc(sizeof(generate_series_timestamp_fctx));
+
+ /*
+ * Use fctx to keep state from call to call. Seed current with the
+ * original start value
+ */
+ fctx->current = start;
+ fctx->finish = finish;
+ fctx->step = *step;
+
+ /* Determine sign of the interval */
+ MemSet(&interval_zero, 0, sizeof(Interval));
+ fctx->step_sign = interval_cmp_internal(&fctx->step, &interval_zero);
+
+ if (fctx->step_sign == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot equal zero")));
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ /*
+ * get the saved state and use current as the result for this iteration
+ */
+ fctx = funcctx->user_fctx;
+ result = fctx->current;
+
+ if (fctx->step_sign > 0 ?
+ timestamp_cmp_internal(result, fctx->finish) <= 0 :
+ timestamp_cmp_internal(result, fctx->finish) >= 0)
+ {
+ /* increment current in preparation for next iteration */
+ fctx->current = DatumGetTimestamp(DirectFunctionCall2(timestamp_pl_interval,
+ TimestampGetDatum(fctx->current),
+ PointerGetDatum(&fctx->step)));
+
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, TimestampGetDatum(result));
+ }
+ else
+ {
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+/* generate_series_timestamptz()
+ * Generate the set of timestamps from start to finish by step
+ */
+Datum
+generate_series_timestamptz(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ generate_series_timestamptz_fctx *fctx;
+ TimestampTz result;
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ TimestampTz start = PG_GETARG_TIMESTAMPTZ(0);
+ TimestampTz finish = PG_GETARG_TIMESTAMPTZ(1);
+ Interval *step = PG_GETARG_INTERVAL_P(2);
+ MemoryContext oldcontext;
+ Interval interval_zero;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* allocate memory for user context */
+ fctx = (generate_series_timestamptz_fctx *)
+ palloc(sizeof(generate_series_timestamptz_fctx));
+
+ /*
+ * Use fctx to keep state from call to call. Seed current with the
+ * original start value
+ */
+ fctx->current = start;
+ fctx->finish = finish;
+ fctx->step = *step;
+
+ /* Determine sign of the interval */
+ MemSet(&interval_zero, 0, sizeof(Interval));
+ fctx->step_sign = interval_cmp_internal(&fctx->step, &interval_zero);
+
+ if (fctx->step_sign == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("step size cannot equal zero")));
+
+ funcctx->user_fctx = fctx;
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ /*
+ * get the saved state and use current as the result for this iteration
+ */
+ fctx = funcctx->user_fctx;
+ result = fctx->current;
+
+ if (fctx->step_sign > 0 ?
+ timestamp_cmp_internal(result, fctx->finish) <= 0 :
+ timestamp_cmp_internal(result, fctx->finish) >= 0)
+ {
+ /* increment current in preparation for next iteration */
+ fctx->current = DatumGetTimestampTz(DirectFunctionCall2(timestamptz_pl_interval,
+ TimestampTzGetDatum(fctx->current),
+ PointerGetDatum(&fctx->step)));
+
+ /* do when there is more left to send */
+ SRF_RETURN_NEXT(funcctx, TimestampTzGetDatum(result));
+ }
+ else
+ {
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+ }
+}
diff --git a/src/backend/utils/adt/trigfuncs.c b/src/backend/utils/adt/trigfuncs.c
new file mode 100644
index 0000000..71b16ca
--- /dev/null
+++ b/src/backend/utils/adt/trigfuncs.c
@@ -0,0 +1,85 @@
+/*-------------------------------------------------------------------------
+ *
+ * trigfuncs.c
+ * Builtin functions for useful trigger support.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/utils/adt/trigfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "commands/trigger.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+
+
+/*
+ * suppress_redundant_updates_trigger
+ *
+ * This trigger function will inhibit an update from being done
+ * if the OLD and NEW records are identical.
+ */
+Datum
+suppress_redundant_updates_trigger(PG_FUNCTION_ARGS)
+{
+ TriggerData *trigdata = (TriggerData *) fcinfo->context;
+ HeapTuple newtuple,
+ oldtuple,
+ rettuple;
+ HeapTupleHeader newheader,
+ oldheader;
+
+ /* make sure it's called as a trigger */
+ if (!CALLED_AS_TRIGGER(fcinfo))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("suppress_redundant_updates_trigger: must be called as trigger")));
+
+ /* and that it's called on update */
+ if (!TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("suppress_redundant_updates_trigger: must be called on update")));
+
+ /* and that it's called before update */
+ if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("suppress_redundant_updates_trigger: must be called before update")));
+
+ /* and that it's called for each row */
+ if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+ ereport(ERROR,
+ (errcode(ERRCODE_E_R_I_E_TRIGGER_PROTOCOL_VIOLATED),
+ errmsg("suppress_redundant_updates_trigger: must be called for each row")));
+
+ /* get tuple data, set default result */
+ rettuple = newtuple = trigdata->tg_newtuple;
+ oldtuple = trigdata->tg_trigtuple;
+
+ newheader = newtuple->t_data;
+ oldheader = oldtuple->t_data;
+
+ /* if the tuple payload is the same ... */
+ if (newtuple->t_len == oldtuple->t_len &&
+ newheader->t_hoff == oldheader->t_hoff &&
+ (HeapTupleHeaderGetNatts(newheader) ==
+ HeapTupleHeaderGetNatts(oldheader)) &&
+ ((newheader->t_infomask & ~HEAP_XACT_MASK) ==
+ (oldheader->t_infomask & ~HEAP_XACT_MASK)) &&
+ memcmp(((char *) newheader) + SizeofHeapTupleHeader,
+ ((char *) oldheader) + SizeofHeapTupleHeader,
+ newtuple->t_len - SizeofHeapTupleHeader) == 0)
+ {
+ /* ... then suppress the update */
+ rettuple = NULL;
+ }
+
+ return PointerGetDatum(rettuple);
+}
diff --git a/src/backend/utils/adt/tsginidx.c b/src/backend/utils/adt/tsginidx.c
new file mode 100644
index 0000000..e272fca
--- /dev/null
+++ b/src/backend/utils/adt/tsginidx.c
@@ -0,0 +1,356 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsginidx.c
+ * GIN support functions for tsvector_ops
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsginidx.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gin.h"
+#include "access/stratnum.h"
+#include "miscadmin.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+Datum
+gin_cmp_tslexeme(PG_FUNCTION_ARGS)
+{
+ text *a = PG_GETARG_TEXT_PP(0);
+ text *b = PG_GETARG_TEXT_PP(1);
+ int cmp;
+
+ cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
+ VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
+ false);
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+gin_cmp_prefix(PG_FUNCTION_ARGS)
+{
+ text *a = PG_GETARG_TEXT_PP(0);
+ text *b = PG_GETARG_TEXT_PP(1);
+
+#ifdef NOT_USED
+ StrategyNumber strategy = PG_GETARG_UINT16(2);
+ Pointer extra_data = PG_GETARG_POINTER(3);
+#endif
+ int cmp;
+
+ cmp = tsCompareString(VARDATA_ANY(a), VARSIZE_ANY_EXHDR(a),
+ VARDATA_ANY(b), VARSIZE_ANY_EXHDR(b),
+ true);
+
+ if (cmp < 0)
+ cmp = 1; /* prevent continue scan */
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+gin_extract_tsvector(PG_FUNCTION_ARGS)
+{
+ TSVector vector = PG_GETARG_TSVECTOR(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+ Datum *entries = NULL;
+
+ *nentries = vector->size;
+ if (vector->size > 0)
+ {
+ int i;
+ WordEntry *we = ARRPTR(vector);
+
+ entries = (Datum *) palloc(sizeof(Datum) * vector->size);
+
+ for (i = 0; i < vector->size; i++)
+ {
+ text *txt;
+
+ txt = cstring_to_text_with_len(STRPTR(vector) + we->pos, we->len);
+ entries[i] = PointerGetDatum(txt);
+
+ we++;
+ }
+ }
+
+ PG_FREE_IF_COPY(vector, 0);
+ PG_RETURN_POINTER(entries);
+}
+
+Datum
+gin_extract_tsquery(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
+
+ /* StrategyNumber strategy = PG_GETARG_UINT16(2); */
+ bool **ptr_partialmatch = (bool **) PG_GETARG_POINTER(3);
+ Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
+ /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
+ int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
+ Datum *entries = NULL;
+
+ *nentries = 0;
+
+ if (query->size > 0)
+ {
+ QueryItem *item = GETQUERY(query);
+ int32 i,
+ j;
+ bool *partialmatch;
+ int *map_item_operand;
+
+ /*
+ * If the query doesn't have any required positive matches (for
+ * instance, it's something like '! foo'), we have to do a full index
+ * scan.
+ */
+ if (tsquery_requires_match(item))
+ *searchMode = GIN_SEARCH_MODE_DEFAULT;
+ else
+ *searchMode = GIN_SEARCH_MODE_ALL;
+
+ /* count number of VAL items */
+ j = 0;
+ for (i = 0; i < query->size; i++)
+ {
+ if (item[i].type == QI_VAL)
+ j++;
+ }
+ *nentries = j;
+
+ entries = (Datum *) palloc(sizeof(Datum) * j);
+ partialmatch = *ptr_partialmatch = (bool *) palloc(sizeof(bool) * j);
+
+ /*
+ * Make map to convert item's number to corresponding operand's (the
+ * same, entry's) number. Entry's number is used in check array in
+ * consistent method. We use the same map for each entry.
+ */
+ *extra_data = (Pointer *) palloc(sizeof(Pointer) * j);
+ map_item_operand = (int *) palloc0(sizeof(int) * query->size);
+
+ /* Now rescan the VAL items and fill in the arrays */
+ j = 0;
+ for (i = 0; i < query->size; i++)
+ {
+ if (item[i].type == QI_VAL)
+ {
+ QueryOperand *val = &item[i].qoperand;
+ text *txt;
+
+ txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
+ val->length);
+ entries[j] = PointerGetDatum(txt);
+ partialmatch[j] = val->prefix;
+ (*extra_data)[j] = (Pointer) map_item_operand;
+ map_item_operand[i] = j;
+ j++;
+ }
+ }
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_POINTER(entries);
+}
+
+typedef struct
+{
+ QueryItem *first_item;
+ GinTernaryValue *check;
+ int *map_item_operand;
+} GinChkVal;
+
+/*
+ * TS_execute callback for matching a tsquery operand to GIN index data
+ */
+static TSTernaryValue
+checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ GinChkVal *gcv = (GinChkVal *) checkval;
+ int j;
+ GinTernaryValue result;
+
+ /* convert item's number to corresponding entry's (operand's) number */
+ j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
+
+ /* determine presence of current entry in indexed value */
+ result = gcv->check[j];
+
+ /*
+ * If any val requiring a weight is used or caller needs position
+ * information then we must recheck, so replace TRUE with MAYBE.
+ */
+ if (result == GIN_TRUE)
+ {
+ if (val->weight != 0 || data != NULL)
+ result = GIN_MAYBE;
+ }
+
+ /*
+ * We rely on GinTernaryValue and TSTernaryValue using equivalent value
+ * assignments. We could use a switch statement to map the values if that
+ * ever stops being true, but it seems unlikely to happen.
+ */
+ return (TSTernaryValue) result;
+}
+
+Datum
+gin_tsquery_consistent(PG_FUNCTION_ARGS)
+{
+ bool *check = (bool *) PG_GETARG_POINTER(0);
+
+ /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
+ TSQuery query = PG_GETARG_TSQUERY(2);
+
+ /* int32 nkeys = PG_GETARG_INT32(3); */
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ bool *recheck = (bool *) PG_GETARG_POINTER(5);
+ bool res = false;
+
+ /* Initially assume query doesn't require recheck */
+ *recheck = false;
+
+ if (query->size > 0)
+ {
+ GinChkVal gcv;
+
+ /*
+ * check-parameter array has one entry for each value (operand) in the
+ * query.
+ */
+ gcv.first_item = GETQUERY(query);
+ StaticAssertStmt(sizeof(GinTernaryValue) == sizeof(bool),
+ "sizes of GinTernaryValue and bool are not equal");
+ gcv.check = (GinTernaryValue *) check;
+ gcv.map_item_operand = (int *) (extra_data[0]);
+
+ switch (TS_execute_ternary(GETQUERY(query),
+ &gcv,
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_gin))
+ {
+ case TS_NO:
+ res = false;
+ break;
+ case TS_YES:
+ res = true;
+ break;
+ case TS_MAYBE:
+ res = true;
+ *recheck = true;
+ break;
+ }
+ }
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+
+ /* StrategyNumber strategy = PG_GETARG_UINT16(1); */
+ TSQuery query = PG_GETARG_TSQUERY(2);
+
+ /* int32 nkeys = PG_GETARG_INT32(3); */
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ GinTernaryValue res = GIN_FALSE;
+
+ if (query->size > 0)
+ {
+ GinChkVal gcv;
+
+ /*
+ * check-parameter array has one entry for each value (operand) in the
+ * query.
+ */
+ gcv.first_item = GETQUERY(query);
+ gcv.check = check;
+ gcv.map_item_operand = (int *) (extra_data[0]);
+
+ res = TS_execute_ternary(GETQUERY(query),
+ &gcv,
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_gin);
+ }
+
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+
+/*
+ * Formerly, gin_extract_tsvector had only two arguments. Now it has three,
+ * but we still need a pg_proc entry with two args to support reloading
+ * pre-9.1 contrib/tsearch2 opclass declarations. This compatibility
+ * function should go away eventually. (Note: you might say "hey, but the
+ * code above is only *using* two args, so let's just declare it that way".
+ * If you try that you'll find the opr_sanity regression test complains.)
+ */
+Datum
+gin_extract_tsvector_2args(PG_FUNCTION_ARGS)
+{
+ if (PG_NARGS() < 3) /* should not happen */
+ elog(ERROR, "gin_extract_tsvector requires three arguments");
+ return gin_extract_tsvector(fcinfo);
+}
+
+/*
+ * Likewise, we need a stub version of gin_extract_tsquery declared with
+ * only five arguments.
+ */
+Datum
+gin_extract_tsquery_5args(PG_FUNCTION_ARGS)
+{
+ if (PG_NARGS() < 7) /* should not happen */
+ elog(ERROR, "gin_extract_tsquery requires seven arguments");
+ return gin_extract_tsquery(fcinfo);
+}
+
+/*
+ * Likewise, we need a stub version of gin_tsquery_consistent declared with
+ * only six arguments.
+ */
+Datum
+gin_tsquery_consistent_6args(PG_FUNCTION_ARGS)
+{
+ if (PG_NARGS() < 8) /* should not happen */
+ elog(ERROR, "gin_tsquery_consistent requires eight arguments");
+ return gin_tsquery_consistent(fcinfo);
+}
+
+/*
+ * Likewise, a stub version of gin_extract_tsquery declared with argument
+ * types that are no longer considered appropriate.
+ */
+Datum
+gin_extract_tsquery_oldsig(PG_FUNCTION_ARGS)
+{
+ return gin_extract_tsquery(fcinfo);
+}
+
+/*
+ * Likewise, a stub version of gin_tsquery_consistent declared with argument
+ * types that are no longer considered appropriate.
+ */
+Datum
+gin_tsquery_consistent_oldsig(PG_FUNCTION_ARGS)
+{
+ return gin_tsquery_consistent(fcinfo);
+}
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
new file mode 100644
index 0000000..2c5617a
--- /dev/null
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -0,0 +1,816 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsgistidx.c
+ * GiST support functions for tsvector_ops
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsgistidx.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/heaptoast.h"
+#include "access/reloptions.h"
+#include "lib/qunique.h"
+#include "port/pg_bitutils.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/pg_crc.h"
+
+
+/* tsvector_ops opclass options */
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int siglen; /* signature length */
+} GistTsVectorOptions;
+
+#define SIGLEN_DEFAULT (31 * 4)
+#define SIGLEN_MAX GISTMaxIndexKeySize
+#define GET_SIGLEN() (PG_HAS_OPCLASS_OPTIONS() ? \
+ ((GistTsVectorOptions *) PG_GET_OPCLASS_OPTIONS())->siglen : \
+ SIGLEN_DEFAULT)
+
+#define SIGLENBIT(siglen) ((siglen) * BITS_PER_BYTE)
+
+typedef char *BITVECP;
+
+#define LOOPBYTE(siglen) \
+ for (i = 0; i < siglen; i++)
+
+#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
+#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
+#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
+#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
+#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
+
+#define HASHVAL(val, siglen) (((unsigned int)(val)) % SIGLENBIT(siglen))
+#define HASH(sign, val, siglen) SETBIT((sign), HASHVAL(val, siglen))
+
+#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
+
+/*
+ * type of GiST index key
+ */
+
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ int32 flag;
+ char data[FLEXIBLE_ARRAY_MEMBER];
+} SignTSVector;
+
+#define ARRKEY 0x01
+#define SIGNKEY 0x02
+#define ALLISTRUE 0x04
+
+#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
+#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
+#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
+
+#define GTHDRSIZE ( VARHDRSZ + sizeof(int32) )
+#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int32)) : (((flag) & ALLISTRUE) ? 0 : (len)) ) )
+
+#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
+#define GETSIGLEN(x)( VARSIZE(x) - GTHDRSIZE )
+#define GETARR(x) ( (int32*)( (char*)(x)+GTHDRSIZE ) )
+#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int32) )
+
+static int32 sizebitvec(BITVECP sign, int siglen);
+
+Datum
+gtsvectorin(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("gtsvector_in not implemented")));
+ PG_RETURN_DATUM(0);
+}
+
+#define SINGOUTSTR "%d true bits, %d false bits"
+#define ARROUTSTR "%d unique words"
+#define EXTRALEN ( 2*13 )
+
+static int outbuf_maxlen = 0;
+
+Datum
+gtsvectorout(PG_FUNCTION_ARGS)
+{
+ SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(PG_GETARG_POINTER(0));
+ char *outbuf;
+
+ if (outbuf_maxlen == 0)
+ outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
+ outbuf = palloc(outbuf_maxlen);
+
+ if (ISARRKEY(key))
+ sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
+ else
+ {
+ int siglen = GETSIGLEN(key);
+ int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT(siglen) : sizebitvec(GETSIGN(key), siglen);
+
+ sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT(siglen) - cnttrue);
+ }
+
+ PG_FREE_IF_COPY(key, 0);
+ PG_RETURN_POINTER(outbuf);
+}
+
+static int
+compareint(const void *va, const void *vb)
+{
+ int32 a = *((const int32 *) va);
+ int32 b = *((const int32 *) vb);
+
+ if (a == b)
+ return 0;
+ return (a > b) ? 1 : -1;
+}
+
+static void
+makesign(BITVECP sign, SignTSVector *a, int siglen)
+{
+ int32 k,
+ len = ARRNELEM(a);
+ int32 *ptr = GETARR(a);
+
+ MemSet((void *) sign, 0, siglen);
+ for (k = 0; k < len; k++)
+ HASH(sign, ptr[k], siglen);
+}
+
+static SignTSVector *
+gtsvector_alloc(int flag, int len, BITVECP sign)
+{
+ int size = CALCGTSIZE(flag, len);
+ SignTSVector *res = palloc(size);
+
+ SET_VARSIZE(res, size);
+ res->flag = flag;
+
+ if ((flag & (SIGNKEY | ALLISTRUE)) == SIGNKEY && sign)
+ memcpy(GETSIGN(res), sign, len);
+
+ return res;
+}
+
+
+Datum
+gtsvector_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ int siglen = GET_SIGLEN();
+ GISTENTRY *retval = entry;
+
+ if (entry->leafkey)
+ { /* tsvector */
+ TSVector val = DatumGetTSVector(entry->key);
+ SignTSVector *res = gtsvector_alloc(ARRKEY, val->size, NULL);
+ int32 len;
+ int32 *arr;
+ WordEntry *ptr = ARRPTR(val);
+ char *words = STRPTR(val);
+
+ arr = GETARR(res);
+ len = val->size;
+ while (len--)
+ {
+ pg_crc32 c;
+
+ INIT_LEGACY_CRC32(c);
+ COMP_LEGACY_CRC32(c, words + ptr->pos, ptr->len);
+ FIN_LEGACY_CRC32(c);
+
+ *arr = *(int32 *) &c;
+ arr++;
+ ptr++;
+ }
+
+ qsort(GETARR(res), val->size, sizeof(int), compareint);
+ len = qunique(GETARR(res), val->size, sizeof(int), compareint);
+ if (len != val->size)
+ {
+ /*
+ * there is a collision of hash-function; len is always less than
+ * val->size
+ */
+ len = CALCGTSIZE(ARRKEY, len);
+ res = (SignTSVector *) repalloc((void *) res, len);
+ SET_VARSIZE(res, len);
+ }
+
+ /* make signature, if array is too long */
+ if (VARSIZE(res) > TOAST_INDEX_TARGET)
+ {
+ SignTSVector *ressign = gtsvector_alloc(SIGNKEY, siglen, NULL);
+
+ makesign(GETSIGN(ressign), res, siglen);
+ res = ressign;
+ }
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
+ !ISALLTRUE(DatumGetPointer(entry->key)))
+ {
+ int32 i;
+ SignTSVector *res;
+ BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
+
+ LOOPBYTE(siglen)
+ {
+ if ((sign[i] & 0xff) != 0xff)
+ PG_RETURN_POINTER(retval);
+ }
+
+ res = gtsvector_alloc(SIGNKEY | ALLISTRUE, siglen, sign);
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ gistentryinit(*retval, PointerGetDatum(res),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+ PG_RETURN_POINTER(retval);
+}
+
+Datum
+gtsvector_decompress(PG_FUNCTION_ARGS)
+{
+ /*
+ * We need to detoast the stored value, because the other gtsvector
+ * support functions don't cope with toasted values.
+ */
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ SignTSVector *key = (SignTSVector *) PG_DETOAST_DATUM(entry->key);
+
+ if (key != (SignTSVector *) DatumGetPointer(entry->key))
+ {
+ GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+
+ gistentryinit(*retval, PointerGetDatum(key),
+ entry->rel, entry->page,
+ entry->offset, false);
+
+ PG_RETURN_POINTER(retval);
+ }
+
+ PG_RETURN_POINTER(entry);
+}
+
+typedef struct
+{
+ int32 *arrb;
+ int32 *arre;
+} CHKVAL;
+
+/*
+ * TS_execute callback for matching a tsquery operand to GIST leaf-page data
+ */
+static TSTernaryValue
+checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ int32 *StopLow = ((CHKVAL *) checkval)->arrb;
+ int32 *StopHigh = ((CHKVAL *) checkval)->arre;
+ int32 *StopMiddle;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+
+ /*
+ * we are not able to find a prefix by hash value
+ */
+ if (val->prefix)
+ return TS_MAYBE;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ if (*StopMiddle == val->valcrc)
+ return TS_MAYBE;
+ else if (*StopMiddle < val->valcrc)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ return TS_NO;
+}
+
+/*
+ * TS_execute callback for matching a tsquery operand to GIST non-leaf data
+ */
+static TSTernaryValue
+checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ void *key = (SignTSVector *) checkval;
+
+ /*
+ * we are not able to find a prefix in signature tree
+ */
+ if (val->prefix)
+ return TS_MAYBE;
+
+ if (GETBIT(GETSIGN(key), HASHVAL(val->valcrc, GETSIGLEN(key))))
+ return TS_MAYBE;
+ else
+ return TS_NO;
+}
+
+Datum
+gtsvector_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+
+ /* StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); */
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ SignTSVector *key = (SignTSVector *) DatumGetPointer(entry->key);
+
+ /* All cases served by this function are inexact */
+ *recheck = true;
+
+ if (!query->size)
+ PG_RETURN_BOOL(false);
+
+ if (ISSIGNKEY(key))
+ {
+ if (ISALLTRUE(key))
+ PG_RETURN_BOOL(true);
+
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
+ key,
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_bit));
+ }
+ else
+ { /* only leaf pages */
+ CHKVAL chkval;
+
+ chkval.arrb = GETARR(key);
+ chkval.arre = chkval.arrb + ARRNELEM(key);
+ PG_RETURN_BOOL(TS_execute(GETQUERY(query),
+ (void *) &chkval,
+ TS_EXEC_PHRASE_NO_POS,
+ checkcondition_arr));
+ }
+}
+
+static int32
+unionkey(BITVECP sbase, SignTSVector *add, int siglen)
+{
+ int32 i;
+
+ if (ISSIGNKEY(add))
+ {
+ BITVECP sadd = GETSIGN(add);
+
+ if (ISALLTRUE(add))
+ return 1;
+
+ Assert(GETSIGLEN(add) == siglen);
+
+ LOOPBYTE(siglen)
+ sbase[i] |= sadd[i];
+ }
+ else
+ {
+ int32 *ptr = GETARR(add);
+
+ for (i = 0; i < ARRNELEM(add); i++)
+ HASH(sbase, ptr[i], siglen);
+ }
+ return 0;
+}
+
+
+Datum
+gtsvector_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ int siglen = GET_SIGLEN();
+ SignTSVector *result = gtsvector_alloc(SIGNKEY, siglen, NULL);
+ BITVECP base = GETSIGN(result);
+ int32 i;
+
+ memset(base, 0, siglen);
+
+ for (i = 0; i < entryvec->n; i++)
+ {
+ if (unionkey(base, GETENTRY(entryvec, i), siglen))
+ {
+ result->flag |= ALLISTRUE;
+ SET_VARSIZE(result, CALCGTSIZE(result->flag, siglen));
+ break;
+ }
+ }
+
+ *size = VARSIZE(result);
+
+ PG_RETURN_POINTER(result);
+}
+
+Datum
+gtsvector_same(PG_FUNCTION_ARGS)
+{
+ SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
+ SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+ int siglen = GET_SIGLEN();
+
+ if (ISSIGNKEY(a))
+ { /* then b also ISSIGNKEY */
+ if (ISALLTRUE(a) && ISALLTRUE(b))
+ *result = true;
+ else if (ISALLTRUE(a))
+ *result = false;
+ else if (ISALLTRUE(b))
+ *result = false;
+ else
+ {
+ int32 i;
+ BITVECP sa = GETSIGN(a),
+ sb = GETSIGN(b);
+
+ Assert(GETSIGLEN(a) == siglen && GETSIGLEN(b) == siglen);
+
+ *result = true;
+ LOOPBYTE(siglen)
+ {
+ if (sa[i] != sb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+ }
+ else
+ { /* a and b ISARRKEY */
+ int32 lena = ARRNELEM(a),
+ lenb = ARRNELEM(b);
+
+ if (lena != lenb)
+ *result = false;
+ else
+ {
+ int32 *ptra = GETARR(a),
+ *ptrb = GETARR(b);
+ int32 i;
+
+ *result = true;
+ for (i = 0; i < lena; i++)
+ if (ptra[i] != ptrb[i])
+ {
+ *result = false;
+ break;
+ }
+ }
+ }
+
+ PG_RETURN_POINTER(result);
+}
+
+static int32
+sizebitvec(BITVECP sign, int siglen)
+{
+ return pg_popcount(sign, siglen);
+}
+
+static int
+hemdistsign(BITVECP a, BITVECP b, int siglen)
+{
+ int i,
+ diff,
+ dist = 0;
+
+ LOOPBYTE(siglen)
+ {
+ diff = (unsigned char) (a[i] ^ b[i]);
+ /* Using the popcount functions here isn't likely to win */
+ dist += pg_number_of_ones[diff];
+ }
+ return dist;
+}
+
+static int
+hemdist(SignTSVector *a, SignTSVector *b)
+{
+ int siglena = GETSIGLEN(a);
+ int siglenb = GETSIGLEN(b);
+
+ if (ISALLTRUE(a))
+ {
+ if (ISALLTRUE(b))
+ return 0;
+ else
+ return SIGLENBIT(siglenb) - sizebitvec(GETSIGN(b), siglenb);
+ }
+ else if (ISALLTRUE(b))
+ return SIGLENBIT(siglena) - sizebitvec(GETSIGN(a), siglena);
+
+ Assert(siglena == siglenb);
+
+ return hemdistsign(GETSIGN(a), GETSIGN(b), siglena);
+}
+
+Datum
+gtsvector_penalty(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
+ GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+ int siglen = GET_SIGLEN();
+ SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
+ SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
+ BITVECP orig = GETSIGN(origval);
+
+ *penalty = 0.0;
+
+ if (ISARRKEY(newval))
+ {
+ BITVECP sign = palloc(siglen);
+
+ makesign(sign, newval, siglen);
+
+ if (ISALLTRUE(origval))
+ {
+ int siglenbit = SIGLENBIT(siglen);
+
+ *penalty =
+ (float) (siglenbit - sizebitvec(sign, siglen)) /
+ (float) (siglenbit + 1);
+ }
+ else
+ *penalty = hemdistsign(sign, orig, siglen);
+
+ pfree(sign);
+ }
+ else
+ *penalty = hemdist(origval, newval);
+ PG_RETURN_POINTER(penalty);
+}
+
+typedef struct
+{
+ bool allistrue;
+ BITVECP sign;
+} CACHESIGN;
+
+static void
+fillcache(CACHESIGN *item, SignTSVector *key, int siglen)
+{
+ item->allistrue = false;
+ if (ISARRKEY(key))
+ makesign(item->sign, key, siglen);
+ else if (ISALLTRUE(key))
+ item->allistrue = true;
+ else
+ memcpy((void *) item->sign, (void *) GETSIGN(key), siglen);
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+typedef struct
+{
+ OffsetNumber pos;
+ int32 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *va, const void *vb)
+{
+ const SPLITCOST *a = (const SPLITCOST *) va;
+ const SPLITCOST *b = (const SPLITCOST *) vb;
+
+ if (a->cost == b->cost)
+ return 0;
+ else
+ return (a->cost > b->cost) ? 1 : -1;
+}
+
+
+static int
+hemdistcache(CACHESIGN *a, CACHESIGN *b, int siglen)
+{
+ if (a->allistrue)
+ {
+ if (b->allistrue)
+ return 0;
+ else
+ return SIGLENBIT(siglen) - sizebitvec(b->sign, siglen);
+ }
+ else if (b->allistrue)
+ return SIGLENBIT(siglen) - sizebitvec(a->sign, siglen);
+
+ return hemdistsign(a->sign, b->sign, siglen);
+}
+
+Datum
+gtsvector_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ int siglen = GET_SIGLEN();
+ OffsetNumber k,
+ j;
+ SignTSVector *datum_l,
+ *datum_r;
+ BITVECP union_l,
+ union_r;
+ int32 size_alpha,
+ size_beta;
+ int32 size_waste,
+ waste = -1;
+ int32 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+ OffsetNumber maxoff;
+ BITVECP ptr;
+ int i;
+ CACHESIGN *cache;
+ char *cache_sign;
+ SPLITCOST *costvector;
+
+ maxoff = entryvec->n - 2;
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ v->spl_left = (OffsetNumber *) palloc(nbytes);
+ v->spl_right = (OffsetNumber *) palloc(nbytes);
+
+ cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
+ cache_sign = palloc(siglen * (maxoff + 2));
+
+ for (j = 0; j < maxoff + 2; j++)
+ cache[j].sign = &cache_sign[siglen * j];
+
+ fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber),
+ siglen);
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ {
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ if (k == FirstOffsetNumber)
+ fillcache(&cache[j], GETENTRY(entryvec, j), siglen);
+
+ size_waste = hemdistcache(&(cache[j]), &(cache[k]), siglen);
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+ }
+
+ left = v->spl_left;
+ v->spl_nleft = 0;
+ right = v->spl_right;
+ v->spl_nright = 0;
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ /* form initial .. */
+ datum_l = gtsvector_alloc(SIGNKEY | (cache[seed_1].allistrue ? ALLISTRUE : 0),
+ siglen, cache[seed_1].sign);
+ datum_r = gtsvector_alloc(SIGNKEY | (cache[seed_2].allistrue ? ALLISTRUE : 0),
+ siglen, cache[seed_2].sign);
+ union_l = GETSIGN(datum_l);
+ union_r = GETSIGN(datum_r);
+ maxoff = OffsetNumberNext(maxoff);
+ fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff), siglen);
+ /* sort before ... */
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]), siglen);
+ size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]), siglen);
+ costvector[j - 1].cost = Abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+
+ if (ISALLTRUE(datum_l) || cache[j].allistrue)
+ {
+ if (ISALLTRUE(datum_l) && cache[j].allistrue)
+ size_alpha = 0;
+ else
+ size_alpha = SIGLENBIT(siglen) -
+ sizebitvec((cache[j].allistrue) ?
+ GETSIGN(datum_l) :
+ cache[j].sign,
+ siglen);
+ }
+ else
+ size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l), siglen);
+
+ if (ISALLTRUE(datum_r) || cache[j].allistrue)
+ {
+ if (ISALLTRUE(datum_r) && cache[j].allistrue)
+ size_beta = 0;
+ else
+ size_beta = SIGLENBIT(siglen) -
+ sizebitvec((cache[j].allistrue) ?
+ GETSIGN(datum_r) :
+ cache[j].sign,
+ siglen);
+ }
+ else
+ size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r), siglen);
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
+ {
+ if (ISALLTRUE(datum_l) || cache[j].allistrue)
+ {
+ if (!ISALLTRUE(datum_l))
+ MemSet((void *) GETSIGN(datum_l), 0xff, siglen);
+ }
+ else
+ {
+ ptr = cache[j].sign;
+ LOOPBYTE(siglen)
+ union_l[i] |= ptr[i];
+ }
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ if (ISALLTRUE(datum_r) || cache[j].allistrue)
+ {
+ if (!ISALLTRUE(datum_r))
+ MemSet((void *) GETSIGN(datum_r), 0xff, siglen);
+ }
+ else
+ {
+ ptr = cache[j].sign;
+ LOOPBYTE(siglen)
+ union_r[i] |= ptr[i];
+ }
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ v->spl_ldatum = PointerGetDatum(datum_l);
+ v->spl_rdatum = PointerGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
+
+/*
+ * Formerly, gtsvector_consistent was declared in pg_proc.h with arguments
+ * that did not match the documented conventions for GiST support functions.
+ * We fixed that, but we still need a pg_proc entry with the old signature
+ * to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
+ * This compatibility function should go away eventually.
+ */
+Datum
+gtsvector_consistent_oldsig(PG_FUNCTION_ARGS)
+{
+ return gtsvector_consistent(fcinfo);
+}
+
+Datum
+gtsvector_options(PG_FUNCTION_ARGS)
+{
+ local_relopts *relopts = (local_relopts *) PG_GETARG_POINTER(0);
+
+ init_local_reloptions(relopts, sizeof(GistTsVectorOptions));
+ add_local_int_reloption(relopts, "siglen", "signature length",
+ SIGLEN_DEFAULT, 1, SIGLEN_MAX,
+ offsetof(GistTsVectorOptions, siglen));
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
new file mode 100644
index 0000000..f54f298
--- /dev/null
+++ b/src/backend/utils/adt/tsquery.c
@@ -0,0 +1,1349 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery.c
+ * I/O functions for tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_type.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/pg_crc.h"
+
+/* FTS operator priorities, see ts_type.h */
+const int tsearch_op_priority[OP_COUNT] =
+{
+ 4, /* OP_NOT */
+ 2, /* OP_AND */
+ 1, /* OP_OR */
+ 3 /* OP_PHRASE */
+};
+
+/*
+ * parser's states
+ */
+typedef enum
+{
+ WAITOPERAND = 1,
+ WAITOPERATOR = 2,
+ WAITFIRSTOPERAND = 3
+} ts_parserstate;
+
+/*
+ * token types for parsing
+ */
+typedef enum
+{
+ PT_END = 0,
+ PT_ERR = 1,
+ PT_VAL = 2,
+ PT_OPR = 3,
+ PT_OPEN = 4,
+ PT_CLOSE = 5
+} ts_tokentype;
+
+/*
+ * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR,
+ * but *weight could contain a distance value in case of phrase operator.
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
+ *
+ */
+typedef ts_tokentype (*ts_tokenizer) (TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix);
+
+struct TSQueryParserStateData
+{
+ /* Tokenizer used for parsing tsquery */
+ ts_tokenizer gettoken;
+
+ /* State of tokenizer function */
+ char *buffer; /* entire string we are scanning */
+ char *buf; /* current scan point */
+ int count; /* nesting count, incremented by (,
+ * decremented by ) */
+ ts_parserstate state;
+
+ /* polish (prefix) notation in list, filled in by push* functions */
+ List *polstr;
+
+ /*
+ * Strings from operands are collected in op. curop is a pointer to the
+ * end of used space of op.
+ */
+ char *op;
+ char *curop;
+ int lenop; /* allocated size of op */
+ int sumlen; /* used size of op */
+
+ /* state for value's parser */
+ TSVectorParseState valstate;
+};
+
+/*
+ * subroutine to parse the modifiers (weight and prefix flag currently)
+ * part, like ':AB*' of a query.
+ */
+static char *
+get_modifiers(char *buf, int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ if (!t_iseq(buf, ':'))
+ return buf;
+
+ buf++;
+ while (*buf && pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'a':
+ case 'A':
+ *weight |= 1 << 3;
+ break;
+ case 'b':
+ case 'B':
+ *weight |= 1 << 2;
+ break;
+ case 'c':
+ case 'C':
+ *weight |= 1 << 1;
+ break;
+ case 'd':
+ case 'D':
+ *weight |= 1;
+ break;
+ case '*':
+ *prefix = true;
+ break;
+ default:
+ return buf;
+ }
+ buf++;
+ }
+
+ return buf;
+}
+
+/*
+ * Parse phrase operator. The operator
+ * may take the following forms:
+ *
+ * a <N> b (distance is exactly N lexemes)
+ * a <-> b (default distance = 1)
+ *
+ * The buffer should begin with '<' char
+ */
+static bool
+parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
+{
+ enum
+ {
+ PHRASE_OPEN = 0,
+ PHRASE_DIST,
+ PHRASE_CLOSE,
+ PHRASE_FINISH
+ } state = PHRASE_OPEN;
+ char *ptr = pstate->buf;
+ char *endptr;
+ long l = 1; /* default distance */
+
+ while (*ptr)
+ {
+ switch (state)
+ {
+ case PHRASE_OPEN:
+ if (t_iseq(ptr, '<'))
+ {
+ state = PHRASE_DIST;
+ ptr++;
+ }
+ else
+ return false;
+ break;
+
+ case PHRASE_DIST:
+ if (t_iseq(ptr, '-'))
+ {
+ state = PHRASE_CLOSE;
+ ptr++;
+ continue;
+ }
+
+ if (!t_isdigit(ptr))
+ return false;
+
+ errno = 0;
+ l = strtol(ptr, &endptr, 10);
+ if (ptr == endptr)
+ return false;
+ else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
+ MAXENTRYPOS)));
+ else
+ {
+ state = PHRASE_CLOSE;
+ ptr = endptr;
+ }
+ break;
+
+ case PHRASE_CLOSE:
+ if (t_iseq(ptr, '>'))
+ {
+ state = PHRASE_FINISH;
+ ptr++;
+ }
+ else
+ return false;
+ break;
+
+ case PHRASE_FINISH:
+ *distance = (int16) l;
+ pstate->buf = ptr;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * Parse OR operator used in websearch_to_tsquery(), returns true if we
+ * believe that "OR" literal could be an operator OR
+ */
+static bool
+parse_or_operator(TSQueryParserState pstate)
+{
+ char *ptr = pstate->buf;
+
+ /* it should begin with "OR" literal */
+ if (pg_strncasecmp(ptr, "or", 2) != 0)
+ return false;
+
+ ptr += 2;
+
+ /*
+ * it shouldn't be a part of any word but somewhere later it should be
+ * some operand
+ */
+ if (*ptr == '\0') /* no operand */
+ return false;
+
+ /* it shouldn't be a part of any word */
+ if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
+ return false;
+
+ for (;;)
+ {
+ ptr += pg_mblen(ptr);
+
+ if (*ptr == '\0') /* got end of string without operand */
+ return false;
+
+ /*
+ * Suppose, we found an operand, but could be a not correct operand.
+ * So we still treat OR literal as operation with possibly incorrect
+ * operand and will not search it as lexeme
+ */
+ if (!t_isspace(ptr))
+ break;
+ }
+
+ pstate->buf += 2;
+ return true;
+}
+
+static ts_tokentype
+gettoken_query_standard(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ while (true)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '!'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_NOT;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '('))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ state->count++;
+ return PT_OPEN;
+ }
+ else if (t_iseq(state->buf, ':'))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /*
+ * We rely on the tsvector parser to parse the value for
+ * us
+ */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
+ {
+ state->buf = get_modifiers(state->buf, weight, prefix);
+ state->state = WAITOPERATOR;
+ return PT_VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ {
+ return PT_END;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("no operand in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ break;
+
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '&'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '|'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ return PT_OPR;
+ }
+ else if (parse_phrase_operator(state, weight))
+ {
+ /* weight var is used as storage for distance */
+ state->state = WAITOPERAND;
+ *operator = OP_PHRASE;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, ')'))
+ {
+ state->buf++;
+ state->count--;
+ return (state->count < 0) ? PT_ERR : PT_CLOSE;
+ }
+ else if (*state->buf == '\0')
+ {
+ return (state->count) ? PT_ERR : PT_END;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ return PT_ERR;
+ }
+ break;
+ }
+
+ state->buf += pg_mblen(state->buf);
+ }
+}
+
+static ts_tokentype
+gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ while (true)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '-'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+
+ *operator = OP_NOT;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '"'))
+ {
+ /* Everything in quotes is processed as a single token */
+
+ /* skip opening quote */
+ state->buf++;
+ *strval = state->buf;
+
+ /* iterate to the closing quote or end of the string */
+ while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
+ state->buf++;
+ *lenval = state->buf - *strval;
+
+ /* skip closing quote if not end of the string */
+ if (*state->buf != '\0')
+ state->buf++;
+
+ state->state = WAITOPERATOR;
+ state->count++;
+ return PT_VAL;
+ }
+ else if (ISOPERATOR(state->buf))
+ {
+ /* or else gettoken_tsvector() will raise an error */
+ state->buf++;
+ state->state = WAITOPERAND;
+ continue;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /*
+ * We rely on the tsvector parser to parse the value for
+ * us
+ */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
+ {
+ state->state = WAITOPERATOR;
+ return PT_VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ {
+ return PT_END;
+ }
+ else
+ {
+ /* finally, we have to provide an operand */
+ pushStop(state);
+ return PT_END;
+ }
+ }
+ break;
+
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '"'))
+ {
+ /*
+ * put implicit AND after an operand and handle this quote
+ * in WAITOPERAND
+ */
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
+ else if (parse_or_operator(state))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ return PT_OPR;
+ }
+ else if (*state->buf == '\0')
+ {
+ return PT_END;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /* put implicit AND after an operand */
+ *operator = OP_AND;
+ state->state = WAITOPERAND;
+ return PT_OPR;
+ }
+ break;
+ }
+
+ state->buf += pg_mblen(state->buf);
+ }
+}
+
+static ts_tokentype
+gettoken_query_plain(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ if (*state->buf == '\0')
+ return PT_END;
+
+ *strval = state->buf;
+ *lenval = strlen(state->buf);
+ state->buf += *lenval;
+ state->count++;
+ return PT_VAL;
+}
+
+/*
+ * Push an operator to state->polstr
+ */
+void
+pushOperator(TSQueryParserState state, int8 oper, int16 distance)
+{
+ QueryOperator *tmp;
+
+ Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
+
+ tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
+ tmp->type = QI_OPR;
+ tmp->oper = oper;
+ tmp->distance = (oper == OP_PHRASE) ? distance : 0;
+ /* left is filled in later with findoprnd */
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+static void
+pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
+{
+ QueryOperand *tmp;
+
+ if (distance >= MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("value is too big in tsquery: \"%s\"",
+ state->buffer)));
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("operand is too long in tsquery: \"%s\"",
+ state->buffer)));
+
+ tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
+ tmp->type = QI_VAL;
+ tmp->weight = weight;
+ tmp->prefix = prefix;
+ tmp->valcrc = (int32) valcrc;
+ tmp->length = lenval;
+ tmp->distance = distance;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+/*
+ * Push an operand to state->polstr.
+ *
+ * strval must point to a string equal to state->curop. lenval is the length
+ * of the string.
+ */
+void
+pushValue(TSQueryParserState state, char *strval, int lenval, int16 weight, bool prefix)
+{
+ pg_crc32 valcrc;
+
+ if (lenval >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("word is too long in tsquery: \"%s\"",
+ state->buffer)));
+
+ INIT_LEGACY_CRC32(valcrc);
+ COMP_LEGACY_CRC32(valcrc, strval, lenval);
+ FIN_LEGACY_CRC32(valcrc);
+ pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
+
+ /* append the value string to state.op, enlarging buffer if needed first */
+ while (state->curop - state->op + lenval + 1 >= state->lenop)
+ {
+ int used = state->curop - state->op;
+
+ state->lenop *= 2;
+ state->op = (char *) repalloc((void *) state->op, state->lenop);
+ state->curop = state->op + used;
+ }
+ memcpy((void *) state->curop, (void *) strval, lenval);
+ state->curop += lenval;
+ *(state->curop) = '\0';
+ state->curop++;
+ state->sumlen += lenval + 1 /* \0 */ ;
+}
+
+
+/*
+ * Push a stopword placeholder to state->polstr
+ */
+void
+pushStop(TSQueryParserState state)
+{
+ QueryOperand *tmp;
+
+ tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
+ tmp->type = QI_VALSTOP;
+
+ state->polstr = lcons(tmp, state->polstr);
+}
+
+
+#define STACKDEPTH 32
+
+typedef struct OperatorElement
+{
+ int8 op;
+ int16 distance;
+} OperatorElement;
+
+static void
+pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
+{
+ if (*lenstack == STACKDEPTH) /* internal error */
+ elog(ERROR, "tsquery stack too small");
+
+ stack[*lenstack].op = op;
+ stack[*lenstack].distance = distance;
+
+ (*lenstack)++;
+}
+
+static void
+cleanOpStack(TSQueryParserState state,
+ OperatorElement *stack, int *lenstack, int8 op)
+{
+ int opPriority = OP_PRIORITY(op);
+
+ while (*lenstack)
+ {
+ /* NOT is right associative unlike to others */
+ if ((op != OP_NOT && opPriority > OP_PRIORITY(stack[*lenstack - 1].op)) ||
+ (op == OP_NOT && opPriority >= OP_PRIORITY(stack[*lenstack - 1].op)))
+ break;
+
+ (*lenstack)--;
+ pushOperator(state, stack[*lenstack].op,
+ stack[*lenstack].distance);
+ }
+}
+
+/*
+ * Make polish (prefix) notation of query.
+ *
+ * See parse_tsquery for explanation of pushval.
+ */
+static void
+makepol(TSQueryParserState state,
+ PushFunction pushval,
+ Datum opaque)
+{
+ int8 operator = 0;
+ ts_tokentype type;
+ int lenval = 0;
+ char *strval = NULL;
+ OperatorElement opstack[STACKDEPTH];
+ int lenstack = 0;
+ int16 weight = 0;
+ bool prefix;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ while ((type = state->gettoken(state, &operator,
+ &lenval, &strval,
+ &weight, &prefix)) != PT_END)
+ {
+ switch (type)
+ {
+ case PT_VAL:
+ pushval(opaque, state, strval, lenval, weight, prefix);
+ break;
+ case PT_OPR:
+ cleanOpStack(state, opstack, &lenstack, operator);
+ pushOpStack(opstack, &lenstack, operator, weight);
+ break;
+ case PT_OPEN:
+ makepol(state, pushval, opaque);
+ break;
+ case PT_CLOSE:
+ cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
+ return;
+ case PT_ERR:
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("syntax error in tsquery: \"%s\"",
+ state->buffer)));
+ }
+ }
+
+ cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */ );
+}
+
+static void
+findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (*pos >= nnodes)
+ elog(ERROR, "malformed tsquery: operand not found");
+
+ if (ptr[*pos].type == QI_VAL)
+ {
+ (*pos)++;
+ }
+ else if (ptr[*pos].type == QI_VALSTOP)
+ {
+ *needcleanup = true; /* we'll have to remove stop words */
+ (*pos)++;
+ }
+ else
+ {
+ Assert(ptr[*pos].type == QI_OPR);
+
+ if (ptr[*pos].qoperator.oper == OP_NOT)
+ {
+ ptr[*pos].qoperator.left = 1; /* fixed offset */
+ (*pos)++;
+
+ /* process the only argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+ }
+ else
+ {
+ QueryOperator *curitem = &ptr[*pos].qoperator;
+ int tmp = *pos; /* save current position */
+
+ Assert(curitem->oper == OP_AND ||
+ curitem->oper == OP_OR ||
+ curitem->oper == OP_PHRASE);
+
+ (*pos)++;
+
+ /* process RIGHT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+
+ curitem->left = *pos - tmp; /* set LEFT arg's offset */
+
+ /* process LEFT argument */
+ findoprnd_recurse(ptr, pos, nnodes, needcleanup);
+ }
+ }
+}
+
+
+/*
+ * Fill in the left-fields previously left unfilled.
+ * The input QueryItems must be in polish (prefix) notation.
+ * Also, set *needcleanup to true if there are any QI_VALSTOP nodes.
+ */
+static void
+findoprnd(QueryItem *ptr, int size, bool *needcleanup)
+{
+ uint32 pos;
+
+ *needcleanup = false;
+ pos = 0;
+ findoprnd_recurse(ptr, &pos, size, needcleanup);
+
+ if (pos != size)
+ elog(ERROR, "malformed tsquery: extra nodes");
+}
+
+
+/*
+ * Each value (operand) in the query is passed to pushval. pushval can
+ * transform the simple value to an arbitrarily complex expression using
+ * pushValue and pushOperator. It must push a single value with pushValue,
+ * a complete expression with all operands, or a stopword placeholder
+ * with pushStop, otherwise the prefix notation representation will be broken,
+ * having an operator with no operand.
+ *
+ * opaque is passed on to pushval as is, pushval can use it to store its
+ * private state.
+ */
+TSQuery
+parse_tsquery(char *buf,
+ PushFunction pushval,
+ Datum opaque,
+ int flags)
+{
+ struct TSQueryParserStateData state;
+ int i;
+ TSQuery query;
+ int commonlen;
+ QueryItem *ptr;
+ ListCell *cell;
+ bool needcleanup;
+ int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
+
+ /* plain should not be used with web */
+ Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
+
+ /* select suitable tokenizer */
+ if (flags & P_TSQ_PLAIN)
+ state.gettoken = gettoken_query_plain;
+ else if (flags & P_TSQ_WEB)
+ {
+ state.gettoken = gettoken_query_websearch;
+ tsv_flags |= P_TSV_IS_WEB;
+ }
+ else
+ state.gettoken = gettoken_query_standard;
+
+ /* init state */
+ state.buffer = buf;
+ state.buf = buf;
+ state.count = 0;
+ state.state = WAITFIRSTOPERAND;
+ state.polstr = NIL;
+
+ /* init value parser's state */
+ state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
+
+ /* init list of operand */
+ state.sumlen = 0;
+ state.lenop = 64;
+ state.curop = state.op = (char *) palloc(state.lenop);
+ *(state.curop) = '\0';
+
+ /* parse query & make polish notation (postfix, but in reverse order) */
+ makepol(&state, pushval, opaque);
+
+ close_tsvector_parser(state.valstate);
+
+ if (list_length(state.polstr) == 0)
+ {
+ ereport(NOTICE,
+ (errmsg("text-search query doesn't contain lexemes: \"%s\"",
+ state.buffer)));
+ query = (TSQuery) palloc(HDRSIZETQ);
+ SET_VARSIZE(query, HDRSIZETQ);
+ query->size = 0;
+ return query;
+ }
+
+ if (TSQUERY_TOO_BIG(list_length(state.polstr), state.sumlen))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("tsquery is too large")));
+ commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
+
+ /* Pack the QueryItems in the final TSQuery struct to return to caller */
+ query = (TSQuery) palloc0(commonlen);
+ SET_VARSIZE(query, commonlen);
+ query->size = list_length(state.polstr);
+ ptr = GETQUERY(query);
+
+ /* Copy QueryItems to TSQuery */
+ i = 0;
+ foreach(cell, state.polstr)
+ {
+ QueryItem *item = (QueryItem *) lfirst(cell);
+
+ switch (item->type)
+ {
+ case QI_VAL:
+ memcpy(&ptr[i], item, sizeof(QueryOperand));
+ break;
+ case QI_VALSTOP:
+ ptr[i].type = QI_VALSTOP;
+ break;
+ case QI_OPR:
+ memcpy(&ptr[i], item, sizeof(QueryOperator));
+ break;
+ default:
+ elog(ERROR, "unrecognized QueryItem type: %d", item->type);
+ }
+ i++;
+ }
+
+ /* Copy all the operand strings to TSQuery */
+ memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
+ pfree(state.op);
+
+ /*
+ * Set left operand pointers for every operator. While we're at it,
+ * detect whether there are any QI_VALSTOP nodes.
+ */
+ findoprnd(ptr, query->size, &needcleanup);
+
+ /*
+ * If there are QI_VALSTOP nodes, delete them and simplify the tree.
+ */
+ if (needcleanup)
+ query = cleanup_tsquery_stopwords(query);
+
+ return query;
+}
+
+static void
+pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
+ int16 weight, bool prefix)
+{
+ pushValue(state, strval, lenval, weight, prefix);
+}
+
+/*
+ * in without morphology
+ */
+Datum
+tsqueryin(PG_FUNCTION_ARGS)
+{
+ char *in = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
+}
+
+/*
+ * out function
+ */
+typedef struct
+{
+ QueryItem *curpol;
+ char *buf;
+ char *cur;
+ char *op;
+ int buflen;
+} INFIX;
+
+/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
+#define RESIZEBUF(inf, addsize) \
+while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
+{ \
+ int len = (inf)->cur - (inf)->buf; \
+ (inf)->buflen *= 2; \
+ (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
+ (inf)->cur = (inf)->buf + len; \
+}
+
+/*
+ * recursively traverse the tree and
+ * print it in infix (human-readable) form
+ */
+static void
+infix(INFIX *in, int parentPriority, bool rightPhraseOp)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->curpol->type == QI_VAL)
+ {
+ QueryOperand *curpol = &in->curpol->qoperand;
+ char *op = in->op + curpol->distance;
+ int clen;
+
+ RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
+ *(in->cur) = '\'';
+ in->cur++;
+ while (*op)
+ {
+ if (t_iseq(op, '\''))
+ {
+ *(in->cur) = '\'';
+ in->cur++;
+ }
+ else if (t_iseq(op, '\\'))
+ {
+ *(in->cur) = '\\';
+ in->cur++;
+ }
+ COPYCHAR(in->cur, op);
+
+ clen = pg_mblen(op);
+ op += clen;
+ in->cur += clen;
+ }
+ *(in->cur) = '\'';
+ in->cur++;
+ if (curpol->weight || curpol->prefix)
+ {
+ *(in->cur) = ':';
+ in->cur++;
+ if (curpol->prefix)
+ {
+ *(in->cur) = '*';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 3))
+ {
+ *(in->cur) = 'A';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 2))
+ {
+ *(in->cur) = 'B';
+ in->cur++;
+ }
+ if (curpol->weight & (1 << 1))
+ {
+ *(in->cur) = 'C';
+ in->cur++;
+ }
+ if (curpol->weight & 1)
+ {
+ *(in->cur) = 'D';
+ in->cur++;
+ }
+ }
+ *(in->cur) = '\0';
+ in->curpol++;
+ }
+ else if (in->curpol->qoperator.oper == OP_NOT)
+ {
+ int priority = QO_PRIORITY(in->curpol);
+
+ if (priority < parentPriority)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+ RESIZEBUF(in, 1);
+ *(in->cur) = '!';
+ in->cur++;
+ *(in->cur) = '\0';
+ in->curpol++;
+
+ infix(in, priority, false);
+ if (priority < parentPriority)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+ else
+ {
+ int8 op = in->curpol->qoperator.oper;
+ int priority = QO_PRIORITY(in->curpol);
+ int16 distance = in->curpol->qoperator.distance;
+ INFIX nrm;
+ bool needParenthesis = false;
+
+ in->curpol++;
+ if (priority < parentPriority ||
+ /* phrase operator depends on order */
+ (op == OP_PHRASE && rightPhraseOp))
+ {
+ needParenthesis = true;
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, "( ");
+ in->cur = strchr(in->cur, '\0');
+ }
+
+ nrm.curpol = in->curpol;
+ nrm.op = in->op;
+ nrm.buflen = 16;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+
+ /* get right operand */
+ infix(&nrm, priority, (op == OP_PHRASE));
+
+ /* get & print left operand */
+ in->curpol = nrm.curpol;
+ infix(in, priority, false);
+
+ /* print operator & right operand */
+ RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
+ switch (op)
+ {
+ case OP_OR:
+ sprintf(in->cur, " | %s", nrm.buf);
+ break;
+ case OP_AND:
+ sprintf(in->cur, " & %s", nrm.buf);
+ break;
+ case OP_PHRASE:
+ if (distance != 1)
+ sprintf(in->cur, " <%d> %s", distance, nrm.buf);
+ else
+ sprintf(in->cur, " <-> %s", nrm.buf);
+ break;
+ default:
+ /* OP_NOT is handled in above if-branch */
+ elog(ERROR, "unrecognized operator type: %d", op);
+ }
+ in->cur = strchr(in->cur, '\0');
+ pfree(nrm.buf);
+
+ if (needParenthesis)
+ {
+ RESIZEBUF(in, 2);
+ sprintf(in->cur, " )");
+ in->cur = strchr(in->cur, '\0');
+ }
+ }
+}
+
+Datum
+tsqueryout(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+
+ if (query->size == 0)
+ {
+ char *b = palloc(1);
+
+ *b = '\0';
+ PG_RETURN_POINTER(b);
+ }
+ nrm.curpol = GETQUERY(query);
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, -1 /* lowest priority */ , false);
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_CSTRING(nrm.buf);
+}
+
+/*
+ * Binary Input / Output functions. The binary format is as follows:
+ *
+ * uint32 number of operators/operands in the query
+ *
+ * Followed by the operators and operands, in prefix notation. For each
+ * operand:
+ *
+ * uint8 type, QI_VAL
+ * uint8 weight
+ * operand text in client encoding, null-terminated
+ * uint8 prefix
+ *
+ * For each operator:
+ * uint8 type, QI_OPR
+ * uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
+ * uint16 distance (only for OP_PHRASE)
+ */
+Datum
+tsquerysend(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ StringInfoData buf;
+ int i;
+ QueryItem *item = GETQUERY(query);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint32(&buf, query->size);
+ for (i = 0; i < query->size; i++)
+ {
+ pq_sendint8(&buf, item->type);
+
+ switch (item->type)
+ {
+ case QI_VAL:
+ pq_sendint8(&buf, item->qoperand.weight);
+ pq_sendint8(&buf, item->qoperand.prefix);
+ pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
+ break;
+ case QI_OPR:
+ pq_sendint8(&buf, item->qoperator.oper);
+ if (item->qoperator.oper == OP_PHRASE)
+ pq_sendint16(&buf, item->qoperator.distance);
+ break;
+ default:
+ elog(ERROR, "unrecognized tsquery node type: %d", item->type);
+ }
+ item++;
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsqueryrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSQuery query;
+ int i,
+ len;
+ QueryItem *item;
+ int datalen;
+ char *ptr;
+ uint32 size;
+ const char **operands;
+ bool needcleanup;
+
+ size = pq_getmsgint(buf, sizeof(uint32));
+ if (size > (MaxAllocSize / sizeof(QueryItem)))
+ elog(ERROR, "invalid size of tsquery");
+
+ /* Allocate space to temporarily hold operand strings */
+ operands = palloc(size * sizeof(char *));
+
+ /* Allocate space for all the QueryItems. */
+ len = HDRSIZETQ + sizeof(QueryItem) * size;
+ query = (TSQuery) palloc0(len);
+ query->size = size;
+ item = GETQUERY(query);
+
+ datalen = 0;
+ for (i = 0; i < size; i++)
+ {
+ item->type = (int8) pq_getmsgint(buf, sizeof(int8));
+
+ if (item->type == QI_VAL)
+ {
+ size_t val_len; /* length after recoding to server
+ * encoding */
+ uint8 weight;
+ uint8 prefix;
+ const char *val;
+ pg_crc32 valcrc;
+
+ weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
+ val = pq_getmsgstring(buf);
+ val_len = strlen(val);
+
+ /* Sanity checks */
+
+ if (weight > 0xF)
+ elog(ERROR, "invalid tsquery: invalid weight bitmap");
+
+ if (val_len > MAXSTRLEN)
+ elog(ERROR, "invalid tsquery: operand too long");
+
+ if (datalen > MAXSTRPOS)
+ elog(ERROR, "invalid tsquery: total operand length exceeded");
+
+ /* Looks valid. */
+
+ INIT_LEGACY_CRC32(valcrc);
+ COMP_LEGACY_CRC32(valcrc, val, val_len);
+ FIN_LEGACY_CRC32(valcrc);
+
+ item->qoperand.weight = weight;
+ item->qoperand.prefix = (prefix) ? true : false;
+ item->qoperand.valcrc = (int32) valcrc;
+ item->qoperand.length = val_len;
+ item->qoperand.distance = datalen;
+
+ /*
+ * Operand strings are copied to the final struct after this loop;
+ * here we just collect them to an array
+ */
+ operands[i] = val;
+
+ datalen += val_len + 1; /* + 1 for the '\0' terminator */
+ }
+ else if (item->type == QI_OPR)
+ {
+ int8 oper;
+
+ oper = (int8) pq_getmsgint(buf, sizeof(int8));
+ if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
+ elog(ERROR, "invalid tsquery: unrecognized operator type %d",
+ (int) oper);
+ if (i == size - 1)
+ elog(ERROR, "invalid pointer to right operand");
+
+ item->qoperator.oper = oper;
+ if (oper == OP_PHRASE)
+ item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
+ }
+ else
+ elog(ERROR, "unrecognized tsquery node type: %d", item->type);
+
+ item++;
+ }
+
+ /* Enlarge buffer to make room for the operand values. */
+ query = (TSQuery) repalloc(query, len + datalen);
+ item = GETQUERY(query);
+ ptr = GETOPERAND(query);
+
+ /*
+ * Fill in the left-pointers. Checks that the tree is well-formed as a
+ * side-effect.
+ */
+ findoprnd(item, size, &needcleanup);
+
+ /* Can't have found any QI_VALSTOP nodes */
+ Assert(!needcleanup);
+
+ /* Copy operands to output struct */
+ for (i = 0; i < size; i++)
+ {
+ if (item->type == QI_VAL)
+ {
+ memcpy(ptr, operands[i], item->qoperand.length + 1);
+ ptr += item->qoperand.length + 1;
+ }
+ item++;
+ }
+
+ pfree(operands);
+
+ Assert(ptr - GETOPERAND(query) == datalen);
+
+ SET_VARSIZE(query, len + datalen);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+/*
+ * debug function, used only for view query
+ * which will be executed in non-leaf pages in index
+ */
+Datum
+tsquerytree(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ INFIX nrm;
+ text *res;
+ QueryItem *q;
+ int len;
+
+ if (query->size == 0)
+ {
+ res = (text *) palloc(VARHDRSZ);
+ SET_VARSIZE(res, VARHDRSZ);
+ PG_RETURN_POINTER(res);
+ }
+
+ q = clean_NOT(GETQUERY(query), &len);
+
+ if (!q)
+ {
+ res = cstring_to_text("T");
+ }
+ else
+ {
+ nrm.curpol = q;
+ nrm.buflen = 32;
+ nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
+ *(nrm.cur) = '\0';
+ nrm.op = GETOPERAND(query);
+ infix(&nrm, -1, false);
+ res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
+ pfree(q);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+
+ PG_RETURN_TEXT_P(res);
+}
diff --git a/src/backend/utils/adt/tsquery_cleanup.c b/src/backend/utils/adt/tsquery_cleanup.c
new file mode 100644
index 0000000..b77a787
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_cleanup.c
@@ -0,0 +1,444 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_cleanup.c
+ * Cleanup query from NOT values and/or stopword
+ * Utility functions to correct work.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery_cleanup.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "tsearch/ts_utils.h"
+
+typedef struct NODE
+{
+ struct NODE *left;
+ struct NODE *right;
+ QueryItem *valnode;
+} NODE;
+
+/*
+ * make query tree from plain view of query
+ */
+static NODE *
+maketree(QueryItem *in)
+{
+ NODE *node = (NODE *) palloc(sizeof(NODE));
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ node->valnode = in;
+ node->right = node->left = NULL;
+ if (in->type == QI_OPR)
+ {
+ node->right = maketree(in + 1);
+ if (in->qoperator.oper != OP_NOT)
+ node->left = maketree(in + in->qoperator.left);
+ }
+ return node;
+}
+
+/*
+ * Internal state for plaintree and plainnode
+ */
+typedef struct
+{
+ QueryItem *ptr;
+ int len; /* allocated size of ptr */
+ int cur; /* number of elements in ptr */
+} PLAINTREE;
+
+static void
+plainnode(PLAINTREE *state, NODE *node)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (state->cur == state->len)
+ {
+ state->len *= 2;
+ state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
+ }
+ memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
+ if (node->valnode->type == QI_VAL)
+ state->cur++;
+ else if (node->valnode->qoperator.oper == OP_NOT)
+ {
+ state->ptr[state->cur].qoperator.left = 1;
+ state->cur++;
+ plainnode(state, node->right);
+ }
+ else
+ {
+ int cur = state->cur;
+
+ state->cur++;
+ plainnode(state, node->right);
+ state->ptr[cur].qoperator.left = state->cur - cur;
+ plainnode(state, node->left);
+ }
+ pfree(node);
+}
+
+/*
+ * make plain view of tree from a NODE-tree representation
+ */
+static QueryItem *
+plaintree(NODE *root, int *len)
+{
+ PLAINTREE pl;
+
+ pl.cur = 0;
+ pl.len = 16;
+ if (root && (root->valnode->type == QI_VAL || root->valnode->type == QI_OPR))
+ {
+ pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
+ plainnode(&pl, root);
+ }
+ else
+ pl.ptr = NULL;
+ *len = pl.cur;
+ return pl.ptr;
+}
+
+static void
+freetree(NODE *node)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (!node)
+ return;
+ if (node->left)
+ freetree(node->left);
+ if (node->right)
+ freetree(node->right);
+ pfree(node);
+}
+
+/*
+ * clean tree for ! operator.
+ * It's useful for debug, but in
+ * other case, such view is used with search in index.
+ * Operator ! always return TRUE
+ */
+static NODE *
+clean_NOT_intree(NODE *node)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (node->valnode->type == QI_VAL)
+ return node;
+
+ if (node->valnode->qoperator.oper == OP_NOT)
+ {
+ freetree(node);
+ return NULL;
+ }
+
+ /* operator & or | */
+ if (node->valnode->qoperator.oper == OP_OR)
+ {
+ if ((node->left = clean_NOT_intree(node->left)) == NULL ||
+ (node->right = clean_NOT_intree(node->right)) == NULL)
+ {
+ freetree(node);
+ return NULL;
+ }
+ }
+ else
+ {
+ NODE *res = node;
+
+ Assert(node->valnode->qoperator.oper == OP_AND ||
+ node->valnode->qoperator.oper == OP_PHRASE);
+
+ node->left = clean_NOT_intree(node->left);
+ node->right = clean_NOT_intree(node->right);
+ if (node->left == NULL && node->right == NULL)
+ {
+ pfree(node);
+ res = NULL;
+ }
+ else if (node->left == NULL)
+ {
+ res = node->right;
+ pfree(node);
+ }
+ else if (node->right == NULL)
+ {
+ res = node->left;
+ pfree(node);
+ }
+ return res;
+ }
+ return node;
+}
+
+QueryItem *
+clean_NOT(QueryItem *ptr, int *len)
+{
+ NODE *root = maketree(ptr);
+
+ return plaintree(clean_NOT_intree(root), len);
+}
+
+
+/*
+ * Remove QI_VALSTOP (stopword) nodes from query tree.
+ *
+ * Returns NULL if the query degenerates to nothing. Input must not be NULL.
+ *
+ * When we remove a phrase operator due to removing one or both of its
+ * arguments, we might need to adjust the distance of a parent phrase
+ * operator. For example, 'a' is a stopword, so:
+ * (b <-> a) <-> c should become b <2> c
+ * b <-> (a <-> c) should become b <2> c
+ * (b <-> (a <-> a)) <-> c should become b <3> c
+ * b <-> ((a <-> a) <-> c) should become b <3> c
+ * To handle that, we define two output parameters:
+ * ladd: amount to add to a phrase distance to the left of this node
+ * radd: amount to add to a phrase distance to the right of this node
+ * We need two outputs because we could need to bubble up adjustments to two
+ * different parent phrase operators. Consider
+ * w <-> (((a <-> x) <2> (y <3> a)) <-> z)
+ * After we've removed the two a's and are considering the <2> node (which is
+ * now just x <2> y), we have an ladd distance of 1 that needs to propagate
+ * up to the topmost (leftmost) <->, and an radd distance of 3 that needs to
+ * propagate to the rightmost <->, so that we'll end up with
+ * w <2> ((x <2> y) <4> z)
+ * Near the bottom of the tree, we may have subtrees consisting only of
+ * stopwords. The distances of any phrase operators within such a subtree are
+ * summed and propagated to both ladd and radd, since we don't know which side
+ * of the lowest surviving phrase operator we are in. The rule is that any
+ * subtree that degenerates to NULL must return equal values of ladd and radd,
+ * and the parent node dealing with it should incorporate only one of those.
+ *
+ * Currently, we only implement this adjustment for adjacent phrase operators.
+ * Thus for example 'x <-> ((a <-> y) | z)' will become 'x <-> (y | z)', which
+ * isn't ideal, but there is no way to represent the really desired semantics
+ * without some redesign of the tsquery structure. Certainly it would not be
+ * any better to convert that to 'x <2> (y | z)'. Since this is such a weird
+ * corner case, let it go for now. But we can fix it in cases where the
+ * intervening non-phrase operator also gets removed, for example
+ * '((x <-> a) | a) <-> y' will become 'x <2> y'.
+ */
+static NODE *
+clean_stopword_intree(NODE *node, int *ladd, int *radd)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ /* default output parameters indicate no change in parent distance */
+ *ladd = *radd = 0;
+
+ if (node->valnode->type == QI_VAL)
+ return node;
+ else if (node->valnode->type == QI_VALSTOP)
+ {
+ pfree(node);
+ return NULL;
+ }
+
+ Assert(node->valnode->type == QI_OPR);
+
+ if (node->valnode->qoperator.oper == OP_NOT)
+ {
+ /* NOT doesn't change pattern width, so just report child distances */
+ node->right = clean_stopword_intree(node->right, ladd, radd);
+ if (!node->right)
+ {
+ freetree(node);
+ return NULL;
+ }
+ }
+ else
+ {
+ NODE *res = node;
+ bool isphrase;
+ int ndistance,
+ lladd,
+ lradd,
+ rladd,
+ rradd;
+
+ /* First, recurse */
+ node->left = clean_stopword_intree(node->left, &lladd, &lradd);
+ node->right = clean_stopword_intree(node->right, &rladd, &rradd);
+
+ /* Check if current node is OP_PHRASE, get its distance */
+ isphrase = (node->valnode->qoperator.oper == OP_PHRASE);
+ ndistance = isphrase ? node->valnode->qoperator.distance : 0;
+
+ if (node->left == NULL && node->right == NULL)
+ {
+ /*
+ * When we collapse out a phrase node entirely, propagate its own
+ * distance into both *ladd and *radd; it is the responsibility of
+ * the parent node to count it only once. Also, for a phrase
+ * node, distances coming from children are summed and propagated
+ * up to parent (we assume lladd == lradd and rladd == rradd, else
+ * rule was broken at a lower level). But if this isn't a phrase
+ * node, take the larger of the two child distances; that
+ * corresponds to what TS_execute will do in non-stopword cases.
+ */
+ if (isphrase)
+ *ladd = *radd = lladd + ndistance + rladd;
+ else
+ *ladd = *radd = Max(lladd, rladd);
+ freetree(node);
+ return NULL;
+ }
+ else if (node->left == NULL)
+ {
+ /* Removing this operator and left subnode */
+ /* lladd and lradd are equal/redundant, don't count both */
+ if (isphrase)
+ {
+ /* operator's own distance must propagate to left */
+ *ladd = lladd + ndistance + rladd;
+ *radd = rradd;
+ }
+ else
+ {
+ /* at non-phrase op, just forget the left subnode entirely */
+ *ladd = rladd;
+ *radd = rradd;
+ }
+ res = node->right;
+ pfree(node);
+ }
+ else if (node->right == NULL)
+ {
+ /* Removing this operator and right subnode */
+ /* rladd and rradd are equal/redundant, don't count both */
+ if (isphrase)
+ {
+ /* operator's own distance must propagate to right */
+ *ladd = lladd;
+ *radd = lradd + ndistance + rradd;
+ }
+ else
+ {
+ /* at non-phrase op, just forget the right subnode entirely */
+ *ladd = lladd;
+ *radd = lradd;
+ }
+ res = node->left;
+ pfree(node);
+ }
+ else if (isphrase)
+ {
+ /* Absorb appropriate corrections at this level */
+ node->valnode->qoperator.distance += lradd + rladd;
+ /* Propagate up any unaccounted-for corrections */
+ *ladd = lladd;
+ *radd = rradd;
+ }
+ else
+ {
+ /* We're keeping a non-phrase operator, so ladd/radd remain 0 */
+ }
+
+ return res;
+ }
+ return node;
+}
+
+/*
+ * Number of elements in query tree
+ */
+static int32
+calcstrlen(NODE *node)
+{
+ int32 size = 0;
+
+ if (node->valnode->type == QI_VAL)
+ {
+ size = node->valnode->qoperand.length + 1;
+ }
+ else
+ {
+ Assert(node->valnode->type == QI_OPR);
+
+ size = calcstrlen(node->right);
+ if (node->valnode->qoperator.oper != OP_NOT)
+ size += calcstrlen(node->left);
+ }
+
+ return size;
+}
+
+/*
+ * Remove QI_VALSTOP (stopword) nodes from TSQuery.
+ */
+TSQuery
+cleanup_tsquery_stopwords(TSQuery in)
+{
+ int32 len,
+ lenstr,
+ commonlen,
+ i;
+ NODE *root;
+ int ladd,
+ radd;
+ TSQuery out;
+ QueryItem *items;
+ char *operands;
+
+ if (in->size == 0)
+ return in;
+
+ /* eliminate stop words */
+ root = clean_stopword_intree(maketree(GETQUERY(in)), &ladd, &radd);
+ if (root == NULL)
+ {
+ ereport(NOTICE,
+ (errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
+ out = palloc(HDRSIZETQ);
+ out->size = 0;
+ SET_VARSIZE(out, HDRSIZETQ);
+ return out;
+ }
+
+ /*
+ * Build TSQuery from plain view
+ */
+
+ lenstr = calcstrlen(root);
+ items = plaintree(root, &len);
+ commonlen = COMPUTESIZE(len, lenstr);
+
+ out = palloc(commonlen);
+ SET_VARSIZE(out, commonlen);
+ out->size = len;
+
+ memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
+
+ items = GETQUERY(out);
+ operands = GETOPERAND(out);
+ for (i = 0; i < out->size; i++)
+ {
+ QueryOperand *op = (QueryOperand *) &items[i];
+
+ if (op->type != QI_VAL)
+ continue;
+
+ memcpy(operands, GETOPERAND(in) + op->distance, op->length);
+ operands[op->length] = '\0';
+ op->distance = operands - GETOPERAND(out);
+ operands += op->length + 1;
+ }
+
+ return out;
+}
diff --git a/src/backend/utils/adt/tsquery_gist.c b/src/backend/utils/adt/tsquery_gist.c
new file mode 100644
index 0000000..801469b
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_gist.c
@@ -0,0 +1,277 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_gist.c
+ * GiST index support for tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/gist.h"
+#include "access/stratnum.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+#define GETENTRY(vec,pos) DatumGetTSQuerySign((vec)->vector[pos].key)
+
+
+Datum
+gtsquery_compress(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ GISTENTRY *retval = entry;
+
+ if (entry->leafkey)
+ {
+ TSQuerySign sign;
+
+ retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
+ sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
+
+ gistentryinit(*retval, TSQuerySignGetDatum(sign),
+ entry->rel, entry->page,
+ entry->offset, false);
+ }
+
+ PG_RETURN_POINTER(retval);
+}
+
+/*
+ * We do not need a decompress function, because the other gtsquery
+ * support functions work with the compressed representation.
+ */
+
+Datum
+gtsquery_consistent(PG_FUNCTION_ARGS)
+{
+ GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
+
+ /* Oid subtype = PG_GETARG_OID(3); */
+ bool *recheck = (bool *) PG_GETARG_POINTER(4);
+ TSQuerySign key = DatumGetTSQuerySign(entry->key);
+ TSQuerySign sq = makeTSQuerySign(query);
+ bool retval;
+
+ /* All cases served by this function are inexact */
+ *recheck = true;
+
+ switch (strategy)
+ {
+ case RTContainsStrategyNumber:
+ if (GIST_LEAF(entry))
+ retval = (key & sq) == sq;
+ else
+ retval = (key & sq) != 0;
+ break;
+ case RTContainedByStrategyNumber:
+ if (GIST_LEAF(entry))
+ retval = (key & sq) == key;
+ else
+ retval = (key & sq) != 0;
+ break;
+ default:
+ retval = false;
+ }
+ PG_RETURN_BOOL(retval);
+}
+
+Datum
+gtsquery_union(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ int *size = (int *) PG_GETARG_POINTER(1);
+ TSQuerySign sign;
+ int i;
+
+ sign = 0;
+
+ for (i = 0; i < entryvec->n; i++)
+ sign |= GETENTRY(entryvec, i);
+
+ *size = sizeof(TSQuerySign);
+
+ PG_RETURN_TSQUERYSIGN(sign);
+}
+
+Datum
+gtsquery_same(PG_FUNCTION_ARGS)
+{
+ TSQuerySign a = PG_GETARG_TSQUERYSIGN(0);
+ TSQuerySign b = PG_GETARG_TSQUERYSIGN(1);
+ bool *result = (bool *) PG_GETARG_POINTER(2);
+
+ *result = (a == b);
+
+ PG_RETURN_POINTER(result);
+}
+
+static int
+sizebitvec(TSQuerySign sign)
+{
+ int size = 0,
+ i;
+
+ for (i = 0; i < TSQS_SIGLEN; i++)
+ size += 0x01 & (sign >> i);
+
+ return size;
+}
+
+static int
+hemdist(TSQuerySign a, TSQuerySign b)
+{
+ TSQuerySign res = a ^ b;
+
+ return sizebitvec(res);
+}
+
+Datum
+gtsquery_penalty(PG_FUNCTION_ARGS)
+{
+ TSQuerySign origval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
+ TSQuerySign newval = DatumGetTSQuerySign(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
+ float *penalty = (float *) PG_GETARG_POINTER(2);
+
+ *penalty = hemdist(origval, newval);
+
+ PG_RETURN_POINTER(penalty);
+}
+
+
+typedef struct
+{
+ OffsetNumber pos;
+ int32 cost;
+} SPLITCOST;
+
+static int
+comparecost(const void *a, const void *b)
+{
+ if (((const SPLITCOST *) a)->cost == ((const SPLITCOST *) b)->cost)
+ return 0;
+ else
+ return (((const SPLITCOST *) a)->cost > ((const SPLITCOST *) b)->cost) ? 1 : -1;
+}
+
+#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
+
+Datum
+gtsquery_picksplit(PG_FUNCTION_ARGS)
+{
+ GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
+ GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
+ OffsetNumber maxoff = entryvec->n - 2;
+ OffsetNumber k,
+ j;
+ TSQuerySign datum_l,
+ datum_r;
+ int32 size_alpha,
+ size_beta;
+ int32 size_waste,
+ waste = -1;
+ int32 nbytes;
+ OffsetNumber seed_1 = 0,
+ seed_2 = 0;
+ OffsetNumber *left,
+ *right;
+
+ SPLITCOST *costvector;
+
+ nbytes = (maxoff + 2) * sizeof(OffsetNumber);
+ left = v->spl_left = (OffsetNumber *) palloc(nbytes);
+ right = v->spl_right = (OffsetNumber *) palloc(nbytes);
+ v->spl_nleft = v->spl_nright = 0;
+
+ for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
+ for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
+ {
+ size_waste = hemdist(GETENTRY(entryvec, j), GETENTRY(entryvec, k));
+ if (size_waste > waste)
+ {
+ waste = size_waste;
+ seed_1 = k;
+ seed_2 = j;
+ }
+ }
+
+
+ if (seed_1 == 0 || seed_2 == 0)
+ {
+ seed_1 = 1;
+ seed_2 = 2;
+ }
+
+ datum_l = GETENTRY(entryvec, seed_1);
+ datum_r = GETENTRY(entryvec, seed_2);
+
+ maxoff = OffsetNumberNext(maxoff);
+ costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
+ for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
+ {
+ costvector[j - 1].pos = j;
+ size_alpha = hemdist(GETENTRY(entryvec, seed_1), GETENTRY(entryvec, j));
+ size_beta = hemdist(GETENTRY(entryvec, seed_2), GETENTRY(entryvec, j));
+ costvector[j - 1].cost = abs(size_alpha - size_beta);
+ }
+ qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
+
+ for (k = 0; k < maxoff; k++)
+ {
+ j = costvector[k].pos;
+ if (j == seed_1)
+ {
+ *left++ = j;
+ v->spl_nleft++;
+ continue;
+ }
+ else if (j == seed_2)
+ {
+ *right++ = j;
+ v->spl_nright++;
+ continue;
+ }
+ size_alpha = hemdist(datum_l, GETENTRY(entryvec, j));
+ size_beta = hemdist(datum_r, GETENTRY(entryvec, j));
+
+ if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
+ {
+ datum_l |= GETENTRY(entryvec, j);
+ *left++ = j;
+ v->spl_nleft++;
+ }
+ else
+ {
+ datum_r |= GETENTRY(entryvec, j);
+ *right++ = j;
+ v->spl_nright++;
+ }
+ }
+
+ *right = *left = FirstOffsetNumber;
+ v->spl_ldatum = TSQuerySignGetDatum(datum_l);
+ v->spl_rdatum = TSQuerySignGetDatum(datum_r);
+
+ PG_RETURN_POINTER(v);
+}
+
+/*
+ * Formerly, gtsquery_consistent was declared in pg_proc.h with arguments
+ * that did not match the documented conventions for GiST support functions.
+ * We fixed that, but we still need a pg_proc entry with the old signature
+ * to support reloading pre-9.6 contrib/tsearch2 opclass declarations.
+ * This compatibility function should go away eventually.
+ */
+Datum
+gtsquery_consistent_oldsig(PG_FUNCTION_ARGS)
+{
+ return gtsquery_consistent(fcinfo);
+}
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
new file mode 100644
index 0000000..a784157
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -0,0 +1,358 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_op.c
+ * Various operations with tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery_op.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "lib/qunique.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+Datum
+tsquery_numnode(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ int nnode = query->size;
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_RETURN_INT32(nnode);
+}
+
+static QTNode *
+join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
+{
+ QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ res->valnode->type = QI_OPR;
+ res->valnode->qoperator.oper = operator;
+ if (operator == OP_PHRASE)
+ res->valnode->qoperator.distance = distance;
+
+ res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+ res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
+ res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ res->nchild = 2;
+
+ return res;
+}
+
+Datum
+tsquery_and(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b, OP_AND, 0);
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+tsquery_or(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b, OP_OR, 0);
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+tsquery_phrase_distance(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ QTNode *res;
+ TSQuery query;
+ int32 distance = PG_GETARG_INT32(2);
+
+ if (distance < 0 || distance > MAXENTRYPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("distance in phrase operator must be an integer value between zero and %d inclusive",
+ MAXENTRYPOS)));
+ if (a->size == 0)
+ {
+ PG_FREE_IF_COPY(a, 1);
+ PG_RETURN_POINTER(b);
+ }
+ else if (b->size == 0)
+ {
+ PG_FREE_IF_COPY(b, 1);
+ PG_RETURN_POINTER(a);
+ }
+
+ res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance);
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+tsquery_phrase(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_POINTER(DirectFunctionCall3(tsquery_phrase_distance,
+ PG_GETARG_DATUM(0),
+ PG_GETARG_DATUM(1),
+ Int32GetDatum(1)));
+}
+
+Datum
+tsquery_not(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ QTNode *res;
+ TSQuery query;
+
+ if (a->size == 0)
+ PG_RETURN_POINTER(a);
+
+ res = (QTNode *) palloc0(sizeof(QTNode));
+
+ res->flags |= QTN_NEEDFREE;
+
+ res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ res->valnode->type = QI_OPR;
+ res->valnode->qoperator.oper = OP_NOT;
+
+ res->child = (QTNode **) palloc0(sizeof(QTNode *));
+ res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ res->nchild = 1;
+
+ query = QTN2QT(res);
+
+ QTNFree(res);
+ PG_FREE_IF_COPY(a, 0);
+
+ PG_RETURN_POINTER(query);
+}
+
+static int
+CompareTSQ(TSQuery a, TSQuery b)
+{
+ if (a->size != b->size)
+ {
+ return (a->size < b->size) ? -1 : 1;
+ }
+ else if (VARSIZE(a) != VARSIZE(b))
+ {
+ return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
+ }
+ else if (a->size != 0)
+ {
+ QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
+ QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
+ int res = QTNodeCompare(an, bn);
+
+ QTNFree(an);
+ QTNFree(bn);
+
+ return res;
+ }
+
+ return 0;
+}
+
+Datum
+tsquery_cmp(PG_FUNCTION_ARGS)
+{
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1);
+ int res = CompareTSQ(a, b);
+
+ PG_FREE_IF_COPY(a, 0);
+ PG_FREE_IF_COPY(b, 1);
+
+ PG_RETURN_INT32(res);
+}
+
+#define CMPFUNC( NAME, CONDITION ) \
+Datum \
+NAME(PG_FUNCTION_ARGS) { \
+ TSQuery a = PG_GETARG_TSQUERY_COPY(0); \
+ TSQuery b = PG_GETARG_TSQUERY_COPY(1); \
+ int res = CompareTSQ(a,b); \
+ \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ \
+ PG_RETURN_BOOL( CONDITION ); \
+} \
+/* keep compiler quiet - no extra ; */ \
+extern int no_such_variable
+
+CMPFUNC(tsquery_lt, res < 0);
+CMPFUNC(tsquery_le, res <= 0);
+CMPFUNC(tsquery_eq, res == 0);
+CMPFUNC(tsquery_ge, res >= 0);
+CMPFUNC(tsquery_gt, res > 0);
+CMPFUNC(tsquery_ne, res != 0);
+
+TSQuerySign
+makeTSQuerySign(TSQuery a)
+{
+ int i;
+ QueryItem *ptr = GETQUERY(a);
+ TSQuerySign sign = 0;
+
+ for (i = 0; i < a->size; i++)
+ {
+ if (ptr->type == QI_VAL)
+ sign |= ((TSQuerySign) 1) << (((unsigned int) ptr->qoperand.valcrc) % TSQS_SIGLEN);
+ ptr++;
+ }
+
+ return sign;
+}
+
+static char **
+collectTSQueryValues(TSQuery a, int *nvalues_p)
+{
+ QueryItem *ptr = GETQUERY(a);
+ char *operand = GETOPERAND(a);
+ char **values;
+ int nvalues = 0;
+ int i;
+
+ values = (char **) palloc(sizeof(char *) * a->size);
+
+ for (i = 0; i < a->size; i++)
+ {
+ if (ptr->type == QI_VAL)
+ {
+ int len = ptr->qoperand.length;
+ char *val;
+
+ val = palloc(len + 1);
+ memcpy(val, operand + ptr->qoperand.distance, len);
+ val[len] = '\0';
+
+ values[nvalues++] = val;
+ }
+ ptr++;
+ }
+
+ *nvalues_p = nvalues;
+ return values;
+}
+
+static int
+cmp_string(const void *a, const void *b)
+{
+ const char *sa = *((char *const *) a);
+ const char *sb = *((char *const *) b);
+
+ return strcmp(sa, sb);
+}
+
+Datum
+tsq_mcontains(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY(0);
+ TSQuery ex = PG_GETARG_TSQUERY(1);
+ char **query_values;
+ int query_nvalues;
+ char **ex_values;
+ int ex_nvalues;
+ bool result = true;
+
+ /* Extract the query terms into arrays */
+ query_values = collectTSQueryValues(query, &query_nvalues);
+ ex_values = collectTSQueryValues(ex, &ex_nvalues);
+
+ /* Sort and remove duplicates from both arrays */
+ qsort(query_values, query_nvalues, sizeof(char *), cmp_string);
+ query_nvalues = qunique(query_values, query_nvalues, sizeof(char *),
+ cmp_string);
+ qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string);
+ ex_nvalues = qunique(ex_values, ex_nvalues, sizeof(char *), cmp_string);
+
+ if (ex_nvalues > query_nvalues)
+ result = false;
+ else
+ {
+ int i;
+ int j = 0;
+
+ for (i = 0; i < ex_nvalues; i++)
+ {
+ for (; j < query_nvalues; j++)
+ {
+ if (strcmp(ex_values[i], query_values[j]) == 0)
+ break;
+ }
+ if (j == query_nvalues)
+ {
+ result = false;
+ break;
+ }
+ }
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+tsq_mcontained(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall2(tsq_mcontains,
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(0)));
+}
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c
new file mode 100644
index 0000000..3bef5d7
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_rewrite.c
@@ -0,0 +1,462 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_rewrite.c
+ * Utilities for reconstructing tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery_rewrite.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_type.h"
+#include "executor/spi.h"
+#include "miscadmin.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+
+
+/*
+ * If "node" is equal to "ex", return a copy of "subs" instead.
+ * If "ex" matches a subset of node's children, return a modified version
+ * of "node" in which those children are replaced with a copy of "subs".
+ * Otherwise return "node" unmodified.
+ *
+ * The QTN_NOCHANGE bit is set in successfully modified nodes, so that
+ * we won't uselessly recurse into them.
+ * Also, set *isfind true if we make a replacement.
+ */
+static QTNode *
+findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
+{
+ /* Can't match unless signature matches and node type matches. */
+ if ((node->sign & ex->sign) != ex->sign ||
+ node->valnode->type != ex->valnode->type)
+ return node;
+
+ /* Ignore nodes marked NOCHANGE, too. */
+ if (node->flags & QTN_NOCHANGE)
+ return node;
+
+ if (node->valnode->type == QI_OPR)
+ {
+ /* Must be same operator. */
+ if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper)
+ return node;
+
+ if (node->nchild == ex->nchild)
+ {
+ /*
+ * Simple case: when same number of children, match if equal.
+ * (This is reliable when the children were sorted earlier.)
+ */
+ if (QTNEq(node, ex))
+ {
+ /* Match; delete node and return a copy of subs instead. */
+ QTNFree(node);
+ if (subs)
+ {
+ node = QTNCopy(subs);
+ node->flags |= QTN_NOCHANGE;
+ }
+ else
+ node = NULL;
+ *isfind = true;
+ }
+ }
+ else if (node->nchild > ex->nchild && ex->nchild > 0)
+ {
+ /*
+ * AND and OR are commutative/associative, so we should check if a
+ * subset of the children match. For example, if node is A|B|C,
+ * and ex is B|C, we have a match after we notionally convert node
+ * to A|(B|C). This does not work for NOT or PHRASE nodes, but we
+ * can't get here for those node types because they have a fixed
+ * number of children.
+ *
+ * Because we expect that the children are sorted, it suffices to
+ * make one pass through the two lists to find the matches.
+ */
+ bool *matched;
+ int nmatched;
+ int i,
+ j;
+
+ /* Assert that the subset rule is OK */
+ Assert(node->valnode->qoperator.oper == OP_AND ||
+ node->valnode->qoperator.oper == OP_OR);
+
+ /* matched[] will record which children of node matched */
+ matched = (bool *) palloc0(node->nchild * sizeof(bool));
+ nmatched = 0;
+ i = j = 0;
+ while (i < node->nchild && j < ex->nchild)
+ {
+ int cmp = QTNodeCompare(node->child[i], ex->child[j]);
+
+ if (cmp == 0)
+ {
+ /* match! */
+ matched[i] = true;
+ nmatched++;
+ i++, j++;
+ }
+ else if (cmp < 0)
+ {
+ /* node->child[i] has no match, ignore it */
+ i++;
+ }
+ else
+ {
+ /* ex->child[j] has no match; we can give up immediately */
+ break;
+ }
+ }
+
+ if (nmatched == ex->nchild)
+ {
+ /* collapse out the matched children of node */
+ j = 0;
+ for (i = 0; i < node->nchild; i++)
+ {
+ if (matched[i])
+ QTNFree(node->child[i]);
+ else
+ node->child[j++] = node->child[i];
+ }
+
+ /* and instead insert a copy of subs */
+ if (subs)
+ {
+ subs = QTNCopy(subs);
+ subs->flags |= QTN_NOCHANGE;
+ node->child[j++] = subs;
+ }
+
+ node->nchild = j;
+
+ /*
+ * At this point we might have a node with zero or one child,
+ * which should be simplified. But we leave it to our caller
+ * (dofindsubquery) to take care of that.
+ */
+
+ /*
+ * Re-sort the node to put new child in the right place. This
+ * is a bit bogus, because it won't matter for findsubquery's
+ * remaining processing, and it's insufficient to prepare the
+ * tree for another search (we would need to re-flatten as
+ * well, and we don't want to do that because we'd lose the
+ * QTN_NOCHANGE marking on the new child). But it's needed to
+ * keep the results the same as the regression tests expect.
+ */
+ QTNSort(node);
+
+ *isfind = true;
+ }
+
+ pfree(matched);
+ }
+ }
+ else
+ {
+ Assert(node->valnode->type == QI_VAL);
+
+ if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc)
+ return node;
+ else if (QTNEq(node, ex))
+ {
+ QTNFree(node);
+ if (subs)
+ {
+ node = QTNCopy(subs);
+ node->flags |= QTN_NOCHANGE;
+ }
+ else
+ {
+ node = NULL;
+ }
+ *isfind = true;
+ }
+ }
+
+ return node;
+}
+
+/*
+ * Recursive guts of findsubquery(): attempt to replace "ex" with "subs"
+ * at the root node, and if we failed to do so, recursively match against
+ * child nodes.
+ *
+ * Delete any void subtrees resulting from the replacement.
+ * In the following example '5' is replaced by empty operand:
+ *
+ * AND -> 6
+ * / \
+ * 5 OR
+ * / \
+ * 6 5
+ */
+static QTNode *
+dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ /* also, since it's a bit expensive, let's check for query cancel. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* match at the node itself */
+ root = findeq(root, ex, subs, isfind);
+
+ /* unless we matched here, consider matches at child nodes */
+ if (root && (root->flags & QTN_NOCHANGE) == 0 &&
+ root->valnode->type == QI_OPR)
+ {
+ int i,
+ j = 0;
+
+ /*
+ * Any subtrees that are replaced by NULL must be dropped from the
+ * tree.
+ */
+ for (i = 0; i < root->nchild; i++)
+ {
+ root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind);
+ if (root->child[j])
+ j++;
+ }
+
+ root->nchild = j;
+
+ /*
+ * If we have just zero or one remaining child node, simplify out this
+ * operator node.
+ */
+ if (root->nchild == 0)
+ {
+ QTNFree(root);
+ root = NULL;
+ }
+ else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT)
+ {
+ QTNode *nroot = root->child[0];
+
+ pfree(root);
+ root = nroot;
+ }
+ }
+
+ return root;
+}
+
+/*
+ * Substitute "subs" for "ex" throughout the QTNode tree at root.
+ *
+ * If isfind isn't NULL, set *isfind to show whether we made any substitution.
+ *
+ * Both "root" and "ex" must have been through QTNTernary and QTNSort
+ * to ensure reliable matching.
+ */
+QTNode *
+findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
+{
+ bool DidFind = false;
+
+ root = dofindsubquery(root, ex, subs, &DidFind);
+
+ if (isfind)
+ *isfind = DidFind;
+
+ return root;
+}
+
+Datum
+tsquery_rewrite_query(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY_COPY(0);
+ text *in = PG_GETARG_TEXT_PP(1);
+ TSQuery rewrited = query;
+ MemoryContext outercontext = CurrentMemoryContext;
+ MemoryContext oldcontext;
+ QTNode *tree;
+ char *buf;
+ SPIPlanPtr plan;
+ Portal portal;
+ bool isnull;
+
+ if (query->size == 0)
+ {
+ PG_FREE_IF_COPY(in, 1);
+ PG_RETURN_POINTER(rewrited);
+ }
+
+ tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
+ QTNTernary(tree);
+ QTNSort(tree);
+
+ buf = text_to_cstring(in);
+
+ SPI_connect();
+
+ if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
+ elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
+
+ SPI_cursor_fetch(portal, true, 100);
+
+ if (SPI_tuptable == NULL ||
+ SPI_tuptable->tupdesc->natts != 2 ||
+ SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
+ SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ts_rewrite query must return two tsquery columns")));
+
+ while (SPI_processed > 0 && tree)
+ {
+ uint64 i;
+
+ for (i = 0; i < SPI_processed && tree; i++)
+ {
+ Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+ Datum sdata;
+
+ if (isnull)
+ continue;
+
+ sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
+
+ if (!isnull)
+ {
+ TSQuery qtex = DatumGetTSQuery(qdata);
+ TSQuery qtsubs = DatumGetTSQuery(sdata);
+ QTNode *qex,
+ *qsubs = NULL;
+
+ if (qtex->size == 0)
+ {
+ if (qtex != (TSQuery) DatumGetPointer(qdata))
+ pfree(qtex);
+ if (qtsubs != (TSQuery) DatumGetPointer(sdata))
+ pfree(qtsubs);
+ continue;
+ }
+
+ qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
+
+ QTNTernary(qex);
+ QTNSort(qex);
+
+ if (qtsubs->size)
+ qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
+
+ oldcontext = MemoryContextSwitchTo(outercontext);
+ tree = findsubquery(tree, qex, qsubs, NULL);
+ MemoryContextSwitchTo(oldcontext);
+
+ QTNFree(qex);
+ if (qtex != (TSQuery) DatumGetPointer(qdata))
+ pfree(qtex);
+ QTNFree(qsubs);
+ if (qtsubs != (TSQuery) DatumGetPointer(sdata))
+ pfree(qtsubs);
+
+ if (tree)
+ {
+ /* ready the tree for another pass */
+ QTNClearFlags(tree, QTN_NOCHANGE);
+ QTNTernary(tree);
+ QTNSort(tree);
+ }
+ }
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_fetch(portal, true, 100);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_close(portal);
+ SPI_freeplan(plan);
+ SPI_finish();
+
+ if (tree)
+ {
+ QTNBinary(tree);
+ rewrited = QTN2QT(tree);
+ QTNFree(tree);
+ PG_FREE_IF_COPY(query, 0);
+ }
+ else
+ {
+ SET_VARSIZE(rewrited, HDRSIZETQ);
+ rewrited->size = 0;
+ }
+
+ pfree(buf);
+ PG_FREE_IF_COPY(in, 1);
+ PG_RETURN_POINTER(rewrited);
+}
+
+Datum
+tsquery_rewrite(PG_FUNCTION_ARGS)
+{
+ TSQuery query = PG_GETARG_TSQUERY_COPY(0);
+ TSQuery ex = PG_GETARG_TSQUERY(1);
+ TSQuery subst = PG_GETARG_TSQUERY(2);
+ TSQuery rewrited = query;
+ QTNode *tree,
+ *qex,
+ *subs = NULL;
+
+ if (query->size == 0 || ex->size == 0)
+ {
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+ }
+
+ tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
+ QTNTernary(tree);
+ QTNSort(tree);
+
+ qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
+ QTNTernary(qex);
+ QTNSort(qex);
+
+ if (subst->size)
+ subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
+
+ tree = findsubquery(tree, qex, subs, NULL);
+
+ QTNFree(qex);
+ QTNFree(subs);
+
+ if (!tree)
+ {
+ SET_VARSIZE(rewrited, HDRSIZETQ);
+ rewrited->size = 0;
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+ }
+ else
+ {
+ QTNBinary(tree);
+ rewrited = QTN2QT(tree);
+ QTNFree(tree);
+ }
+
+ PG_FREE_IF_COPY(query, 0);
+ PG_FREE_IF_COPY(ex, 1);
+ PG_FREE_IF_COPY(subst, 2);
+ PG_RETURN_POINTER(rewrited);
+}
diff --git a/src/backend/utils/adt/tsquery_util.c b/src/backend/utils/adt/tsquery_util.c
new file mode 100644
index 0000000..191b58e
--- /dev/null
+++ b/src/backend/utils/adt/tsquery_util.c
@@ -0,0 +1,447 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsquery_util.c
+ * Utilities for tsquery datatype
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsquery_util.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "miscadmin.h"
+#include "tsearch/ts_utils.h"
+
+/*
+ * Build QTNode tree for a tsquery given in QueryItem array format.
+ */
+QTNode *
+QT2QTN(QueryItem *in, char *operand)
+{
+ QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ node->valnode = in;
+
+ if (in->type == QI_OPR)
+ {
+ node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+ node->child[0] = QT2QTN(in + 1, operand);
+ node->sign = node->child[0]->sign;
+ if (in->qoperator.oper == OP_NOT)
+ node->nchild = 1;
+ else
+ {
+ node->nchild = 2;
+ node->child[1] = QT2QTN(in + in->qoperator.left, operand);
+ node->sign |= node->child[1]->sign;
+ }
+ }
+ else if (operand)
+ {
+ node->word = operand + in->qoperand.distance;
+ node->sign = ((uint32) 1) << (((unsigned int) in->qoperand.valcrc) % 32);
+ }
+
+ return node;
+}
+
+/*
+ * Free a QTNode tree.
+ *
+ * Referenced "word" and "valnode" items are freed if marked as transient
+ * by flags.
+ */
+void
+QTNFree(QTNode *in)
+{
+ if (!in)
+ return;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->valnode->type == QI_VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
+ pfree(in->word);
+
+ if (in->valnode->type == QI_OPR)
+ {
+ int i;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNFree(in->child[i]);
+ }
+ if (in->child)
+ pfree(in->child);
+
+ if (in->flags & QTN_NEEDFREE)
+ pfree(in->valnode);
+
+ pfree(in);
+}
+
+/*
+ * Sort comparator for QTNodes.
+ *
+ * The sort order is somewhat arbitrary.
+ */
+int
+QTNodeCompare(QTNode *an, QTNode *bn)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (an->valnode->type != bn->valnode->type)
+ return (an->valnode->type > bn->valnode->type) ? -1 : 1;
+
+ if (an->valnode->type == QI_OPR)
+ {
+ QueryOperator *ao = &an->valnode->qoperator;
+ QueryOperator *bo = &bn->valnode->qoperator;
+
+ if (ao->oper != bo->oper)
+ return (ao->oper > bo->oper) ? -1 : 1;
+
+ if (an->nchild != bn->nchild)
+ return (an->nchild > bn->nchild) ? -1 : 1;
+
+ {
+ int i,
+ res;
+
+ for (i = 0; i < an->nchild; i++)
+ if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
+ return res;
+ }
+
+ if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
+ return (ao->distance > bo->distance) ? -1 : 1;
+
+ return 0;
+ }
+ else if (an->valnode->type == QI_VAL)
+ {
+ QueryOperand *ao = &an->valnode->qoperand;
+ QueryOperand *bo = &bn->valnode->qoperand;
+
+ if (ao->valcrc != bo->valcrc)
+ {
+ return (ao->valcrc > bo->valcrc) ? -1 : 1;
+ }
+
+ return tsCompareString(an->word, ao->length, bn->word, bo->length, false);
+ }
+ else
+ {
+ elog(ERROR, "unrecognized QueryItem type: %d", an->valnode->type);
+ return 0; /* keep compiler quiet */
+ }
+}
+
+/*
+ * qsort comparator for QTNode pointers.
+ */
+static int
+cmpQTN(const void *a, const void *b)
+{
+ return QTNodeCompare(*(QTNode *const *) a, *(QTNode *const *) b);
+}
+
+/*
+ * Canonicalize a QTNode tree by sorting the children of AND/OR nodes
+ * into an arbitrary but well-defined order.
+ */
+void
+QTNSort(QTNode *in)
+{
+ int i;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->valnode->type != QI_OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNSort(in->child[i]);
+ if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
+ qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
+}
+
+/*
+ * Are two QTNode trees equal according to QTNodeCompare?
+ */
+bool
+QTNEq(QTNode *a, QTNode *b)
+{
+ uint32 sign = a->sign & b->sign;
+
+ if (!(sign == a->sign && sign == b->sign))
+ return false;
+
+ return (QTNodeCompare(a, b) == 0);
+}
+
+/*
+ * Remove unnecessary intermediate nodes. For example:
+ *
+ * OR OR
+ * a OR -> a b c
+ * b c
+ */
+void
+QTNTernary(QTNode *in)
+{
+ int i;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->valnode->type != QI_OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNTernary(in->child[i]);
+
+ /* Only AND and OR are associative, so don't flatten other node types */
+ if (in->valnode->qoperator.oper != OP_AND &&
+ in->valnode->qoperator.oper != OP_OR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ {
+ QTNode *cc = in->child[i];
+
+ if (cc->valnode->type == QI_OPR &&
+ in->valnode->qoperator.oper == cc->valnode->qoperator.oper)
+ {
+ int oldnchild = in->nchild;
+
+ in->nchild += cc->nchild - 1;
+ in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
+
+ if (i + 1 != oldnchild)
+ memmove(in->child + i + cc->nchild, in->child + i + 1,
+ (oldnchild - i - 1) * sizeof(QTNode *));
+
+ memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
+ i += cc->nchild - 1;
+
+ if (cc->flags & QTN_NEEDFREE)
+ pfree(cc->valnode);
+ pfree(cc);
+ }
+ }
+}
+
+/*
+ * Convert a tree to binary tree by inserting intermediate nodes.
+ * (Opposite of QTNTernary)
+ */
+void
+QTNBinary(QTNode *in)
+{
+ int i;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->valnode->type != QI_OPR)
+ return;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNBinary(in->child[i]);
+
+ while (in->nchild > 2)
+ {
+ QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
+
+ nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
+ nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
+
+ nn->nchild = 2;
+ nn->flags = QTN_NEEDFREE;
+
+ nn->child[0] = in->child[0];
+ nn->child[1] = in->child[1];
+ nn->sign = nn->child[0]->sign | nn->child[1]->sign;
+
+ nn->valnode->type = in->valnode->type;
+ nn->valnode->qoperator.oper = in->valnode->qoperator.oper;
+
+ in->child[0] = nn;
+ in->child[1] = in->child[in->nchild - 1];
+ in->nchild--;
+ }
+}
+
+/*
+ * Count the total length of operand strings in tree (including '\0'-
+ * terminators) and the total number of nodes.
+ * Caller must initialize *sumlen and *nnode to zeroes.
+ */
+static void
+cntsize(QTNode *in, int *sumlen, int *nnode)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ *nnode += 1;
+ if (in->valnode->type == QI_OPR)
+ {
+ int i;
+
+ for (i = 0; i < in->nchild; i++)
+ cntsize(in->child[i], sumlen, nnode);
+ }
+ else
+ {
+ *sumlen += in->valnode->qoperand.length + 1;
+ }
+}
+
+typedef struct
+{
+ QueryItem *curitem;
+ char *operand;
+ char *curoperand;
+} QTN2QTState;
+
+/*
+ * Recursively convert a QTNode tree into flat tsquery format.
+ * Caller must have allocated arrays of the correct size.
+ */
+static void
+fillQT(QTN2QTState *state, QTNode *in)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ if (in->valnode->type == QI_VAL)
+ {
+ memcpy(state->curitem, in->valnode, sizeof(QueryOperand));
+
+ memcpy(state->curoperand, in->word, in->valnode->qoperand.length);
+ state->curitem->qoperand.distance = state->curoperand - state->operand;
+ state->curoperand[in->valnode->qoperand.length] = '\0';
+ state->curoperand += in->valnode->qoperand.length + 1;
+ state->curitem++;
+ }
+ else
+ {
+ QueryItem *curitem = state->curitem;
+
+ Assert(in->valnode->type == QI_OPR);
+
+ memcpy(state->curitem, in->valnode, sizeof(QueryOperator));
+
+ Assert(in->nchild <= 2);
+ state->curitem++;
+
+ fillQT(state, in->child[0]);
+
+ if (in->nchild == 2)
+ {
+ curitem->qoperator.left = state->curitem - curitem;
+ fillQT(state, in->child[1]);
+ }
+ }
+}
+
+/*
+ * Build flat tsquery from a QTNode tree.
+ */
+TSQuery
+QTN2QT(QTNode *in)
+{
+ TSQuery out;
+ int len;
+ int sumlen = 0,
+ nnode = 0;
+ QTN2QTState state;
+
+ cntsize(in, &sumlen, &nnode);
+
+ if (TSQUERY_TOO_BIG(nnode, sumlen))
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("tsquery is too large")));
+ len = COMPUTESIZE(nnode, sumlen);
+
+ out = (TSQuery) palloc0(len);
+ SET_VARSIZE(out, len);
+ out->size = nnode;
+
+ state.curitem = GETQUERY(out);
+ state.operand = state.curoperand = GETOPERAND(out);
+
+ fillQT(&state, in);
+ return out;
+}
+
+/*
+ * Copy a QTNode tree.
+ *
+ * Modifiable copies of the words and valnodes are made, too.
+ */
+QTNode *
+QTNCopy(QTNode *in)
+{
+ QTNode *out;
+
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ out = (QTNode *) palloc(sizeof(QTNode));
+
+ *out = *in;
+ out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
+ *(out->valnode) = *(in->valnode);
+ out->flags |= QTN_NEEDFREE;
+
+ if (in->valnode->type == QI_VAL)
+ {
+ out->word = palloc(in->valnode->qoperand.length + 1);
+ memcpy(out->word, in->word, in->valnode->qoperand.length);
+ out->word[in->valnode->qoperand.length] = '\0';
+ out->flags |= QTN_WORDFREE;
+ }
+ else
+ {
+ int i;
+
+ out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
+
+ for (i = 0; i < in->nchild; i++)
+ out->child[i] = QTNCopy(in->child[i]);
+ }
+
+ return out;
+}
+
+/*
+ * Clear the specified flag bit(s) in all nodes of a QTNode tree.
+ */
+void
+QTNClearFlags(QTNode *in, uint32 flags)
+{
+ /* since this function recurses, it could be driven to stack overflow. */
+ check_stack_depth();
+
+ in->flags &= ~flags;
+
+ if (in->valnode->type != QI_VAL)
+ {
+ int i;
+
+ for (i = 0; i < in->nchild; i++)
+ QTNClearFlags(in->child[i], flags);
+ }
+}
diff --git a/src/backend/utils/adt/tsrank.c b/src/backend/utils/adt/tsrank.c
new file mode 100644
index 0000000..3858fc5
--- /dev/null
+++ b/src/backend/utils/adt/tsrank.c
@@ -0,0 +1,1012 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsrank.c
+ * rank tsvector by tsquery
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsrank.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+#include <math.h>
+
+#include "miscadmin.h"
+#include "tsearch/ts_utils.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+static const float weights[] = {0.1f, 0.2f, 0.4f, 1.0f};
+
+#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
+
+#define RANK_NO_NORM 0x00
+#define RANK_NORM_LOGLENGTH 0x01
+#define RANK_NORM_LENGTH 0x02
+#define RANK_NORM_EXTDIST 0x04
+#define RANK_NORM_UNIQ 0x08
+#define RANK_NORM_LOGUNIQ 0x10
+#define RANK_NORM_RDIVRPLUS1 0x20
+#define DEF_NORM_METHOD RANK_NO_NORM
+
+static float calc_rank_or(const float *w, TSVector t, TSQuery q);
+static float calc_rank_and(const float *w, TSVector t, TSQuery q);
+
+/*
+ * Returns a weight of a word collocation
+ */
+static float4
+word_distance(int32 w)
+{
+ if (w > 100)
+ return 1e-30f;
+
+ return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
+}
+
+static int
+cnt_length(TSVector t)
+{
+ WordEntry *ptr = ARRPTR(t),
+ *end = (WordEntry *) STRPTR(t);
+ int len = 0;
+
+ while (ptr < end)
+ {
+ int clen = POSDATALEN(t, ptr);
+
+ if (clen == 0)
+ len += 1;
+ else
+ len += clen;
+
+ ptr++;
+ }
+
+ return len;
+}
+
+
+#define WordECompareQueryItem(e,q,p,i,m) \
+ tsCompareString((q) + (i)->distance, (i)->length, \
+ (e) + (p)->pos, (p)->len, (m))
+
+
+/*
+ * Returns a pointer to a WordEntry's array corresponding to 'item' from
+ * tsvector 't'. 'q' is the TSQuery containing 'item'.
+ * Returns NULL if not found.
+ */
+static WordEntry *
+find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
+{
+ WordEntry *StopLow = ARRPTR(t);
+ WordEntry *StopHigh = (WordEntry *) STRPTR(t);
+ WordEntry *StopMiddle = StopHigh;
+ int difference;
+
+ *nitem = 0;
+
+ /* Loop invariant: StopLow <= item < StopHigh */
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
+ if (difference == 0)
+ {
+ StopHigh = StopMiddle;
+ *nitem = 1;
+ break;
+ }
+ else if (difference > 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ if (item->prefix)
+ {
+ if (StopLow >= StopHigh)
+ StopMiddle = StopHigh;
+
+ *nitem = 0;
+
+ while (StopMiddle < (WordEntry *) STRPTR(t) &&
+ WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0)
+ {
+ (*nitem)++;
+ StopMiddle++;
+ }
+ }
+
+ return (*nitem > 0) ? StopHigh : NULL;
+}
+
+
+/*
+ * sort QueryOperands by (length, word)
+ */
+static int
+compareQueryOperand(const void *a, const void *b, void *arg)
+{
+ char *operand = (char *) arg;
+ QueryOperand *qa = (*(QueryOperand *const *) a);
+ QueryOperand *qb = (*(QueryOperand *const *) b);
+
+ return tsCompareString(operand + qa->distance, qa->length,
+ operand + qb->distance, qb->length,
+ false);
+}
+
+/*
+ * Returns a sorted, de-duplicated array of QueryOperands in a query.
+ * The returned QueryOperands are pointers to the original QueryOperands
+ * in the query.
+ *
+ * Length of the returned array is stored in *size
+ */
+static QueryOperand **
+SortAndUniqItems(TSQuery q, int *size)
+{
+ char *operand = GETOPERAND(q);
+ QueryItem *item = GETQUERY(q);
+ QueryOperand **res,
+ **ptr,
+ **prevptr;
+
+ ptr = res = (QueryOperand **) palloc(sizeof(QueryOperand *) * *size);
+
+ /* Collect all operands from the tree to res */
+ while ((*size)--)
+ {
+ if (item->type == QI_VAL)
+ {
+ *ptr = (QueryOperand *) item;
+ ptr++;
+ }
+ item++;
+ }
+
+ *size = ptr - res;
+ if (*size < 2)
+ return res;
+
+ qsort_arg(res, *size, sizeof(QueryOperand *), compareQueryOperand, (void *) operand);
+
+ ptr = res + 1;
+ prevptr = res;
+
+ /* remove duplicates */
+ while (ptr - res < *size)
+ {
+ if (compareQueryOperand((void *) ptr, (void *) prevptr, (void *) operand) != 0)
+ {
+ prevptr++;
+ *prevptr = *ptr;
+ }
+ ptr++;
+ }
+
+ *size = prevptr + 1 - res;
+ return res;
+}
+
+static float
+calc_rank_and(const float *w, TSVector t, TSQuery q)
+{
+ WordEntryPosVector **pos;
+ WordEntryPosVector1 posnull;
+ WordEntryPosVector *POSNULL;
+ int i,
+ k,
+ l,
+ p;
+ WordEntry *entry,
+ *firstentry;
+ WordEntryPos *post,
+ *ct;
+ int32 dimt,
+ lenct,
+ dist,
+ nitem;
+ float res = -1.0;
+ QueryOperand **item;
+ int size = q->size;
+
+ item = SortAndUniqItems(q, &size);
+ if (size < 2)
+ {
+ pfree(item);
+ return calc_rank_or(w, t, q);
+ }
+ pos = (WordEntryPosVector **) palloc0(sizeof(WordEntryPosVector *) * q->size);
+
+ /* A dummy WordEntryPos array to use when haspos is false */
+ posnull.npos = 1;
+ posnull.pos[0] = 0;
+ WEP_SETPOS(posnull.pos[0], MAXENTRYPOS - 1);
+ POSNULL = (WordEntryPosVector *) &posnull;
+
+ for (i = 0; i < size; i++)
+ {
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
+ if (!entry)
+ continue;
+
+ while (entry - firstentry < nitem)
+ {
+ if (entry->haspos)
+ pos[i] = _POSVECPTR(t, entry);
+ else
+ pos[i] = POSNULL;
+
+ dimt = pos[i]->npos;
+ post = pos[i]->pos;
+ for (k = 0; k < i; k++)
+ {
+ if (!pos[k])
+ continue;
+ lenct = pos[k]->npos;
+ ct = pos[k]->pos;
+ for (l = 0; l < dimt; l++)
+ {
+ for (p = 0; p < lenct; p++)
+ {
+ dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
+ if (dist || (dist == 0 && (pos[i] == POSNULL || pos[k] == POSNULL)))
+ {
+ float curw;
+
+ if (!dist)
+ dist = MAXENTRYPOS;
+ curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
+ res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
+ }
+ }
+ }
+ }
+
+ entry++;
+ }
+ }
+ pfree(pos);
+ pfree(item);
+ return res;
+}
+
+static float
+calc_rank_or(const float *w, TSVector t, TSQuery q)
+{
+ WordEntry *entry,
+ *firstentry;
+ WordEntryPosVector1 posnull;
+ WordEntryPos *post;
+ int32 dimt,
+ j,
+ i,
+ nitem;
+ float res = 0.0;
+ QueryOperand **item;
+ int size = q->size;
+
+ /* A dummy WordEntryPos array to use when haspos is false */
+ posnull.npos = 1;
+ posnull.pos[0] = 0;
+
+ item = SortAndUniqItems(q, &size);
+
+ for (i = 0; i < size; i++)
+ {
+ float resj,
+ wjm;
+ int32 jm;
+
+ firstentry = entry = find_wordentry(t, q, item[i], &nitem);
+ if (!entry)
+ continue;
+
+ while (entry - firstentry < nitem)
+ {
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(t, entry);
+ post = POSDATAPTR(t, entry);
+ }
+ else
+ {
+ dimt = posnull.npos;
+ post = posnull.pos;
+ }
+
+ resj = 0.0;
+ wjm = -1.0;
+ jm = 0;
+ for (j = 0; j < dimt; j++)
+ {
+ resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
+ if (wpos(post[j]) > wjm)
+ {
+ wjm = wpos(post[j]);
+ jm = j;
+ }
+ }
+/*
+ limit (sum(1/i^2),i=1,inf) = pi^2/6
+ resj = sum(wi/i^2),i=1,noccurrence,
+ wi - should be sorted desc,
+ don't sort for now, just choose maximum weight. This should be corrected
+ Oleg Bartunov
+*/
+ res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
+
+ entry++;
+ }
+ }
+ if (size > 0)
+ res = res / size;
+ pfree(item);
+ return res;
+}
+
+static float
+calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
+{
+ QueryItem *item = GETQUERY(q);
+ float res = 0.0;
+ int len;
+
+ if (!t->size || !q->size)
+ return 0.0;
+
+ /* XXX: What about NOT? */
+ res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
+ item->qoperator.oper == OP_PHRASE)) ?
+ calc_rank_and(w, t, q) :
+ calc_rank_or(w, t, q);
+
+ if (res < 0)
+ res = 1e-20f;
+
+ if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
+ res /= log((double) (cnt_length(t) + 1)) / log(2.0);
+
+ if (method & RANK_NORM_LENGTH)
+ {
+ len = cnt_length(t);
+ if (len > 0)
+ res /= (float) len;
+ }
+
+ /* RANK_NORM_EXTDIST not applicable */
+
+ if ((method & RANK_NORM_UNIQ) && t->size > 0)
+ res /= (float) (t->size);
+
+ if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
+ res /= log((double) (t->size + 1)) / log(2.0);
+
+ if (method & RANK_NORM_RDIVRPLUS1)
+ res /= (res + 1);
+
+ return res;
+}
+
+static const float *
+getWeights(ArrayType *win)
+{
+ static float ws[lengthof(weights)];
+ int i;
+ float4 *arrdata;
+
+ if (win == NULL)
+ return weights;
+
+ if (ARR_NDIM(win) != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array of weight must be one-dimensional")));
+
+ if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("array of weight is too short")));
+
+ if (array_contains_nulls(win))
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("array of weight must not contain nulls")));
+
+ arrdata = (float4 *) ARR_DATA_PTR(win);
+ for (i = 0; i < lengthof(weights); i++)
+ {
+ ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
+ if (ws[i] > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("weight out of range")));
+ }
+
+ return ws;
+}
+
+Datum
+ts_rank_wttf(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ int method = PG_GETARG_INT32(3);
+ float res;
+
+ res = calc_rank(getWeights(win), txt, query, method);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_wtt(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ float res;
+
+ res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_ttf(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ int method = PG_GETARG_INT32(2);
+ float res;
+
+ res = calc_rank(getWeights(NULL), txt, query, method);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rank_tt(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ float res;
+
+ res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+typedef struct
+{
+ union
+ {
+ struct
+ { /* compiled doc representation */
+ QueryItem **items;
+ int16 nitem;
+ } query;
+ struct
+ { /* struct is used for preparing doc
+ * representation */
+ QueryItem *item;
+ WordEntry *entry;
+ } map;
+ } data;
+ WordEntryPos pos;
+} DocRepresentation;
+
+static int
+compareDocR(const void *va, const void *vb)
+{
+ const DocRepresentation *a = (const DocRepresentation *) va;
+ const DocRepresentation *b = (const DocRepresentation *) vb;
+
+ if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
+ {
+ if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
+ {
+ if (a->data.map.entry == b->data.map.entry)
+ return 0;
+
+ return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
+ }
+
+ return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
+ }
+
+ return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
+}
+
+#define MAXQROPOS MAXENTRYPOS
+typedef struct
+{
+ bool operandexists;
+ bool reverseinsert; /* indicates insert order, true means
+ * descending order */
+ uint32 npos;
+ WordEntryPos pos[MAXQROPOS];
+} QueryRepresentationOperand;
+
+typedef struct
+{
+ TSQuery query;
+ QueryRepresentationOperand *operandData;
+} QueryRepresentation;
+
+#define QR_GET_OPERAND_DATA(q, v) \
+ ( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
+
+/*
+ * TS_execute callback for matching a tsquery operand to QueryRepresentation
+ */
+static TSTernaryValue
+checkcondition_QueryOperand(void *checkval, QueryOperand *val,
+ ExecPhraseData *data)
+{
+ QueryRepresentation *qr = (QueryRepresentation *) checkval;
+ QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
+
+ if (!opData->operandexists)
+ return TS_NO;
+
+ if (data)
+ {
+ data->npos = opData->npos;
+ data->pos = opData->pos;
+ if (opData->reverseinsert)
+ data->pos += MAXQROPOS - opData->npos;
+ }
+
+ return TS_YES;
+}
+
+typedef struct
+{
+ int pos;
+ int p;
+ int q;
+ DocRepresentation *begin;
+ DocRepresentation *end;
+} CoverExt;
+
+static void
+resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
+{
+ int i;
+
+ for (i = 0; i < qr->query->size; i++)
+ {
+ qr->operandData[i].operandexists = false;
+ qr->operandData[i].reverseinsert = reverseinsert;
+ qr->operandData[i].npos = 0;
+ }
+}
+
+static void
+fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
+{
+ int i;
+ int lastPos;
+ QueryRepresentationOperand *opData;
+
+ for (i = 0; i < entry->data.query.nitem; i++)
+ {
+ if (entry->data.query.items[i]->type != QI_VAL)
+ continue;
+
+ opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
+
+ opData->operandexists = true;
+
+ if (opData->npos == 0)
+ {
+ lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
+ opData->pos[lastPos] = entry->pos;
+ opData->npos++;
+ continue;
+ }
+
+ lastPos = opData->reverseinsert ?
+ (MAXQROPOS - opData->npos) :
+ (opData->npos - 1);
+
+ if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
+ {
+ lastPos = opData->reverseinsert ?
+ (MAXQROPOS - 1 - opData->npos) :
+ (opData->npos);
+
+ opData->pos[lastPos] = entry->pos;
+ opData->npos++;
+ }
+ }
+}
+
+static bool
+Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
+{
+ DocRepresentation *ptr;
+ int lastpos = ext->pos;
+ bool found = false;
+
+ /*
+ * since this function recurses, it could be driven to stack overflow.
+ * (though any decent compiler will optimize away the tail-recursion.
+ */
+ check_stack_depth();
+
+ resetQueryRepresentation(qr, false);
+
+ ext->p = INT_MAX;
+ ext->q = 0;
+ ptr = doc + ext->pos;
+
+ /* find upper bound of cover from current position, move up */
+ while (ptr - doc < len)
+ {
+ fillQueryRepresentationData(qr, ptr);
+
+ if (TS_execute(GETQUERY(qr->query), (void *) qr,
+ TS_EXEC_EMPTY, checkcondition_QueryOperand))
+ {
+ if (WEP_GETPOS(ptr->pos) > ext->q)
+ {
+ ext->q = WEP_GETPOS(ptr->pos);
+ ext->end = ptr;
+ lastpos = ptr - doc;
+ found = true;
+ }
+ break;
+ }
+ ptr++;
+ }
+
+ if (!found)
+ return false;
+
+ resetQueryRepresentation(qr, true);
+
+ ptr = doc + lastpos;
+
+ /* find lower bound of cover from found upper bound, move down */
+ while (ptr >= doc + ext->pos)
+ {
+ /*
+ * we scan doc from right to left, so pos info in reverse order!
+ */
+ fillQueryRepresentationData(qr, ptr);
+
+ if (TS_execute(GETQUERY(qr->query), (void *) qr,
+ TS_EXEC_EMPTY, checkcondition_QueryOperand))
+ {
+ if (WEP_GETPOS(ptr->pos) < ext->p)
+ {
+ ext->begin = ptr;
+ ext->p = WEP_GETPOS(ptr->pos);
+ }
+ break;
+ }
+ ptr--;
+ }
+
+ if (ext->p <= ext->q)
+ {
+ /*
+ * set position for next try to next lexeme after beginning of found
+ * cover
+ */
+ ext->pos = (ptr - doc) + 1;
+ return true;
+ }
+
+ ext->pos++;
+ return Cover(doc, len, qr, ext);
+}
+
+static DocRepresentation *
+get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
+{
+ QueryItem *item = GETQUERY(qr->query);
+ WordEntry *entry,
+ *firstentry;
+ WordEntryPos *post;
+ int32 dimt, /* number of 'post' items */
+ j,
+ i,
+ nitem;
+ int len = qr->query->size * 4,
+ cur = 0;
+ DocRepresentation *doc;
+
+ doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
+
+ /*
+ * Iterate through query to make DocRepresentation for words and it's
+ * entries satisfied by query
+ */
+ for (i = 0; i < qr->query->size; i++)
+ {
+ QueryOperand *curoperand;
+
+ if (item[i].type != QI_VAL)
+ continue;
+
+ curoperand = &item[i].qoperand;
+
+ firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
+ if (!entry)
+ continue;
+
+ /* iterations over entries in tsvector */
+ while (entry - firstentry < nitem)
+ {
+ if (entry->haspos)
+ {
+ dimt = POSDATALEN(txt, entry);
+ post = POSDATAPTR(txt, entry);
+ }
+ else
+ {
+ /* ignore words without positions */
+ entry++;
+ continue;
+ }
+
+ while (cur + dimt >= len)
+ {
+ len *= 2;
+ doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
+ }
+
+ /* iterations over entry's positions */
+ for (j = 0; j < dimt; j++)
+ {
+ if (curoperand->weight == 0 ||
+ curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
+ {
+ doc[cur].pos = post[j];
+ doc[cur].data.map.entry = entry;
+ doc[cur].data.map.item = (QueryItem *) curoperand;
+ cur++;
+ }
+ }
+
+ entry++;
+ }
+ }
+
+ if (cur > 0)
+ {
+ DocRepresentation *rptr = doc + 1,
+ *wptr = doc,
+ storage;
+
+ /*
+ * Sort representation in ascending order by pos and entry
+ */
+ qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
+
+ /*
+ * Join QueryItem per WordEntry and it's position
+ */
+ storage.pos = doc->pos;
+ storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+ storage.data.query.items[0] = doc->data.map.item;
+ storage.data.query.nitem = 1;
+
+ while (rptr - doc < cur)
+ {
+ if (rptr->pos == (rptr - 1)->pos &&
+ rptr->data.map.entry == (rptr - 1)->data.map.entry)
+ {
+ storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
+ storage.data.query.nitem++;
+ }
+ else
+ {
+ *wptr = storage;
+ wptr++;
+ storage.pos = rptr->pos;
+ storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
+ storage.data.query.items[0] = rptr->data.map.item;
+ storage.data.query.nitem = 1;
+ }
+
+ rptr++;
+ }
+
+ *wptr = storage;
+ wptr++;
+
+ *doclen = wptr - doc;
+ return doc;
+ }
+
+ pfree(doc);
+ return NULL;
+}
+
+static float4
+calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
+{
+ DocRepresentation *doc;
+ int len,
+ i,
+ doclen = 0;
+ CoverExt ext;
+ double Wdoc = 0.0;
+ double invws[lengthof(weights)];
+ double SumDist = 0.0,
+ PrevExtPos = 0.0;
+ int NExtent = 0;
+ QueryRepresentation qr;
+
+
+ for (i = 0; i < lengthof(weights); i++)
+ {
+ invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
+ if (invws[i] > 1.0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("weight out of range")));
+ invws[i] = 1.0 / invws[i];
+ }
+
+ qr.query = query;
+ qr.operandData = (QueryRepresentationOperand *)
+ palloc0(sizeof(QueryRepresentationOperand) * query->size);
+
+ doc = get_docrep(txt, &qr, &doclen);
+ if (!doc)
+ {
+ pfree(qr.operandData);
+ return 0.0;
+ }
+
+ MemSet(&ext, 0, sizeof(CoverExt));
+ while (Cover(doc, doclen, &qr, &ext))
+ {
+ double Cpos = 0.0;
+ double InvSum = 0.0;
+ double CurExtPos;
+ int nNoise;
+ DocRepresentation *ptr = ext.begin;
+
+ while (ptr <= ext.end)
+ {
+ InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
+ ptr++;
+ }
+
+ Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
+
+ /*
+ * if doc are big enough then ext.q may be equal to ext.p due to limit
+ * of positional information. In this case we approximate number of
+ * noise word as half cover's length
+ */
+ nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
+ if (nNoise < 0)
+ nNoise = (ext.end - ext.begin) / 2;
+ Wdoc += Cpos / ((double) (1 + nNoise));
+
+ CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
+ if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent division by
+ * zero in a case of
+ * multiple lexize */ )
+ SumDist += 1.0 / (CurExtPos - PrevExtPos);
+
+ PrevExtPos = CurExtPos;
+ NExtent++;
+ }
+
+ if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
+ Wdoc /= log((double) (cnt_length(txt) + 1));
+
+ if (method & RANK_NORM_LENGTH)
+ {
+ len = cnt_length(txt);
+ if (len > 0)
+ Wdoc /= (double) len;
+ }
+
+ if ((method & RANK_NORM_EXTDIST) && NExtent > 0 && SumDist > 0)
+ Wdoc /= ((double) NExtent) / SumDist;
+
+ if ((method & RANK_NORM_UNIQ) && txt->size > 0)
+ Wdoc /= (double) (txt->size);
+
+ if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
+ Wdoc /= log((double) (txt->size + 1)) / log(2.0);
+
+ if (method & RANK_NORM_RDIVRPLUS1)
+ Wdoc /= (Wdoc + 1);
+
+ pfree(doc);
+
+ pfree(qr.operandData);
+
+ return (float4) Wdoc;
+}
+
+Datum
+ts_rankcd_wttf(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ int method = PG_GETARG_INT32(3);
+ float res;
+
+ res = calc_rank_cd(getWeights(win), txt, query, method);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_wtt(PG_FUNCTION_ARGS)
+{
+ ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
+ TSVector txt = PG_GETARG_TSVECTOR(1);
+ TSQuery query = PG_GETARG_TSQUERY(2);
+ float res;
+
+ res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(win, 0);
+ PG_FREE_IF_COPY(txt, 1);
+ PG_FREE_IF_COPY(query, 2);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_ttf(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ int method = PG_GETARG_INT32(2);
+ float res;
+
+ res = calc_rank_cd(getWeights(NULL), txt, query, method);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
+
+Datum
+ts_rankcd_tt(PG_FUNCTION_ARGS)
+{
+ TSVector txt = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ float res;
+
+ res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
+
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_FLOAT4(res);
+}
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
new file mode 100644
index 0000000..cb36893
--- /dev/null
+++ b/src/backend/utils/adt/tsvector.c
@@ -0,0 +1,551 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector.c
+ * I/O functions for tsvector
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsvector.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "libpq/pqformat.h"
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+
+typedef struct
+{
+ WordEntry entry; /* must be first! */
+ WordEntryPos *pos;
+ int poslen; /* number of elements in pos */
+} WordEntryIN;
+
+
+/* Compare two WordEntryPos values for qsort */
+int
+compareWordEntryPos(const void *a, const void *b)
+{
+ int apos = WEP_GETPOS(*(const WordEntryPos *) a);
+ int bpos = WEP_GETPOS(*(const WordEntryPos *) b);
+
+ if (apos == bpos)
+ return 0;
+ return (apos > bpos) ? 1 : -1;
+}
+
+/*
+ * Removes duplicate pos entries. If there's two entries with same pos but
+ * different weight, the higher weight is retained, so we can't use
+ * qunique here.
+ *
+ * Returns new length.
+ */
+static int
+uniquePos(WordEntryPos *a, int l)
+{
+ WordEntryPos *ptr,
+ *res;
+
+ if (l <= 1)
+ return l;
+
+ qsort((void *) a, l, sizeof(WordEntryPos), compareWordEntryPos);
+
+ res = a;
+ ptr = a + 1;
+ while (ptr - a < l)
+ {
+ if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
+ {
+ res++;
+ *res = *ptr;
+ if (res - a >= MAXNUMPOS - 1 ||
+ WEP_GETPOS(*res) == MAXENTRYPOS - 1)
+ break;
+ }
+ else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
+ WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
+ ptr++;
+ }
+
+ return res + 1 - a;
+}
+
+/* Compare two WordEntryIN values for qsort */
+static int
+compareentry(const void *va, const void *vb, void *arg)
+{
+ const WordEntryIN *a = (const WordEntryIN *) va;
+ const WordEntryIN *b = (const WordEntryIN *) vb;
+ char *BufferStr = (char *) arg;
+
+ return tsCompareString(&BufferStr[a->entry.pos], a->entry.len,
+ &BufferStr[b->entry.pos], b->entry.len,
+ false);
+}
+
+/*
+ * Sort an array of WordEntryIN, remove duplicates.
+ * *outbuflen receives the amount of space needed for strings and positions.
+ */
+static int
+uniqueentry(WordEntryIN *a, int l, char *buf, int *outbuflen)
+{
+ int buflen;
+ WordEntryIN *ptr,
+ *res;
+
+ Assert(l >= 1);
+
+ if (l > 1)
+ qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry,
+ (void *) buf);
+
+ buflen = 0;
+ res = a;
+ ptr = a + 1;
+ while (ptr - a < l)
+ {
+ if (!(ptr->entry.len == res->entry.len &&
+ strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos],
+ res->entry.len) == 0))
+ {
+ /* done accumulating data into *res, count space needed */
+ buflen += res->entry.len;
+ if (res->entry.haspos)
+ {
+ res->poslen = uniquePos(res->pos, res->poslen);
+ buflen = SHORTALIGN(buflen);
+ buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ res++;
+ if (res != ptr)
+ memcpy(res, ptr, sizeof(WordEntryIN));
+ }
+ else if (ptr->entry.haspos)
+ {
+ if (res->entry.haspos)
+ {
+ /* append ptr's positions to res's positions */
+ int newlen = ptr->poslen + res->poslen;
+
+ res->pos = (WordEntryPos *)
+ repalloc(res->pos, newlen * sizeof(WordEntryPos));
+ memcpy(&res->pos[res->poslen], ptr->pos,
+ ptr->poslen * sizeof(WordEntryPos));
+ res->poslen = newlen;
+ pfree(ptr->pos);
+ }
+ else
+ {
+ /* just give ptr's positions to pos */
+ res->entry.haspos = 1;
+ res->pos = ptr->pos;
+ res->poslen = ptr->poslen;
+ }
+ }
+ ptr++;
+ }
+
+ /* count space needed for last item */
+ buflen += res->entry.len;
+ if (res->entry.haspos)
+ {
+ res->poslen = uniquePos(res->pos, res->poslen);
+ buflen = SHORTALIGN(buflen);
+ buflen += res->poslen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+
+ *outbuflen = buflen;
+ return res + 1 - a;
+}
+
+static int
+WordEntryCMP(WordEntry *a, WordEntry *b, char *buf)
+{
+ return compareentry(a, b, buf);
+}
+
+
+Datum
+tsvectorin(PG_FUNCTION_ARGS)
+{
+ char *buf = PG_GETARG_CSTRING(0);
+ TSVectorParseState state;
+ WordEntryIN *arr;
+ int totallen;
+ int arrlen; /* allocated size of arr */
+ WordEntry *inarr;
+ int len = 0;
+ TSVector in;
+ int i;
+ char *token;
+ int toklen;
+ WordEntryPos *pos;
+ int poslen;
+ char *strbuf;
+ int stroff;
+
+ /*
+ * Tokens are appended to tmpbuf, cur is a pointer to the end of used
+ * space in tmpbuf.
+ */
+ char *tmpbuf;
+ char *cur;
+ int buflen = 256; /* allocated size of tmpbuf */
+
+ state = init_tsvector_parser(buf, 0);
+
+ arrlen = 64;
+ arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
+ cur = tmpbuf = (char *) palloc(buflen);
+
+ while (gettoken_tsvector(state, &token, &toklen, &pos, &poslen, NULL))
+ {
+ if (toklen >= MAXSTRLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("word is too long (%ld bytes, max %ld bytes)",
+ (long) toklen,
+ (long) (MAXSTRLEN - 1))));
+
+ if (cur - tmpbuf > MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("string is too long for tsvector (%ld bytes, max %ld bytes)",
+ (long) (cur - tmpbuf), (long) MAXSTRPOS)));
+
+ /*
+ * Enlarge buffers if needed
+ */
+ if (len >= arrlen)
+ {
+ arrlen *= 2;
+ arr = (WordEntryIN *)
+ repalloc((void *) arr, sizeof(WordEntryIN) * arrlen);
+ }
+ while ((cur - tmpbuf) + toklen >= buflen)
+ {
+ int dist = cur - tmpbuf;
+
+ buflen *= 2;
+ tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
+ cur = tmpbuf + dist;
+ }
+ arr[len].entry.len = toklen;
+ arr[len].entry.pos = cur - tmpbuf;
+ memcpy((void *) cur, (void *) token, toklen);
+ cur += toklen;
+
+ if (poslen != 0)
+ {
+ arr[len].entry.haspos = 1;
+ arr[len].pos = pos;
+ arr[len].poslen = poslen;
+ }
+ else
+ {
+ arr[len].entry.haspos = 0;
+ arr[len].pos = NULL;
+ arr[len].poslen = 0;
+ }
+ len++;
+ }
+
+ close_tsvector_parser(state);
+
+ if (len > 0)
+ len = uniqueentry(arr, len, tmpbuf, &buflen);
+ else
+ buflen = 0;
+
+ if (buflen > MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("string is too long for tsvector (%d bytes, max %d bytes)", buflen, MAXSTRPOS)));
+
+ totallen = CALCDATASIZE(len, buflen);
+ in = (TSVector) palloc0(totallen);
+ SET_VARSIZE(in, totallen);
+ in->size = len;
+ inarr = ARRPTR(in);
+ strbuf = STRPTR(in);
+ stroff = 0;
+ for (i = 0; i < len; i++)
+ {
+ memcpy(strbuf + stroff, &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
+ arr[i].entry.pos = stroff;
+ stroff += arr[i].entry.len;
+ if (arr[i].entry.haspos)
+ {
+ if (arr[i].poslen > 0xFFFF)
+ elog(ERROR, "positions array too long");
+
+ /* Copy number of positions */
+ stroff = SHORTALIGN(stroff);
+ *(uint16 *) (strbuf + stroff) = (uint16) arr[i].poslen;
+ stroff += sizeof(uint16);
+
+ /* Copy positions */
+ memcpy(strbuf + stroff, arr[i].pos, arr[i].poslen * sizeof(WordEntryPos));
+ stroff += arr[i].poslen * sizeof(WordEntryPos);
+
+ pfree(arr[i].pos);
+ }
+ inarr[i] = arr[i].entry;
+ }
+
+ Assert((strbuf + stroff - (char *) in) == totallen);
+
+ PG_RETURN_TSVECTOR(in);
+}
+
+Datum
+tsvectorout(PG_FUNCTION_ARGS)
+{
+ TSVector out = PG_GETARG_TSVECTOR(0);
+ char *outbuf;
+ int32 i,
+ lenbuf = 0,
+ pp;
+ WordEntry *ptr = ARRPTR(out);
+ char *curbegin,
+ *curin,
+ *curout;
+
+ lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
+ for (i = 0; i < out->size; i++)
+ {
+ lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
+ if (ptr[i].haspos)
+ lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
+ }
+
+ curout = outbuf = (char *) palloc(lenbuf);
+ for (i = 0; i < out->size; i++)
+ {
+ curbegin = curin = STRPTR(out) + ptr->pos;
+ if (i != 0)
+ *curout++ = ' ';
+ *curout++ = '\'';
+ while (curin - curbegin < ptr->len)
+ {
+ int len = pg_mblen(curin);
+
+ if (t_iseq(curin, '\''))
+ *curout++ = '\'';
+ else if (t_iseq(curin, '\\'))
+ *curout++ = '\\';
+
+ while (len--)
+ *curout++ = *curin++;
+ }
+
+ *curout++ = '\'';
+ if ((pp = POSDATALEN(out, ptr)) != 0)
+ {
+ WordEntryPos *wptr;
+
+ *curout++ = ':';
+ wptr = POSDATAPTR(out, ptr);
+ while (pp)
+ {
+ curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
+ switch (WEP_GETWEIGHT(*wptr))
+ {
+ case 3:
+ *curout++ = 'A';
+ break;
+ case 2:
+ *curout++ = 'B';
+ break;
+ case 1:
+ *curout++ = 'C';
+ break;
+ case 0:
+ default:
+ break;
+ }
+
+ if (pp > 1)
+ *curout++ = ',';
+ pp--;
+ wptr++;
+ }
+ }
+ ptr++;
+ }
+
+ *curout = '\0';
+ PG_FREE_IF_COPY(out, 0);
+ PG_RETURN_CSTRING(outbuf);
+}
+
+/*
+ * Binary Input / Output functions. The binary format is as follows:
+ *
+ * uint32 number of lexemes
+ *
+ * for each lexeme:
+ * lexeme text in client encoding, null-terminated
+ * uint16 number of positions
+ * for each position:
+ * uint16 WordEntryPos
+ */
+
+Datum
+tsvectorsend(PG_FUNCTION_ARGS)
+{
+ TSVector vec = PG_GETARG_TSVECTOR(0);
+ StringInfoData buf;
+ int i,
+ j;
+ WordEntry *weptr = ARRPTR(vec);
+
+ pq_begintypsend(&buf);
+
+ pq_sendint32(&buf, vec->size);
+ for (i = 0; i < vec->size; i++)
+ {
+ uint16 npos;
+
+ /*
+ * the strings in the TSVector array are not null-terminated, so we
+ * have to send the null-terminator separately
+ */
+ pq_sendtext(&buf, STRPTR(vec) + weptr->pos, weptr->len);
+ pq_sendbyte(&buf, '\0');
+
+ npos = POSDATALEN(vec, weptr);
+ pq_sendint16(&buf, npos);
+
+ if (npos > 0)
+ {
+ WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
+
+ for (j = 0; j < npos; j++)
+ pq_sendint16(&buf, wepptr[j]);
+ }
+ weptr++;
+ }
+
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+tsvectorrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ TSVector vec;
+ int i;
+ int32 nentries;
+ int datalen; /* number of bytes used in the variable size
+ * area after fixed size TSVector header and
+ * WordEntries */
+ Size hdrlen;
+ Size len; /* allocated size of vec */
+ bool needSort = false;
+
+ nentries = pq_getmsgint(buf, sizeof(int32));
+ if (nentries < 0 || nentries > (MaxAllocSize / sizeof(WordEntry)))
+ elog(ERROR, "invalid size of tsvector");
+
+ hdrlen = DATAHDRSIZE + sizeof(WordEntry) * nentries;
+
+ len = hdrlen * 2; /* times two to make room for lexemes */
+ vec = (TSVector) palloc0(len);
+ vec->size = nentries;
+
+ datalen = 0;
+ for (i = 0; i < nentries; i++)
+ {
+ const char *lexeme;
+ uint16 npos;
+ size_t lex_len;
+
+ lexeme = pq_getmsgstring(buf);
+ npos = (uint16) pq_getmsgint(buf, sizeof(uint16));
+
+ /* sanity checks */
+
+ lex_len = strlen(lexeme);
+ if (lex_len > MAXSTRLEN)
+ elog(ERROR, "invalid tsvector: lexeme too long");
+
+ if (datalen > MAXSTRPOS)
+ elog(ERROR, "invalid tsvector: maximum total lexeme length exceeded");
+
+ if (npos > MAXNUMPOS)
+ elog(ERROR, "unexpected number of tsvector positions");
+
+ /*
+ * Looks valid. Fill the WordEntry struct, and copy lexeme.
+ *
+ * But make sure the buffer is large enough first.
+ */
+ while (hdrlen + SHORTALIGN(datalen + lex_len) +
+ sizeof(uint16) + npos * sizeof(WordEntryPos) >= len)
+ {
+ len *= 2;
+ vec = (TSVector) repalloc(vec, len);
+ }
+
+ vec->entries[i].haspos = (npos > 0) ? 1 : 0;
+ vec->entries[i].len = lex_len;
+ vec->entries[i].pos = datalen;
+
+ memcpy(STRPTR(vec) + datalen, lexeme, lex_len);
+
+ datalen += lex_len;
+
+ if (i > 0 && WordEntryCMP(&vec->entries[i],
+ &vec->entries[i - 1],
+ STRPTR(vec)) <= 0)
+ needSort = true;
+
+ /* Receive positions */
+ if (npos > 0)
+ {
+ uint16 j;
+ WordEntryPos *wepptr;
+
+ /*
+ * Pad to 2-byte alignment if necessary. Though we used palloc0
+ * for the initial allocation, subsequent repalloc'd memory areas
+ * are not initialized to zero.
+ */
+ if (datalen != SHORTALIGN(datalen))
+ {
+ *(STRPTR(vec) + datalen) = '\0';
+ datalen = SHORTALIGN(datalen);
+ }
+
+ memcpy(STRPTR(vec) + datalen, &npos, sizeof(uint16));
+
+ wepptr = POSDATAPTR(vec, &vec->entries[i]);
+ for (j = 0; j < npos; j++)
+ {
+ wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(WordEntryPos));
+ if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
+ elog(ERROR, "position information is misordered");
+ }
+
+ datalen += sizeof(uint16) + npos * sizeof(WordEntryPos);
+ }
+ }
+
+ SET_VARSIZE(vec, hdrlen + datalen);
+
+ if (needSort)
+ qsort_arg((void *) ARRPTR(vec), vec->size, sizeof(WordEntry),
+ compareentry, (void *) STRPTR(vec));
+
+ PG_RETURN_TSVECTOR(vec);
+}
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
new file mode 100644
index 0000000..2ccd3bd
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -0,0 +1,2726 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_op.c
+ * operations over tsvector
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsvector_op.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/htup_details.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "commands/trigger.h"
+#include "executor/spi.h"
+#include "funcapi.h"
+#include "lib/qunique.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "parser/parse_coerce.h"
+#include "tsearch/ts_utils.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/regproc.h"
+#include "utils/rel.h"
+
+
+typedef struct
+{
+ WordEntry *arrb;
+ WordEntry *arre;
+ char *values;
+ char *operand;
+} CHKVAL;
+
+
+typedef struct StatEntry
+{
+ uint32 ndoc; /* zero indicates that we were already here
+ * while walking through the tree */
+ uint32 nentry;
+ struct StatEntry *left;
+ struct StatEntry *right;
+ uint32 lenlexeme;
+ char lexeme[FLEXIBLE_ARRAY_MEMBER];
+} StatEntry;
+
+#define STATENTRYHDRSZ (offsetof(StatEntry, lexeme))
+
+typedef struct
+{
+ int32 weight;
+
+ uint32 maxdepth;
+
+ StatEntry **stack;
+ uint32 stackpos;
+
+ StatEntry *root;
+} TSVectorStat;
+
+
+static TSTernaryValue TS_execute_recurse(QueryItem *curitem, void *arg,
+ uint32 flags,
+ TSExecuteCallback chkcond);
+static int tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len);
+static Datum tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column);
+
+
+/*
+ * Order: haspos, len, word, for all positions (pos, weight)
+ */
+static int
+silly_cmp_tsvector(const TSVector a, const TSVector b)
+{
+ if (VARSIZE(a) < VARSIZE(b))
+ return -1;
+ else if (VARSIZE(a) > VARSIZE(b))
+ return 1;
+ else if (a->size < b->size)
+ return -1;
+ else if (a->size > b->size)
+ return 1;
+ else
+ {
+ WordEntry *aptr = ARRPTR(a);
+ WordEntry *bptr = ARRPTR(b);
+ int i = 0;
+ int res;
+
+
+ for (i = 0; i < a->size; i++)
+ {
+ if (aptr->haspos != bptr->haspos)
+ {
+ return (aptr->haspos > bptr->haspos) ? -1 : 1;
+ }
+ else if ((res = tsCompareString(STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) != 0)
+ {
+ return res;
+ }
+ else if (aptr->haspos)
+ {
+ WordEntryPos *ap = POSDATAPTR(a, aptr);
+ WordEntryPos *bp = POSDATAPTR(b, bptr);
+ int j;
+
+ if (POSDATALEN(a, aptr) != POSDATALEN(b, bptr))
+ return (POSDATALEN(a, aptr) > POSDATALEN(b, bptr)) ? -1 : 1;
+
+ for (j = 0; j < POSDATALEN(a, aptr); j++)
+ {
+ if (WEP_GETPOS(*ap) != WEP_GETPOS(*bp))
+ {
+ return (WEP_GETPOS(*ap) > WEP_GETPOS(*bp)) ? -1 : 1;
+ }
+ else if (WEP_GETWEIGHT(*ap) != WEP_GETWEIGHT(*bp))
+ {
+ return (WEP_GETWEIGHT(*ap) > WEP_GETWEIGHT(*bp)) ? -1 : 1;
+ }
+ ap++, bp++;
+ }
+ }
+
+ aptr++;
+ bptr++;
+ }
+ }
+
+ return 0;
+}
+
+#define TSVECTORCMPFUNC( type, action, ret ) \
+Datum \
+tsvector_##type(PG_FUNCTION_ARGS) \
+{ \
+ TSVector a = PG_GETARG_TSVECTOR(0); \
+ TSVector b = PG_GETARG_TSVECTOR(1); \
+ int res = silly_cmp_tsvector(a, b); \
+ PG_FREE_IF_COPY(a,0); \
+ PG_FREE_IF_COPY(b,1); \
+ PG_RETURN_##ret( res action 0 ); \
+} \
+/* keep compiler quiet - no extra ; */ \
+extern int no_such_variable
+
+TSVECTORCMPFUNC(lt, <, BOOL);
+TSVECTORCMPFUNC(le, <=, BOOL);
+TSVECTORCMPFUNC(eq, ==, BOOL);
+TSVECTORCMPFUNC(ge, >=, BOOL);
+TSVECTORCMPFUNC(gt, >, BOOL);
+TSVECTORCMPFUNC(ne, !=, BOOL);
+TSVECTORCMPFUNC(cmp, +, INT32);
+
+Datum
+tsvector_strip(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ TSVector out;
+ int i,
+ len = 0;
+ WordEntry *arrin = ARRPTR(in),
+ *arrout;
+ char *cur;
+
+ for (i = 0; i < in->size; i++)
+ len += arrin[i].len;
+
+ len = CALCDATASIZE(in->size, len);
+ out = (TSVector) palloc0(len);
+ SET_VARSIZE(out, len);
+ out->size = in->size;
+ arrout = ARRPTR(out);
+ cur = STRPTR(out);
+ for (i = 0; i < in->size; i++)
+ {
+ memcpy(cur, STRPTR(in) + arrin[i].pos, arrin[i].len);
+ arrout[i].haspos = 0;
+ arrout[i].len = arrin[i].len;
+ arrout[i].pos = cur - STRPTR(out);
+ cur += arrout[i].len;
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+Datum
+tsvector_length(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ int32 ret = in->size;
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_INT32(ret);
+}
+
+Datum
+tsvector_setweight(PG_FUNCTION_ARGS)
+{
+ TSVector in = PG_GETARG_TSVECTOR(0);
+ char cw = PG_GETARG_CHAR(1);
+ TSVector out;
+ int i,
+ j;
+ WordEntry *entry;
+ WordEntryPos *p;
+ int w = 0;
+
+ switch (cw)
+ {
+ case 'A':
+ case 'a':
+ w = 3;
+ break;
+ case 'B':
+ case 'b':
+ w = 2;
+ break;
+ case 'C':
+ case 'c':
+ w = 1;
+ break;
+ case 'D':
+ case 'd':
+ w = 0;
+ break;
+ default:
+ /* internal error */
+ elog(ERROR, "unrecognized weight: %d", cw);
+ }
+
+ out = (TSVector) palloc(VARSIZE(in));
+ memcpy(out, in, VARSIZE(in));
+ entry = ARRPTR(out);
+ i = out->size;
+ while (i--)
+ {
+ if ((j = POSDATALEN(out, entry)) != 0)
+ {
+ p = POSDATAPTR(out, entry);
+ while (j--)
+ {
+ WEP_SETWEIGHT(*p, w);
+ p++;
+ }
+ }
+ entry++;
+ }
+
+ PG_FREE_IF_COPY(in, 0);
+ PG_RETURN_POINTER(out);
+}
+
+/*
+ * setweight(tsin tsvector, char_weight "char", lexemes "text"[])
+ *
+ * Assign weight w to elements of tsin that are listed in lexemes.
+ */
+Datum
+tsvector_setweight_by_filter(PG_FUNCTION_ARGS)
+{
+ TSVector tsin = PG_GETARG_TSVECTOR(0);
+ char char_weight = PG_GETARG_CHAR(1);
+ ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(2);
+
+ TSVector tsout;
+ int i,
+ j,
+ nlexemes,
+ weight;
+ WordEntry *entry;
+ Datum *dlexemes;
+ bool *nulls;
+
+ switch (char_weight)
+ {
+ case 'A':
+ case 'a':
+ weight = 3;
+ break;
+ case 'B':
+ case 'b':
+ weight = 2;
+ break;
+ case 'C':
+ case 'c':
+ weight = 1;
+ break;
+ case 'D':
+ case 'd':
+ weight = 0;
+ break;
+ default:
+ /* internal error */
+ elog(ERROR, "unrecognized weight: %c", char_weight);
+ }
+
+ tsout = (TSVector) palloc(VARSIZE(tsin));
+ memcpy(tsout, tsin, VARSIZE(tsin));
+ entry = ARRPTR(tsout);
+
+ deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
+ &dlexemes, &nulls, &nlexemes);
+
+ /*
+ * Assuming that lexemes array is significantly shorter than tsvector we
+ * can iterate through lexemes performing binary search of each lexeme
+ * from lexemes in tsvector.
+ */
+ for (i = 0; i < nlexemes; i++)
+ {
+ char *lex;
+ int lex_len,
+ lex_pos;
+
+ /* Ignore null array elements, they surely don't match */
+ if (nulls[i])
+ continue;
+
+ lex = VARDATA(dlexemes[i]);
+ lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+ lex_pos = tsvector_bsearch(tsout, lex, lex_len);
+
+ if (lex_pos >= 0 && (j = POSDATALEN(tsout, entry + lex_pos)) != 0)
+ {
+ WordEntryPos *p = POSDATAPTR(tsout, entry + lex_pos);
+
+ while (j--)
+ {
+ WEP_SETWEIGHT(*p, weight);
+ p++;
+ }
+ }
+ }
+
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_FREE_IF_COPY(lexemes, 2);
+
+ PG_RETURN_POINTER(tsout);
+}
+
+#define compareEntry(pa, a, pb, b) \
+ tsCompareString((pa) + (a)->pos, (a)->len, \
+ (pb) + (b)->pos, (b)->len, \
+ false)
+
+/*
+ * Add positions from src to dest after offsetting them by maxpos.
+ * Return the number added (might be less than expected due to overflow)
+ */
+static int32
+add_pos(TSVector src, WordEntry *srcptr,
+ TSVector dest, WordEntry *destptr,
+ int32 maxpos)
+{
+ uint16 *clen = &_POSVECPTR(dest, destptr)->npos;
+ int i;
+ uint16 slen = POSDATALEN(src, srcptr),
+ startlen;
+ WordEntryPos *spos = POSDATAPTR(src, srcptr),
+ *dpos = POSDATAPTR(dest, destptr);
+
+ if (!destptr->haspos)
+ *clen = 0;
+
+ startlen = *clen;
+ for (i = 0;
+ i < slen && *clen < MAXNUMPOS &&
+ (*clen == 0 || WEP_GETPOS(dpos[*clen - 1]) != MAXENTRYPOS - 1);
+ i++)
+ {
+ WEP_SETWEIGHT(dpos[*clen], WEP_GETWEIGHT(spos[i]));
+ WEP_SETPOS(dpos[*clen], LIMITPOS(WEP_GETPOS(spos[i]) + maxpos));
+ (*clen)++;
+ }
+
+ if (*clen != startlen)
+ destptr->haspos = 1;
+ return *clen - startlen;
+}
+
+/*
+ * Perform binary search of given lexeme in TSVector.
+ * Returns lexeme position in TSVector's entry array or -1 if lexeme wasn't
+ * found.
+ */
+static int
+tsvector_bsearch(const TSVector tsv, char *lexeme, int lexeme_len)
+{
+ WordEntry *arrin = ARRPTR(tsv);
+ int StopLow = 0,
+ StopHigh = tsv->size,
+ StopMiddle,
+ cmp;
+
+ while (StopLow < StopHigh)
+ {
+ StopMiddle = (StopLow + StopHigh) / 2;
+
+ cmp = tsCompareString(lexeme, lexeme_len,
+ STRPTR(tsv) + arrin[StopMiddle].pos,
+ arrin[StopMiddle].len,
+ false);
+
+ if (cmp < 0)
+ StopHigh = StopMiddle;
+ else if (cmp > 0)
+ StopLow = StopMiddle + 1;
+ else /* found it */
+ return StopMiddle;
+ }
+
+ return -1;
+}
+
+/*
+ * qsort comparator functions
+ */
+
+static int
+compare_int(const void *va, const void *vb)
+{
+ int a = *((const int *) va);
+ int b = *((const int *) vb);
+
+ if (a == b)
+ return 0;
+ return (a > b) ? 1 : -1;
+}
+
+static int
+compare_text_lexemes(const void *va, const void *vb)
+{
+ Datum a = *((const Datum *) va);
+ Datum b = *((const Datum *) vb);
+ char *alex = VARDATA_ANY(a);
+ int alex_len = VARSIZE_ANY_EXHDR(a);
+ char *blex = VARDATA_ANY(b);
+ int blex_len = VARSIZE_ANY_EXHDR(b);
+
+ return tsCompareString(alex, alex_len, blex, blex_len, false);
+}
+
+/*
+ * Internal routine to delete lexemes from TSVector by array of offsets.
+ *
+ * int *indices_to_delete -- array of lexeme offsets to delete (modified here!)
+ * int indices_count -- size of that array
+ *
+ * Returns new TSVector without given lexemes along with their positions
+ * and weights.
+ */
+static TSVector
+tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
+ int indices_count)
+{
+ TSVector tsout;
+ WordEntry *arrin = ARRPTR(tsv),
+ *arrout;
+ char *data = STRPTR(tsv),
+ *dataout;
+ int i, /* index in arrin */
+ j, /* index in arrout */
+ k, /* index in indices_to_delete */
+ curoff; /* index in dataout area */
+
+ /*
+ * Sort the filter array to simplify membership checks below. Also, get
+ * rid of any duplicate entries, so that we can assume that indices_count
+ * is exactly equal to the number of lexemes that will be removed.
+ */
+ if (indices_count > 1)
+ {
+ qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
+ indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
+ compare_int);
+ }
+
+ /*
+ * Here we overestimate tsout size, since we don't know how much space is
+ * used by the deleted lexeme(s). We will set exact size below.
+ */
+ tsout = (TSVector) palloc0(VARSIZE(tsv));
+
+ /* This count must be correct because STRPTR(tsout) relies on it. */
+ tsout->size = tsv->size - indices_count;
+
+ /*
+ * Copy tsv to tsout, skipping lexemes listed in indices_to_delete.
+ */
+ arrout = ARRPTR(tsout);
+ dataout = STRPTR(tsout);
+ curoff = 0;
+ for (i = j = k = 0; i < tsv->size; i++)
+ {
+ /*
+ * If current i is present in indices_to_delete, skip this lexeme.
+ * Since indices_to_delete is already sorted, we only need to check
+ * the current (k'th) entry.
+ */
+ if (k < indices_count && i == indices_to_delete[k])
+ {
+ k++;
+ continue;
+ }
+
+ /* Copy lexeme and its positions and weights */
+ memcpy(dataout + curoff, data + arrin[i].pos, arrin[i].len);
+ arrout[j].haspos = arrin[i].haspos;
+ arrout[j].len = arrin[i].len;
+ arrout[j].pos = curoff;
+ curoff += arrin[i].len;
+ if (arrin[i].haspos)
+ {
+ int len = POSDATALEN(tsv, arrin + i) * sizeof(WordEntryPos)
+ + sizeof(uint16);
+
+ curoff = SHORTALIGN(curoff);
+ memcpy(dataout + curoff,
+ STRPTR(tsv) + SHORTALIGN(arrin[i].pos + arrin[i].len),
+ len);
+ curoff += len;
+ }
+
+ j++;
+ }
+
+ /*
+ * k should now be exactly equal to indices_count. If it isn't then the
+ * caller provided us with indices outside of [0, tsv->size) range and
+ * estimation of tsout's size is wrong.
+ */
+ Assert(k == indices_count);
+
+ SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, curoff));
+ return tsout;
+}
+
+/*
+ * Delete given lexeme from tsvector.
+ * Implementation of user-level ts_delete(tsvector, text).
+ */
+Datum
+tsvector_delete_str(PG_FUNCTION_ARGS)
+{
+ TSVector tsin = PG_GETARG_TSVECTOR(0),
+ tsout;
+ text *tlexeme = PG_GETARG_TEXT_PP(1);
+ char *lexeme = VARDATA_ANY(tlexeme);
+ int lexeme_len = VARSIZE_ANY_EXHDR(tlexeme),
+ skip_index;
+
+ if ((skip_index = tsvector_bsearch(tsin, lexeme, lexeme_len)) == -1)
+ PG_RETURN_POINTER(tsin);
+
+ tsout = tsvector_delete_by_indices(tsin, &skip_index, 1);
+
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_FREE_IF_COPY(tlexeme, 1);
+ PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * Delete given array of lexemes from tsvector.
+ * Implementation of user-level ts_delete(tsvector, text[]).
+ */
+Datum
+tsvector_delete_arr(PG_FUNCTION_ARGS)
+{
+ TSVector tsin = PG_GETARG_TSVECTOR(0),
+ tsout;
+ ArrayType *lexemes = PG_GETARG_ARRAYTYPE_P(1);
+ int i,
+ nlex,
+ skip_count,
+ *skip_indices;
+ Datum *dlexemes;
+ bool *nulls;
+
+ deconstruct_array(lexemes, TEXTOID, -1, false, TYPALIGN_INT,
+ &dlexemes, &nulls, &nlex);
+
+ /*
+ * In typical use case array of lexemes to delete is relatively small. So
+ * here we optimize things for that scenario: iterate through lexarr
+ * performing binary search of each lexeme from lexarr in tsvector.
+ */
+ skip_indices = palloc0(nlex * sizeof(int));
+ for (i = skip_count = 0; i < nlex; i++)
+ {
+ char *lex;
+ int lex_len,
+ lex_pos;
+
+ /* Ignore null array elements, they surely don't match */
+ if (nulls[i])
+ continue;
+
+ lex = VARDATA(dlexemes[i]);
+ lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+ lex_pos = tsvector_bsearch(tsin, lex, lex_len);
+
+ if (lex_pos >= 0)
+ skip_indices[skip_count++] = lex_pos;
+ }
+
+ tsout = tsvector_delete_by_indices(tsin, skip_indices, skip_count);
+
+ pfree(skip_indices);
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_FREE_IF_COPY(lexemes, 1);
+
+ PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * Expand tsvector as table with following columns:
+ * lexeme: lexeme text
+ * positions: integer array of lexeme positions
+ * weights: char array of weights corresponding to positions
+ */
+Datum
+tsvector_unnest(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ TSVector tsin;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ MemoryContext oldcontext;
+ TupleDesc tupdesc;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ tupdesc = CreateTemplateTupleDesc(3);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "lexeme",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "positions",
+ INT2ARRAYOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "weights",
+ TEXTARRAYOID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+
+ funcctx->user_fctx = PG_GETARG_TSVECTOR_COPY(0);
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ tsin = (TSVector) funcctx->user_fctx;
+
+ if (funcctx->call_cntr < tsin->size)
+ {
+ WordEntry *arrin = ARRPTR(tsin);
+ char *data = STRPTR(tsin);
+ HeapTuple tuple;
+ int j,
+ i = funcctx->call_cntr;
+ bool nulls[] = {false, false, false};
+ Datum values[3];
+
+ values[0] = PointerGetDatum(cstring_to_text_with_len(data + arrin[i].pos, arrin[i].len));
+
+ if (arrin[i].haspos)
+ {
+ WordEntryPosVector *posv;
+ Datum *positions;
+ Datum *weights;
+ char weight;
+
+ /*
+ * Internally tsvector stores position and weight in the same
+ * uint16 (2 bits for weight, 14 for position). Here we extract
+ * that in two separate arrays.
+ */
+ posv = _POSVECPTR(tsin, arrin + i);
+ positions = palloc(posv->npos * sizeof(Datum));
+ weights = palloc(posv->npos * sizeof(Datum));
+ for (j = 0; j < posv->npos; j++)
+ {
+ positions[j] = Int16GetDatum(WEP_GETPOS(posv->pos[j]));
+ weight = 'D' - WEP_GETWEIGHT(posv->pos[j]);
+ weights[j] = PointerGetDatum(cstring_to_text_with_len(&weight,
+ 1));
+ }
+
+ values[1] = PointerGetDatum(construct_array(positions, posv->npos,
+ INT2OID, 2, true, TYPALIGN_SHORT));
+ values[2] = PointerGetDatum(construct_array(weights, posv->npos,
+ TEXTOID, -1, false, TYPALIGN_INT));
+ }
+ else
+ {
+ nulls[1] = nulls[2] = true;
+ }
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+ else
+ {
+ SRF_RETURN_DONE(funcctx);
+ }
+}
+
+/*
+ * Convert tsvector to array of lexemes.
+ */
+Datum
+tsvector_to_array(PG_FUNCTION_ARGS)
+{
+ TSVector tsin = PG_GETARG_TSVECTOR(0);
+ WordEntry *arrin = ARRPTR(tsin);
+ Datum *elements;
+ int i;
+ ArrayType *array;
+
+ elements = palloc(tsin->size * sizeof(Datum));
+
+ for (i = 0; i < tsin->size; i++)
+ {
+ elements[i] = PointerGetDatum(cstring_to_text_with_len(STRPTR(tsin) + arrin[i].pos,
+ arrin[i].len));
+ }
+
+ array = construct_array(elements, tsin->size, TEXTOID, -1, false, TYPALIGN_INT);
+
+ pfree(elements);
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_RETURN_POINTER(array);
+}
+
+/*
+ * Build tsvector from array of lexemes.
+ */
+Datum
+array_to_tsvector(PG_FUNCTION_ARGS)
+{
+ ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
+ TSVector tsout;
+ Datum *dlexemes;
+ WordEntry *arrout;
+ bool *nulls;
+ int nitems,
+ i,
+ tslen,
+ datalen = 0;
+ char *cur;
+
+ deconstruct_array(v, TEXTOID, -1, false, TYPALIGN_INT, &dlexemes, &nulls, &nitems);
+
+ /*
+ * Reject nulls and zero length strings (maybe we should just ignore them,
+ * instead?)
+ */
+ for (i = 0; i < nitems; i++)
+ {
+ if (nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("lexeme array may not contain nulls")));
+
+ if (VARSIZE(dlexemes[i]) - VARHDRSZ == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_ZERO_LENGTH_CHARACTER_STRING),
+ errmsg("lexeme array may not contain empty strings")));
+ }
+
+ /* Sort and de-dup, because this is required for a valid tsvector. */
+ if (nitems > 1)
+ {
+ qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
+ nitems = qunique(dlexemes, nitems, sizeof(Datum),
+ compare_text_lexemes);
+ }
+
+ /* Calculate space needed for surviving lexemes. */
+ for (i = 0; i < nitems; i++)
+ datalen += VARSIZE(dlexemes[i]) - VARHDRSZ;
+ tslen = CALCDATASIZE(nitems, datalen);
+
+ /* Allocate and fill tsvector. */
+ tsout = (TSVector) palloc0(tslen);
+ SET_VARSIZE(tsout, tslen);
+ tsout->size = nitems;
+
+ arrout = ARRPTR(tsout);
+ cur = STRPTR(tsout);
+ for (i = 0; i < nitems; i++)
+ {
+ char *lex = VARDATA(dlexemes[i]);
+ int lex_len = VARSIZE(dlexemes[i]) - VARHDRSZ;
+
+ memcpy(cur, lex, lex_len);
+ arrout[i].haspos = 0;
+ arrout[i].len = lex_len;
+ arrout[i].pos = cur - STRPTR(tsout);
+ cur += lex_len;
+ }
+
+ PG_FREE_IF_COPY(v, 0);
+ PG_RETURN_POINTER(tsout);
+}
+
+/*
+ * ts_filter(): keep only lexemes with given weights in tsvector.
+ */
+Datum
+tsvector_filter(PG_FUNCTION_ARGS)
+{
+ TSVector tsin = PG_GETARG_TSVECTOR(0),
+ tsout;
+ ArrayType *weights = PG_GETARG_ARRAYTYPE_P(1);
+ WordEntry *arrin = ARRPTR(tsin),
+ *arrout;
+ char *datain = STRPTR(tsin),
+ *dataout;
+ Datum *dweights;
+ bool *nulls;
+ int nweights;
+ int i,
+ j;
+ int cur_pos = 0;
+ char mask = 0;
+
+ deconstruct_array(weights, CHAROID, 1, true, TYPALIGN_CHAR,
+ &dweights, &nulls, &nweights);
+
+ for (i = 0; i < nweights; i++)
+ {
+ char char_weight;
+
+ if (nulls[i])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("weight array may not contain nulls")));
+
+ char_weight = DatumGetChar(dweights[i]);
+ switch (char_weight)
+ {
+ case 'A':
+ case 'a':
+ mask = mask | 8;
+ break;
+ case 'B':
+ case 'b':
+ mask = mask | 4;
+ break;
+ case 'C':
+ case 'c':
+ mask = mask | 2;
+ break;
+ case 'D':
+ case 'd':
+ mask = mask | 1;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized weight: \"%c\"", char_weight)));
+ }
+ }
+
+ tsout = (TSVector) palloc0(VARSIZE(tsin));
+ tsout->size = tsin->size;
+ arrout = ARRPTR(tsout);
+ dataout = STRPTR(tsout);
+
+ for (i = j = 0; i < tsin->size; i++)
+ {
+ WordEntryPosVector *posvin,
+ *posvout;
+ int npos = 0;
+ int k;
+
+ if (!arrin[i].haspos)
+ continue;
+
+ posvin = _POSVECPTR(tsin, arrin + i);
+ posvout = (WordEntryPosVector *)
+ (dataout + SHORTALIGN(cur_pos + arrin[i].len));
+
+ for (k = 0; k < posvin->npos; k++)
+ {
+ if (mask & (1 << WEP_GETWEIGHT(posvin->pos[k])))
+ posvout->pos[npos++] = posvin->pos[k];
+ }
+
+ /* if no satisfactory positions found, skip lexeme */
+ if (!npos)
+ continue;
+
+ arrout[j].haspos = true;
+ arrout[j].len = arrin[i].len;
+ arrout[j].pos = cur_pos;
+
+ memcpy(dataout + cur_pos, datain + arrin[i].pos, arrin[i].len);
+ posvout->npos = npos;
+ cur_pos += SHORTALIGN(arrin[i].len);
+ cur_pos += POSDATALEN(tsout, arrout + j) * sizeof(WordEntryPos) +
+ sizeof(uint16);
+ j++;
+ }
+
+ tsout->size = j;
+ if (dataout != STRPTR(tsout))
+ memmove(STRPTR(tsout), dataout, cur_pos);
+
+ SET_VARSIZE(tsout, CALCDATASIZE(tsout->size, cur_pos));
+
+ PG_FREE_IF_COPY(tsin, 0);
+ PG_RETURN_POINTER(tsout);
+}
+
+Datum
+tsvector_concat(PG_FUNCTION_ARGS)
+{
+ TSVector in1 = PG_GETARG_TSVECTOR(0);
+ TSVector in2 = PG_GETARG_TSVECTOR(1);
+ TSVector out;
+ WordEntry *ptr;
+ WordEntry *ptr1,
+ *ptr2;
+ WordEntryPos *p;
+ int maxpos = 0,
+ i,
+ j,
+ i1,
+ i2,
+ dataoff,
+ output_bytes,
+ output_size;
+ char *data,
+ *data1,
+ *data2;
+
+ /* Get max position in in1; we'll need this to offset in2's positions */
+ ptr = ARRPTR(in1);
+ i = in1->size;
+ while (i--)
+ {
+ if ((j = POSDATALEN(in1, ptr)) != 0)
+ {
+ p = POSDATAPTR(in1, ptr);
+ while (j--)
+ {
+ if (WEP_GETPOS(*p) > maxpos)
+ maxpos = WEP_GETPOS(*p);
+ p++;
+ }
+ }
+ ptr++;
+ }
+
+ ptr1 = ARRPTR(in1);
+ ptr2 = ARRPTR(in2);
+ data1 = STRPTR(in1);
+ data2 = STRPTR(in2);
+ i1 = in1->size;
+ i2 = in2->size;
+
+ /*
+ * Conservative estimate of space needed. We might need all the data in
+ * both inputs, and conceivably add a pad byte before position data for
+ * each item where there was none before.
+ */
+ output_bytes = VARSIZE(in1) + VARSIZE(in2) + i1 + i2;
+
+ out = (TSVector) palloc0(output_bytes);
+ SET_VARSIZE(out, output_bytes);
+
+ /*
+ * We must make out->size valid so that STRPTR(out) is sensible. We'll
+ * collapse out any unused space at the end.
+ */
+ out->size = in1->size + in2->size;
+
+ ptr = ARRPTR(out);
+ data = STRPTR(out);
+ dataoff = 0;
+ while (i1 && i2)
+ {
+ int cmp = compareEntry(data1, ptr1, data2, ptr2);
+
+ if (cmp < 0)
+ { /* in1 first */
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = dataoff;
+ dataoff += ptr1->len;
+ if (ptr->haspos)
+ {
+ dataoff = SHORTALIGN(dataoff);
+ memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+
+ ptr++;
+ ptr1++;
+ i1--;
+ }
+ else if (cmp > 0)
+ { /* in2 first */
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
+ ptr->pos = dataoff;
+ dataoff += ptr2->len;
+ if (ptr->haspos)
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ {
+ dataoff = SHORTALIGN(dataoff);
+ dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ }
+
+ ptr++;
+ ptr2++;
+ i2--;
+ }
+ else
+ {
+ ptr->haspos = ptr1->haspos | ptr2->haspos;
+ ptr->len = ptr1->len;
+ memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = dataoff;
+ dataoff += ptr1->len;
+ if (ptr->haspos)
+ {
+ if (ptr1->haspos)
+ {
+ dataoff = SHORTALIGN(dataoff);
+ memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ if (ptr2->haspos)
+ dataoff += add_pos(in2, ptr2, out, ptr, maxpos) * sizeof(WordEntryPos);
+ }
+ else /* must have ptr2->haspos */
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ {
+ dataoff = SHORTALIGN(dataoff);
+ dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ }
+ }
+
+ ptr++;
+ ptr1++;
+ ptr2++;
+ i1--;
+ i2--;
+ }
+ }
+
+ while (i1)
+ {
+ ptr->haspos = ptr1->haspos;
+ ptr->len = ptr1->len;
+ memcpy(data + dataoff, data1 + ptr1->pos, ptr1->len);
+ ptr->pos = dataoff;
+ dataoff += ptr1->len;
+ if (ptr->haspos)
+ {
+ dataoff = SHORTALIGN(dataoff);
+ memcpy(data + dataoff, _POSVECPTR(in1, ptr1), POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16));
+ dataoff += POSDATALEN(in1, ptr1) * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+
+ ptr++;
+ ptr1++;
+ i1--;
+ }
+
+ while (i2)
+ {
+ ptr->haspos = ptr2->haspos;
+ ptr->len = ptr2->len;
+ memcpy(data + dataoff, data2 + ptr2->pos, ptr2->len);
+ ptr->pos = dataoff;
+ dataoff += ptr2->len;
+ if (ptr->haspos)
+ {
+ int addlen = add_pos(in2, ptr2, out, ptr, maxpos);
+
+ if (addlen == 0)
+ ptr->haspos = 0;
+ else
+ {
+ dataoff = SHORTALIGN(dataoff);
+ dataoff += addlen * sizeof(WordEntryPos) + sizeof(uint16);
+ }
+ }
+
+ ptr++;
+ ptr2++;
+ i2--;
+ }
+
+ /*
+ * Instead of checking each offset individually, we check for overflow of
+ * pos fields once at the end.
+ */
+ if (dataoff > MAXSTRPOS)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("string is too long for tsvector (%d bytes, max %d bytes)", dataoff, MAXSTRPOS)));
+
+ /*
+ * Adjust sizes (asserting that we didn't overrun the original estimates)
+ * and collapse out any unused array entries.
+ */
+ output_size = ptr - ARRPTR(out);
+ Assert(output_size <= out->size);
+ out->size = output_size;
+ if (data != STRPTR(out))
+ memmove(STRPTR(out), data, dataoff);
+ output_bytes = CALCDATASIZE(out->size, dataoff);
+ Assert(output_bytes <= VARSIZE(out));
+ SET_VARSIZE(out, output_bytes);
+
+ PG_FREE_IF_COPY(in1, 0);
+ PG_FREE_IF_COPY(in2, 1);
+ PG_RETURN_POINTER(out);
+}
+
+/*
+ * Compare two strings by tsvector rules.
+ *
+ * if prefix = true then it returns zero value iff b has prefix a
+ */
+int32
+tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
+{
+ int cmp;
+
+ if (lena == 0)
+ {
+ if (prefix)
+ cmp = 0; /* empty string is prefix of anything */
+ else
+ cmp = (lenb > 0) ? -1 : 0;
+ }
+ else if (lenb == 0)
+ {
+ cmp = (lena > 0) ? 1 : 0;
+ }
+ else
+ {
+ cmp = memcmp(a, b, Min((unsigned int) lena, (unsigned int) lenb));
+
+ if (prefix)
+ {
+ if (cmp == 0 && lena > lenb)
+ cmp = 1; /* a is longer, so not a prefix of b */
+ }
+ else if (cmp == 0 && lena != lenb)
+ {
+ cmp = (lena < lenb) ? -1 : 1;
+ }
+ }
+
+ return cmp;
+}
+
+/*
+ * Check weight info or/and fill 'data' with the required positions
+ */
+static TSTernaryValue
+checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
+ ExecPhraseData *data)
+{
+ TSTernaryValue result = TS_NO;
+
+ Assert(data == NULL || data->npos == 0);
+
+ if (entry->haspos)
+ {
+ WordEntryPosVector *posvec;
+
+ /*
+ * We can't use the _POSVECPTR macro here because the pointer to the
+ * tsvector's lexeme storage is already contained in chkval->values.
+ */
+ posvec = (WordEntryPosVector *)
+ (chkval->values + SHORTALIGN(entry->pos + entry->len));
+
+ if (val->weight && data)
+ {
+ WordEntryPos *posvec_iter = posvec->pos;
+ WordEntryPos *dptr;
+
+ /*
+ * Filter position information by weights
+ */
+ dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
+ data->allocated = true;
+
+ /* Is there a position with a matching weight? */
+ while (posvec_iter < posvec->pos + posvec->npos)
+ {
+ /* If true, append this position to the data->pos */
+ if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+ {
+ *dptr = WEP_GETPOS(*posvec_iter);
+ dptr++;
+ }
+
+ posvec_iter++;
+ }
+
+ data->npos = dptr - data->pos;
+
+ if (data->npos > 0)
+ result = TS_YES;
+ else
+ {
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ }
+ }
+ else if (val->weight)
+ {
+ WordEntryPos *posvec_iter = posvec->pos;
+
+ /* Is there a position with a matching weight? */
+ while (posvec_iter < posvec->pos + posvec->npos)
+ {
+ if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
+ {
+ result = TS_YES;
+ break; /* no need to go further */
+ }
+
+ posvec_iter++;
+ }
+ }
+ else if (data)
+ {
+ data->npos = posvec->npos;
+ data->pos = posvec->pos;
+ data->allocated = false;
+ result = TS_YES;
+ }
+ else
+ {
+ /* simplest case: no weight check, positions not needed */
+ result = TS_YES;
+ }
+ }
+ else
+ {
+ /*
+ * Position info is lacking, so if the caller requires it, we can only
+ * say that maybe there is a match.
+ *
+ * Notice, however, that we *don't* check val->weight here.
+ * Historically, stripped tsvectors are considered to match queries
+ * whether or not the query has a weight restriction; that's a little
+ * dubious but we'll preserve the behavior.
+ */
+ if (data)
+ result = TS_MAYBE;
+ else
+ result = TS_YES;
+ }
+
+ return result;
+}
+
+/*
+ * TS_execute callback for matching a tsquery operand to plain tsvector data
+ */
+static TSTernaryValue
+checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
+{
+ CHKVAL *chkval = (CHKVAL *) checkval;
+ WordEntry *StopLow = chkval->arrb;
+ WordEntry *StopHigh = chkval->arre;
+ WordEntry *StopMiddle = StopHigh;
+ TSTernaryValue res = TS_NO;
+
+ /* Loop invariant: StopLow <= val < StopHigh */
+ while (StopLow < StopHigh)
+ {
+ int difference;
+
+ StopMiddle = StopLow + (StopHigh - StopLow) / 2;
+ difference = tsCompareString(chkval->operand + val->distance,
+ val->length,
+ chkval->values + StopMiddle->pos,
+ StopMiddle->len,
+ false);
+
+ if (difference == 0)
+ {
+ /* Check weight info & fill 'data' with positions */
+ res = checkclass_str(chkval, StopMiddle, val, data);
+ break;
+ }
+ else if (difference > 0)
+ StopLow = StopMiddle + 1;
+ else
+ StopHigh = StopMiddle;
+ }
+
+ /*
+ * If it's a prefix search, we should also consider lexemes that the
+ * search term is a prefix of (which will necessarily immediately follow
+ * the place we found in the above loop). But we can skip them if there
+ * was a definite match on the exact term AND the caller doesn't need
+ * position info.
+ */
+ if (val->prefix && (res != TS_YES || data))
+ {
+ WordEntryPos *allpos = NULL;
+ int npos = 0,
+ totalpos = 0;
+
+ /* adjust start position for corner case */
+ if (StopLow >= StopHigh)
+ StopMiddle = StopHigh;
+
+ /* we don't try to re-use any data from the initial match */
+ if (data)
+ {
+ if (data->allocated)
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ data->npos = 0;
+ }
+ res = TS_NO;
+
+ while ((res != TS_YES || data) &&
+ StopMiddle < chkval->arre &&
+ tsCompareString(chkval->operand + val->distance,
+ val->length,
+ chkval->values + StopMiddle->pos,
+ StopMiddle->len,
+ true) == 0)
+ {
+ TSTernaryValue subres;
+
+ subres = checkclass_str(chkval, StopMiddle, val, data);
+
+ if (subres != TS_NO)
+ {
+ if (data)
+ {
+ /*
+ * We need to join position information
+ */
+ if (subres == TS_MAYBE)
+ {
+ /*
+ * No position info for this match, so we must report
+ * MAYBE overall.
+ */
+ res = TS_MAYBE;
+ /* forget any previous positions */
+ npos = 0;
+ /* don't leak storage */
+ if (allpos)
+ pfree(allpos);
+ break;
+ }
+
+ while (npos + data->npos > totalpos)
+ {
+ if (totalpos == 0)
+ {
+ totalpos = 256;
+ allpos = palloc(sizeof(WordEntryPos) * totalpos);
+ }
+ else
+ {
+ totalpos *= 2;
+ allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
+ }
+ }
+
+ memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
+ npos += data->npos;
+
+ /* don't leak storage from individual matches */
+ if (data->allocated)
+ pfree(data->pos);
+ data->pos = NULL;
+ data->allocated = false;
+ /* it's important to reset data->npos before next loop */
+ data->npos = 0;
+ }
+ else
+ {
+ /* Don't need positions, just handle YES/MAYBE */
+ if (subres == TS_YES || res == TS_NO)
+ res = subres;
+ }
+ }
+
+ StopMiddle++;
+ }
+
+ if (data && npos > 0)
+ {
+ /* Sort and make unique array of found positions */
+ data->pos = allpos;
+ qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
+ data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
+ compareWordEntryPos);
+ data->allocated = true;
+ res = TS_YES;
+ }
+ }
+
+ return res;
+}
+
+/*
+ * Compute output position list for a tsquery operator in phrase mode.
+ *
+ * Merge the position lists in Ldata and Rdata as specified by "emit",
+ * returning the result list into *data. The input position lists must be
+ * sorted and unique, and the output will be as well.
+ *
+ * data: pointer to initially-all-zeroes output struct, or NULL
+ * Ldata, Rdata: input position lists
+ * emit: bitmask of TSPO_XXX flags
+ * Loffset: offset to be added to Ldata positions before comparing/outputting
+ * Roffset: offset to be added to Rdata positions before comparing/outputting
+ * max_npos: maximum possible required size of output position array
+ *
+ * Loffset and Roffset should not be negative, else we risk trying to output
+ * negative positions, which won't fit into WordEntryPos.
+ *
+ * The result is boolean (TS_YES or TS_NO), but for the caller's convenience
+ * we return it as TSTernaryValue.
+ *
+ * Returns TS_YES if any positions were emitted to *data; or if data is NULL,
+ * returns TS_YES if any positions would have been emitted.
+ */
+#define TSPO_L_ONLY 0x01 /* emit positions appearing only in L */
+#define TSPO_R_ONLY 0x02 /* emit positions appearing only in R */
+#define TSPO_BOTH 0x04 /* emit positions appearing in both L&R */
+
+static TSTernaryValue
+TS_phrase_output(ExecPhraseData *data,
+ ExecPhraseData *Ldata,
+ ExecPhraseData *Rdata,
+ int emit,
+ int Loffset,
+ int Roffset,
+ int max_npos)
+{
+ int Lindex,
+ Rindex;
+
+ /* Loop until both inputs are exhausted */
+ Lindex = Rindex = 0;
+ while (Lindex < Ldata->npos || Rindex < Rdata->npos)
+ {
+ int Lpos,
+ Rpos;
+ int output_pos = 0;
+
+ /*
+ * Fetch current values to compare. WEP_GETPOS() is needed because
+ * ExecPhraseData->data can point to a tsvector's WordEntryPosVector.
+ */
+ if (Lindex < Ldata->npos)
+ Lpos = WEP_GETPOS(Ldata->pos[Lindex]) + Loffset;
+ else
+ {
+ /* L array exhausted, so we're done if R_ONLY isn't set */
+ if (!(emit & TSPO_R_ONLY))
+ break;
+ Lpos = INT_MAX;
+ }
+ if (Rindex < Rdata->npos)
+ Rpos = WEP_GETPOS(Rdata->pos[Rindex]) + Roffset;
+ else
+ {
+ /* R array exhausted, so we're done if L_ONLY isn't set */
+ if (!(emit & TSPO_L_ONLY))
+ break;
+ Rpos = INT_MAX;
+ }
+
+ /* Merge-join the two input lists */
+ if (Lpos < Rpos)
+ {
+ /* Lpos is not matched in Rdata, should we output it? */
+ if (emit & TSPO_L_ONLY)
+ output_pos = Lpos;
+ Lindex++;
+ }
+ else if (Lpos == Rpos)
+ {
+ /* Lpos and Rpos match ... should we output it? */
+ if (emit & TSPO_BOTH)
+ output_pos = Rpos;
+ Lindex++;
+ Rindex++;
+ }
+ else /* Lpos > Rpos */
+ {
+ /* Rpos is not matched in Ldata, should we output it? */
+ if (emit & TSPO_R_ONLY)
+ output_pos = Rpos;
+ Rindex++;
+ }
+
+ if (output_pos > 0)
+ {
+ if (data)
+ {
+ /* Store position, first allocating output array if needed */
+ if (data->pos == NULL)
+ {
+ data->pos = (WordEntryPos *)
+ palloc(max_npos * sizeof(WordEntryPos));
+ data->allocated = true;
+ }
+ data->pos[data->npos++] = output_pos;
+ }
+ else
+ {
+ /*
+ * Exact positions not needed, so return TS_YES as soon as we
+ * know there is at least one.
+ */
+ return TS_YES;
+ }
+ }
+ }
+
+ if (data && data->npos > 0)
+ {
+ /* Let's assert we didn't overrun the array */
+ Assert(data->npos <= max_npos);
+ return TS_YES;
+ }
+ return TS_NO;
+}
+
+/*
+ * Execute tsquery at or below an OP_PHRASE operator.
+ *
+ * This handles tsquery execution at recursion levels where we need to care
+ * about match locations.
+ *
+ * In addition to the same arguments used for TS_execute, the caller may pass
+ * a preinitialized-to-zeroes ExecPhraseData struct, to be filled with lexeme
+ * match position info on success. data == NULL if no position data need be
+ * returned. (In practice, outside callers pass NULL, and only the internal
+ * recursion cases pass a data pointer.)
+ * Note: the function assumes data != NULL for operators other than OP_PHRASE.
+ * This is OK because an outside call always starts from an OP_PHRASE node.
+ *
+ * The detailed semantics of the match data, given that the function returned
+ * TS_YES (successful match), are:
+ *
+ * npos > 0, negate = false:
+ * query is matched at specified position(s) (and only those positions)
+ * npos > 0, negate = true:
+ * query is matched at all positions *except* specified position(s)
+ * npos = 0, negate = true:
+ * query is matched at all positions
+ * npos = 0, negate = false:
+ * disallowed (this should result in TS_NO or TS_MAYBE, as appropriate)
+ *
+ * Successful matches also return a "width" value which is the match width in
+ * lexemes, less one. Hence, "width" is zero for simple one-lexeme matches,
+ * and is the sum of the phrase operator distances for phrase matches. Note
+ * that when width > 0, the listed positions represent the ends of matches not
+ * the starts. (This unintuitive rule is needed to avoid possibly generating
+ * negative positions, which wouldn't fit into the WordEntryPos arrays.)
+ *
+ * If the TSExecuteCallback function reports that an operand is present
+ * but fails to provide position(s) for it, we will return TS_MAYBE when
+ * it is possible but not certain that the query is matched.
+ *
+ * When the function returns TS_NO or TS_MAYBE, it must return npos = 0,
+ * negate = false (which is the state initialized by the caller); but the
+ * "width" output in such cases is undefined.
+ */
+static TSTernaryValue
+TS_phrase_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond,
+ ExecPhraseData *data)
+{
+ ExecPhraseData Ldata,
+ Rdata;
+ TSTernaryValue lmatch,
+ rmatch;
+ int Loffset,
+ Roffset,
+ maxwidth;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /* ... and let's check for query cancel while we're at it */
+ CHECK_FOR_INTERRUPTS();
+
+ if (curitem->type == QI_VAL)
+ return chkcond(arg, (QueryOperand *) curitem, data);
+
+ switch (curitem->qoperator.oper)
+ {
+ case OP_NOT:
+
+ /*
+ * We need not touch data->width, since a NOT operation does not
+ * change the match width.
+ */
+ if (flags & TS_EXEC_SKIP_NOT)
+ {
+ /* with SKIP_NOT, report NOT as "match everywhere" */
+ Assert(data->npos == 0 && !data->negate);
+ data->negate = true;
+ return TS_YES;
+ }
+ switch (TS_phrase_execute(curitem + 1, arg, flags, chkcond, data))
+ {
+ case TS_NO:
+ /* change "match nowhere" to "match everywhere" */
+ Assert(data->npos == 0 && !data->negate);
+ data->negate = true;
+ return TS_YES;
+ case TS_YES:
+ if (data->npos > 0)
+ {
+ /* we have some positions, invert negate flag */
+ data->negate = !data->negate;
+ return TS_YES;
+ }
+ else if (data->negate)
+ {
+ /* change "match everywhere" to "match nowhere" */
+ data->negate = false;
+ return TS_NO;
+ }
+ /* Should not get here if result was TS_YES */
+ Assert(false);
+ break;
+ case TS_MAYBE:
+ /* match positions are, and remain, uncertain */
+ return TS_MAYBE;
+ }
+ break;
+
+ case OP_PHRASE:
+ case OP_AND:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
+
+ lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata);
+ if (lmatch == TS_NO)
+ return TS_NO;
+
+ rmatch = TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata);
+ if (rmatch == TS_NO)
+ return TS_NO;
+
+ /*
+ * If either operand has no position information, then we can't
+ * return reliable position data, only a MAYBE result.
+ */
+ if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
+ return TS_MAYBE;
+
+ if (curitem->qoperator.oper == OP_PHRASE)
+ {
+ /*
+ * Compute Loffset and Roffset suitable for phrase match, and
+ * compute overall width of whole phrase match.
+ */
+ Loffset = curitem->qoperator.distance + Rdata.width;
+ Roffset = 0;
+ if (data)
+ data->width = curitem->qoperator.distance +
+ Ldata.width + Rdata.width;
+ }
+ else
+ {
+ /*
+ * For OP_AND, set output width and alignment like OP_OR (see
+ * comment below)
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ if (data)
+ data->width = maxwidth;
+ }
+
+ if (Ldata.negate && Rdata.negate)
+ {
+ /* !L & !R: treat as !(L | R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ if (data)
+ data->negate = true;
+ return TS_YES;
+ }
+ else if (Ldata.negate)
+ {
+ /* !L & R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ }
+ else if (Rdata.negate)
+ {
+ /* L & !R */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ }
+ else
+ {
+ /* straight AND */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ }
+
+ case OP_OR:
+ memset(&Ldata, 0, sizeof(Ldata));
+ memset(&Rdata, 0, sizeof(Rdata));
+
+ lmatch = TS_phrase_execute(curitem + curitem->qoperator.left,
+ arg, flags, chkcond, &Ldata);
+ rmatch = TS_phrase_execute(curitem + 1,
+ arg, flags, chkcond, &Rdata);
+
+ if (lmatch == TS_NO && rmatch == TS_NO)
+ return TS_NO;
+
+ /*
+ * If either operand has no position information, then we can't
+ * return reliable position data, only a MAYBE result.
+ */
+ if (lmatch == TS_MAYBE || rmatch == TS_MAYBE)
+ return TS_MAYBE;
+
+ /*
+ * Cope with undefined output width from failed submatch. (This
+ * takes less code than trying to ensure that all failure returns
+ * set data->width to zero.)
+ */
+ if (lmatch == TS_NO)
+ Ldata.width = 0;
+ if (rmatch == TS_NO)
+ Rdata.width = 0;
+
+ /*
+ * For OP_AND and OP_OR, report the width of the wider of the two
+ * inputs, and align the narrower input's positions to the right
+ * end of that width. This rule deals at least somewhat
+ * reasonably with cases like "x <-> (y | z <-> q)".
+ */
+ maxwidth = Max(Ldata.width, Rdata.width);
+ Loffset = maxwidth - Ldata.width;
+ Roffset = maxwidth - Rdata.width;
+ data->width = maxwidth;
+
+ if (Ldata.negate && Rdata.negate)
+ {
+ /* !L | !R: treat as !(L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH,
+ Loffset, Roffset,
+ Min(Ldata.npos, Rdata.npos));
+ data->negate = true;
+ return TS_YES;
+ }
+ else if (Ldata.negate)
+ {
+ /* !L | R: treat as !(L & !R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_L_ONLY,
+ Loffset, Roffset,
+ Ldata.npos);
+ data->negate = true;
+ return TS_YES;
+ }
+ else if (Rdata.negate)
+ {
+ /* L | !R: treat as !(!L & R) */
+ (void) TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_R_ONLY,
+ Loffset, Roffset,
+ Rdata.npos);
+ data->negate = true;
+ return TS_YES;
+ }
+ else
+ {
+ /* straight OR */
+ return TS_phrase_output(data, &Ldata, &Rdata,
+ TSPO_BOTH | TSPO_L_ONLY | TSPO_R_ONLY,
+ Loffset, Roffset,
+ Ldata.npos + Rdata.npos);
+ }
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
+ }
+
+ /* not reachable, but keep compiler quiet */
+ return TS_NO;
+}
+
+
+/*
+ * Evaluate tsquery boolean expression.
+ *
+ * curitem: current tsquery item (initially, the first one)
+ * arg: opaque value to pass through to callback function
+ * flags: bitmask of flag bits shown in ts_utils.h
+ * chkcond: callback function to check whether a primitive value is present
+ */
+bool
+TS_execute(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond)
+{
+ /*
+ * If we get TS_MAYBE from the recursion, return true. We could only see
+ * that result if the caller passed TS_EXEC_PHRASE_NO_POS, so there's no
+ * need to check again.
+ */
+ return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
+}
+
+/*
+ * Evaluate tsquery boolean expression.
+ *
+ * This is the same as TS_execute except that TS_MAYBE is returned as-is.
+ */
+TSTernaryValue
+TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond)
+{
+ return TS_execute_recurse(curitem, arg, flags, chkcond);
+}
+
+/*
+ * TS_execute recursion for operators above any phrase operator. Here we do
+ * not need to worry about lexeme positions. As soon as we hit an OP_PHRASE
+ * operator, we pass it off to TS_phrase_execute which does worry.
+ */
+static TSTernaryValue
+TS_execute_recurse(QueryItem *curitem, void *arg, uint32 flags,
+ TSExecuteCallback chkcond)
+{
+ TSTernaryValue lmatch;
+
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ /* ... and let's check for query cancel while we're at it */
+ CHECK_FOR_INTERRUPTS();
+
+ if (curitem->type == QI_VAL)
+ return chkcond(arg, (QueryOperand *) curitem,
+ NULL /* don't need position info */ );
+
+ switch (curitem->qoperator.oper)
+ {
+ case OP_NOT:
+ if (flags & TS_EXEC_SKIP_NOT)
+ return TS_YES;
+ switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
+ {
+ case TS_NO:
+ return TS_YES;
+ case TS_YES:
+ return TS_NO;
+ case TS_MAYBE:
+ return TS_MAYBE;
+ }
+ break;
+
+ case OP_AND:
+ lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
+ flags, chkcond);
+ if (lmatch == TS_NO)
+ return TS_NO;
+ switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
+ {
+ case TS_NO:
+ return TS_NO;
+ case TS_YES:
+ return lmatch;
+ case TS_MAYBE:
+ return TS_MAYBE;
+ }
+ break;
+
+ case OP_OR:
+ lmatch = TS_execute_recurse(curitem + curitem->qoperator.left, arg,
+ flags, chkcond);
+ if (lmatch == TS_YES)
+ return TS_YES;
+ switch (TS_execute_recurse(curitem + 1, arg, flags, chkcond))
+ {
+ case TS_NO:
+ return lmatch;
+ case TS_YES:
+ return TS_YES;
+ case TS_MAYBE:
+ return TS_MAYBE;
+ }
+ break;
+
+ case OP_PHRASE:
+
+ /*
+ * If we get a MAYBE result, and the caller doesn't want that,
+ * convert it to NO. It would be more consistent, perhaps, to
+ * return the result of TS_phrase_execute() verbatim and then
+ * convert MAYBE results at the top of the recursion. But
+ * converting at the topmost phrase operator gives results that
+ * are bug-compatible with the old implementation, so do it like
+ * this for now.
+ */
+ switch (TS_phrase_execute(curitem, arg, flags, chkcond, NULL))
+ {
+ case TS_NO:
+ return TS_NO;
+ case TS_YES:
+ return TS_YES;
+ case TS_MAYBE:
+ return (flags & TS_EXEC_PHRASE_NO_POS) ? TS_MAYBE : TS_NO;
+ }
+ break;
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
+ }
+
+ /* not reachable, but keep compiler quiet */
+ return TS_NO;
+}
+
+/*
+ * Detect whether a tsquery boolean expression requires any positive matches
+ * to values shown in the tsquery.
+ *
+ * This is needed to know whether a GIN index search requires full index scan.
+ * For example, 'x & !y' requires a match of x, so it's sufficient to scan
+ * entries for x; but 'x | !y' could match rows containing neither x nor y.
+ */
+bool
+tsquery_requires_match(QueryItem *curitem)
+{
+ /* since this function recurses, it could be driven to stack overflow */
+ check_stack_depth();
+
+ if (curitem->type == QI_VAL)
+ return true;
+
+ switch (curitem->qoperator.oper)
+ {
+ case OP_NOT:
+
+ /*
+ * Assume there are no required matches underneath a NOT. For
+ * some cases with nested NOTs, we could prove there's a required
+ * match, but it seems unlikely to be worth the trouble.
+ */
+ return false;
+
+ case OP_PHRASE:
+
+ /*
+ * Treat OP_PHRASE as OP_AND here
+ */
+ case OP_AND:
+ /* If either side requires a match, we're good */
+ if (tsquery_requires_match(curitem + curitem->qoperator.left))
+ return true;
+ else
+ return tsquery_requires_match(curitem + 1);
+
+ case OP_OR:
+ /* Both sides must require a match */
+ if (tsquery_requires_match(curitem + curitem->qoperator.left))
+ return tsquery_requires_match(curitem + 1);
+ else
+ return false;
+
+ default:
+ elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
+ }
+
+ /* not reachable, but keep compiler quiet */
+ return false;
+}
+
+/*
+ * boolean operations
+ */
+Datum
+ts_match_qv(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_DATUM(DirectFunctionCall2(ts_match_vq,
+ PG_GETARG_DATUM(1),
+ PG_GETARG_DATUM(0)));
+}
+
+Datum
+ts_match_vq(PG_FUNCTION_ARGS)
+{
+ TSVector val = PG_GETARG_TSVECTOR(0);
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ CHKVAL chkval;
+ bool result;
+
+ /* empty query matches nothing */
+ if (!query->size)
+ {
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(false);
+ }
+
+ chkval.arrb = ARRPTR(val);
+ chkval.arre = chkval.arrb + val->size;
+ chkval.values = STRPTR(val);
+ chkval.operand = GETOPERAND(query);
+ result = TS_execute(GETQUERY(query),
+ &chkval,
+ TS_EXEC_EMPTY,
+ checkcondition_str);
+
+ PG_FREE_IF_COPY(val, 0);
+ PG_FREE_IF_COPY(query, 1);
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+ts_match_tt(PG_FUNCTION_ARGS)
+{
+ TSVector vector;
+ TSQuery query;
+ bool res;
+
+ vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
+ PG_GETARG_DATUM(0)));
+ query = DatumGetTSQuery(DirectFunctionCall1(plainto_tsquery,
+ PG_GETARG_DATUM(1)));
+
+ res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
+ TSVectorGetDatum(vector),
+ TSQueryGetDatum(query)));
+
+ pfree(vector);
+ pfree(query);
+
+ PG_RETURN_BOOL(res);
+}
+
+Datum
+ts_match_tq(PG_FUNCTION_ARGS)
+{
+ TSVector vector;
+ TSQuery query = PG_GETARG_TSQUERY(1);
+ bool res;
+
+ vector = DatumGetTSVector(DirectFunctionCall1(to_tsvector,
+ PG_GETARG_DATUM(0)));
+
+ res = DatumGetBool(DirectFunctionCall2(ts_match_vq,
+ TSVectorGetDatum(vector),
+ TSQueryGetDatum(query)));
+
+ pfree(vector);
+ PG_FREE_IF_COPY(query, 1);
+
+ PG_RETURN_BOOL(res);
+}
+
+/*
+ * ts_stat statistic function support
+ */
+
+
+/*
+ * Returns the number of positions in value 'wptr' within tsvector 'txt',
+ * that have a weight equal to one of the weights in 'weight' bitmask.
+ */
+static int
+check_weight(TSVector txt, WordEntry *wptr, int8 weight)
+{
+ int len = POSDATALEN(txt, wptr);
+ int num = 0;
+ WordEntryPos *ptr = POSDATAPTR(txt, wptr);
+
+ while (len--)
+ {
+ if (weight & (1 << WEP_GETWEIGHT(*ptr)))
+ num++;
+ ptr++;
+ }
+ return num;
+}
+
+#define compareStatWord(a,e,t) \
+ tsCompareString((a)->lexeme, (a)->lenlexeme, \
+ STRPTR(t) + (e)->pos, (e)->len, \
+ false)
+
+static void
+insertStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt, uint32 off)
+{
+ WordEntry *we = ARRPTR(txt) + off;
+ StatEntry *node = stat->root,
+ *pnode = NULL;
+ int n,
+ res = 0;
+ uint32 depth = 1;
+
+ if (stat->weight == 0)
+ n = (we->haspos) ? POSDATALEN(txt, we) : 1;
+ else
+ n = (we->haspos) ? check_weight(txt, we, stat->weight) : 0;
+
+ if (n == 0)
+ return; /* nothing to insert */
+
+ while (node)
+ {
+ res = compareStatWord(node, we, txt);
+
+ if (res == 0)
+ {
+ break;
+ }
+ else
+ {
+ pnode = node;
+ node = (res < 0) ? node->left : node->right;
+ }
+ depth++;
+ }
+
+ if (depth > stat->maxdepth)
+ stat->maxdepth = depth;
+
+ if (node == NULL)
+ {
+ node = MemoryContextAlloc(persistentContext, STATENTRYHDRSZ + we->len);
+ node->left = node->right = NULL;
+ node->ndoc = 1;
+ node->nentry = n;
+ node->lenlexeme = we->len;
+ memcpy(node->lexeme, STRPTR(txt) + we->pos, node->lenlexeme);
+
+ if (pnode == NULL)
+ {
+ stat->root = node;
+ }
+ else
+ {
+ if (res < 0)
+ pnode->left = node;
+ else
+ pnode->right = node;
+ }
+ }
+ else
+ {
+ node->ndoc++;
+ node->nentry += n;
+ }
+}
+
+static void
+chooseNextStatEntry(MemoryContext persistentContext, TSVectorStat *stat, TSVector txt,
+ uint32 low, uint32 high, uint32 offset)
+{
+ uint32 pos;
+ uint32 middle = (low + high) >> 1;
+
+ pos = (low + middle) >> 1;
+ if (low != middle && pos >= offset && pos - offset < txt->size)
+ insertStatEntry(persistentContext, stat, txt, pos - offset);
+ pos = (high + middle + 1) >> 1;
+ if (middle + 1 != high && pos >= offset && pos - offset < txt->size)
+ insertStatEntry(persistentContext, stat, txt, pos - offset);
+
+ if (low != middle)
+ chooseNextStatEntry(persistentContext, stat, txt, low, middle, offset);
+ if (high != middle + 1)
+ chooseNextStatEntry(persistentContext, stat, txt, middle + 1, high, offset);
+}
+
+/*
+ * This is written like a custom aggregate function, because the
+ * original plan was to do just that. Unfortunately, an aggregate function
+ * can't return a set, so that plan was abandoned. If that limitation is
+ * lifted in the future, ts_stat could be a real aggregate function so that
+ * you could use it like this:
+ *
+ * SELECT ts_stat(vector_column) FROM vector_table;
+ *
+ * where vector_column is a tsvector-type column in vector_table.
+ */
+
+static TSVectorStat *
+ts_accum(MemoryContext persistentContext, TSVectorStat *stat, Datum data)
+{
+ TSVector txt = DatumGetTSVector(data);
+ uint32 i,
+ nbit = 0,
+ offset;
+
+ if (stat == NULL)
+ { /* Init in first */
+ stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
+ stat->maxdepth = 1;
+ }
+
+ /* simple check of correctness */
+ if (txt == NULL || txt->size == 0)
+ {
+ if (txt && txt != (TSVector) DatumGetPointer(data))
+ pfree(txt);
+ return stat;
+ }
+
+ i = txt->size - 1;
+ for (; i > 0; i >>= 1)
+ nbit++;
+
+ nbit = 1 << nbit;
+ offset = (nbit - txt->size) / 2;
+
+ insertStatEntry(persistentContext, stat, txt, (nbit >> 1) - offset);
+ chooseNextStatEntry(persistentContext, stat, txt, 0, nbit, offset);
+
+ return stat;
+}
+
+static void
+ts_setup_firstcall(FunctionCallInfo fcinfo, FuncCallContext *funcctx,
+ TSVectorStat *stat)
+{
+ TupleDesc tupdesc;
+ MemoryContext oldcontext;
+ StatEntry *node;
+
+ funcctx->user_fctx = (void *) stat;
+
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ stat->stack = palloc0(sizeof(StatEntry *) * (stat->maxdepth + 1));
+ stat->stackpos = 0;
+
+ node = stat->root;
+ /* find leftmost value */
+ if (node == NULL)
+ stat->stack[stat->stackpos] = NULL;
+ else
+ for (;;)
+ {
+ stat->stack[stat->stackpos] = node;
+ if (node->left)
+ {
+ stat->stackpos++;
+ node = node->left;
+ }
+ else
+ break;
+ }
+ Assert(stat->stackpos <= stat->maxdepth);
+
+ tupdesc = CreateTemplateTupleDesc(3);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "word",
+ TEXTOID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "ndoc",
+ INT4OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 3, "nentry",
+ INT4OID, -1, 0);
+ funcctx->tuple_desc = BlessTupleDesc(tupdesc);
+ funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
+
+ MemoryContextSwitchTo(oldcontext);
+}
+
+static StatEntry *
+walkStatEntryTree(TSVectorStat *stat)
+{
+ StatEntry *node = stat->stack[stat->stackpos];
+
+ if (node == NULL)
+ return NULL;
+
+ if (node->ndoc != 0)
+ {
+ /* return entry itself: we already was at left sublink */
+ return node;
+ }
+ else if (node->right && node->right != stat->stack[stat->stackpos + 1])
+ {
+ /* go on right sublink */
+ stat->stackpos++;
+ node = node->right;
+
+ /* find most-left value */
+ for (;;)
+ {
+ stat->stack[stat->stackpos] = node;
+ if (node->left)
+ {
+ stat->stackpos++;
+ node = node->left;
+ }
+ else
+ break;
+ }
+ Assert(stat->stackpos <= stat->maxdepth);
+ }
+ else
+ {
+ /* we already return all left subtree, itself and right subtree */
+ if (stat->stackpos == 0)
+ return NULL;
+
+ stat->stackpos--;
+ return walkStatEntryTree(stat);
+ }
+
+ return node;
+}
+
+static Datum
+ts_process_call(FuncCallContext *funcctx)
+{
+ TSVectorStat *st;
+ StatEntry *entry;
+
+ st = (TSVectorStat *) funcctx->user_fctx;
+
+ entry = walkStatEntryTree(st);
+
+ if (entry != NULL)
+ {
+ Datum result;
+ char *values[3];
+ char ndoc[16];
+ char nentry[16];
+ HeapTuple tuple;
+
+ values[0] = palloc(entry->lenlexeme + 1);
+ memcpy(values[0], entry->lexeme, entry->lenlexeme);
+ (values[0])[entry->lenlexeme] = '\0';
+ sprintf(ndoc, "%d", entry->ndoc);
+ values[1] = ndoc;
+ sprintf(nentry, "%d", entry->nentry);
+ values[2] = nentry;
+
+ tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
+ result = HeapTupleGetDatum(tuple);
+
+ pfree(values[0]);
+
+ /* mark entry as already visited */
+ entry->ndoc = 0;
+
+ return result;
+ }
+
+ return (Datum) 0;
+}
+
+static TSVectorStat *
+ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws)
+{
+ char *query = text_to_cstring(txt);
+ TSVectorStat *stat;
+ bool isnull;
+ Portal portal;
+ SPIPlanPtr plan;
+
+ if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
+ /* internal error */
+ elog(ERROR, "SPI_prepare(\"%s\") failed", query);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
+ /* internal error */
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
+
+ SPI_cursor_fetch(portal, true, 100);
+
+ if (SPI_tuptable == NULL ||
+ SPI_tuptable->tupdesc->natts != 1 ||
+ !IsBinaryCoercible(SPI_gettypeid(SPI_tuptable->tupdesc, 1),
+ TSVECTOROID))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("ts_stat query must return one tsvector column")));
+
+ stat = MemoryContextAllocZero(persistentContext, sizeof(TSVectorStat));
+ stat->maxdepth = 1;
+
+ if (ws)
+ {
+ char *buf;
+
+ buf = VARDATA_ANY(ws);
+ while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws))
+ {
+ if (pg_mblen(buf) == 1)
+ {
+ switch (*buf)
+ {
+ case 'A':
+ case 'a':
+ stat->weight |= 1 << 3;
+ break;
+ case 'B':
+ case 'b':
+ stat->weight |= 1 << 2;
+ break;
+ case 'C':
+ case 'c':
+ stat->weight |= 1 << 1;
+ break;
+ case 'D':
+ case 'd':
+ stat->weight |= 1;
+ break;
+ default:
+ stat->weight |= 0;
+ }
+ }
+ buf += pg_mblen(buf);
+ }
+ }
+
+ while (SPI_processed > 0)
+ {
+ uint64 i;
+
+ for (i = 0; i < SPI_processed; i++)
+ {
+ Datum data = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
+
+ if (!isnull)
+ stat = ts_accum(persistentContext, stat, data);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_fetch(portal, true, 100);
+ }
+
+ SPI_freetuptable(SPI_tuptable);
+ SPI_cursor_close(portal);
+ SPI_freeplan(plan);
+ pfree(query);
+
+ return stat;
+}
+
+Datum
+ts_stat1(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TSVectorStat *stat;
+ text *txt = PG_GETARG_TEXT_PP(0);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ SPI_connect();
+ stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, NULL);
+ PG_FREE_IF_COPY(txt, 0);
+ ts_setup_firstcall(fcinfo, funcctx, stat);
+ SPI_finish();
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ((result = ts_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+Datum
+ts_stat2(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ Datum result;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TSVectorStat *stat;
+ text *txt = PG_GETARG_TEXT_PP(0);
+ text *ws = PG_GETARG_TEXT_PP(1);
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ SPI_connect();
+ stat = ts_stat_sql(funcctx->multi_call_memory_ctx, txt, ws);
+ PG_FREE_IF_COPY(txt, 0);
+ PG_FREE_IF_COPY(ws, 1);
+ ts_setup_firstcall(fcinfo, funcctx, stat);
+ SPI_finish();
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ if ((result = ts_process_call(funcctx)) != (Datum) 0)
+ SRF_RETURN_NEXT(funcctx, result);
+ SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * Triggers for automatic update of a tsvector column from text column(s)
+ *
+ * Trigger arguments are either
+ * name of tsvector col, name of tsconfig to use, name(s) of text col(s)
+ * name of tsvector col, name of regconfig col, name(s) of text col(s)
+ * ie, tsconfig can either be specified by name, or indirectly as the
+ * contents of a regconfig field in the row. If the name is used, it must
+ * be explicitly schema-qualified.
+ */
+Datum
+tsvector_update_trigger_byid(PG_FUNCTION_ARGS)
+{
+ return tsvector_update_trigger(fcinfo, false);
+}
+
+Datum
+tsvector_update_trigger_bycolumn(PG_FUNCTION_ARGS)
+{
+ return tsvector_update_trigger(fcinfo, true);
+}
+
+static Datum
+tsvector_update_trigger(PG_FUNCTION_ARGS, bool config_column)
+{
+ TriggerData *trigdata;
+ Trigger *trigger;
+ Relation rel;
+ HeapTuple rettuple = NULL;
+ int tsvector_attr_num,
+ i;
+ ParsedText prs;
+ Datum datum;
+ bool isnull;
+ text *txt;
+ Oid cfgId;
+ bool update_needed;
+
+ /* Check call context */
+ if (!CALLED_AS_TRIGGER(fcinfo)) /* internal error */
+ elog(ERROR, "tsvector_update_trigger: not fired by trigger manager");
+
+ trigdata = (TriggerData *) fcinfo->context;
+ if (!TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
+ elog(ERROR, "tsvector_update_trigger: must be fired for row");
+ if (!TRIGGER_FIRED_BEFORE(trigdata->tg_event))
+ elog(ERROR, "tsvector_update_trigger: must be fired BEFORE event");
+
+ if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
+ {
+ rettuple = trigdata->tg_trigtuple;
+ update_needed = true;
+ }
+ else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
+ {
+ rettuple = trigdata->tg_newtuple;
+ update_needed = false; /* computed below */
+ }
+ else
+ elog(ERROR, "tsvector_update_trigger: must be fired for INSERT or UPDATE");
+
+ trigger = trigdata->tg_trigger;
+ rel = trigdata->tg_relation;
+
+ if (trigger->tgnargs < 3)
+ elog(ERROR, "tsvector_update_trigger: arguments must be tsvector_field, ts_config, text_field1, ...)");
+
+ /* Find the target tsvector column */
+ tsvector_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
+ if (tsvector_attr_num == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("tsvector column \"%s\" does not exist",
+ trigger->tgargs[0])));
+ /* This will effectively reject system columns, so no separate test: */
+ if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, tsvector_attr_num),
+ TSVECTOROID))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of tsvector type",
+ trigger->tgargs[0])));
+
+ /* Find the configuration to use */
+ if (config_column)
+ {
+ int config_attr_num;
+
+ config_attr_num = SPI_fnumber(rel->rd_att, trigger->tgargs[1]);
+ if (config_attr_num == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("configuration column \"%s\" does not exist",
+ trigger->tgargs[1])));
+ if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, config_attr_num),
+ REGCONFIGOID))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of regconfig type",
+ trigger->tgargs[1])));
+
+ datum = SPI_getbinval(rettuple, rel->rd_att, config_attr_num, &isnull);
+ if (isnull)
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("configuration column \"%s\" must not be null",
+ trigger->tgargs[1])));
+ cfgId = DatumGetObjectId(datum);
+ }
+ else
+ {
+ List *names;
+
+ names = stringToQualifiedNameList(trigger->tgargs[1]);
+ /* require a schema so that results are not search path dependent */
+ if (list_length(names) < 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("text search configuration name \"%s\" must be schema-qualified",
+ trigger->tgargs[1])));
+ cfgId = get_ts_config_oid(names, false);
+ }
+
+ /* initialize parse state */
+ prs.lenwords = 32;
+ prs.curwords = 0;
+ prs.pos = 0;
+ prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
+
+ /* find all words in indexable column(s) */
+ for (i = 2; i < trigger->tgnargs; i++)
+ {
+ int numattr;
+
+ numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
+ if (numattr == SPI_ERROR_NOATTRIBUTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_COLUMN),
+ errmsg("column \"%s\" does not exist",
+ trigger->tgargs[i])));
+ if (!IsBinaryCoercible(SPI_gettypeid(rel->rd_att, numattr), TEXTOID))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATATYPE_MISMATCH),
+ errmsg("column \"%s\" is not of a character type",
+ trigger->tgargs[i])));
+
+ if (bms_is_member(numattr - FirstLowInvalidHeapAttributeNumber, trigdata->tg_updatedcols))
+ update_needed = true;
+
+ datum = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
+ if (isnull)
+ continue;
+
+ txt = DatumGetTextPP(datum);
+
+ parsetext(cfgId, &prs, VARDATA_ANY(txt), VARSIZE_ANY_EXHDR(txt));
+
+ if (txt != (text *) DatumGetPointer(datum))
+ pfree(txt);
+ }
+
+ if (update_needed)
+ {
+ /* make tsvector value */
+ datum = TSVectorGetDatum(make_tsvector(&prs));
+ isnull = false;
+
+ /* and insert it into tuple */
+ rettuple = heap_modify_tuple_by_cols(rettuple, rel->rd_att,
+ 1, &tsvector_attr_num,
+ &datum, &isnull);
+
+ pfree(DatumGetPointer(datum));
+ }
+
+ return PointerGetDatum(rettuple);
+}
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
new file mode 100644
index 0000000..e2460d3
--- /dev/null
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -0,0 +1,367 @@
+/*-------------------------------------------------------------------------
+ *
+ * tsvector_parser.c
+ * Parser for tsvector
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/tsvector_parser.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "tsearch/ts_locale.h"
+#include "tsearch/ts_utils.h"
+
+
+/*
+ * Private state of tsvector parser. Note that tsquery also uses this code to
+ * parse its input, hence the boolean flags. The two flags are both true or
+ * both false in current usage, but we keep them separate for clarity.
+ * is_tsquery affects *only* the content of error messages.
+ */
+struct TSVectorParseStateData
+{
+ char *prsbuf; /* next input character */
+ char *bufstart; /* whole string (used only for errors) */
+ char *word; /* buffer to hold the current word */
+ int len; /* size in bytes allocated for 'word' */
+ int eml; /* max bytes per character */
+ bool oprisdelim; /* treat ! | * ( ) as delimiters? */
+ bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
+ bool is_web; /* we're in websearch_to_tsquery() */
+};
+
+
+/*
+ * Initializes parser for the input string. If oprisdelim is set, the
+ * following characters are treated as delimiters in addition to whitespace:
+ * ! | & ( )
+ */
+TSVectorParseState
+init_tsvector_parser(char *input, int flags)
+{
+ TSVectorParseState state;
+
+ state = (TSVectorParseState) palloc(sizeof(struct TSVectorParseStateData));
+ state->prsbuf = input;
+ state->bufstart = input;
+ state->len = 32;
+ state->word = (char *) palloc(state->len);
+ state->eml = pg_database_encoding_max_length();
+ state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
+ state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
+ state->is_web = (flags & P_TSV_IS_WEB) != 0;
+
+ return state;
+}
+
+/*
+ * Reinitializes parser to parse 'input', instead of previous input.
+ */
+void
+reset_tsvector_parser(TSVectorParseState state, char *input)
+{
+ state->prsbuf = input;
+}
+
+/*
+ * Shuts down a tsvector parser.
+ */
+void
+close_tsvector_parser(TSVectorParseState state)
+{
+ pfree(state->word);
+ pfree(state);
+}
+
+/* increase the size of 'word' if needed to hold one more character */
+#define RESIZEPRSBUF \
+do { \
+ int clen = curpos - state->word; \
+ if ( clen + state->eml >= state->len ) \
+ { \
+ state->len *= 2; \
+ state->word = (char *) repalloc(state->word, state->len); \
+ curpos = state->word + clen; \
+ } \
+} while (0)
+
+/* Fills gettoken_tsvector's output parameters, and returns true */
+#define RETURN_TOKEN \
+do { \
+ if (pos_ptr != NULL) \
+ { \
+ *pos_ptr = pos; \
+ *poslen = npos; \
+ } \
+ else if (pos != NULL) \
+ pfree(pos); \
+ \
+ if (strval != NULL) \
+ *strval = state->word; \
+ if (lenval != NULL) \
+ *lenval = curpos - state->word; \
+ if (endptr != NULL) \
+ *endptr = state->prsbuf; \
+ return true; \
+} while(0)
+
+
+/* State codes used in gettoken_tsvector */
+#define WAITWORD 1
+#define WAITENDWORD 2
+#define WAITNEXTCHAR 3
+#define WAITENDCMPLX 4
+#define WAITPOSINFO 5
+#define INPOSINFO 6
+#define WAITPOSDELIM 7
+#define WAITCHARCMPLX 8
+
+#define PRSSYNTAXERROR prssyntaxerror(state)
+
+static void
+prssyntaxerror(TSVectorParseState state)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ state->is_tsquery ?
+ errmsg("syntax error in tsquery: \"%s\"", state->bufstart) :
+ errmsg("syntax error in tsvector: \"%s\"", state->bufstart)));
+}
+
+
+/*
+ * Get next token from string being parsed. Returns true if successful,
+ * false if end of input string is reached. On success, these output
+ * parameters are filled in:
+ *
+ * *strval pointer to token
+ * *lenval length of *strval
+ * *pos_ptr pointer to a palloc'd array of positions and weights
+ * associated with the token. If the caller is not interested
+ * in the information, NULL can be supplied. Otherwise
+ * the caller is responsible for pfreeing the array.
+ * *poslen number of elements in *pos_ptr
+ * *endptr scan resumption point
+ *
+ * Pass NULL for unwanted output parameters.
+ */
+bool
+gettoken_tsvector(TSVectorParseState state,
+ char **strval, int *lenval,
+ WordEntryPos **pos_ptr, int *poslen,
+ char **endptr)
+{
+ int oldstate = 0;
+ char *curpos = state->word;
+ int statecode = WAITWORD;
+
+ /*
+ * pos is for collecting the comma delimited list of positions followed by
+ * the actual token.
+ */
+ WordEntryPos *pos = NULL;
+ int npos = 0; /* elements of pos used */
+ int posalen = 0; /* allocated size of pos */
+
+ while (1)
+ {
+ if (statecode == WAITWORD)
+ {
+ if (*(state->prsbuf) == '\0')
+ return false;
+ else if (!state->is_web && t_iseq(state->prsbuf, '\''))
+ statecode = WAITENDCMPLX;
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
+ PRSSYNTAXERROR;
+ else if (!t_isspace(state->prsbuf))
+ {
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ statecode = WAITENDWORD;
+ }
+ }
+ else if (statecode == WAITNEXTCHAR)
+ {
+ if (*(state->prsbuf) == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("there is no escaped character: \"%s\"",
+ state->bufstart)));
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ Assert(oldstate != 0);
+ statecode = oldstate;
+ }
+ }
+ else if (statecode == WAITENDWORD)
+ {
+ if (!state->is_web && t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDWORD;
+ }
+ else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
+ (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
+ {
+ RESIZEPRSBUF;
+ if (curpos == state->word)
+ PRSSYNTAXERROR;
+ *(curpos) = '\0';
+ RETURN_TOKEN;
+ }
+ else if (t_iseq(state->prsbuf, ':'))
+ {
+ if (curpos == state->word)
+ PRSSYNTAXERROR;
+ *(curpos) = '\0';
+ if (state->oprisdelim)
+ RETURN_TOKEN;
+ else
+ statecode = INPOSINFO;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (statecode == WAITENDCMPLX)
+ {
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
+ {
+ statecode = WAITCHARCMPLX;
+ }
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
+ {
+ statecode = WAITNEXTCHAR;
+ oldstate = WAITENDCMPLX;
+ }
+ else if (*(state->prsbuf) == '\0')
+ PRSSYNTAXERROR;
+ else
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ }
+ }
+ else if (statecode == WAITCHARCMPLX)
+ {
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
+ {
+ RESIZEPRSBUF;
+ COPYCHAR(curpos, state->prsbuf);
+ curpos += pg_mblen(state->prsbuf);
+ statecode = WAITENDCMPLX;
+ }
+ else
+ {
+ RESIZEPRSBUF;
+ *(curpos) = '\0';
+ if (curpos == state->word)
+ PRSSYNTAXERROR;
+ if (state->oprisdelim)
+ {
+ /* state->prsbuf+=pg_mblen(state->prsbuf); */
+ RETURN_TOKEN;
+ }
+ else
+ statecode = WAITPOSINFO;
+ continue; /* recheck current character */
+ }
+ }
+ else if (statecode == WAITPOSINFO)
+ {
+ if (t_iseq(state->prsbuf, ':'))
+ statecode = INPOSINFO;
+ else
+ RETURN_TOKEN;
+ }
+ else if (statecode == INPOSINFO)
+ {
+ if (t_isdigit(state->prsbuf))
+ {
+ if (posalen == 0)
+ {
+ posalen = 4;
+ pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * posalen);
+ npos = 0;
+ }
+ else if (npos + 1 >= posalen)
+ {
+ posalen *= 2;
+ pos = (WordEntryPos *) repalloc(pos, sizeof(WordEntryPos) * posalen);
+ }
+ npos++;
+ WEP_SETPOS(pos[npos - 1], LIMITPOS(atoi(state->prsbuf)));
+ /* we cannot get here in tsquery, so no need for 2 errmsgs */
+ if (WEP_GETPOS(pos[npos - 1]) == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("wrong position info in tsvector: \"%s\"",
+ state->bufstart)));
+ WEP_SETWEIGHT(pos[npos - 1], 0);
+ statecode = WAITPOSDELIM;
+ }
+ else
+ PRSSYNTAXERROR;
+ }
+ else if (statecode == WAITPOSDELIM)
+ {
+ if (t_iseq(state->prsbuf, ','))
+ statecode = INPOSINFO;
+ else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ PRSSYNTAXERROR;
+ WEP_SETWEIGHT(pos[npos - 1], 3);
+ }
+ else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ PRSSYNTAXERROR;
+ WEP_SETWEIGHT(pos[npos - 1], 2);
+ }
+ else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ PRSSYNTAXERROR;
+ WEP_SETWEIGHT(pos[npos - 1], 1);
+ }
+ else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
+ {
+ if (WEP_GETWEIGHT(pos[npos - 1]))
+ PRSSYNTAXERROR;
+ WEP_SETWEIGHT(pos[npos - 1], 0);
+ }
+ else if (t_isspace(state->prsbuf) ||
+ *(state->prsbuf) == '\0')
+ RETURN_TOKEN;
+ else if (!t_isdigit(state->prsbuf))
+ PRSSYNTAXERROR;
+ }
+ else /* internal error */
+ elog(ERROR, "unrecognized state in gettoken_tsvector: %d",
+ statecode);
+
+ /* get next char */
+ state->prsbuf += pg_mblen(state->prsbuf);
+ }
+}
diff --git a/src/backend/utils/adt/uuid.c b/src/backend/utils/adt/uuid.c
new file mode 100644
index 0000000..7cec937
--- /dev/null
+++ b/src/backend/utils/adt/uuid.c
@@ -0,0 +1,423 @@
+/*-------------------------------------------------------------------------
+ *
+ * uuid.c
+ * Functions for the built-in type "uuid".
+ *
+ * Copyright (c) 2007-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/uuid.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "lib/hyperloglog.h"
+#include "libpq/pqformat.h"
+#include "port/pg_bswap.h"
+#include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/sortsupport.h"
+#include "utils/uuid.h"
+
+/* sortsupport for uuid */
+typedef struct
+{
+ int64 input_count; /* number of non-null values seen */
+ bool estimating; /* true if estimating cardinality */
+
+ hyperLogLogState abbr_card; /* cardinality estimator */
+} uuid_sortsupport_state;
+
+static void string_to_uuid(const char *source, pg_uuid_t *uuid);
+static int uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2);
+static int uuid_fast_cmp(Datum x, Datum y, SortSupport ssup);
+static bool uuid_abbrev_abort(int memtupcount, SortSupport ssup);
+static Datum uuid_abbrev_convert(Datum original, SortSupport ssup);
+
+Datum
+uuid_in(PG_FUNCTION_ARGS)
+{
+ char *uuid_str = PG_GETARG_CSTRING(0);
+ pg_uuid_t *uuid;
+
+ uuid = (pg_uuid_t *) palloc(sizeof(*uuid));
+ string_to_uuid(uuid_str, uuid);
+ PG_RETURN_UUID_P(uuid);
+}
+
+Datum
+uuid_out(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
+ static const char hex_chars[] = "0123456789abcdef";
+ StringInfoData buf;
+ int i;
+
+ initStringInfo(&buf);
+ for (i = 0; i < UUID_LEN; i++)
+ {
+ int hi;
+ int lo;
+
+ /*
+ * We print uuid values as a string of 8, 4, 4, 4, and then 12
+ * hexadecimal characters, with each group is separated by a hyphen
+ * ("-"). Therefore, add the hyphens at the appropriate places here.
+ */
+ if (i == 4 || i == 6 || i == 8 || i == 10)
+ appendStringInfoChar(&buf, '-');
+
+ hi = uuid->data[i] >> 4;
+ lo = uuid->data[i] & 0x0F;
+
+ appendStringInfoChar(&buf, hex_chars[hi]);
+ appendStringInfoChar(&buf, hex_chars[lo]);
+ }
+
+ PG_RETURN_CSTRING(buf.data);
+}
+
+/*
+ * We allow UUIDs as a series of 32 hexadecimal digits with an optional dash
+ * after each group of 4 hexadecimal digits, and optionally surrounded by {}.
+ * (The canonical format 8x-4x-4x-4x-12x, where "nx" means n hexadecimal
+ * digits, is the only one used for output.)
+ */
+static void
+string_to_uuid(const char *source, pg_uuid_t *uuid)
+{
+ const char *src = source;
+ bool braces = false;
+ int i;
+
+ if (src[0] == '{')
+ {
+ src++;
+ braces = true;
+ }
+
+ for (i = 0; i < UUID_LEN; i++)
+ {
+ char str_buf[3];
+
+ if (src[0] == '\0' || src[1] == '\0')
+ goto syntax_error;
+ memcpy(str_buf, src, 2);
+ if (!isxdigit((unsigned char) str_buf[0]) ||
+ !isxdigit((unsigned char) str_buf[1]))
+ goto syntax_error;
+
+ str_buf[2] = '\0';
+ uuid->data[i] = (unsigned char) strtoul(str_buf, NULL, 16);
+ src += 2;
+ if (src[0] == '-' && (i % 2) == 1 && i < UUID_LEN - 1)
+ src++;
+ }
+
+ if (braces)
+ {
+ if (*src != '}')
+ goto syntax_error;
+ src++;
+ }
+
+ if (*src != '\0')
+ goto syntax_error;
+
+ return;
+
+syntax_error:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "uuid", source)));
+}
+
+Datum
+uuid_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buffer = (StringInfo) PG_GETARG_POINTER(0);
+ pg_uuid_t *uuid;
+
+ uuid = (pg_uuid_t *) palloc(UUID_LEN);
+ memcpy(uuid->data, pq_getmsgbytes(buffer, UUID_LEN), UUID_LEN);
+ PG_RETURN_POINTER(uuid);
+}
+
+Datum
+uuid_send(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *uuid = PG_GETARG_UUID_P(0);
+ StringInfoData buffer;
+
+ pq_begintypsend(&buffer);
+ pq_sendbytes(&buffer, (char *) uuid->data, UUID_LEN);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buffer));
+}
+
+/* internal uuid compare function */
+static int
+uuid_internal_cmp(const pg_uuid_t *arg1, const pg_uuid_t *arg2)
+{
+ return memcmp(arg1->data, arg2->data, UUID_LEN);
+}
+
+Datum
+uuid_lt(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) < 0);
+}
+
+Datum
+uuid_le(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) <= 0);
+}
+
+Datum
+uuid_eq(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) == 0);
+}
+
+Datum
+uuid_ge(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) >= 0);
+}
+
+Datum
+uuid_gt(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) > 0);
+}
+
+Datum
+uuid_ne(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_BOOL(uuid_internal_cmp(arg1, arg2) != 0);
+}
+
+/* handler for btree index operator */
+Datum
+uuid_cmp(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *arg1 = PG_GETARG_UUID_P(0);
+ pg_uuid_t *arg2 = PG_GETARG_UUID_P(1);
+
+ PG_RETURN_INT32(uuid_internal_cmp(arg1, arg2));
+}
+
+/*
+ * Sort support strategy routine
+ */
+Datum
+uuid_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+
+ ssup->comparator = uuid_fast_cmp;
+ ssup->ssup_extra = NULL;
+
+ if (ssup->abbreviate)
+ {
+ uuid_sortsupport_state *uss;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ uss = palloc(sizeof(uuid_sortsupport_state));
+ uss->input_count = 0;
+ uss->estimating = true;
+ initHyperLogLog(&uss->abbr_card, 10);
+
+ ssup->ssup_extra = uss;
+
+ ssup->comparator = ssup_datum_unsigned_cmp;
+ ssup->abbrev_converter = uuid_abbrev_convert;
+ ssup->abbrev_abort = uuid_abbrev_abort;
+ ssup->abbrev_full_comparator = uuid_fast_cmp;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * SortSupport comparison func
+ */
+static int
+uuid_fast_cmp(Datum x, Datum y, SortSupport ssup)
+{
+ pg_uuid_t *arg1 = DatumGetUUIDP(x);
+ pg_uuid_t *arg2 = DatumGetUUIDP(y);
+
+ return uuid_internal_cmp(arg1, arg2);
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization.
+ *
+ * We pay no attention to the cardinality of the non-abbreviated data, because
+ * there is no equality fast-path within authoritative uuid comparator.
+ */
+static bool
+uuid_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ uuid_sortsupport_state *uss = ssup->ssup_extra;
+ double abbr_card;
+
+ if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating)
+ return false;
+
+ abbr_card = estimateHyperLogLog(&uss->abbr_card);
+
+ /*
+ * If we have >100k distinct values, then even if we were sorting many
+ * billion rows we'd likely still break even, and the penalty of undoing
+ * that many rows of abbrevs would probably not be worth it. Stop even
+ * counting at that point.
+ */
+ if (abbr_card > 100000.0)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "uuid_abbrev: estimation ends at cardinality %f"
+ " after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count, memtupcount);
+#endif
+ uss->estimating = false;
+ return false;
+ }
+
+ /*
+ * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row
+ * fudge factor allows us to abort earlier on genuinely pathological data
+ * where we've had exactly one abbreviated value in the first 2k
+ * (non-null) rows.
+ */
+ if (abbr_card < uss->input_count / 2000.0 + 0.5)
+ {
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "uuid_abbrev: aborting abbreviation at cardinality %f"
+ " below threshold %f after " INT64_FORMAT " values (%d rows)",
+ abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count,
+ memtupcount);
+#endif
+ return true;
+ }
+
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG,
+ "uuid_abbrev: cardinality %f after " INT64_FORMAT
+ " values (%d rows)", abbr_card, uss->input_count, memtupcount);
+#endif
+
+ return false;
+}
+
+/*
+ * Conversion routine for sortsupport. Converts original uuid representation
+ * to abbreviated key representation. Our encoding strategy is simple -- pack
+ * the first `sizeof(Datum)` bytes of uuid data into a Datum (on little-endian
+ * machines, the bytes are stored in reverse order), and treat it as an
+ * unsigned integer.
+ */
+static Datum
+uuid_abbrev_convert(Datum original, SortSupport ssup)
+{
+ uuid_sortsupport_state *uss = ssup->ssup_extra;
+ pg_uuid_t *authoritative = DatumGetUUIDP(original);
+ Datum res;
+
+ memcpy(&res, authoritative->data, sizeof(Datum));
+ uss->input_count += 1;
+
+ if (uss->estimating)
+ {
+ uint32 tmp;
+
+#if SIZEOF_DATUM == 8
+ tmp = (uint32) res ^ (uint32) ((uint64) res >> 32);
+#else /* SIZEOF_DATUM != 8 */
+ tmp = (uint32) res;
+#endif
+
+ addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp)));
+ }
+
+ /*
+ * Byteswap on little-endian machines.
+ *
+ * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
+ * 3-way comparator) works correctly on all platforms. If we didn't do
+ * this, the comparator would have to call memcmp() with a pair of
+ * pointers to the first byte of each abbreviated key, which is slower.
+ */
+ res = DatumBigEndianToNative(res);
+
+ return res;
+}
+
+/* hash index support */
+Datum
+uuid_hash(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *key = PG_GETARG_UUID_P(0);
+
+ return hash_any(key->data, UUID_LEN);
+}
+
+Datum
+uuid_hash_extended(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *key = PG_GETARG_UUID_P(0);
+
+ return hash_any_extended(key->data, UUID_LEN, PG_GETARG_INT64(1));
+}
+
+Datum
+gen_random_uuid(PG_FUNCTION_ARGS)
+{
+ pg_uuid_t *uuid = palloc(UUID_LEN);
+
+ if (!pg_strong_random(uuid, UUID_LEN))
+ ereport(ERROR,
+ (errcode(ERRCODE_INTERNAL_ERROR),
+ errmsg("could not generate random values")));
+
+ /*
+ * Set magic numbers for a "version 4" (pseudorandom) UUID, see
+ * http://tools.ietf.org/html/rfc4122#section-4.4
+ */
+ uuid->data[6] = (uuid->data[6] & 0x0f) | 0x40; /* time_hi_and_version */
+ uuid->data[8] = (uuid->data[8] & 0x3f) | 0x80; /* clock_seq_hi_and_reserved */
+
+ PG_RETURN_UUID_P(uuid);
+}
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
new file mode 100644
index 0000000..73e41e0
--- /dev/null
+++ b/src/backend/utils/adt/varbit.c
@@ -0,0 +1,1894 @@
+/*-------------------------------------------------------------------------
+ *
+ * varbit.c
+ * Functions for the SQL datatypes BIT() and BIT VARYING().
+ *
+ * The data structure contains the following elements:
+ * header -- length of the whole data structure (incl header)
+ * in bytes (as with all varying length datatypes)
+ * data section -- private data section for the bits data structures
+ * bitlength -- length of the bit string in bits
+ * bitdata -- bit string, most significant byte first
+ *
+ * The length of the bitdata vector should always be exactly as many
+ * bytes as are needed for the given bitlength. If the bitlength is
+ * not a multiple of 8, the extra low-order padding bits of the last
+ * byte must be zeroes.
+ *
+ * attypmod is defined as the length of the bit string in bits, or for
+ * varying bits the maximum length.
+ *
+ * Code originally contributed by Adriaan Joubert.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/varbit.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "common/int.h"
+#include "libpq/pqformat.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "port/pg_bitutils.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/varbit.h"
+
+#define HEXDIG(z) ((z)<10 ? ((z)+'0') : ((z)-10+'A'))
+
+/* Mask off any bits that should be zero in the last byte of a bitstring */
+#define VARBIT_PAD(vb) \
+ do { \
+ int32 pad_ = VARBITPAD(vb); \
+ Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+ if (pad_ > 0) \
+ *(VARBITS(vb) + VARBITBYTES(vb) - 1) &= BITMASK << pad_; \
+ } while (0)
+
+/*
+ * Many functions work byte-by-byte, so they have a pointer handy to the
+ * last-plus-one byte, which saves a cycle or two.
+ */
+#define VARBIT_PAD_LAST(vb, ptr) \
+ do { \
+ int32 pad_ = VARBITPAD(vb); \
+ Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+ if (pad_ > 0) \
+ *((ptr) - 1) &= BITMASK << pad_; \
+ } while (0)
+
+/* Assert proper padding of a bitstring */
+#ifdef USE_ASSERT_CHECKING
+#define VARBIT_CORRECTLY_PADDED(vb) \
+ do { \
+ int32 pad_ = VARBITPAD(vb); \
+ Assert(pad_ >= 0 && pad_ < BITS_PER_BYTE); \
+ Assert(pad_ == 0 || \
+ (*(VARBITS(vb) + VARBITBYTES(vb) - 1) & ~(BITMASK << pad_)) == 0); \
+ } while (0)
+#else
+#define VARBIT_CORRECTLY_PADDED(vb) ((void) 0)
+#endif
+
+static VarBit *bit_catenate(VarBit *arg1, VarBit *arg2);
+static VarBit *bitsubstring(VarBit *arg, int32 s, int32 l,
+ bool length_not_specified);
+static VarBit *bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl);
+
+
+/*
+ * common code for bittypmodin and varbittypmodin
+ */
+static int32
+anybit_typmodin(ArrayType *ta, const char *typename)
+{
+ int32 typmod;
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * we're not too tense about good error message here because grammar
+ * shouldn't allow wrong number of modifiers for BIT
+ */
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+
+ if (*tl < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s must be at least 1",
+ typename)));
+ if (*tl > (MaxAttrSize * BITS_PER_BYTE))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s cannot exceed %d",
+ typename, MaxAttrSize * BITS_PER_BYTE)));
+
+ typmod = *tl;
+
+ return typmod;
+}
+
+/*
+ * common code for bittypmodout and varbittypmodout
+ */
+static char *
+anybit_typmodout(int32 typmod)
+{
+ char *res = (char *) palloc(64);
+
+ if (typmod >= 0)
+ snprintf(res, 64, "(%d)", typmod);
+ else
+ *res = '\0';
+
+ return res;
+}
+
+
+/*
+ * bit_in -
+ * converts a char string to the internal representation of a bitstring.
+ * The length is determined by the number of bits required plus
+ * VARHDRSZ bytes or from atttypmod.
+ */
+Datum
+bit_in(PG_FUNCTION_ARGS)
+{
+ char *input_string = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarBit *result; /* The resulting bit string */
+ char *sp; /* pointer into the character string */
+ bits8 *r; /* pointer into the result */
+ int len, /* Length of the whole data structure */
+ bitlen, /* Number of bits in the bit string */
+ slen; /* Length of the input string */
+ bool bit_not_hex; /* false = hex string true = bit string */
+ int bc;
+ bits8 x = 0;
+
+ /* Check that the first character is a b or an x */
+ if (input_string[0] == 'b' || input_string[0] == 'B')
+ {
+ bit_not_hex = true;
+ sp = input_string + 1;
+ }
+ else if (input_string[0] == 'x' || input_string[0] == 'X')
+ {
+ bit_not_hex = false;
+ sp = input_string + 1;
+ }
+ else
+ {
+ /*
+ * Otherwise it's binary. This allows things like cast('1001' as bit)
+ * to work transparently.
+ */
+ bit_not_hex = true;
+ sp = input_string;
+ }
+
+ /*
+ * Determine bitlength from input string. MaxAllocSize ensures a regular
+ * input is small enough, but we must check hex input.
+ */
+ slen = strlen(sp);
+ if (bit_not_hex)
+ bitlen = slen;
+ else
+ {
+ if (slen > VARBITMAXLEN / 4)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("bit string length exceeds the maximum allowed (%d)",
+ VARBITMAXLEN)));
+ bitlen = slen * 4;
+ }
+
+ /*
+ * Sometimes atttypmod is not supplied. If it is supplied we need to make
+ * sure that the bitstring fits.
+ */
+ if (atttypmod <= 0)
+ atttypmod = bitlen;
+ else if (bitlen != atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("bit string length %d does not match type bit(%d)",
+ bitlen, atttypmod)));
+
+ len = VARBITTOTALLEN(atttypmod);
+ /* set to 0 so that *r is always initialised and string is zero-padded */
+ result = (VarBit *) palloc0(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = atttypmod;
+
+ r = VARBITS(result);
+ if (bit_not_hex)
+ {
+ /* Parse the bit representation of the string */
+ /* We know it fits, as bitlen was compared to atttypmod */
+ x = HIGHBIT;
+ for (; *sp; sp++)
+ {
+ if (*sp == '1')
+ *r |= x;
+ else if (*sp != '0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("\"%.*s\" is not a valid binary digit",
+ pg_mblen(sp), sp)));
+
+ x >>= 1;
+ if (x == 0)
+ {
+ x = HIGHBIT;
+ r++;
+ }
+ }
+ }
+ else
+ {
+ /* Parse the hex representation of the string */
+ for (bc = 0; *sp; sp++)
+ {
+ if (*sp >= '0' && *sp <= '9')
+ x = (bits8) (*sp - '0');
+ else if (*sp >= 'A' && *sp <= 'F')
+ x = (bits8) (*sp - 'A') + 10;
+ else if (*sp >= 'a' && *sp <= 'f')
+ x = (bits8) (*sp - 'a') + 10;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("\"%.*s\" is not a valid hexadecimal digit",
+ pg_mblen(sp), sp)));
+
+ if (bc)
+ {
+ *r++ |= x;
+ bc = 0;
+ }
+ else
+ {
+ *r = x << 4;
+ bc = 1;
+ }
+ }
+ }
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+
+Datum
+bit_out(PG_FUNCTION_ARGS)
+{
+#if 1
+ /* same as varbit output */
+ return varbit_out(fcinfo);
+#else
+
+ /*
+ * This is how one would print a hex string, in case someone wants to
+ * write a formatting function.
+ */
+ VarBit *s = PG_GETARG_VARBIT_P(0);
+ char *result,
+ *r;
+ bits8 *sp;
+ int i,
+ len,
+ bitlen;
+
+ /* Assertion to help catch any bit functions that don't pad correctly */
+ VARBIT_CORRECTLY_PADDED(s);
+
+ bitlen = VARBITLEN(s);
+ len = (bitlen + 3) / 4;
+ result = (char *) palloc(len + 2);
+ sp = VARBITS(s);
+ r = result;
+ *r++ = 'X';
+ /* we cheat by knowing that we store full bytes zero padded */
+ for (i = 0; i < len; i += 2, sp++)
+ {
+ *r++ = HEXDIG((*sp) >> 4);
+ *r++ = HEXDIG((*sp) & 0xF);
+ }
+
+ /*
+ * Go back one step if we printed a hex number that was not part of the
+ * bitstring anymore
+ */
+ if (i > len)
+ r--;
+ *r = '\0';
+
+ PG_RETURN_CSTRING(result);
+#endif
+}
+
+/*
+ * bit_recv - converts external binary format to bit
+ */
+Datum
+bit_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarBit *result;
+ int len,
+ bitlen;
+
+ bitlen = pq_getmsgint(buf, sizeof(int32));
+ if (bitlen < 0 || bitlen > VARBITMAXLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid length in external bit string")));
+
+ /*
+ * Sometimes atttypmod is not supplied. If it is supplied we need to make
+ * sure that the bitstring fits.
+ */
+ if (atttypmod > 0 && bitlen != atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("bit string length %d does not match type bit(%d)",
+ bitlen, atttypmod)));
+
+ len = VARBITTOTALLEN(bitlen);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen;
+
+ pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
+
+ /* Make sure last byte is correctly zero-padded */
+ VARBIT_PAD(result);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bit_send - converts bit to binary format
+ */
+Datum
+bit_send(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as varbit_send, so share code */
+ return varbit_send(fcinfo);
+}
+
+/*
+ * bit()
+ * Converts a bit() type to a specific internal length.
+ * len is the bitlength specified in the column definition.
+ *
+ * If doing implicit cast, raise error when source data is wrong length.
+ * If doing explicit cast, silently truncate or zero-pad to specified length.
+ */
+Datum
+bit(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ int32 len = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ VarBit *result;
+ int rlen;
+
+ /* No work if typmod is invalid or supplied data matches it already */
+ if (len <= 0 || len > VARBITMAXLEN || len == VARBITLEN(arg))
+ PG_RETURN_VARBIT_P(arg);
+
+ if (!isExplicit)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("bit string length %d does not match type bit(%d)",
+ VARBITLEN(arg), len)));
+
+ rlen = VARBITTOTALLEN(len);
+ /* set to 0 so that string is zero-padded */
+ result = (VarBit *) palloc0(rlen);
+ SET_VARSIZE(result, rlen);
+ VARBITLEN(result) = len;
+
+ memcpy(VARBITS(result), VARBITS(arg),
+ Min(VARBITBYTES(result), VARBITBYTES(arg)));
+
+ /*
+ * Make sure last byte is zero-padded if needed. This is useless but safe
+ * if source data was shorter than target length (we assume the last byte
+ * of the source data was itself correctly zero-padded).
+ */
+ VARBIT_PAD(result);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+Datum
+bittypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anybit_typmodin(ta, "bit"));
+}
+
+Datum
+bittypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anybit_typmodout(typmod));
+}
+
+
+/*
+ * varbit_in -
+ * converts a string to the internal representation of a bitstring.
+ * This is the same as bit_in except that atttypmod is taken as
+ * the maximum length, not the exact length to force the bitstring to.
+ */
+Datum
+varbit_in(PG_FUNCTION_ARGS)
+{
+ char *input_string = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarBit *result; /* The resulting bit string */
+ char *sp; /* pointer into the character string */
+ bits8 *r; /* pointer into the result */
+ int len, /* Length of the whole data structure */
+ bitlen, /* Number of bits in the bit string */
+ slen; /* Length of the input string */
+ bool bit_not_hex; /* false = hex string true = bit string */
+ int bc;
+ bits8 x = 0;
+
+ /* Check that the first character is a b or an x */
+ if (input_string[0] == 'b' || input_string[0] == 'B')
+ {
+ bit_not_hex = true;
+ sp = input_string + 1;
+ }
+ else if (input_string[0] == 'x' || input_string[0] == 'X')
+ {
+ bit_not_hex = false;
+ sp = input_string + 1;
+ }
+ else
+ {
+ bit_not_hex = true;
+ sp = input_string;
+ }
+
+ /*
+ * Determine bitlength from input string. MaxAllocSize ensures a regular
+ * input is small enough, but we must check hex input.
+ */
+ slen = strlen(sp);
+ if (bit_not_hex)
+ bitlen = slen;
+ else
+ {
+ if (slen > VARBITMAXLEN / 4)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("bit string length exceeds the maximum allowed (%d)",
+ VARBITMAXLEN)));
+ bitlen = slen * 4;
+ }
+
+ /*
+ * Sometimes atttypmod is not supplied. If it is supplied we need to make
+ * sure that the bitstring fits.
+ */
+ if (atttypmod <= 0)
+ atttypmod = bitlen;
+ else if (bitlen > atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("bit string too long for type bit varying(%d)",
+ atttypmod)));
+
+ len = VARBITTOTALLEN(bitlen);
+ /* set to 0 so that *r is always initialised and string is zero-padded */
+ result = (VarBit *) palloc0(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = Min(bitlen, atttypmod);
+
+ r = VARBITS(result);
+ if (bit_not_hex)
+ {
+ /* Parse the bit representation of the string */
+ /* We know it fits, as bitlen was compared to atttypmod */
+ x = HIGHBIT;
+ for (; *sp; sp++)
+ {
+ if (*sp == '1')
+ *r |= x;
+ else if (*sp != '0')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("\"%.*s\" is not a valid binary digit",
+ pg_mblen(sp), sp)));
+
+ x >>= 1;
+ if (x == 0)
+ {
+ x = HIGHBIT;
+ r++;
+ }
+ }
+ }
+ else
+ {
+ /* Parse the hex representation of the string */
+ for (bc = 0; *sp; sp++)
+ {
+ if (*sp >= '0' && *sp <= '9')
+ x = (bits8) (*sp - '0');
+ else if (*sp >= 'A' && *sp <= 'F')
+ x = (bits8) (*sp - 'A') + 10;
+ else if (*sp >= 'a' && *sp <= 'f')
+ x = (bits8) (*sp - 'a') + 10;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("\"%.*s\" is not a valid hexadecimal digit",
+ pg_mblen(sp), sp)));
+
+ if (bc)
+ {
+ *r++ |= x;
+ bc = 0;
+ }
+ else
+ {
+ *r = x << 4;
+ bc = 1;
+ }
+ }
+ }
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * varbit_out -
+ * Prints the string as bits to preserve length accurately
+ *
+ * XXX varbit_recv() and hex input to varbit_in() can load a value that this
+ * cannot emit. Consider using hex output for such values.
+ */
+Datum
+varbit_out(PG_FUNCTION_ARGS)
+{
+ VarBit *s = PG_GETARG_VARBIT_P(0);
+ char *result,
+ *r;
+ bits8 *sp;
+ bits8 x;
+ int i,
+ k,
+ len;
+
+ /* Assertion to help catch any bit functions that don't pad correctly */
+ VARBIT_CORRECTLY_PADDED(s);
+
+ len = VARBITLEN(s);
+ result = (char *) palloc(len + 1);
+ sp = VARBITS(s);
+ r = result;
+ for (i = 0; i <= len - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++)
+ {
+ /* print full bytes */
+ x = *sp;
+ for (k = 0; k < BITS_PER_BYTE; k++)
+ {
+ *r++ = IS_HIGHBIT_SET(x) ? '1' : '0';
+ x <<= 1;
+ }
+ }
+ if (i < len)
+ {
+ /* print the last partial byte */
+ x = *sp;
+ for (k = i; k < len; k++)
+ {
+ *r++ = IS_HIGHBIT_SET(x) ? '1' : '0';
+ x <<= 1;
+ }
+ }
+ *r = '\0';
+
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * varbit_recv - converts external binary format to varbit
+ *
+ * External format is the bitlen as an int32, then the byte array.
+ */
+Datum
+varbit_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarBit *result;
+ int len,
+ bitlen;
+
+ bitlen = pq_getmsgint(buf, sizeof(int32));
+ if (bitlen < 0 || bitlen > VARBITMAXLEN)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid length in external bit string")));
+
+ /*
+ * Sometimes atttypmod is not supplied. If it is supplied we need to make
+ * sure that the bitstring fits.
+ */
+ if (atttypmod > 0 && bitlen > atttypmod)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("bit string too long for type bit varying(%d)",
+ atttypmod)));
+
+ len = VARBITTOTALLEN(bitlen);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen;
+
+ pq_copymsgbytes(buf, (char *) VARBITS(result), VARBITBYTES(result));
+
+ /* Make sure last byte is correctly zero-padded */
+ VARBIT_PAD(result);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * varbit_send - converts varbit to binary format
+ */
+Datum
+varbit_send(PG_FUNCTION_ARGS)
+{
+ VarBit *s = PG_GETARG_VARBIT_P(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, VARBITLEN(s));
+ pq_sendbytes(&buf, (char *) VARBITS(s), VARBITBYTES(s));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * varbit_support()
+ *
+ * Planner support function for the varbit() length coercion function.
+ *
+ * Currently, the only interesting thing we can do is flatten calls that set
+ * the new maximum length >= the previous maximum length. We can ignore the
+ * isExplicit argument, since that only affects truncation cases.
+ */
+Datum
+varbit_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+ FuncExpr *expr = req->fcall;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
+ int32 old_max = exprTypmod(source);
+ int32 new_max = new_typmod;
+
+ /* Note: varbit() treats typmod 0 as invalid, so we do too */
+ if (new_max <= 0 || (old_max > 0 && old_max <= new_max))
+ ret = relabel_to_typmod(source, new_typmod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * varbit()
+ * Converts a varbit() type to a specific internal length.
+ * len is the maximum bitlength specified in the column definition.
+ *
+ * If doing implicit cast, raise error when source data is too long.
+ * If doing explicit cast, silently truncate to max length.
+ */
+Datum
+varbit(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ int32 len = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ VarBit *result;
+ int rlen;
+
+ /* No work if typmod is invalid or supplied data matches it already */
+ if (len <= 0 || len >= VARBITLEN(arg))
+ PG_RETURN_VARBIT_P(arg);
+
+ if (!isExplicit)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("bit string too long for type bit varying(%d)",
+ len)));
+
+ rlen = VARBITTOTALLEN(len);
+ result = (VarBit *) palloc(rlen);
+ SET_VARSIZE(result, rlen);
+ VARBITLEN(result) = len;
+
+ memcpy(VARBITS(result), VARBITS(arg), VARBITBYTES(result));
+
+ /* Make sure last byte is correctly zero-padded */
+ VARBIT_PAD(result);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+Datum
+varbittypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anybit_typmodin(ta, "varbit"));
+}
+
+Datum
+varbittypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anybit_typmodout(typmod));
+}
+
+
+/*
+ * Comparison operators
+ *
+ * We only need one set of comparison operators for bitstrings, as the lengths
+ * are stored in the same way for zero-padded and varying bit strings.
+ *
+ * Note that the standard is not unambiguous about the comparison between
+ * zero-padded bit strings and varying bitstrings. If the same value is written
+ * into a zero padded bitstring as into a varying bitstring, but the zero
+ * padded bitstring has greater length, it will be bigger.
+ *
+ * Zeros from the beginning of a bitstring cannot simply be ignored, as they
+ * may be part of a bit string and may be significant.
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ */
+
+/*
+ * bit_cmp
+ *
+ * Compares two bitstrings and returns <0, 0, >0 depending on whether the first
+ * string is smaller, equal, or bigger than the second. All bits are considered
+ * and additional zero bits may make one string smaller/larger than the other,
+ * even if their zero-padded values would be the same.
+ */
+static int32
+bit_cmp(VarBit *arg1, VarBit *arg2)
+{
+ int bitlen1,
+ bytelen1,
+ bitlen2,
+ bytelen2;
+ int32 cmp;
+
+ bytelen1 = VARBITBYTES(arg1);
+ bytelen2 = VARBITBYTES(arg2);
+
+ cmp = memcmp(VARBITS(arg1), VARBITS(arg2), Min(bytelen1, bytelen2));
+ if (cmp == 0)
+ {
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+ if (bitlen1 != bitlen2)
+ cmp = (bitlen1 < bitlen2) ? -1 : 1;
+ }
+ return cmp;
+}
+
+Datum
+biteq(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+ int bitlen1,
+ bitlen2;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+
+ /* fast path for different-length inputs */
+ if (bitlen1 != bitlen2)
+ result = false;
+ else
+ result = (bit_cmp(arg1, arg2) == 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitne(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+ int bitlen1,
+ bitlen2;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+
+ /* fast path for different-length inputs */
+ if (bitlen1 != bitlen2)
+ result = true;
+ else
+ result = (bit_cmp(arg1, arg2) != 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitlt(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+
+ result = (bit_cmp(arg1, arg2) < 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitle(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+
+ result = (bit_cmp(arg1, arg2) <= 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitgt(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+
+ result = (bit_cmp(arg1, arg2) > 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitge(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ bool result;
+
+ result = (bit_cmp(arg1, arg2) >= 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bitcmp(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ int32 result;
+
+ result = bit_cmp(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * bitcat
+ * Concatenation of bit strings
+ */
+Datum
+bitcat(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+
+ PG_RETURN_VARBIT_P(bit_catenate(arg1, arg2));
+}
+
+static VarBit *
+bit_catenate(VarBit *arg1, VarBit *arg2)
+{
+ VarBit *result;
+ int bitlen1,
+ bitlen2,
+ bytelen,
+ bit1pad,
+ bit2shift;
+ bits8 *pr,
+ *pa;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+
+ if (bitlen1 > VARBITMAXLEN - bitlen2)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg("bit string length exceeds the maximum allowed (%d)",
+ VARBITMAXLEN)));
+ bytelen = VARBITTOTALLEN(bitlen1 + bitlen2);
+
+ result = (VarBit *) palloc(bytelen);
+ SET_VARSIZE(result, bytelen);
+ VARBITLEN(result) = bitlen1 + bitlen2;
+
+ /* Copy the first bitstring in */
+ memcpy(VARBITS(result), VARBITS(arg1), VARBITBYTES(arg1));
+
+ /* Copy the second bit string */
+ bit1pad = VARBITPAD(arg1);
+ if (bit1pad == 0)
+ {
+ memcpy(VARBITS(result) + VARBITBYTES(arg1), VARBITS(arg2),
+ VARBITBYTES(arg2));
+ }
+ else if (bitlen2 > 0)
+ {
+ /* We need to shift all the bits to fit */
+ bit2shift = BITS_PER_BYTE - bit1pad;
+ pr = VARBITS(result) + VARBITBYTES(arg1) - 1;
+ for (pa = VARBITS(arg2); pa < VARBITEND(arg2); pa++)
+ {
+ *pr |= ((*pa >> bit2shift) & BITMASK);
+ pr++;
+ if (pr < VARBITEND(result))
+ *pr = (*pa << bit1pad) & BITMASK;
+ }
+ }
+
+ /* The pad bits should be already zero at this point */
+
+ return result;
+}
+
+/*
+ * bitsubstr
+ * retrieve a substring from the bit string.
+ * Note, s is 1-based.
+ * SQL draft 6.10 9)
+ */
+Datum
+bitsubstr(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_VARBIT_P(bitsubstring(PG_GETARG_VARBIT_P(0),
+ PG_GETARG_INT32(1),
+ PG_GETARG_INT32(2),
+ false));
+}
+
+Datum
+bitsubstr_no_len(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_VARBIT_P(bitsubstring(PG_GETARG_VARBIT_P(0),
+ PG_GETARG_INT32(1),
+ -1, true));
+}
+
+static VarBit *
+bitsubstring(VarBit *arg, int32 s, int32 l, bool length_not_specified)
+{
+ VarBit *result;
+ int bitlen,
+ rbitlen,
+ len,
+ ishift,
+ i;
+ int32 e,
+ s1,
+ e1;
+ bits8 *r,
+ *ps;
+
+ bitlen = VARBITLEN(arg);
+ s1 = Max(s, 1);
+ /* If we do not have an upper bound, use end of string */
+ if (length_not_specified)
+ {
+ e1 = bitlen + 1;
+ }
+ else if (l < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ e1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(s, l, &e))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case the
+ * substring must run to end of string.
+ */
+ e1 = bitlen + 1;
+ }
+ else
+ {
+ e1 = Min(e, bitlen + 1);
+ }
+ if (s1 > bitlen || e1 <= s1)
+ {
+ /* Need to return a zero-length bitstring */
+ len = VARBITTOTALLEN(0);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = 0;
+ }
+ else
+ {
+ /*
+ * OK, we've got a true substring starting at position s1-1 and ending
+ * at position e1-1
+ */
+ rbitlen = e1 - s1;
+ len = VARBITTOTALLEN(rbitlen);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = rbitlen;
+ len -= VARHDRSZ + VARBITHDRSZ;
+ /* Are we copying from a byte boundary? */
+ if ((s1 - 1) % BITS_PER_BYTE == 0)
+ {
+ /* Yep, we are copying bytes */
+ memcpy(VARBITS(result), VARBITS(arg) + (s1 - 1) / BITS_PER_BYTE,
+ len);
+ }
+ else
+ {
+ /* Figure out how much we need to shift the sequence by */
+ ishift = (s1 - 1) % BITS_PER_BYTE;
+ r = VARBITS(result);
+ ps = VARBITS(arg) + (s1 - 1) / BITS_PER_BYTE;
+ for (i = 0; i < len; i++)
+ {
+ *r = (*ps << ishift) & BITMASK;
+ if ((++ps) < VARBITEND(arg))
+ *r |= *ps >> (BITS_PER_BYTE - ishift);
+ r++;
+ }
+ }
+
+ /* Make sure last byte is correctly zero-padded */
+ VARBIT_PAD(result);
+ }
+
+ return result;
+}
+
+/*
+ * bitoverlay
+ * Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+bitoverlay(PG_FUNCTION_ARGS)
+{
+ VarBit *t1 = PG_GETARG_VARBIT_P(0);
+ VarBit *t2 = PG_GETARG_VARBIT_P(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl = PG_GETARG_INT32(3); /* substring length */
+
+ PG_RETURN_VARBIT_P(bit_overlay(t1, t2, sp, sl));
+}
+
+Datum
+bitoverlay_no_len(PG_FUNCTION_ARGS)
+{
+ VarBit *t1 = PG_GETARG_VARBIT_P(0);
+ VarBit *t2 = PG_GETARG_VARBIT_P(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl;
+
+ sl = VARBITLEN(t2); /* defaults to length(t2) */
+ PG_RETURN_VARBIT_P(bit_overlay(t1, t2, sp, sl));
+}
+
+static VarBit *
+bit_overlay(VarBit *t1, VarBit *t2, int sp, int sl)
+{
+ VarBit *result;
+ VarBit *s1;
+ VarBit *s2;
+ int sp_pl_sl;
+
+ /*
+ * Check for possible integer-overflow cases. For negative sp, throw a
+ * "substring length" error because that's what should be expected
+ * according to the spec's definition of OVERLAY().
+ */
+ if (sp <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ s1 = bitsubstring(t1, 1, sp - 1, false);
+ s2 = bitsubstring(t1, sp_pl_sl, -1, true);
+ result = bit_catenate(s1, t2);
+ result = bit_catenate(result, s2);
+
+ return result;
+}
+
+/*
+ * bit_count
+ *
+ * Returns the number of bits set in a bit string.
+ */
+Datum
+bit_bit_count(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+
+ PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg)));
+}
+
+/*
+ * bitlength, bitoctetlength
+ * Return the length of a bit string
+ */
+Datum
+bitlength(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+
+ PG_RETURN_INT32(VARBITLEN(arg));
+}
+
+Datum
+bitoctetlength(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+
+ PG_RETURN_INT32(VARBITBYTES(arg));
+}
+
+/*
+ * bit_and
+ * perform a logical AND on two bit strings.
+ */
+Datum
+bit_and(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ VarBit *result;
+ int len,
+ bitlen1,
+ bitlen2,
+ i;
+ bits8 *p1,
+ *p2,
+ *r;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+ if (bitlen1 != bitlen2)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("cannot AND bit strings of different sizes")));
+
+ len = VARSIZE(arg1);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen1;
+
+ p1 = VARBITS(arg1);
+ p2 = VARBITS(arg2);
+ r = VARBITS(result);
+ for (i = 0; i < VARBITBYTES(arg1); i++)
+ *r++ = *p1++ & *p2++;
+
+ /* Padding is not needed as & of 0 pads is 0 */
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bit_or
+ * perform a logical OR on two bit strings.
+ */
+Datum
+bit_or(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ VarBit *result;
+ int len,
+ bitlen1,
+ bitlen2,
+ i;
+ bits8 *p1,
+ *p2,
+ *r;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+ if (bitlen1 != bitlen2)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("cannot OR bit strings of different sizes")));
+ len = VARSIZE(arg1);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen1;
+
+ p1 = VARBITS(arg1);
+ p2 = VARBITS(arg2);
+ r = VARBITS(result);
+ for (i = 0; i < VARBITBYTES(arg1); i++)
+ *r++ = *p1++ | *p2++;
+
+ /* Padding is not needed as | of 0 pads is 0 */
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bitxor
+ * perform a logical XOR on two bit strings.
+ */
+Datum
+bitxor(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ VarBit *arg2 = PG_GETARG_VARBIT_P(1);
+ VarBit *result;
+ int len,
+ bitlen1,
+ bitlen2,
+ i;
+ bits8 *p1,
+ *p2,
+ *r;
+
+ bitlen1 = VARBITLEN(arg1);
+ bitlen2 = VARBITLEN(arg2);
+ if (bitlen1 != bitlen2)
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_LENGTH_MISMATCH),
+ errmsg("cannot XOR bit strings of different sizes")));
+
+ len = VARSIZE(arg1);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen1;
+
+ p1 = VARBITS(arg1);
+ p2 = VARBITS(arg2);
+ r = VARBITS(result);
+ for (i = 0; i < VARBITBYTES(arg1); i++)
+ *r++ = *p1++ ^ *p2++;
+
+ /* Padding is not needed as ^ of 0 pads is 0 */
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bitnot
+ * perform a logical NOT on a bit string.
+ */
+Datum
+bitnot(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ VarBit *result;
+ bits8 *p,
+ *r;
+
+ result = (VarBit *) palloc(VARSIZE(arg));
+ SET_VARSIZE(result, VARSIZE(arg));
+ VARBITLEN(result) = VARBITLEN(arg);
+
+ p = VARBITS(arg);
+ r = VARBITS(result);
+ for (; p < VARBITEND(arg); p++)
+ *r++ = ~*p;
+
+ /* Must zero-pad the result, because extra bits are surely 1's here */
+ VARBIT_PAD_LAST(result, r);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bitshiftleft
+ * do a left shift (i.e. towards the beginning of the string)
+ */
+Datum
+bitshiftleft(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ int32 shft = PG_GETARG_INT32(1);
+ VarBit *result;
+ int byte_shift,
+ ishift,
+ len;
+ bits8 *p,
+ *r;
+
+ /* Negative shift is a shift to the right */
+ if (shft < 0)
+ {
+ /* Prevent integer overflow in negation */
+ if (shft < -VARBITMAXLEN)
+ shft = -VARBITMAXLEN;
+ PG_RETURN_DATUM(DirectFunctionCall2(bitshiftright,
+ VarBitPGetDatum(arg),
+ Int32GetDatum(-shft)));
+ }
+
+ result = (VarBit *) palloc(VARSIZE(arg));
+ SET_VARSIZE(result, VARSIZE(arg));
+ VARBITLEN(result) = VARBITLEN(arg);
+ r = VARBITS(result);
+
+ /* If we shifted all the bits out, return an all-zero string */
+ if (shft >= VARBITLEN(arg))
+ {
+ MemSet(r, 0, VARBITBYTES(arg));
+ PG_RETURN_VARBIT_P(result);
+ }
+
+ byte_shift = shft / BITS_PER_BYTE;
+ ishift = shft % BITS_PER_BYTE;
+ p = VARBITS(arg) + byte_shift;
+
+ if (ishift == 0)
+ {
+ /* Special case: we can do a memcpy */
+ len = VARBITBYTES(arg) - byte_shift;
+ memcpy(r, p, len);
+ MemSet(r + len, 0, byte_shift);
+ }
+ else
+ {
+ for (; p < VARBITEND(arg); r++)
+ {
+ *r = *p << ishift;
+ if ((++p) < VARBITEND(arg))
+ *r |= *p >> (BITS_PER_BYTE - ishift);
+ }
+ for (; r < VARBITEND(result); r++)
+ *r = 0;
+ }
+
+ /* The pad bits should be already zero at this point */
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bitshiftright
+ * do a right shift (i.e. towards the end of the string)
+ */
+Datum
+bitshiftright(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ int32 shft = PG_GETARG_INT32(1);
+ VarBit *result;
+ int byte_shift,
+ ishift,
+ len;
+ bits8 *p,
+ *r;
+
+ /* Negative shift is a shift to the left */
+ if (shft < 0)
+ {
+ /* Prevent integer overflow in negation */
+ if (shft < -VARBITMAXLEN)
+ shft = -VARBITMAXLEN;
+ PG_RETURN_DATUM(DirectFunctionCall2(bitshiftleft,
+ VarBitPGetDatum(arg),
+ Int32GetDatum(-shft)));
+ }
+
+ result = (VarBit *) palloc(VARSIZE(arg));
+ SET_VARSIZE(result, VARSIZE(arg));
+ VARBITLEN(result) = VARBITLEN(arg);
+ r = VARBITS(result);
+
+ /* If we shifted all the bits out, return an all-zero string */
+ if (shft >= VARBITLEN(arg))
+ {
+ MemSet(r, 0, VARBITBYTES(arg));
+ PG_RETURN_VARBIT_P(result);
+ }
+
+ byte_shift = shft / BITS_PER_BYTE;
+ ishift = shft % BITS_PER_BYTE;
+ p = VARBITS(arg);
+
+ /* Set the first part of the result to 0 */
+ MemSet(r, 0, byte_shift);
+ r += byte_shift;
+
+ if (ishift == 0)
+ {
+ /* Special case: we can do a memcpy */
+ len = VARBITBYTES(arg) - byte_shift;
+ memcpy(r, p, len);
+ r += len;
+ }
+ else
+ {
+ if (r < VARBITEND(result))
+ *r = 0; /* initialize first byte */
+ for (; r < VARBITEND(result); p++)
+ {
+ *r |= *p >> ishift;
+ if ((++r) < VARBITEND(result))
+ *r = (*p << (BITS_PER_BYTE - ishift)) & BITMASK;
+ }
+ }
+
+ /* We may have shifted 1's into the pad bits, so fix that */
+ VARBIT_PAD_LAST(result, r);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * This is not defined in any standard. We retain the natural ordering of
+ * bits here, as it just seems more intuitive.
+ */
+Datum
+bitfromint4(PG_FUNCTION_ARGS)
+{
+ int32 a = PG_GETARG_INT32(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ VarBit *result;
+ bits8 *r;
+ int rlen;
+ int destbitsleft,
+ srcbitsleft;
+
+ if (typmod <= 0 || typmod > VARBITMAXLEN)
+ typmod = 1; /* default bit length */
+
+ rlen = VARBITTOTALLEN(typmod);
+ result = (VarBit *) palloc(rlen);
+ SET_VARSIZE(result, rlen);
+ VARBITLEN(result) = typmod;
+
+ r = VARBITS(result);
+ destbitsleft = typmod;
+ srcbitsleft = 32;
+ /* drop any input bits that don't fit */
+ srcbitsleft = Min(srcbitsleft, destbitsleft);
+ /* sign-fill any excess bytes in output */
+ while (destbitsleft >= srcbitsleft + 8)
+ {
+ *r++ = (bits8) ((a < 0) ? BITMASK : 0);
+ destbitsleft -= 8;
+ }
+ /* store first fractional byte */
+ if (destbitsleft > srcbitsleft)
+ {
+ unsigned int val = (unsigned int) (a >> (destbitsleft - 8));
+
+ /* Force sign-fill in case the compiler implements >> as zero-fill */
+ if (a < 0)
+ val |= ((unsigned int) -1) << (srcbitsleft + 8 - destbitsleft);
+ *r++ = (bits8) (val & BITMASK);
+ destbitsleft -= 8;
+ }
+ /* Now srcbitsleft and destbitsleft are the same, need not track both */
+ /* store whole bytes */
+ while (destbitsleft >= 8)
+ {
+ *r++ = (bits8) ((a >> (destbitsleft - 8)) & BITMASK);
+ destbitsleft -= 8;
+ }
+ /* store last fractional byte */
+ if (destbitsleft > 0)
+ *r = (bits8) ((a << (8 - destbitsleft)) & BITMASK);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+Datum
+bittoint4(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ uint32 result;
+ bits8 *r;
+
+ /* Check that the bit string is not too long */
+ if (VARBITLEN(arg) > sizeof(result) * BITS_PER_BYTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ result = 0;
+ for (r = VARBITS(arg); r < VARBITEND(arg); r++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= *r;
+ }
+ /* Now shift the result to take account of the padding at the end */
+ result >>= VARBITPAD(arg);
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+bitfromint8(PG_FUNCTION_ARGS)
+{
+ int64 a = PG_GETARG_INT64(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ VarBit *result;
+ bits8 *r;
+ int rlen;
+ int destbitsleft,
+ srcbitsleft;
+
+ if (typmod <= 0 || typmod > VARBITMAXLEN)
+ typmod = 1; /* default bit length */
+
+ rlen = VARBITTOTALLEN(typmod);
+ result = (VarBit *) palloc(rlen);
+ SET_VARSIZE(result, rlen);
+ VARBITLEN(result) = typmod;
+
+ r = VARBITS(result);
+ destbitsleft = typmod;
+ srcbitsleft = 64;
+ /* drop any input bits that don't fit */
+ srcbitsleft = Min(srcbitsleft, destbitsleft);
+ /* sign-fill any excess bytes in output */
+ while (destbitsleft >= srcbitsleft + 8)
+ {
+ *r++ = (bits8) ((a < 0) ? BITMASK : 0);
+ destbitsleft -= 8;
+ }
+ /* store first fractional byte */
+ if (destbitsleft > srcbitsleft)
+ {
+ unsigned int val = (unsigned int) (a >> (destbitsleft - 8));
+
+ /* Force sign-fill in case the compiler implements >> as zero-fill */
+ if (a < 0)
+ val |= ((unsigned int) -1) << (srcbitsleft + 8 - destbitsleft);
+ *r++ = (bits8) (val & BITMASK);
+ destbitsleft -= 8;
+ }
+ /* Now srcbitsleft and destbitsleft are the same, need not track both */
+ /* store whole bytes */
+ while (destbitsleft >= 8)
+ {
+ *r++ = (bits8) ((a >> (destbitsleft - 8)) & BITMASK);
+ destbitsleft -= 8;
+ }
+ /* store last fractional byte */
+ if (destbitsleft > 0)
+ *r = (bits8) ((a << (8 - destbitsleft)) & BITMASK);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+Datum
+bittoint8(PG_FUNCTION_ARGS)
+{
+ VarBit *arg = PG_GETARG_VARBIT_P(0);
+ uint64 result;
+ bits8 *r;
+
+ /* Check that the bit string is not too long */
+ if (VARBITLEN(arg) > sizeof(result) * BITS_PER_BYTE)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("bigint out of range")));
+
+ result = 0;
+ for (r = VARBITS(arg); r < VARBITEND(arg); r++)
+ {
+ result <<= BITS_PER_BYTE;
+ result |= *r;
+ }
+ /* Now shift the result to take account of the padding at the end */
+ result >>= VARBITPAD(arg);
+
+ PG_RETURN_INT64(result);
+}
+
+
+/*
+ * Determines the position of S2 in the bitstring S1 (1-based string).
+ * If S2 does not appear in S1 this function returns 0.
+ * If S2 is of length 0 this function returns 1.
+ * Compatible in usage with POSITION() functions for other data types.
+ */
+Datum
+bitposition(PG_FUNCTION_ARGS)
+{
+ VarBit *str = PG_GETARG_VARBIT_P(0);
+ VarBit *substr = PG_GETARG_VARBIT_P(1);
+ int substr_length,
+ str_length,
+ i,
+ is;
+ bits8 *s, /* pointer into substring */
+ *p; /* pointer into str */
+ bits8 cmp, /* shifted substring byte to compare */
+ mask1, /* mask for substring byte shifted right */
+ mask2, /* mask for substring byte shifted left */
+ end_mask, /* pad mask for last substring byte */
+ str_mask; /* pad mask for last string byte */
+ bool is_match;
+
+ /* Get the substring length */
+ substr_length = VARBITLEN(substr);
+ str_length = VARBITLEN(str);
+
+ /* String has zero length or substring longer than string, return 0 */
+ if ((str_length == 0) || (substr_length > str_length))
+ PG_RETURN_INT32(0);
+
+ /* zero-length substring means return 1 */
+ if (substr_length == 0)
+ PG_RETURN_INT32(1);
+
+ /* Initialise the padding masks */
+ end_mask = BITMASK << VARBITPAD(substr);
+ str_mask = BITMASK << VARBITPAD(str);
+ for (i = 0; i < VARBITBYTES(str) - VARBITBYTES(substr) + 1; i++)
+ {
+ for (is = 0; is < BITS_PER_BYTE; is++)
+ {
+ is_match = true;
+ p = VARBITS(str) + i;
+ mask1 = BITMASK >> is;
+ mask2 = ~mask1;
+ for (s = VARBITS(substr);
+ is_match && s < VARBITEND(substr); s++)
+ {
+ cmp = *s >> is;
+ if (s == VARBITEND(substr) - 1)
+ {
+ mask1 &= end_mask >> is;
+ if (p == VARBITEND(str) - 1)
+ {
+ /* Check that there is enough of str left */
+ if (mask1 & ~str_mask)
+ {
+ is_match = false;
+ break;
+ }
+ mask1 &= str_mask;
+ }
+ }
+ is_match = ((cmp ^ *p) & mask1) == 0;
+ if (!is_match)
+ break;
+ /* Move on to the next byte */
+ p++;
+ if (p == VARBITEND(str))
+ {
+ mask2 = end_mask << (BITS_PER_BYTE - is);
+ is_match = mask2 == 0;
+#if 0
+ elog(DEBUG4, "S. %d %d em=%2x sm=%2x r=%d",
+ i, is, end_mask, mask2, is_match);
+#endif
+ break;
+ }
+ cmp = *s << (BITS_PER_BYTE - is);
+ if (s == VARBITEND(substr) - 1)
+ {
+ mask2 &= end_mask << (BITS_PER_BYTE - is);
+ if (p == VARBITEND(str) - 1)
+ {
+ if (mask2 & ~str_mask)
+ {
+ is_match = false;
+ break;
+ }
+ mask2 &= str_mask;
+ }
+ }
+ is_match = ((cmp ^ *p) & mask2) == 0;
+ }
+ /* Have we found a match? */
+ if (is_match)
+ PG_RETURN_INT32(i * BITS_PER_BYTE + is + 1);
+ }
+ }
+ PG_RETURN_INT32(0);
+}
+
+
+/*
+ * bitsetbit
+ *
+ * Given an instance of type 'bit' creates a new one with
+ * the Nth bit set to the given value.
+ *
+ * The bit location is specified left-to-right in a zero-based fashion
+ * consistent with the other get_bit and set_bit functions, but
+ * inconsistent with the standard substring, position, overlay functions
+ */
+Datum
+bitsetbit(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ int32 n = PG_GETARG_INT32(1);
+ int32 newBit = PG_GETARG_INT32(2);
+ VarBit *result;
+ int len,
+ bitlen;
+ bits8 *r,
+ *p;
+ int byteNo,
+ bitNo;
+
+ bitlen = VARBITLEN(arg1);
+ if (n < 0 || n >= bitlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("bit index %d out of valid range (0..%d)",
+ n, bitlen - 1)));
+
+ /*
+ * sanity check!
+ */
+ if (newBit != 0 && newBit != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new bit must be 0 or 1")));
+
+ len = VARSIZE(arg1);
+ result = (VarBit *) palloc(len);
+ SET_VARSIZE(result, len);
+ VARBITLEN(result) = bitlen;
+
+ p = VARBITS(arg1);
+ r = VARBITS(result);
+
+ memcpy(r, p, VARBITBYTES(arg1));
+
+ byteNo = n / BITS_PER_BYTE;
+ bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE);
+
+ /*
+ * Update the byte.
+ */
+ if (newBit == 0)
+ r[byteNo] &= (~(1 << bitNo));
+ else
+ r[byteNo] |= (1 << bitNo);
+
+ PG_RETURN_VARBIT_P(result);
+}
+
+/*
+ * bitgetbit
+ *
+ * returns the value of the Nth bit of a bit array (0 or 1).
+ *
+ * The bit location is specified left-to-right in a zero-based fashion
+ * consistent with the other get_bit and set_bit functions, but
+ * inconsistent with the standard substring, position, overlay functions
+ */
+Datum
+bitgetbit(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ int32 n = PG_GETARG_INT32(1);
+ int bitlen;
+ bits8 *p;
+ int byteNo,
+ bitNo;
+
+ bitlen = VARBITLEN(arg1);
+ if (n < 0 || n >= bitlen)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("bit index %d out of valid range (0..%d)",
+ n, bitlen - 1)));
+
+ p = VARBITS(arg1);
+
+ byteNo = n / BITS_PER_BYTE;
+ bitNo = BITS_PER_BYTE - 1 - (n % BITS_PER_BYTE);
+
+ if (p[byteNo] & (1 << bitNo))
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(0);
+}
diff --git a/src/backend/utils/adt/varchar.c b/src/backend/utils/adt/varchar.c
new file mode 100644
index 0000000..63a2073
--- /dev/null
+++ b/src/backend/utils/adt/varchar.c
@@ -0,0 +1,1236 @@
+/*-------------------------------------------------------------------------
+ *
+ * varchar.c
+ * Functions for the built-in types char(n) and varchar(n).
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/varchar.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/supportnodes.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/lsyscache.h"
+#include "utils/pg_locale.h"
+#include "utils/varlena.h"
+
+/* common code for bpchartypmodin and varchartypmodin */
+static int32
+anychar_typmodin(ArrayType *ta, const char *typename)
+{
+ int32 typmod;
+ int32 *tl;
+ int n;
+
+ tl = ArrayGetIntegerTypmods(ta, &n);
+
+ /*
+ * we're not too tense about good error message here because grammar
+ * shouldn't allow wrong number of modifiers for CHAR
+ */
+ if (n != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid type modifier")));
+
+ if (*tl < 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s must be at least 1", typename)));
+ if (*tl > MaxAttrSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("length for type %s cannot exceed %d",
+ typename, MaxAttrSize)));
+
+ /*
+ * For largely historical reasons, the typmod is VARHDRSZ plus the number
+ * of characters; there is enough client-side code that knows about that
+ * that we'd better not change it.
+ */
+ typmod = VARHDRSZ + *tl;
+
+ return typmod;
+}
+
+/* common code for bpchartypmodout and varchartypmodout */
+static char *
+anychar_typmodout(int32 typmod)
+{
+ char *res = (char *) palloc(64);
+
+ if (typmod > VARHDRSZ)
+ snprintf(res, 64, "(%d)", (int) (typmod - VARHDRSZ));
+ else
+ *res = '\0';
+
+ return res;
+}
+
+
+/*
+ * CHAR() and VARCHAR() types are part of the SQL standard. CHAR()
+ * is for blank-padded string whose length is specified in CREATE TABLE.
+ * VARCHAR is for storing string whose length is at most the length specified
+ * at CREATE TABLE time.
+ *
+ * It's hard to implement these types because we cannot figure out
+ * the length of the type from the type itself. I changed (hopefully all) the
+ * fmgr calls that invoke input functions of a data type to supply the
+ * length also. (eg. in INSERTs, we have the tupleDescriptor which contains
+ * the length of the attributes and hence the exact length of the char() or
+ * varchar(). We pass this to bpcharin() or varcharin().) In the case where
+ * we cannot determine the length, we pass in -1 instead and the input
+ * converter does not enforce any length check.
+ *
+ * We actually implement this as a varlena so that we don't have to pass in
+ * the length for the comparison functions. (The difference between these
+ * types and "text" is that we truncate and possibly blank-pad the string
+ * at insertion time.)
+ *
+ * - ay 6/95
+ */
+
+
+/*****************************************************************************
+ * bpchar - char() *
+ *****************************************************************************/
+
+/*
+ * bpchar_input -- common guts of bpcharin and bpcharrecv
+ *
+ * s is the input text of length len (may not be null-terminated)
+ * atttypmod is the typmod value to apply
+ *
+ * Note that atttypmod is measured in characters, which
+ * is not necessarily the same as the number of bytes.
+ *
+ * If the input string is too long, raise an error, unless the extra
+ * characters are spaces, in which case they're truncated. (per SQL)
+ */
+static BpChar *
+bpchar_input(const char *s, size_t len, int32 atttypmod)
+{
+ BpChar *result;
+ char *r;
+ size_t maxlen;
+
+ /* If typmod is -1 (or invalid), use the actual string length */
+ if (atttypmod < (int32) VARHDRSZ)
+ maxlen = len;
+ else
+ {
+ size_t charlen; /* number of CHARACTERS in the input */
+
+ maxlen = atttypmod - VARHDRSZ;
+ charlen = pg_mbstrlen_with_len(s, len);
+ if (charlen > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
+ size_t j;
+
+ /*
+ * at this point, len is the actual BYTE length of the input
+ * string, maxlen is the max number of CHARACTERS allowed for this
+ * bpchar type, mbmaxlen is the length in BYTES of those chars.
+ */
+ for (j = mbmaxlen; j < len; j++)
+ {
+ if (s[j] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character(%d)",
+ (int) maxlen)));
+ }
+
+ /*
+ * Now we set maxlen to the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len = mbmaxlen;
+ }
+ else
+ {
+ /*
+ * Now we set maxlen to the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len + (maxlen - charlen);
+ }
+ }
+
+ result = (BpChar *) palloc(maxlen + VARHDRSZ);
+ SET_VARSIZE(result, maxlen + VARHDRSZ);
+ r = VARDATA(result);
+ memcpy(r, s, len);
+
+ /* blank pad the string if necessary */
+ if (maxlen > len)
+ memset(r + len, ' ', maxlen - len);
+
+ return result;
+}
+
+/*
+ * Convert a C string to CHARACTER internal representation. atttypmod
+ * is the declared length of the type plus VARHDRSZ.
+ */
+Datum
+bpcharin(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ BpChar *result;
+
+ result = bpchar_input(s, strlen(s), atttypmod);
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/*
+ * Convert a CHARACTER value to a C string.
+ *
+ * Uses the text conversion functions, which is only appropriate if BpChar
+ * and text are equivalent types.
+ */
+Datum
+bpcharout(PG_FUNCTION_ARGS)
+{
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * bpcharrecv - converts external binary format to bpchar
+ */
+Datum
+bpcharrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ BpChar *result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ result = bpchar_input(str, nbytes, atttypmod);
+ pfree(str);
+ PG_RETURN_BPCHAR_P(result);
+}
+
+/*
+ * bpcharsend - converts bpchar to binary format
+ */
+Datum
+bpcharsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as textsend, so share code */
+ return textsend(fcinfo);
+}
+
+
+/*
+ * Converts a CHARACTER type to the specified size.
+ *
+ * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
+ * isExplicit is true if this is for an explicit cast to char(N).
+ *
+ * Truncation rules: for an explicit cast, silently truncate to the given
+ * length; for an implicit cast, raise error unless extra characters are
+ * all spaces. (This is sort-of per SQL: the spec would actually have us
+ * raise a "completion condition" for the explicit cast case, but Postgres
+ * hasn't got such a concept.)
+ */
+Datum
+bpchar(PG_FUNCTION_ARGS)
+{
+ BpChar *source = PG_GETARG_BPCHAR_PP(0);
+ int32 maxlen = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ BpChar *result;
+ int32 len;
+ char *r;
+ char *s;
+ int i;
+ int charlen; /* number of characters in the input string +
+ * VARHDRSZ */
+
+ /* No work if typmod is invalid */
+ if (maxlen < (int32) VARHDRSZ)
+ PG_RETURN_BPCHAR_P(source);
+
+ maxlen -= VARHDRSZ;
+
+ len = VARSIZE_ANY_EXHDR(source);
+ s = VARDATA_ANY(source);
+
+ charlen = pg_mbstrlen_with_len(s, len);
+
+ /* No work if supplied data matches typmod already */
+ if (charlen == maxlen)
+ PG_RETURN_BPCHAR_P(source);
+
+ if (charlen > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t maxmblen;
+
+ maxmblen = pg_mbcharcliplen(s, len, maxlen);
+
+ if (!isExplicit)
+ {
+ for (i = maxmblen; i < len; i++)
+ if (s[i] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character(%d)",
+ maxlen)));
+ }
+
+ len = maxmblen;
+
+ /*
+ * At this point, maxlen is the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len;
+ }
+ else
+ {
+ /*
+ * At this point, maxlen is the necessary byte length, not the number
+ * of CHARACTERS!
+ */
+ maxlen = len + (maxlen - charlen);
+ }
+
+ Assert(maxlen >= len);
+
+ result = palloc(maxlen + VARHDRSZ);
+ SET_VARSIZE(result, maxlen + VARHDRSZ);
+ r = VARDATA(result);
+
+ memcpy(r, s, len);
+
+ /* blank pad the string if necessary */
+ if (maxlen > len)
+ memset(r + len, ' ', maxlen - len);
+
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/* char_bpchar()
+ * Convert char to bpchar(1).
+ */
+Datum
+char_bpchar(PG_FUNCTION_ARGS)
+{
+ char c = PG_GETARG_CHAR(0);
+ BpChar *result;
+
+ result = (BpChar *) palloc(VARHDRSZ + 1);
+
+ SET_VARSIZE(result, VARHDRSZ + 1);
+ *(VARDATA(result)) = c;
+
+ PG_RETURN_BPCHAR_P(result);
+}
+
+
+/* bpchar_name()
+ * Converts a bpchar() type to a NameData type.
+ */
+Datum
+bpchar_name(PG_FUNCTION_ARGS)
+{
+ BpChar *s = PG_GETARG_BPCHAR_PP(0);
+ char *s_data;
+ Name result;
+ int len;
+
+ len = VARSIZE_ANY_EXHDR(s);
+ s_data = VARDATA_ANY(s);
+
+ /* Truncate oversize input */
+ if (len >= NAMEDATALEN)
+ len = pg_mbcliplen(s_data, len, NAMEDATALEN - 1);
+
+ /* Remove trailing blanks */
+ while (len > 0)
+ {
+ if (s_data[len - 1] != ' ')
+ break;
+ len--;
+ }
+
+ /* We use palloc0 here to ensure result is zero-padded */
+ result = (Name) palloc0(NAMEDATALEN);
+ memcpy(NameStr(*result), s_data, len);
+
+ PG_RETURN_NAME(result);
+}
+
+/* name_bpchar()
+ * Converts a NameData type to a bpchar type.
+ *
+ * Uses the text conversion functions, which is only appropriate if BpChar
+ * and text are equivalent types.
+ */
+Datum
+name_bpchar(PG_FUNCTION_ARGS)
+{
+ Name s = PG_GETARG_NAME(0);
+ BpChar *result;
+
+ result = (BpChar *) cstring_to_text(NameStr(*s));
+ PG_RETURN_BPCHAR_P(result);
+}
+
+Datum
+bpchartypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anychar_typmodin(ta, "char"));
+}
+
+Datum
+bpchartypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anychar_typmodout(typmod));
+}
+
+
+/*****************************************************************************
+ * varchar - varchar(n)
+ *
+ * Note: varchar piggybacks on type text for most operations, and so has no
+ * C-coded functions except for I/O and typmod checking.
+ *****************************************************************************/
+
+/*
+ * varchar_input -- common guts of varcharin and varcharrecv
+ *
+ * s is the input text of length len (may not be null-terminated)
+ * atttypmod is the typmod value to apply
+ *
+ * Note that atttypmod is measured in characters, which
+ * is not necessarily the same as the number of bytes.
+ *
+ * If the input string is too long, raise an error, unless the extra
+ * characters are spaces, in which case they're truncated. (per SQL)
+ *
+ * Uses the C string to text conversion function, which is only appropriate
+ * if VarChar and text are equivalent types.
+ */
+static VarChar *
+varchar_input(const char *s, size_t len, int32 atttypmod)
+{
+ VarChar *result;
+ size_t maxlen;
+
+ maxlen = atttypmod - VARHDRSZ;
+
+ if (atttypmod >= (int32) VARHDRSZ && len > maxlen)
+ {
+ /* Verify that extra characters are spaces, and clip them off */
+ size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
+ size_t j;
+
+ for (j = mbmaxlen; j < len; j++)
+ {
+ if (s[j] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character varying(%d)",
+ (int) maxlen)));
+ }
+
+ len = mbmaxlen;
+ }
+
+ result = (VarChar *) cstring_to_text_with_len(s, len);
+ return result;
+}
+
+/*
+ * Convert a C string to VARCHAR internal representation. atttypmod
+ * is the declared length of the type plus VARHDRSZ.
+ */
+Datum
+varcharin(PG_FUNCTION_ARGS)
+{
+ char *s = PG_GETARG_CSTRING(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarChar *result;
+
+ result = varchar_input(s, strlen(s), atttypmod);
+ PG_RETURN_VARCHAR_P(result);
+}
+
+
+/*
+ * Convert a VARCHAR value to a C string.
+ *
+ * Uses the text to C string conversion function, which is only appropriate
+ * if VarChar and text are equivalent types.
+ */
+Datum
+varcharout(PG_FUNCTION_ARGS)
+{
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * varcharrecv - converts external binary format to varchar
+ */
+Datum
+varcharrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+#ifdef NOT_USED
+ Oid typelem = PG_GETARG_OID(1);
+#endif
+ int32 atttypmod = PG_GETARG_INT32(2);
+ VarChar *result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ result = varchar_input(str, nbytes, atttypmod);
+ pfree(str);
+ PG_RETURN_VARCHAR_P(result);
+}
+
+/*
+ * varcharsend - converts varchar to binary format
+ */
+Datum
+varcharsend(PG_FUNCTION_ARGS)
+{
+ /* Exactly the same as textsend, so share code */
+ return textsend(fcinfo);
+}
+
+
+/*
+ * varchar_support()
+ *
+ * Planner support function for the varchar() length coercion function.
+ *
+ * Currently, the only interesting thing we can do is flatten calls that set
+ * the new maximum length >= the previous maximum length. We can ignore the
+ * isExplicit argument, since that only affects truncation cases.
+ */
+Datum
+varchar_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+ Node *ret = NULL;
+
+ if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify *req = (SupportRequestSimplify *) rawreq;
+ FuncExpr *expr = req->fcall;
+ Node *typmod;
+
+ Assert(list_length(expr->args) >= 2);
+
+ typmod = (Node *) lsecond(expr->args);
+
+ if (IsA(typmod, Const) && !((Const *) typmod)->constisnull)
+ {
+ Node *source = (Node *) linitial(expr->args);
+ int32 old_typmod = exprTypmod(source);
+ int32 new_typmod = DatumGetInt32(((Const *) typmod)->constvalue);
+ int32 old_max = old_typmod - VARHDRSZ;
+ int32 new_max = new_typmod - VARHDRSZ;
+
+ if (new_typmod < 0 || (old_typmod >= 0 && old_max <= new_max))
+ ret = relabel_to_typmod(source, new_typmod);
+ }
+ }
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * Converts a VARCHAR type to the specified size.
+ *
+ * maxlen is the typmod, ie, declared length plus VARHDRSZ bytes.
+ * isExplicit is true if this is for an explicit cast to varchar(N).
+ *
+ * Truncation rules: for an explicit cast, silently truncate to the given
+ * length; for an implicit cast, raise error unless extra characters are
+ * all spaces. (This is sort-of per SQL: the spec would actually have us
+ * raise a "completion condition" for the explicit cast case, but Postgres
+ * hasn't got such a concept.)
+ */
+Datum
+varchar(PG_FUNCTION_ARGS)
+{
+ VarChar *source = PG_GETARG_VARCHAR_PP(0);
+ int32 typmod = PG_GETARG_INT32(1);
+ bool isExplicit = PG_GETARG_BOOL(2);
+ int32 len,
+ maxlen;
+ size_t maxmblen;
+ int i;
+ char *s_data;
+
+ len = VARSIZE_ANY_EXHDR(source);
+ s_data = VARDATA_ANY(source);
+ maxlen = typmod - VARHDRSZ;
+
+ /* No work if typmod is invalid or supplied data fits it already */
+ if (maxlen < 0 || len <= maxlen)
+ PG_RETURN_VARCHAR_P(source);
+
+ /* only reach here if string is too long... */
+
+ /* truncate multibyte string preserving multibyte boundary */
+ maxmblen = pg_mbcharcliplen(s_data, len, maxlen);
+
+ if (!isExplicit)
+ {
+ for (i = maxmblen; i < len; i++)
+ if (s_data[i] != ' ')
+ ereport(ERROR,
+ (errcode(ERRCODE_STRING_DATA_RIGHT_TRUNCATION),
+ errmsg("value too long for type character varying(%d)",
+ maxlen)));
+ }
+
+ PG_RETURN_VARCHAR_P((VarChar *) cstring_to_text_with_len(s_data,
+ maxmblen));
+}
+
+Datum
+varchartypmodin(PG_FUNCTION_ARGS)
+{
+ ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
+
+ PG_RETURN_INT32(anychar_typmodin(ta, "varchar"));
+}
+
+Datum
+varchartypmodout(PG_FUNCTION_ARGS)
+{
+ int32 typmod = PG_GETARG_INT32(0);
+
+ PG_RETURN_CSTRING(anychar_typmodout(typmod));
+}
+
+
+/*****************************************************************************
+ * Exported functions
+ *****************************************************************************/
+
+/* "True" length (not counting trailing blanks) of a BpChar */
+static inline int
+bcTruelen(BpChar *arg)
+{
+ return bpchartruelen(VARDATA_ANY(arg), VARSIZE_ANY_EXHDR(arg));
+}
+
+int
+bpchartruelen(char *s, int len)
+{
+ int i;
+
+ /*
+ * Note that we rely on the assumption that ' ' is a singleton unit on
+ * every supported multibyte server encoding.
+ */
+ for (i = len - 1; i >= 0; i--)
+ {
+ if (s[i] != ' ')
+ break;
+ }
+ return i + 1;
+}
+
+Datum
+bpcharlen(PG_FUNCTION_ARGS)
+{
+ BpChar *arg = PG_GETARG_BPCHAR_PP(0);
+ int len;
+
+ /* get number of bytes, ignoring trailing spaces */
+ len = bcTruelen(arg);
+
+ /* in multibyte encoding, convert to number of characters */
+ if (pg_database_encoding_max_length() != 1)
+ len = pg_mbstrlen_with_len(VARDATA_ANY(arg), len);
+
+ PG_RETURN_INT32(len);
+}
+
+Datum
+bpcharoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum arg = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(arg) - VARHDRSZ);
+}
+
+
+/*****************************************************************************
+ * Comparison Functions used for bpchar
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+static void
+check_collation_set(Oid collid)
+{
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+}
+
+Datum
+bpchareq(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ bool result;
+ Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
+
+ check_collation_set(collid);
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = false;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
+ }
+ else
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) == 0);
+ }
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bpcharne(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ bool result;
+ Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
+
+ check_collation_set(collid);
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
+ {
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison.
+ */
+ if (len1 != len2)
+ result = true;
+ else
+ result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
+ }
+ else
+ {
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ collid) != 0);
+ }
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bpcharlt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp < 0);
+}
+
+Datum
+bpcharle(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp <= 0);
+}
+
+Datum
+bpchargt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp > 0);
+}
+
+Datum
+bpcharge(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(cmp >= 0);
+}
+
+Datum
+bpcharcmp(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+bpchar_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, BPCHAROID, collid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+Datum
+bpchar_larger(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_RETURN_BPCHAR_P((cmp >= 0) ? arg1 : arg2);
+}
+
+Datum
+bpchar_smaller(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ cmp = varstr_cmp(VARDATA_ANY(arg1), len1, VARDATA_ANY(arg2), len2,
+ PG_GET_COLLATION());
+
+ PG_RETURN_BPCHAR_P((cmp <= 0) ? arg1 : arg2);
+}
+
+
+/*
+ * bpchar needs a specialized hash function because we want to ignore
+ * trailing blanks in comparisons.
+ */
+Datum
+hashbpchar(PG_FUNCTION_ARGS)
+{
+ BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ char *keydata;
+ int keylen;
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ keydata = VARDATA_ANY(key);
+ keylen = bcTruelen(key);
+
+ if (!lc_collate_is_c(collid))
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any((unsigned char *) keydata, keylen);
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, keydata, keylen);
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+ pfree(uchar);
+
+ result = hash_any(buf, bsize);
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ /* Avoid leaking memory for toasted inputs */
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+Datum
+hashbpcharextended(PG_FUNCTION_ARGS)
+{
+ BpChar *key = PG_GETARG_BPCHAR_PP(0);
+ Oid collid = PG_GET_COLLATION();
+ char *keydata;
+ int keylen;
+ pg_locale_t mylocale = 0;
+ Datum result;
+
+ if (!collid)
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string hashing"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+
+ keydata = VARDATA_ANY(key);
+ keylen = bcTruelen(key);
+
+ if (!lc_collate_is_c(collid))
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (!mylocale || mylocale->deterministic)
+ {
+ result = hash_any_extended((unsigned char *) keydata, keylen,
+ PG_GETARG_INT64(1));
+ }
+ else
+ {
+#ifdef USE_ICU
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+ Size bsize;
+ uint8_t *buf;
+
+ ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
+
+ bsize = ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, NULL, 0);
+ buf = palloc(bsize);
+ ucol_getSortKey(mylocale->info.icu.ucol,
+ uchar, ulen, buf, bsize);
+ pfree(uchar);
+
+ result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
+
+ pfree(buf);
+ }
+ else
+#endif
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+ }
+
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+/*
+ * The following operators support character-by-character comparison
+ * of bpchar datums, to allow building indexes suitable for LIKE clauses.
+ * Note that the regular bpchareq/bpcharne comparison operators, and
+ * regular support functions 1 and 2 with "C" collation are assumed to be
+ * compatible with these!
+ */
+
+static int
+internal_bpchar_pattern_compare(BpChar *arg1, BpChar *arg2)
+{
+ int result;
+ int len1,
+ len2;
+
+ len1 = bcTruelen(arg1);
+ len2 = bcTruelen(arg2);
+
+ result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if (result != 0)
+ return result;
+ else if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return 0;
+}
+
+
+Datum
+bpchar_pattern_lt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result < 0);
+}
+
+
+Datum
+bpchar_pattern_le(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result <= 0);
+}
+
+
+Datum
+bpchar_pattern_ge(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result >= 0);
+}
+
+
+Datum
+bpchar_pattern_gt(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result > 0);
+}
+
+
+Datum
+btbpchar_pattern_cmp(PG_FUNCTION_ARGS)
+{
+ BpChar *arg1 = PG_GETARG_BPCHAR_PP(0);
+ BpChar *arg2 = PG_GETARG_BPCHAR_PP(1);
+ int result;
+
+ result = internal_bpchar_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+
+Datum
+btbpchar_pattern_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, BPCHAROID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
new file mode 100644
index 0000000..3732b79
--- /dev/null
+++ b/src/backend/utils/adt/varlena.c
@@ -0,0 +1,6556 @@
+/*-------------------------------------------------------------------------
+ *
+ * varlena.c
+ * Functions for the variable-length built-in types.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/varlena.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <ctype.h>
+#include <limits.h>
+
+#include "access/detoast.h"
+#include "access/toast_compression.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_type.h"
+#include "common/hashfn.h"
+#include "common/int.h"
+#include "common/unicode_norm.h"
+#include "funcapi.h"
+#include "lib/hyperloglog.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "nodes/execnodes.h"
+#include "parser/scansup.h"
+#include "port/pg_bswap.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/bytea.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/pg_locale.h"
+#include "utils/sortsupport.h"
+#include "utils/varlena.h"
+
+
+/* GUC variable */
+int bytea_output = BYTEA_OUTPUT_HEX;
+
+typedef struct varlena unknown;
+typedef struct varlena VarString;
+
+/*
+ * State for text_position_* functions.
+ */
+typedef struct
+{
+ bool is_multibyte_char_in_char; /* need to check char boundaries? */
+
+ char *str1; /* haystack string */
+ char *str2; /* needle string */
+ int len1; /* string lengths in bytes */
+ int len2;
+
+ /* Skip table for Boyer-Moore-Horspool search algorithm: */
+ int skiptablemask; /* mask for ANDing with skiptable subscripts */
+ int skiptable[256]; /* skip distance for given mismatched char */
+
+ char *last_match; /* pointer to last match in 'str1' */
+
+ /*
+ * Sometimes we need to convert the byte position of a match to a
+ * character position. These store the last position that was converted,
+ * so that on the next call, we can continue from that point, rather than
+ * count characters from the very beginning.
+ */
+ char *refpoint; /* pointer within original haystack string */
+ int refpos; /* 0-based character offset of the same point */
+} TextPositionState;
+
+typedef struct
+{
+ char *buf1; /* 1st string, or abbreviation original string
+ * buf */
+ char *buf2; /* 2nd string, or abbreviation strxfrm() buf */
+ int buflen1; /* Allocated length of buf1 */
+ int buflen2; /* Allocated length of buf2 */
+ int last_len1; /* Length of last buf1 string/strxfrm() input */
+ int last_len2; /* Length of last buf2 string/strxfrm() blob */
+ int last_returned; /* Last comparison result (cache) */
+ bool cache_blob; /* Does buf2 contain strxfrm() blob, etc? */
+ bool collate_c;
+ Oid typid; /* Actual datatype (text/bpchar/bytea/name) */
+ hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
+ hyperLogLogState full_card; /* Full key cardinality state */
+ double prop_card; /* Required cardinality proportion */
+ pg_locale_t locale;
+} VarStringSortSupport;
+
+/*
+ * Output data for split_text(): we output either to an array or a table.
+ * tupstore and tupdesc must be set up in advance to output to a table.
+ */
+typedef struct
+{
+ ArrayBuildState *astate;
+ Tuplestorestate *tupstore;
+ TupleDesc tupdesc;
+} SplitTextOutputData;
+
+/*
+ * This should be large enough that most strings will fit, but small enough
+ * that we feel comfortable putting it on the stack
+ */
+#define TEXTBUFLEN 1024
+
+#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
+#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
+#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
+#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
+#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
+
+#define DatumGetVarStringP(X) ((VarString *) PG_DETOAST_DATUM(X))
+#define DatumGetVarStringPP(X) ((VarString *) PG_DETOAST_DATUM_PACKED(X))
+
+static int varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int namefastcmp_c(Datum x, Datum y, SortSupport ssup);
+static int varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
+static int varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
+static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
+static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
+static int32 text_length(Datum str);
+static text *text_catenate(text *t1, text *t2);
+static text *text_substring(Datum str,
+ int32 start,
+ int32 length,
+ bool length_not_specified);
+static text *text_overlay(text *t1, text *t2, int sp, int sl);
+static int text_position(text *t1, text *t2, Oid collid);
+static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
+static bool text_position_next(TextPositionState *state);
+static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
+static char *text_position_get_match_ptr(TextPositionState *state);
+static int text_position_get_match_pos(TextPositionState *state);
+static void text_position_cleanup(TextPositionState *state);
+static void check_collation_set(Oid collid);
+static int text_cmp(text *arg1, text *arg2, Oid collid);
+static bytea *bytea_catenate(bytea *t1, bytea *t2);
+static bytea *bytea_substring(Datum str,
+ int S,
+ int L,
+ bool length_not_specified);
+static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl);
+static void appendStringInfoText(StringInfo str, const text *t);
+static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
+static void split_text_accum_result(SplitTextOutputData *tstate,
+ text *field_value,
+ text *null_string,
+ Oid collation);
+static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
+ const char *fldsep, const char *null_string);
+static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
+static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
+ int *value);
+static const char *text_format_parse_format(const char *start_ptr,
+ const char *end_ptr,
+ int *argpos, int *widthpos,
+ int *flags, int *width);
+static void text_format_string_conversion(StringInfo buf, char conversion,
+ FmgrInfo *typOutputInfo,
+ Datum value, bool isNull,
+ int flags, int width);
+static void text_format_append_string(StringInfo buf, const char *str,
+ int flags, int width);
+
+
+/*****************************************************************************
+ * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
+ *****************************************************************************/
+
+/*
+ * cstring_to_text
+ *
+ * Create a text value from a null-terminated C string.
+ *
+ * The new text value is freshly palloc'd with a full-size VARHDR.
+ */
+text *
+cstring_to_text(const char *s)
+{
+ return cstring_to_text_with_len(s, strlen(s));
+}
+
+/*
+ * cstring_to_text_with_len
+ *
+ * Same as cstring_to_text except the caller specifies the string length;
+ * the string need not be null_terminated.
+ */
+text *
+cstring_to_text_with_len(const char *s, int len)
+{
+ text *result = (text *) palloc(len + VARHDRSZ);
+
+ SET_VARSIZE(result, len + VARHDRSZ);
+ memcpy(VARDATA(result), s, len);
+
+ return result;
+}
+
+/*
+ * text_to_cstring
+ *
+ * Create a palloc'd, null-terminated C string from a text value.
+ *
+ * We support being passed a compressed or toasted text value.
+ * This is a bit bogus since such values shouldn't really be referred to as
+ * "text *", but it seems useful for robustness. If we didn't handle that
+ * case here, we'd need another routine that did, anyway.
+ */
+char *
+text_to_cstring(const text *t)
+{
+ /* must cast away the const, unfortunately */
+ text *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
+ int len = VARSIZE_ANY_EXHDR(tunpacked);
+ char *result;
+
+ result = (char *) palloc(len + 1);
+ memcpy(result, VARDATA_ANY(tunpacked), len);
+ result[len] = '\0';
+
+ if (tunpacked != t)
+ pfree(tunpacked);
+
+ return result;
+}
+
+/*
+ * text_to_cstring_buffer
+ *
+ * Copy a text value into a caller-supplied buffer of size dst_len.
+ *
+ * The text string is truncated if necessary to fit. The result is
+ * guaranteed null-terminated (unless dst_len == 0).
+ *
+ * We support being passed a compressed or toasted text value.
+ * This is a bit bogus since such values shouldn't really be referred to as
+ * "text *", but it seems useful for robustness. If we didn't handle that
+ * case here, we'd need another routine that did, anyway.
+ */
+void
+text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
+{
+ /* must cast away the const, unfortunately */
+ text *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
+ size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
+
+ if (dst_len > 0)
+ {
+ dst_len--;
+ if (dst_len >= src_len)
+ dst_len = src_len;
+ else /* ensure truncation is encoding-safe */
+ dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
+ memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
+ dst[dst_len] = '\0';
+ }
+
+ if (srcunpacked != src)
+ pfree(srcunpacked);
+}
+
+
+/*****************************************************************************
+ * USER I/O ROUTINES *
+ *****************************************************************************/
+
+
+#define VAL(CH) ((CH) - '0')
+#define DIG(VAL) ((VAL) + '0')
+
+/*
+ * byteain - converts from printable representation of byte array
+ *
+ * Non-printable characters must be passed as '\nnn' (octal) and are
+ * converted to internal form. '\' must be passed as '\\'.
+ * ereport(ERROR, ...) if bad form.
+ *
+ * BUGS:
+ * The input is scanned twice.
+ * The error checking of input is minimal.
+ */
+Datum
+byteain(PG_FUNCTION_ARGS)
+{
+ char *inputText = PG_GETARG_CSTRING(0);
+ char *tp;
+ char *rp;
+ int bc;
+ bytea *result;
+
+ /* Recognize hex input */
+ if (inputText[0] == '\\' && inputText[1] == 'x')
+ {
+ size_t len = strlen(inputText);
+
+ bc = (len - 2) / 2 + VARHDRSZ; /* maximum possible length */
+ result = palloc(bc);
+ bc = hex_decode(inputText + 2, len - 2, VARDATA(result));
+ SET_VARSIZE(result, bc + VARHDRSZ); /* actual length */
+
+ PG_RETURN_BYTEA_P(result);
+ }
+
+ /* Else, it's the traditional escaped style */
+ for (bc = 0, tp = inputText; *tp != '\0'; bc++)
+ {
+ if (tp[0] != '\\')
+ tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ tp += 4;
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ tp += 2;
+ else
+ {
+ /*
+ * one backslash, not followed by another or ### valid octal
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ bc += VARHDRSZ;
+
+ result = (bytea *) palloc(bc);
+ SET_VARSIZE(result, bc);
+
+ tp = inputText;
+ rp = VARDATA(result);
+ while (*tp != '\0')
+ {
+ if (tp[0] != '\\')
+ *rp++ = *tp++;
+ else if ((tp[0] == '\\') &&
+ (tp[1] >= '0' && tp[1] <= '3') &&
+ (tp[2] >= '0' && tp[2] <= '7') &&
+ (tp[3] >= '0' && tp[3] <= '7'))
+ {
+ bc = VAL(tp[1]);
+ bc <<= 3;
+ bc += VAL(tp[2]);
+ bc <<= 3;
+ *rp++ = bc + VAL(tp[3]);
+
+ tp += 4;
+ }
+ else if ((tp[0] == '\\') &&
+ (tp[1] == '\\'))
+ {
+ *rp++ = '\\';
+ tp += 2;
+ }
+ else
+ {
+ /*
+ * We should never get here. The first pass should not allow it.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s", "bytea")));
+ }
+ }
+
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteaout - converts to printable representation of byte array
+ *
+ * In the traditional escaped format, non-printable characters are
+ * printed as '\nnn' (octal) and '\' as '\\'.
+ */
+Datum
+byteaout(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_PP(0);
+ char *result;
+ char *rp;
+
+ if (bytea_output == BYTEA_OUTPUT_HEX)
+ {
+ /* Print hex format */
+ rp = result = palloc(VARSIZE_ANY_EXHDR(vlena) * 2 + 2 + 1);
+ *rp++ = '\\';
+ *rp++ = 'x';
+ rp += hex_encode(VARDATA_ANY(vlena), VARSIZE_ANY_EXHDR(vlena), rp);
+ }
+ else if (bytea_output == BYTEA_OUTPUT_ESCAPE)
+ {
+ /* Print traditional escaped format */
+ char *vp;
+ uint64 len;
+ int i;
+
+ len = 1; /* empty string has 1 char */
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ len += 2;
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ len += 4;
+ else
+ len++;
+ }
+
+ /*
+ * In principle len can't overflow uint32 if the input fit in 1GB, but
+ * for safety let's check rather than relying on palloc's internal
+ * check.
+ */
+ if (len > MaxAllocSize)
+ ereport(ERROR,
+ (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+ errmsg_internal("result of bytea output conversion is too large")));
+ rp = result = (char *) palloc(len);
+
+ vp = VARDATA_ANY(vlena);
+ for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
+ {
+ if (*vp == '\\')
+ {
+ *rp++ = '\\';
+ *rp++ = '\\';
+ }
+ else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
+ {
+ int val; /* holds unprintable chars */
+
+ val = *vp;
+ rp[0] = '\\';
+ rp[3] = DIG(val & 07);
+ val >>= 3;
+ rp[2] = DIG(val & 07);
+ val >>= 3;
+ rp[1] = DIG(val & 03);
+ rp += 4;
+ }
+ else
+ *rp++ = *vp;
+ }
+ }
+ else
+ {
+ elog(ERROR, "unrecognized bytea_output setting: %d",
+ bytea_output);
+ rp = result = NULL; /* keep compiler quiet */
+ }
+ *rp = '\0';
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * bytearecv - converts external binary format to bytea
+ */
+Datum
+bytearecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ bytea *result;
+ int nbytes;
+
+ nbytes = buf->len - buf->cursor;
+ result = (bytea *) palloc(nbytes + VARHDRSZ);
+ SET_VARSIZE(result, nbytes + VARHDRSZ);
+ pq_copymsgbytes(buf, VARDATA(result), nbytes);
+ PG_RETURN_BYTEA_P(result);
+}
+
+/*
+ * byteasend - converts bytea to binary format
+ *
+ * This is a special case: just copy the input...
+ */
+Datum
+byteasend(PG_FUNCTION_ARGS)
+{
+ bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
+
+ PG_RETURN_BYTEA_P(vlena);
+}
+
+Datum
+bytea_string_agg_transfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ /* Append the value unless null. */
+ if (!PG_ARGISNULL(1))
+ {
+ bytea *value = PG_GETARG_BYTEA_PP(1);
+
+ /* On the first time through, we ignore the delimiter. */
+ if (state == NULL)
+ state = makeStringAggState(fcinfo);
+ else if (!PG_ARGISNULL(2))
+ {
+ bytea *delim = PG_GETARG_BYTEA_PP(2);
+
+ appendBinaryStringInfo(state, VARDATA_ANY(delim), VARSIZE_ANY_EXHDR(delim));
+ }
+
+ appendBinaryStringInfo(state, VARDATA_ANY(value), VARSIZE_ANY_EXHDR(value));
+ }
+
+ /*
+ * The transition type for string_agg() is declared to be "internal",
+ * which is a pass-by-value type the same size as a pointer.
+ */
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+bytea_string_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ if (state != NULL)
+ {
+ bytea *result;
+
+ result = (bytea *) palloc(state->len + VARHDRSZ);
+ SET_VARSIZE(result, state->len + VARHDRSZ);
+ memcpy(VARDATA(result), state->data, state->len);
+ PG_RETURN_BYTEA_P(result);
+ }
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * textin - converts "..." to internal representation
+ */
+Datum
+textin(PG_FUNCTION_ARGS)
+{
+ char *inputText = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_TEXT_P(cstring_to_text(inputText));
+}
+
+/*
+ * textout - converts internal representation to "..."
+ */
+Datum
+textout(PG_FUNCTION_ARGS)
+{
+ Datum txt = PG_GETARG_DATUM(0);
+
+ PG_RETURN_CSTRING(TextDatumGetCString(txt));
+}
+
+/*
+ * textrecv - converts external binary format to text
+ */
+Datum
+textrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ text *result;
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+
+ result = cstring_to_text_with_len(str, nbytes);
+ pfree(str);
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * textsend - converts text to binary format
+ */
+Datum
+textsend(PG_FUNCTION_ARGS)
+{
+ text *t = PG_GETARG_TEXT_PP(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/*
+ * unknownin - converts "..." to internal representation
+ */
+Datum
+unknownin(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ /* representation is same as cstring */
+ PG_RETURN_CSTRING(pstrdup(str));
+}
+
+/*
+ * unknownout - converts internal representation to "..."
+ */
+Datum
+unknownout(PG_FUNCTION_ARGS)
+{
+ /* representation is same as cstring */
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_CSTRING(pstrdup(str));
+}
+
+/*
+ * unknownrecv - converts external binary format to unknown
+ */
+Datum
+unknownrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ char *str;
+ int nbytes;
+
+ str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
+ /* representation is same as cstring */
+ PG_RETURN_CSTRING(str);
+}
+
+/*
+ * unknownsend - converts unknown to binary format
+ */
+Datum
+unknownsend(PG_FUNCTION_ARGS)
+{
+ /* representation is same as cstring */
+ char *str = PG_GETARG_CSTRING(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, str, strlen(str));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+/* ========== PUBLIC ROUTINES ========== */
+
+/*
+ * textlen -
+ * returns the logical length of a text*
+ * (which is less than the VARSIZE of the text*)
+ */
+Datum
+textlen(PG_FUNCTION_ARGS)
+{
+ Datum str = PG_GETARG_DATUM(0);
+
+ /* try to avoid decompressing argument */
+ PG_RETURN_INT32(text_length(str));
+}
+
+/*
+ * text_length -
+ * Does the real work for textlen()
+ *
+ * This is broken out so it can be called directly by other string processing
+ * functions. Note that the argument is passed as a Datum, to indicate that
+ * it may still be in compressed form. We can avoid decompressing it at all
+ * in some cases.
+ */
+static int32
+text_length(Datum str)
+{
+ /* fastpath when max encoding length is one */
+ if (pg_database_encoding_max_length() == 1)
+ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+ else
+ {
+ text *t = DatumGetTextPP(str);
+
+ PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
+ VARSIZE_ANY_EXHDR(t)));
+ }
+}
+
+/*
+ * textoctetlen -
+ * returns the physical length of a text*
+ * (which is less than the VARSIZE of the text*)
+ */
+Datum
+textoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum str = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+}
+
+/*
+ * textcat -
+ * takes two text* and returns a text* that is the concatenation of
+ * the two.
+ *
+ * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
+ * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
+ * Allocate space for output in all cases.
+ * XXX - thomas 1997-07-10
+ */
+Datum
+textcat(PG_FUNCTION_ARGS)
+{
+ text *t1 = PG_GETARG_TEXT_PP(0);
+ text *t2 = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_TEXT_P(text_catenate(t1, t2));
+}
+
+/*
+ * text_catenate
+ * Guts of textcat(), broken out so it can be used by other functions
+ *
+ * Arguments can be in short-header form, but not compressed or out-of-line
+ */
+static text *
+text_catenate(text *t1, text *t2)
+{
+ text *result;
+ int len1,
+ len2,
+ len;
+ char *ptr;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ /* paranoia ... probably should throw error instead? */
+ if (len1 < 0)
+ len1 = 0;
+ if (len2 < 0)
+ len2 = 0;
+
+ len = len1 + len2 + VARHDRSZ;
+ result = (text *) palloc(len);
+
+ /* Set size of result string... */
+ SET_VARSIZE(result, len);
+
+ /* Fill data field of result string... */
+ ptr = VARDATA(result);
+ if (len1 > 0)
+ memcpy(ptr, VARDATA_ANY(t1), len1);
+ if (len2 > 0)
+ memcpy(ptr + len1, VARDATA_ANY(t2), len2);
+
+ return result;
+}
+
+/*
+ * charlen_to_bytelen()
+ * Compute the number of bytes occupied by n characters starting at *p
+ *
+ * It is caller's responsibility that there actually are n characters;
+ * the string need not be null-terminated.
+ */
+static int
+charlen_to_bytelen(const char *p, int n)
+{
+ if (pg_database_encoding_max_length() == 1)
+ {
+ /* Optimization for single-byte encodings */
+ return n;
+ }
+ else
+ {
+ const char *s;
+
+ for (s = p; n > 0; n--)
+ s += pg_mblen(s);
+
+ return s - p;
+ }
+}
+
+/*
+ * text_substr()
+ * Return a substring starting at the specified position.
+ * - thomas 1997-12-31
+ *
+ * Input:
+ * - string
+ * - starting position (is one-based)
+ * - string length
+ *
+ * If the starting position is zero or less, then return from the start of the string
+ * adjusting the length to be consistent with the "negative start" per SQL.
+ * If the length is less than zero, return the remaining string.
+ *
+ * Added multibyte support.
+ * - Tatsuo Ishii 1998-4-21
+ * Changed behavior if starting position is less than one to conform to SQL behavior.
+ * Formerly returned the entire string; now returns a portion.
+ * - Thomas Lockhart 1998-12-10
+ * Now uses faster TOAST-slicing interface
+ * - John Gray 2002-02-22
+ * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
+ * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
+ * error; if E < 1, return '', not entire string). Fixed MB related bug when
+ * S > LC and < LC + 4 sometimes garbage characters are returned.
+ * - Joe Conway 2002-08-10
+ */
+Datum
+text_substr(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ PG_GETARG_INT32(2),
+ false));
+}
+
+/*
+ * text_substr_no_len -
+ * Wrapper to avoid opr_sanity failure due to
+ * one function accepting a different number of args.
+ */
+Datum
+text_substr_no_len(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ -1, true));
+}
+
+/*
+ * text_substring -
+ * Does the real work for text_substr() and text_substr_no_len()
+ *
+ * This is broken out so it can be called directly by other string processing
+ * functions. Note that the argument is passed as a Datum, to indicate that
+ * it may still be in compressed/toasted form. We can avoid detoasting all
+ * of it in some cases.
+ *
+ * The result is always a freshly palloc'd datum.
+ */
+static text *
+text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
+{
+ int32 eml = pg_database_encoding_max_length();
+ int32 S = start; /* start position */
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 E; /* end position */
+
+ /*
+ * SQL99 says S can be zero or negative, but we still must fetch from the
+ * start of the string.
+ */
+ S1 = Max(S, 1);
+
+ /* life is easy if the encoding max length is 1 */
+ if (eml == 1)
+ {
+ if (length_not_specified) /* special case - get length to end of
+ * string */
+ L1 = -1;
+ else if (length < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ L1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(S, length, &E))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case
+ * the substring must run to end of string.
+ */
+ L1 = -1;
+ }
+ else
+ {
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 1)
+ return cstring_to_text("");
+
+ L1 = E - S1;
+ }
+
+ /*
+ * If the start position is past the end of the string, SQL99 says to
+ * return a zero-length string -- DatumGetTextPSlice() will do that
+ * for us. We need only convert S1 to zero-based starting position.
+ */
+ return DatumGetTextPSlice(str, S1 - 1, L1);
+ }
+ else if (eml > 1)
+ {
+ /*
+ * When encoding max length is > 1, we can't get LC without
+ * detoasting, so we'll grab a conservatively large slice now and go
+ * back later to do the right thing
+ */
+ int32 slice_start;
+ int32 slice_size;
+ int32 slice_strlen;
+ text *slice;
+ int32 E1;
+ int32 i;
+ char *p;
+ char *s;
+ text *ret;
+
+ /*
+ * We need to start at position zero because there is no way to know
+ * in advance which byte offset corresponds to the supplied start
+ * position.
+ */
+ slice_start = 0;
+
+ if (length_not_specified) /* special case - get length to end of
+ * string */
+ slice_size = L1 = -1;
+ else if (length < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ slice_size = L1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(S, length, &E))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case
+ * the substring must run to end of string.
+ */
+ slice_size = L1 = -1;
+ }
+ else
+ {
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 1)
+ return cstring_to_text("");
+
+ /*
+ * if E is past the end of the string, the tuple toaster will
+ * truncate the length for us
+ */
+ L1 = E - S1;
+
+ /*
+ * Total slice size in bytes can't be any longer than the start
+ * position plus substring length times the encoding max length.
+ * If that overflows, we can just use -1.
+ */
+ if (pg_mul_s32_overflow(E, eml, &slice_size))
+ slice_size = -1;
+ }
+
+ /*
+ * If we're working with an untoasted source, no need to do an extra
+ * copying step.
+ */
+ if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
+ VARATT_IS_EXTERNAL(DatumGetPointer(str)))
+ slice = DatumGetTextPSlice(str, slice_start, slice_size);
+ else
+ slice = (text *) DatumGetPointer(str);
+
+ /* see if we got back an empty string */
+ if (VARSIZE_ANY_EXHDR(slice) == 0)
+ {
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
+ return cstring_to_text("");
+ }
+
+ /* Now we can get the actual length of the slice in MB characters */
+ slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
+ VARSIZE_ANY_EXHDR(slice));
+
+ /*
+ * Check that the start position wasn't > slice_strlen. If so, SQL99
+ * says to return a zero-length string.
+ */
+ if (S1 > slice_strlen)
+ {
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
+ return cstring_to_text("");
+ }
+
+ /*
+ * Adjust L1 and E1 now that we know the slice string length. Again
+ * remember that S1 is one based, and slice_start is zero based.
+ */
+ if (L1 > -1)
+ E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
+ else
+ E1 = slice_start + 1 + slice_strlen;
+
+ /*
+ * Find the start position in the slice; remember S1 is not zero based
+ */
+ p = VARDATA_ANY(slice);
+ for (i = 0; i < S1 - 1; i++)
+ p += pg_mblen(p);
+
+ /* hang onto a pointer to our start position */
+ s = p;
+
+ /*
+ * Count the actual bytes used by the substring of the requested
+ * length.
+ */
+ for (i = S1; i < E1; i++)
+ p += pg_mblen(p);
+
+ ret = (text *) palloc(VARHDRSZ + (p - s));
+ SET_VARSIZE(ret, VARHDRSZ + (p - s));
+ memcpy(VARDATA(ret), s, (p - s));
+
+ if (slice != (text *) DatumGetPointer(str))
+ pfree(slice);
+
+ return ret;
+ }
+ else
+ elog(ERROR, "invalid backend encoding: encoding max length < 1");
+
+ /* not reached: suppress compiler warning */
+ return NULL;
+}
+
+/*
+ * textoverlay
+ * Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+textoverlay(PG_FUNCTION_ARGS)
+{
+ text *t1 = PG_GETARG_TEXT_PP(0);
+ text *t2 = PG_GETARG_TEXT_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl = PG_GETARG_INT32(3); /* substring length */
+
+ PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
+}
+
+Datum
+textoverlay_no_len(PG_FUNCTION_ARGS)
+{
+ text *t1 = PG_GETARG_TEXT_PP(0);
+ text *t2 = PG_GETARG_TEXT_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl;
+
+ sl = text_length(PointerGetDatum(t2)); /* defaults to length(t2) */
+ PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
+}
+
+static text *
+text_overlay(text *t1, text *t2, int sp, int sl)
+{
+ text *result;
+ text *s1;
+ text *s2;
+ int sp_pl_sl;
+
+ /*
+ * Check for possible integer-overflow cases. For negative sp, throw a
+ * "substring length" error because that's what should be expected
+ * according to the spec's definition of OVERLAY().
+ */
+ if (sp <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
+ s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
+ result = text_catenate(s1, t2);
+ result = text_catenate(result, s2);
+
+ return result;
+}
+
+/*
+ * textpos -
+ * Return the position of the specified substring.
+ * Implements the SQL POSITION() function.
+ * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
+ * - thomas 1997-07-27
+ */
+Datum
+textpos(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ text *search_str = PG_GETARG_TEXT_PP(1);
+
+ PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
+}
+
+/*
+ * text_position -
+ * Does the real work for textpos()
+ *
+ * Inputs:
+ * t1 - string to be searched
+ * t2 - pattern to match within t1
+ * Result:
+ * Character index of the first matched char, starting from 1,
+ * or 0 if no match.
+ *
+ * This is broken out so it can be called directly by other string processing
+ * functions.
+ */
+static int
+text_position(text *t1, text *t2, Oid collid)
+{
+ TextPositionState state;
+ int result;
+
+ /* Empty needle always matches at position 1 */
+ if (VARSIZE_ANY_EXHDR(t2) < 1)
+ return 1;
+
+ /* Otherwise, can't match if haystack is shorter than needle */
+ if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2))
+ return 0;
+
+ text_position_setup(t1, t2, collid, &state);
+ if (!text_position_next(&state))
+ result = 0;
+ else
+ result = text_position_get_match_pos(&state);
+ text_position_cleanup(&state);
+ return result;
+}
+
+
+/*
+ * text_position_setup, text_position_next, text_position_cleanup -
+ * Component steps of text_position()
+ *
+ * These are broken out so that a string can be efficiently searched for
+ * multiple occurrences of the same pattern. text_position_next may be
+ * called multiple times, and it advances to the next match on each call.
+ * text_position_get_match_ptr() and text_position_get_match_pos() return
+ * a pointer or 1-based character position of the last match, respectively.
+ *
+ * The "state" variable is normally just a local variable in the caller.
+ *
+ * NOTE: text_position_next skips over the matched portion. For example,
+ * searching for "xx" in "xxx" returns only one match, not two.
+ */
+
+static void
+text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
+{
+ int len1 = VARSIZE_ANY_EXHDR(t1);
+ int len2 = VARSIZE_ANY_EXHDR(t2);
+ pg_locale_t mylocale = 0;
+
+ check_collation_set(collid);
+
+ if (!lc_collate_is_c(collid))
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (mylocale && !mylocale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for substring searches")));
+
+ Assert(len1 > 0);
+ Assert(len2 > 0);
+
+ /*
+ * Even with a multi-byte encoding, we perform the search using the raw
+ * byte sequence, ignoring multibyte issues. For UTF-8, that works fine,
+ * because in UTF-8 the byte sequence of one character cannot contain
+ * another character. For other multi-byte encodings, we do the search
+ * initially as a simple byte search, ignoring multibyte issues, but
+ * verify afterwards that the match we found is at a character boundary,
+ * and continue the search if it was a false match.
+ */
+ if (pg_database_encoding_max_length() == 1)
+ state->is_multibyte_char_in_char = false;
+ else if (GetDatabaseEncoding() == PG_UTF8)
+ state->is_multibyte_char_in_char = false;
+ else
+ state->is_multibyte_char_in_char = true;
+
+ state->str1 = VARDATA_ANY(t1);
+ state->str2 = VARDATA_ANY(t2);
+ state->len1 = len1;
+ state->len2 = len2;
+ state->last_match = NULL;
+ state->refpoint = state->str1;
+ state->refpos = 0;
+
+ /*
+ * Prepare the skip table for Boyer-Moore-Horspool searching. In these
+ * notes we use the terminology that the "haystack" is the string to be
+ * searched (t1) and the "needle" is the pattern being sought (t2).
+ *
+ * If the needle is empty or bigger than the haystack then there is no
+ * point in wasting cycles initializing the table. We also choose not to
+ * use B-M-H for needles of length 1, since the skip table can't possibly
+ * save anything in that case.
+ */
+ if (len1 >= len2 && len2 > 1)
+ {
+ int searchlength = len1 - len2;
+ int skiptablemask;
+ int last;
+ int i;
+ const char *str2 = state->str2;
+
+ /*
+ * First we must determine how much of the skip table to use. The
+ * declaration of TextPositionState allows up to 256 elements, but for
+ * short search problems we don't really want to have to initialize so
+ * many elements --- it would take too long in comparison to the
+ * actual search time. So we choose a useful skip table size based on
+ * the haystack length minus the needle length. The closer the needle
+ * length is to the haystack length the less useful skipping becomes.
+ *
+ * Note: since we use bit-masking to select table elements, the skip
+ * table size MUST be a power of 2, and so the mask must be 2^N-1.
+ */
+ if (searchlength < 16)
+ skiptablemask = 3;
+ else if (searchlength < 64)
+ skiptablemask = 7;
+ else if (searchlength < 128)
+ skiptablemask = 15;
+ else if (searchlength < 512)
+ skiptablemask = 31;
+ else if (searchlength < 2048)
+ skiptablemask = 63;
+ else if (searchlength < 4096)
+ skiptablemask = 127;
+ else
+ skiptablemask = 255;
+ state->skiptablemask = skiptablemask;
+
+ /*
+ * Initialize the skip table. We set all elements to the needle
+ * length, since this is the correct skip distance for any character
+ * not found in the needle.
+ */
+ for (i = 0; i <= skiptablemask; i++)
+ state->skiptable[i] = len2;
+
+ /*
+ * Now examine the needle. For each character except the last one,
+ * set the corresponding table element to the appropriate skip
+ * distance. Note that when two characters share the same skip table
+ * entry, the one later in the needle must determine the skip
+ * distance.
+ */
+ last = len2 - 1;
+
+ for (i = 0; i < last; i++)
+ state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
+ }
+}
+
+/*
+ * Advance to the next match, starting from the end of the previous match
+ * (or the beginning of the string, on first call). Returns true if a match
+ * is found.
+ *
+ * Note that this refuses to match an empty-string needle. Most callers
+ * will have handled that case specially and we'll never see it here.
+ */
+static bool
+text_position_next(TextPositionState *state)
+{
+ int needle_len = state->len2;
+ char *start_ptr;
+ char *matchptr;
+
+ if (needle_len <= 0)
+ return false; /* result for empty pattern */
+
+ /* Start from the point right after the previous match. */
+ if (state->last_match)
+ start_ptr = state->last_match + needle_len;
+ else
+ start_ptr = state->str1;
+
+retry:
+ matchptr = text_position_next_internal(start_ptr, state);
+
+ if (!matchptr)
+ return false;
+
+ /*
+ * Found a match for the byte sequence. If this is a multibyte encoding,
+ * where one character's byte sequence can appear inside a longer
+ * multi-byte character, we need to verify that the match was at a
+ * character boundary, not in the middle of a multi-byte character.
+ */
+ if (state->is_multibyte_char_in_char)
+ {
+ /* Walk one character at a time, until we reach the match. */
+
+ /* the search should never move backwards. */
+ Assert(state->refpoint <= matchptr);
+
+ while (state->refpoint < matchptr)
+ {
+ /* step to next character. */
+ state->refpoint += pg_mblen(state->refpoint);
+ state->refpos++;
+
+ /*
+ * If we stepped over the match's start position, then it was a
+ * false positive, where the byte sequence appeared in the middle
+ * of a multi-byte character. Skip it, and continue the search at
+ * the next character boundary.
+ */
+ if (state->refpoint > matchptr)
+ {
+ start_ptr = state->refpoint;
+ goto retry;
+ }
+ }
+ }
+
+ state->last_match = matchptr;
+ return true;
+}
+
+/*
+ * Subroutine of text_position_next(). This searches for the raw byte
+ * sequence, ignoring any multi-byte encoding issues. Returns the first
+ * match starting at 'start_ptr', or NULL if no match is found.
+ */
+static char *
+text_position_next_internal(char *start_ptr, TextPositionState *state)
+{
+ int haystack_len = state->len1;
+ int needle_len = state->len2;
+ int skiptablemask = state->skiptablemask;
+ const char *haystack = state->str1;
+ const char *needle = state->str2;
+ const char *haystack_end = &haystack[haystack_len];
+ const char *hptr;
+
+ Assert(start_ptr >= haystack && start_ptr <= haystack_end);
+
+ if (needle_len == 1)
+ {
+ /* No point in using B-M-H for a one-character needle */
+ char nchar = *needle;
+
+ hptr = start_ptr;
+ while (hptr < haystack_end)
+ {
+ if (*hptr == nchar)
+ return (char *) hptr;
+ hptr++;
+ }
+ }
+ else
+ {
+ const char *needle_last = &needle[needle_len - 1];
+
+ /* Start at startpos plus the length of the needle */
+ hptr = start_ptr + needle_len - 1;
+ while (hptr < haystack_end)
+ {
+ /* Match the needle scanning *backward* */
+ const char *nptr;
+ const char *p;
+
+ nptr = needle_last;
+ p = hptr;
+ while (*nptr == *p)
+ {
+ /* Matched it all? If so, return 1-based position */
+ if (nptr == needle)
+ return (char *) p;
+ nptr--, p--;
+ }
+
+ /*
+ * No match, so use the haystack char at hptr to decide how far to
+ * advance. If the needle had any occurrence of that character
+ * (or more precisely, one sharing the same skiptable entry)
+ * before its last character, then we advance far enough to align
+ * the last such needle character with that haystack position.
+ * Otherwise we can advance by the whole needle length.
+ */
+ hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
+ }
+ }
+
+ return 0; /* not found */
+}
+
+/*
+ * Return a pointer to the current match.
+ *
+ * The returned pointer points into the original haystack string.
+ */
+static char *
+text_position_get_match_ptr(TextPositionState *state)
+{
+ return state->last_match;
+}
+
+/*
+ * Return the offset of the current match.
+ *
+ * The offset is in characters, 1-based.
+ */
+static int
+text_position_get_match_pos(TextPositionState *state)
+{
+ /* Convert the byte position to char position. */
+ state->refpos += pg_mbstrlen_with_len(state->refpoint,
+ state->last_match - state->refpoint);
+ state->refpoint = state->last_match;
+ return state->refpos + 1;
+}
+
+/*
+ * Reset search state to the initial state installed by text_position_setup.
+ *
+ * The next call to text_position_next will search from the beginning
+ * of the string.
+ */
+static void
+text_position_reset(TextPositionState *state)
+{
+ state->last_match = NULL;
+ state->refpoint = state->str1;
+ state->refpos = 0;
+}
+
+static void
+text_position_cleanup(TextPositionState *state)
+{
+ /* no cleanup needed */
+}
+
+
+static void
+check_collation_set(Oid collid)
+{
+ if (!OidIsValid(collid))
+ {
+ /*
+ * This typically means that the parser could not resolve a conflict
+ * of implicit collations, so report it that way.
+ */
+ ereport(ERROR,
+ (errcode(ERRCODE_INDETERMINATE_COLLATION),
+ errmsg("could not determine which collation to use for string comparison"),
+ errhint("Use the COLLATE clause to set the collation explicitly.")));
+ }
+}
+
+/* varstr_cmp()
+ * Comparison function for text strings with given lengths.
+ * Includes locale support, but must copy strings to temporary memory
+ * to allow null-termination for inputs to strcoll().
+ * Returns an integer less than, equal to, or greater than zero, indicating
+ * whether arg1 is less than, equal to, or greater than arg2.
+ *
+ * Note: many functions that depend on this are marked leakproof; therefore,
+ * avoid reporting the actual contents of the input when throwing errors.
+ * All errors herein should be things that can't happen except on corrupt
+ * data, anyway; otherwise we will have trouble with indexing strings that
+ * would cause them.
+ */
+int
+varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
+{
+ int result;
+
+ check_collation_set(collid);
+
+ /*
+ * Unfortunately, there is no strncoll(), so in the non-C locale case we
+ * have to do some memory copying. This turns out to be significantly
+ * slower, so we optimize the case where LC_COLLATE is C. We also try to
+ * optimize relatively-short strings by avoiding palloc/pfree overhead.
+ */
+ if (lc_collate_is_c(collid))
+ {
+ result = memcmp(arg1, arg2, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+ }
+ else
+ {
+ char a1buf[TEXTBUFLEN];
+ char a2buf[TEXTBUFLEN];
+ char *a1p,
+ *a2p;
+ pg_locale_t mylocale;
+
+ mylocale = pg_newlocale_from_collation(collid);
+
+ /*
+ * memcmp() can't tell us which of two unequal strings sorts first,
+ * but it's a cheap way to tell if they're equal. Testing shows that
+ * memcmp() followed by strcoll() is only trivially slower than
+ * strcoll() by itself, so we don't lose much if this doesn't work out
+ * very often, and if it does - for example, because there are many
+ * equal strings in the input - then we win big by avoiding expensive
+ * collation-aware comparisons.
+ */
+ if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
+ return 0;
+
+#ifdef WIN32
+ /* Win32 does not have UTF-8, so we need to map to UTF-16 */
+ if (GetDatabaseEncoding() == PG_UTF8
+ && (!mylocale || mylocale->provider == COLLPROVIDER_LIBC))
+ {
+ int a1len;
+ int a2len;
+ int r;
+
+ if (len1 >= TEXTBUFLEN / 2)
+ {
+ a1len = len1 * 2 + 2;
+ a1p = palloc(a1len);
+ }
+ else
+ {
+ a1len = TEXTBUFLEN;
+ a1p = a1buf;
+ }
+ if (len2 >= TEXTBUFLEN / 2)
+ {
+ a2len = len2 * 2 + 2;
+ a2p = palloc(a2len);
+ }
+ else
+ {
+ a2len = TEXTBUFLEN;
+ a2p = a2buf;
+ }
+
+ /* stupid Microsloth API does not work for zero-length input */
+ if (len1 == 0)
+ r = 0;
+ else
+ {
+ r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
+ (LPWSTR) a1p, a1len / 2);
+ if (!r)
+ ereport(ERROR,
+ (errmsg("could not convert string to UTF-16: error code %lu",
+ GetLastError())));
+ }
+ ((LPWSTR) a1p)[r] = 0;
+
+ if (len2 == 0)
+ r = 0;
+ else
+ {
+ r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
+ (LPWSTR) a2p, a2len / 2);
+ if (!r)
+ ereport(ERROR,
+ (errmsg("could not convert string to UTF-16: error code %lu",
+ GetLastError())));
+ }
+ ((LPWSTR) a2p)[r] = 0;
+
+ errno = 0;
+#ifdef HAVE_LOCALE_T
+ if (mylocale)
+ result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, mylocale->info.lt);
+ else
+#endif
+ result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
+ if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
+ * headers */
+ ereport(ERROR,
+ (errmsg("could not compare Unicode strings: %m")));
+
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!mylocale || mylocale->deterministic))
+ {
+ result = memcmp(arg1, arg2, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+ }
+
+ if (a1p != a1buf)
+ pfree(a1p);
+ if (a2p != a2buf)
+ pfree(a2p);
+
+ return result;
+ }
+#endif /* WIN32 */
+
+ if (len1 >= TEXTBUFLEN)
+ a1p = (char *) palloc(len1 + 1);
+ else
+ a1p = a1buf;
+ if (len2 >= TEXTBUFLEN)
+ a2p = (char *) palloc(len2 + 1);
+ else
+ a2p = a2buf;
+
+ memcpy(a1p, arg1, len1);
+ a1p[len1] = '\0';
+ memcpy(a2p, arg2, len2);
+ a2p[len2] = '\0';
+
+ if (mylocale)
+ {
+ if (mylocale->provider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ result = ucol_strcollUTF8(mylocale->info.icu.ucol,
+ arg1, len1,
+ arg2, len2,
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("collation failed: %s", u_errorName(status))));
+ }
+ else
+#endif
+ {
+ int32_t ulen1,
+ ulen2;
+ UChar *uchar1,
+ *uchar2;
+
+ ulen1 = icu_to_uchar(&uchar1, arg1, len1);
+ ulen2 = icu_to_uchar(&uchar2, arg2, len2);
+
+ result = ucol_strcoll(mylocale->info.icu.ucol,
+ uchar1, ulen1,
+ uchar2, ulen2);
+
+ pfree(uchar1);
+ pfree(uchar2);
+ }
+#else /* not USE_ICU */
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+#endif /* not USE_ICU */
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+ result = strcoll_l(a1p, a2p, mylocale->info.lt);
+#else
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
+#endif
+ }
+ }
+ else
+ result = strcoll(a1p, a2p);
+
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!mylocale || mylocale->deterministic))
+ result = strcmp(a1p, a2p);
+
+ if (a1p != a1buf)
+ pfree(a1p);
+ if (a2p != a2buf)
+ pfree(a2p);
+ }
+
+ return result;
+}
+
+/* text_cmp()
+ * Internal comparison function for text strings.
+ * Returns -1, 0 or 1
+ */
+static int
+text_cmp(text *arg1, text *arg2, Oid collid)
+{
+ char *a1p,
+ *a2p;
+ int len1,
+ len2;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ return varstr_cmp(a1p, len1, a2p, len2, collid);
+}
+
+/*
+ * Comparison functions for text strings.
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ */
+
+Datum
+texteq(PG_FUNCTION_ARGS)
+{
+ Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
+ bool result;
+
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
+ {
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Size len1,
+ len2;
+
+ /*
+ * Since we only care about equality or not-equality, we can avoid all
+ * the expense of strcoll() here, and just do bitwise comparison. In
+ * fact, we don't even have to do a bitwise comparison if we can show
+ * the lengths of the strings are unequal; which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len1 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+ }
+ else
+ {
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+
+ result = (text_cmp(arg1, arg2, collid) == 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textne(PG_FUNCTION_ARGS)
+{
+ Oid collid = PG_GET_COLLATION();
+ bool locale_is_c = false;
+ pg_locale_t mylocale = 0;
+ bool result;
+
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid))
+ locale_is_c = true;
+ else
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (locale_is_c || !mylocale || mylocale->deterministic)
+ {
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Size len1,
+ len2;
+
+ /* See comment in texteq() */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = true;
+ else
+ {
+ text *targ1 = DatumGetTextPP(arg1);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ len1 - VARHDRSZ) != 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+ }
+ else
+ {
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+
+ result = (text_cmp(arg1, arg2, collid) != 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+text_lt(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+text_le(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+text_gt(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+text_ge(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ bool result;
+
+ result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+text_starts_with(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ Oid collid = PG_GET_COLLATION();
+ pg_locale_t mylocale = 0;
+ bool result;
+ Size len1,
+ len2;
+
+ check_collation_set(collid);
+
+ if (!lc_collate_is_c(collid))
+ mylocale = pg_newlocale_from_collation(collid);
+
+ if (mylocale && !mylocale->deterministic)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("nondeterministic collations are not supported for substring searches")));
+
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len2 > len1)
+ result = false;
+ else
+ {
+ text *targ1 = text_substring(arg1, 1, len2, false);
+ text *targ2 = DatumGetTextPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
+ VARSIZE_ANY_EXHDR(targ2)) == 0);
+
+ PG_FREE_IF_COPY(targ1, 0);
+ PG_FREE_IF_COPY(targ2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bttextcmp(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int32 result;
+
+ result = text_cmp(arg1, arg2, PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+bttextsortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ Oid collid = ssup->ssup_collation;
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport */
+ varstr_sortsupport(ssup, TEXTOID, collid);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Generic sortsupport interface for character type's operator classes.
+ * Includes locale support, and support for BpChar semantics (i.e. removing
+ * trailing spaces before comparison).
+ *
+ * Relies on the assumption that text, VarChar, BpChar, and bytea all have the
+ * same representation. Callers that always use the C collation (e.g.
+ * non-collatable type callers like bytea) may have NUL bytes in their strings;
+ * this will not work with any other collation, though.
+ */
+void
+varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
+{
+ bool abbreviate = ssup->abbreviate;
+ bool collate_c = false;
+ VarStringSortSupport *sss;
+ pg_locale_t locale = 0;
+
+ check_collation_set(collid);
+
+ /*
+ * If possible, set ssup->comparator to a function which can be used to
+ * directly compare two datums. If we can do this, we'll avoid the
+ * overhead of a trip through the fmgr layer for every comparison, which
+ * can be substantial.
+ *
+ * Most typically, we'll set the comparator to varlenafastcmp_locale,
+ * which uses strcoll() to perform comparisons. We use that for the
+ * BpChar case too, but type NAME uses namefastcmp_locale. However, if
+ * LC_COLLATE = C, we can make things quite a bit faster with
+ * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
+ * memcmp() rather than strcoll().
+ */
+ if (lc_collate_is_c(collid))
+ {
+ if (typid == BPCHAROID)
+ ssup->comparator = bpcharfastcmp_c;
+ else if (typid == NAMEOID)
+ {
+ ssup->comparator = namefastcmp_c;
+ /* Not supporting abbreviation with type NAME, for now */
+ abbreviate = false;
+ }
+ else
+ ssup->comparator = varstrfastcmp_c;
+
+ collate_c = true;
+ }
+ else
+ {
+ /*
+ * We need a collation-sensitive comparison. To make things faster,
+ * we'll figure out the collation based on the locale id and cache the
+ * result.
+ */
+ locale = pg_newlocale_from_collation(collid);
+
+ /*
+ * There is a further exception on Windows. When the database
+ * encoding is UTF-8 and we are not using the C collation, complex
+ * hacks are required. We don't currently have a comparator that
+ * handles that case, so we fall back on the slow method of having the
+ * sort code invoke bttextcmp() (in the case of text) via the fmgr
+ * trampoline. ICU locales work just the same on Windows, however.
+ */
+#ifdef WIN32
+ if (GetDatabaseEncoding() == PG_UTF8 &&
+ !(locale && locale->provider == COLLPROVIDER_ICU))
+ return;
+#endif
+
+ /*
+ * We use varlenafastcmp_locale except for type NAME.
+ */
+ if (typid == NAMEOID)
+ {
+ ssup->comparator = namefastcmp_locale;
+ /* Not supporting abbreviation with type NAME, for now */
+ abbreviate = false;
+ }
+ else
+ ssup->comparator = varlenafastcmp_locale;
+ }
+
+ /*
+ * Unfortunately, it seems that abbreviation for non-C collations is
+ * broken on many common platforms; testing of multiple versions of glibc
+ * reveals that, for many locales, strcoll() and strxfrm() do not return
+ * consistent results, which is fatal to this optimization. While no
+ * other libc other than Cygwin has so far been shown to have a problem,
+ * we take the conservative course of action for right now and disable
+ * this categorically. (Users who are certain this isn't a problem on
+ * their system can define TRUST_STRXFRM.)
+ *
+ * Even apart from the risk of broken locales, it's possible that there
+ * are platforms where the use of abbreviated keys should be disabled at
+ * compile time. Having only 4 byte datums could make worst-case
+ * performance drastically more likely, for example. Moreover, macOS's
+ * strxfrm() implementation is known to not effectively concentrate a
+ * significant amount of entropy from the original string in earlier
+ * transformed blobs. It's possible that other supported platforms are
+ * similarly encumbered. So, if we ever get past disabling this
+ * categorically, we may still want or need to disable it for particular
+ * platforms.
+ */
+#ifndef TRUST_STRXFRM
+ if (!collate_c && !(locale && locale->provider == COLLPROVIDER_ICU))
+ abbreviate = false;
+#endif
+
+ /*
+ * If we're using abbreviated keys, or if we're using a locale-aware
+ * comparison, we need to initialize a VarStringSortSupport object. Both
+ * cases will make use of the temporary buffers we initialize here for
+ * scratch space (and to detect requirement for BpChar semantics from
+ * caller), and the abbreviation case requires additional state.
+ */
+ if (abbreviate || !collate_c)
+ {
+ sss = palloc(sizeof(VarStringSortSupport));
+ sss->buf1 = palloc(TEXTBUFLEN);
+ sss->buflen1 = TEXTBUFLEN;
+ sss->buf2 = palloc(TEXTBUFLEN);
+ sss->buflen2 = TEXTBUFLEN;
+ /* Start with invalid values */
+ sss->last_len1 = -1;
+ sss->last_len2 = -1;
+ /* Initialize */
+ sss->last_returned = 0;
+ sss->locale = locale;
+
+ /*
+ * To avoid somehow confusing a strxfrm() blob and an original string,
+ * constantly keep track of the variety of data that buf1 and buf2
+ * currently contain.
+ *
+ * Comparisons may be interleaved with conversion calls. Frequently,
+ * conversions and comparisons are batched into two distinct phases,
+ * but the correctness of caching cannot hinge upon this. For
+ * comparison caching, buffer state is only trusted if cache_blob is
+ * found set to false, whereas strxfrm() caching only trusts the state
+ * when cache_blob is found set to true.
+ *
+ * Arbitrarily initialize cache_blob to true.
+ */
+ sss->cache_blob = true;
+ sss->collate_c = collate_c;
+ sss->typid = typid;
+ ssup->ssup_extra = sss;
+
+ /*
+ * If possible, plan to use the abbreviated keys optimization. The
+ * core code may switch back to authoritative comparator should
+ * abbreviation be aborted.
+ */
+ if (abbreviate)
+ {
+ sss->prop_card = 0.20;
+ initHyperLogLog(&sss->abbr_card, 10);
+ initHyperLogLog(&sss->full_card, 10);
+ ssup->abbrev_full_comparator = ssup->comparator;
+ ssup->comparator = ssup_datum_unsigned_cmp;
+ ssup->abbrev_converter = varstr_abbrev_convert;
+ ssup->abbrev_abort = varstr_abbrev_abort;
+ }
+ }
+}
+
+/*
+ * sortsupport comparison func (for C locale case)
+ */
+static int
+varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ VarString *arg1 = DatumGetVarStringPP(x);
+ VarString *arg2 = DatumGetVarStringPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ result = memcmp(a1p, a2p, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * sortsupport comparison func (for BpChar C locale case)
+ *
+ * BpChar outsources its sortsupport to this module. Specialization for the
+ * varstr_sortsupport BpChar case, modeled on
+ * internal_bpchar_pattern_compare().
+ */
+static int
+bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ BpChar *arg1 = DatumGetBpCharPP(x);
+ BpChar *arg2 = DatumGetBpCharPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
+ len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
+
+ result = memcmp(a1p, a2p, Min(len1, len2));
+ if ((result == 0) && (len1 != len2))
+ result = (len1 < len2) ? -1 : 1;
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * sortsupport comparison func (for NAME C locale case)
+ */
+static int
+namefastcmp_c(Datum x, Datum y, SortSupport ssup)
+{
+ Name arg1 = DatumGetName(x);
+ Name arg2 = DatumGetName(y);
+
+ return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
+}
+
+/*
+ * sortsupport comparison func (for locale case with all varlena types)
+ */
+static int
+varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+ VarString *arg1 = DatumGetVarStringPP(x);
+ VarString *arg2 = DatumGetVarStringPP(y);
+ char *a1p,
+ *a2p;
+ int len1,
+ len2,
+ result;
+
+ a1p = VARDATA_ANY(arg1);
+ a2p = VARDATA_ANY(arg2);
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
+
+ /* We can't afford to leak memory here. */
+ if (PointerGetDatum(arg1) != x)
+ pfree(arg1);
+ if (PointerGetDatum(arg2) != y)
+ pfree(arg2);
+
+ return result;
+}
+
+/*
+ * sortsupport comparison func (for locale case with NAME type)
+ */
+static int
+namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
+{
+ Name arg1 = DatumGetName(x);
+ Name arg2 = DatumGetName(y);
+
+ return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
+ NameStr(*arg2), strlen(NameStr(*arg2)),
+ ssup);
+}
+
+/*
+ * sortsupport comparison func for locale cases
+ */
+static int
+varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
+{
+ VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
+ int result;
+ bool arg1_match;
+
+ /* Fast pre-check for equality, as discussed in varstr_cmp() */
+ if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
+ {
+ /*
+ * No change in buf1 or buf2 contents, so avoid changing last_len1 or
+ * last_len2. Existing contents of buffers might still be used by
+ * next call.
+ *
+ * It's fine to allow the comparison of BpChar padding bytes here,
+ * even though that implies that the memcmp() will usually be
+ * performed for BpChar callers (though multibyte characters could
+ * still prevent that from occurring). The memcmp() is still very
+ * cheap, and BpChar's funny semantics have us remove trailing spaces
+ * (not limited to padding), so we need make no distinction between
+ * padding space characters and "real" space characters.
+ */
+ return 0;
+ }
+
+ if (sss->typid == BPCHAROID)
+ {
+ /* Get true number of bytes, ignoring trailing spaces */
+ len1 = bpchartruelen(a1p, len1);
+ len2 = bpchartruelen(a2p, len2);
+ }
+
+ if (len1 >= sss->buflen1)
+ {
+ sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
+ sss->buf1 = repalloc(sss->buf1, sss->buflen1);
+ }
+ if (len2 >= sss->buflen2)
+ {
+ sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
+ sss->buf2 = repalloc(sss->buf2, sss->buflen2);
+ }
+
+ /*
+ * We're likely to be asked to compare the same strings repeatedly, and
+ * memcmp() is so much cheaper than strcoll() that it pays to try to cache
+ * comparisons, even though in general there is no reason to think that
+ * that will work out (every string datum may be unique). Caching does
+ * not slow things down measurably when it doesn't work out, and can speed
+ * things up by rather a lot when it does. In part, this is because the
+ * memcmp() compares data from cachelines that are needed in L1 cache even
+ * when the last comparison's result cannot be reused.
+ */
+ arg1_match = true;
+ if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
+ {
+ arg1_match = false;
+ memcpy(sss->buf1, a1p, len1);
+ sss->buf1[len1] = '\0';
+ sss->last_len1 = len1;
+ }
+
+ /*
+ * If we're comparing the same two strings as last time, we can return the
+ * same answer without calling strcoll() again. This is more likely than
+ * it seems (at least with moderate to low cardinality sets), because
+ * quicksort compares the same pivot against many values.
+ */
+ if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
+ {
+ memcpy(sss->buf2, a2p, len2);
+ sss->buf2[len2] = '\0';
+ sss->last_len2 = len2;
+ }
+ else if (arg1_match && !sss->cache_blob)
+ {
+ /* Use result cached following last actual strcoll() call */
+ return sss->last_returned;
+ }
+
+ if (sss->locale)
+ {
+ if (sss->locale->provider == COLLPROVIDER_ICU)
+ {
+#ifdef USE_ICU
+#ifdef HAVE_UCOL_STRCOLLUTF8
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ result = ucol_strcollUTF8(sss->locale->info.icu.ucol,
+ a1p, len1,
+ a2p, len2,
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("collation failed: %s", u_errorName(status))));
+ }
+ else
+#endif
+ {
+ int32_t ulen1,
+ ulen2;
+ UChar *uchar1,
+ *uchar2;
+
+ ulen1 = icu_to_uchar(&uchar1, a1p, len1);
+ ulen2 = icu_to_uchar(&uchar2, a2p, len2);
+
+ result = ucol_strcoll(sss->locale->info.icu.ucol,
+ uchar1, ulen1,
+ uchar2, ulen2);
+
+ pfree(uchar1);
+ pfree(uchar2);
+ }
+#else /* not USE_ICU */
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif /* not USE_ICU */
+ }
+ else
+ {
+#ifdef HAVE_LOCALE_T
+ result = strcoll_l(sss->buf1, sss->buf2, sss->locale->info.lt);
+#else
+ /* shouldn't happen */
+ elog(ERROR, "unsupported collprovider: %c", sss->locale->provider);
+#endif
+ }
+ }
+ else
+ result = strcoll(sss->buf1, sss->buf2);
+
+ /* Break tie if necessary. */
+ if (result == 0 &&
+ (!sss->locale || sss->locale->deterministic))
+ result = strcmp(sss->buf1, sss->buf2);
+
+ /* Cache result, perhaps saving an expensive strcoll() call next time */
+ sss->cache_blob = false;
+ sss->last_returned = result;
+ return result;
+}
+
+/*
+ * Conversion routine for sortsupport. Converts original to abbreviated key
+ * representation. Our encoding strategy is simple -- pack the first 8 bytes
+ * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
+ * stored in reverse order), and treat it as an unsigned integer. When the "C"
+ * locale is used, or in case of bytea, just memcpy() from original instead.
+ */
+static Datum
+varstr_abbrev_convert(Datum original, SortSupport ssup)
+{
+ VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
+ VarString *authoritative = DatumGetVarStringPP(original);
+ char *authoritative_data = VARDATA_ANY(authoritative);
+
+ /* working state */
+ Datum res;
+ char *pres;
+ int len;
+ uint32 hash;
+
+ pres = (char *) &res;
+ /* memset(), so any non-overwritten bytes are NUL */
+ memset(pres, 0, sizeof(Datum));
+ len = VARSIZE_ANY_EXHDR(authoritative);
+
+ /* Get number of bytes, ignoring trailing spaces */
+ if (sss->typid == BPCHAROID)
+ len = bpchartruelen(authoritative_data, len);
+
+ /*
+ * If we're using the C collation, use memcpy(), rather than strxfrm(), to
+ * abbreviate keys. The full comparator for the C locale is always
+ * memcmp(). It would be incorrect to allow bytea callers (callers that
+ * always force the C collation -- bytea isn't a collatable type, but this
+ * approach is convenient) to use strxfrm(). This is because bytea
+ * strings may contain NUL bytes. Besides, this should be faster, too.
+ *
+ * More generally, it's okay that bytea callers can have NUL bytes in
+ * strings because abbreviated cmp need not make a distinction between
+ * terminating NUL bytes, and NUL bytes representing actual NULs in the
+ * authoritative representation. Hopefully a comparison at or past one
+ * abbreviated key's terminating NUL byte will resolve the comparison
+ * without consulting the authoritative representation; specifically, some
+ * later non-NUL byte in the longer string can resolve the comparison
+ * against a subsequent terminating NUL in the shorter string. There will
+ * usually be what is effectively a "length-wise" resolution there and
+ * then.
+ *
+ * If that doesn't work out -- if all bytes in the longer string
+ * positioned at or past the offset of the smaller string's (first)
+ * terminating NUL are actually representative of NUL bytes in the
+ * authoritative binary string (perhaps with some *terminating* NUL bytes
+ * towards the end of the longer string iff it happens to still be small)
+ * -- then an authoritative tie-breaker will happen, and do the right
+ * thing: explicitly consider string length.
+ */
+ if (sss->collate_c)
+ memcpy(pres, authoritative_data, Min(len, sizeof(Datum)));
+ else
+ {
+ Size bsize;
+#ifdef USE_ICU
+ int32_t ulen = -1;
+ UChar *uchar = NULL;
+#endif
+
+ /*
+ * We're not using the C collation, so fall back on strxfrm or ICU
+ * analogs.
+ */
+
+ /* By convention, we use buffer 1 to store and NUL-terminate */
+ if (len >= sss->buflen1)
+ {
+ sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
+ sss->buf1 = repalloc(sss->buf1, sss->buflen1);
+ }
+
+ /* Might be able to reuse strxfrm() blob from last call */
+ if (sss->last_len1 == len && sss->cache_blob &&
+ memcmp(sss->buf1, authoritative_data, len) == 0)
+ {
+ memcpy(pres, sss->buf2, Min(sizeof(Datum), sss->last_len2));
+ /* No change affecting cardinality, so no hashing required */
+ goto done;
+ }
+
+ memcpy(sss->buf1, authoritative_data, len);
+
+ /*
+ * Just like strcoll(), strxfrm() expects a NUL-terminated string. Not
+ * necessary for ICU, but doesn't hurt.
+ */
+ sss->buf1[len] = '\0';
+ sss->last_len1 = len;
+
+#ifdef USE_ICU
+ /* When using ICU and not UTF8, convert string to UChar. */
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU &&
+ GetDatabaseEncoding() != PG_UTF8)
+ ulen = icu_to_uchar(&uchar, sss->buf1, len);
+#endif
+
+ /*
+ * Loop: Call strxfrm() or ucol_getSortKey(), possibly enlarge buffer,
+ * and try again. Both of these functions have the result buffer
+ * content undefined if the result did not fit, so we need to retry
+ * until everything fits, even though we only need the first few bytes
+ * in the end. When using ucol_nextSortKeyPart(), however, we only
+ * ask for as many bytes as we actually need.
+ */
+ for (;;)
+ {
+#ifdef USE_ICU
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_ICU)
+ {
+ /*
+ * When using UTF8, use the iteration interface so we only
+ * need to produce as many bytes as we actually need.
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ {
+ UCharIterator iter;
+ uint32_t state[2];
+ UErrorCode status;
+
+ uiter_setUTF8(&iter, sss->buf1, len);
+ state[0] = state[1] = 0; /* won't need that again */
+ status = U_ZERO_ERROR;
+ bsize = ucol_nextSortKeyPart(sss->locale->info.icu.ucol,
+ &iter,
+ state,
+ (uint8_t *) sss->buf2,
+ Min(sizeof(Datum), sss->buflen2),
+ &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("sort key generation failed: %s",
+ u_errorName(status))));
+ }
+ else
+ bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
+ uchar, ulen,
+ (uint8_t *) sss->buf2, sss->buflen2);
+ }
+ else
+#endif
+#ifdef HAVE_LOCALE_T
+ if (sss->locale && sss->locale->provider == COLLPROVIDER_LIBC)
+ bsize = strxfrm_l(sss->buf2, sss->buf1,
+ sss->buflen2, sss->locale->info.lt);
+ else
+#endif
+ bsize = strxfrm(sss->buf2, sss->buf1, sss->buflen2);
+
+ sss->last_len2 = bsize;
+ if (bsize < sss->buflen2)
+ break;
+
+ /*
+ * Grow buffer and retry.
+ */
+ sss->buflen2 = Max(bsize + 1,
+ Min(sss->buflen2 * 2, MaxAllocSize));
+ sss->buf2 = repalloc(sss->buf2, sss->buflen2);
+ }
+
+ /*
+ * Every Datum byte is always compared. This is safe because the
+ * strxfrm() blob is itself NUL terminated, leaving no danger of
+ * misinterpreting any NUL bytes not intended to be interpreted as
+ * logically representing termination.
+ *
+ * (Actually, even if there were NUL bytes in the blob it would be
+ * okay. See remarks on bytea case above.)
+ */
+ memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
+
+#ifdef USE_ICU
+ if (uchar)
+ pfree(uchar);
+#endif
+ }
+
+ /*
+ * Maintain approximate cardinality of both abbreviated keys and original,
+ * authoritative keys using HyperLogLog. Used as cheap insurance against
+ * the worst case, where we do many string transformations for no saving
+ * in full strcoll()-based comparisons. These statistics are used by
+ * varstr_abbrev_abort().
+ *
+ * First, Hash key proper, or a significant fraction of it. Mix in length
+ * in order to compensate for cases where differences are past
+ * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
+ */
+ hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
+ Min(len, PG_CACHE_LINE_SIZE)));
+
+ if (len > PG_CACHE_LINE_SIZE)
+ hash ^= DatumGetUInt32(hash_uint32((uint32) len));
+
+ addHyperLogLog(&sss->full_card, hash);
+
+ /* Hash abbreviated key */
+#if SIZEOF_DATUM == 8
+ {
+ uint32 lohalf,
+ hihalf;
+
+ lohalf = (uint32) res;
+ hihalf = (uint32) (res >> 32);
+ hash = DatumGetUInt32(hash_uint32(lohalf ^ hihalf));
+ }
+#else /* SIZEOF_DATUM != 8 */
+ hash = DatumGetUInt32(hash_uint32((uint32) res));
+#endif
+
+ addHyperLogLog(&sss->abbr_card, hash);
+
+ /* Cache result, perhaps saving an expensive strxfrm() call next time */
+ sss->cache_blob = true;
+done:
+
+ /*
+ * Byteswap on little-endian machines.
+ *
+ * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
+ * 3-way comparator) works correctly on all platforms. If we didn't do
+ * this, the comparator would have to call memcmp() with a pair of
+ * pointers to the first byte of each abbreviated key, which is slower.
+ */
+ res = DatumBigEndianToNative(res);
+
+ /* Don't leak memory here */
+ if (PointerGetDatum(authoritative) != original)
+ pfree(authoritative);
+
+ return res;
+}
+
+/*
+ * Callback for estimating effectiveness of abbreviated key optimization, using
+ * heuristic rules. Returns value indicating if the abbreviation optimization
+ * should be aborted, based on its projected effectiveness.
+ */
+static bool
+varstr_abbrev_abort(int memtupcount, SortSupport ssup)
+{
+ VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
+ double abbrev_distinct,
+ key_distinct;
+
+ Assert(ssup->abbreviate);
+
+ /* Have a little patience */
+ if (memtupcount < 100)
+ return false;
+
+ abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
+ key_distinct = estimateHyperLogLog(&sss->full_card);
+
+ /*
+ * Clamp cardinality estimates to at least one distinct value. While
+ * NULLs are generally disregarded, if only NULL values were seen so far,
+ * that might misrepresent costs if we failed to clamp.
+ */
+ if (abbrev_distinct <= 1.0)
+ abbrev_distinct = 1.0;
+
+ if (key_distinct <= 1.0)
+ key_distinct = 1.0;
+
+ /*
+ * In the worst case all abbreviated keys are identical, while at the same
+ * time there are differences within full key strings not captured in
+ * abbreviations.
+ */
+#ifdef TRACE_SORT
+ if (trace_sort)
+ {
+ double norm_abbrev_card = abbrev_distinct / (double) memtupcount;
+
+ elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
+ "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
+ memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
+ sss->prop_card);
+ }
+#endif
+
+ /*
+ * If the number of distinct abbreviated keys approximately matches the
+ * number of distinct authoritative original keys, that's reason enough to
+ * proceed. We can win even with a very low cardinality set if most
+ * tie-breakers only memcmp(). This is by far the most important
+ * consideration.
+ *
+ * While comparisons that are resolved at the abbreviated key level are
+ * considerably cheaper than tie-breakers resolved with memcmp(), both of
+ * those two outcomes are so much cheaper than a full strcoll() once
+ * sorting is underway that it doesn't seem worth it to weigh abbreviated
+ * cardinality against the overall size of the set in order to more
+ * accurately model costs. Assume that an abbreviated comparison, and an
+ * abbreviated comparison with a cheap memcmp()-based authoritative
+ * resolution are equivalent.
+ */
+ if (abbrev_distinct > key_distinct * sss->prop_card)
+ {
+ /*
+ * When we have exceeded 10,000 tuples, decay required cardinality
+ * aggressively for next call.
+ *
+ * This is useful because the number of comparisons required on
+ * average increases at a linearithmic rate, and at roughly 10,000
+ * tuples that factor will start to dominate over the linear costs of
+ * string transformation (this is a conservative estimate). The decay
+ * rate is chosen to be a little less aggressive than halving -- which
+ * (since we're called at points at which memtupcount has doubled)
+ * would never see the cost model actually abort past the first call
+ * following a decay. This decay rate is mostly a precaution against
+ * a sudden, violent swing in how well abbreviated cardinality tracks
+ * full key cardinality. The decay also serves to prevent a marginal
+ * case from being aborted too late, when too much has already been
+ * invested in string transformation.
+ *
+ * It's possible for sets of several million distinct strings with
+ * mere tens of thousands of distinct abbreviated keys to still
+ * benefit very significantly. This will generally occur provided
+ * each abbreviated key is a proxy for a roughly uniform number of the
+ * set's full keys. If it isn't so, we hope to catch that early and
+ * abort. If it isn't caught early, by the time the problem is
+ * apparent it's probably not worth aborting.
+ */
+ if (memtupcount > 10000)
+ sss->prop_card *= 0.65;
+
+ return false;
+ }
+
+ /*
+ * Abort abbreviation strategy.
+ *
+ * The worst case, where all abbreviated keys are identical while all
+ * original strings differ will typically only see a regression of about
+ * 10% in execution time for small to medium sized lists of strings.
+ * Whereas on modern CPUs where cache stalls are the dominant cost, we can
+ * often expect very large improvements, particularly with sets of strings
+ * of moderately high to high abbreviated cardinality. There is little to
+ * lose but much to gain, which our strategy reflects.
+ */
+#ifdef TRACE_SORT
+ if (trace_sort)
+ elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
+ "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
+ memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
+#endif
+
+ return true;
+}
+
+/*
+ * Generic equalimage support function for character type's operator classes.
+ * Disables the use of deduplication with nondeterministic collations.
+ */
+Datum
+btvarstrequalimage(PG_FUNCTION_ARGS)
+{
+ /* Oid opcintype = PG_GETARG_OID(0); */
+ Oid collid = PG_GET_COLLATION();
+
+ check_collation_set(collid);
+
+ if (lc_collate_is_c(collid) ||
+ collid == DEFAULT_COLLATION_OID ||
+ get_collation_isdeterministic(collid))
+ PG_RETURN_BOOL(true);
+ else
+ PG_RETURN_BOOL(false);
+}
+
+Datum
+text_larger(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+Datum
+text_smaller(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ text *result;
+
+ result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+
+/*
+ * Cross-type comparison functions for types text and name.
+ */
+
+Datum
+nameeqtext(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ size_t len1 = strlen(NameStr(*arg1));
+ size_t len2 = VARSIZE_ANY_EXHDR(arg2);
+ Oid collid = PG_GET_COLLATION();
+ bool result;
+
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = (len1 == len2 &&
+ memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ else
+ result = (varstr_cmp(NameStr(*arg1), len1,
+ VARDATA_ANY(arg2), len2,
+ collid) == 0);
+
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+texteqname(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ Name arg2 = PG_GETARG_NAME(1);
+ size_t len1 = VARSIZE_ANY_EXHDR(arg1);
+ size_t len2 = strlen(NameStr(*arg2));
+ Oid collid = PG_GET_COLLATION();
+ bool result;
+
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = (len1 == len2 &&
+ memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ else
+ result = (varstr_cmp(VARDATA_ANY(arg1), len1,
+ NameStr(*arg2), len2,
+ collid) == 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+namenetext(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ size_t len1 = strlen(NameStr(*arg1));
+ size_t len2 = VARSIZE_ANY_EXHDR(arg2);
+ Oid collid = PG_GET_COLLATION();
+ bool result;
+
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = !(len1 == len2 &&
+ memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
+ else
+ result = !(varstr_cmp(NameStr(*arg1), len1,
+ VARDATA_ANY(arg2), len2,
+ collid) == 0);
+
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+textnename(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ Name arg2 = PG_GETARG_NAME(1);
+ size_t len1 = VARSIZE_ANY_EXHDR(arg1);
+ size_t len2 = strlen(NameStr(*arg2));
+ Oid collid = PG_GET_COLLATION();
+ bool result;
+
+ check_collation_set(collid);
+
+ if (collid == C_COLLATION_OID)
+ result = !(len1 == len2 &&
+ memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
+ else
+ result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
+ NameStr(*arg2), len2,
+ collid) == 0);
+
+ PG_FREE_IF_COPY(arg1, 0);
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+btnametextcmp(PG_FUNCTION_ARGS)
+{
+ Name arg1 = PG_GETARG_NAME(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int32 result;
+
+ result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
+ VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+Datum
+bttextnamecmp(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ Name arg2 = PG_GETARG_NAME(1);
+ int32 result;
+
+ result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
+ NameStr(*arg2), strlen(NameStr(*arg2)),
+ PG_GET_COLLATION());
+
+ PG_FREE_IF_COPY(arg1, 0);
+
+ PG_RETURN_INT32(result);
+}
+
+#define CmpCall(cmpfunc) \
+ DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
+ PG_GET_COLLATION(), \
+ PG_GETARG_DATUM(0), \
+ PG_GETARG_DATUM(1)))
+
+Datum
+namelttext(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
+}
+
+Datum
+nameletext(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
+}
+
+Datum
+namegttext(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
+}
+
+Datum
+namegetext(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
+}
+
+Datum
+textltname(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
+}
+
+Datum
+textlename(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
+}
+
+Datum
+textgtname(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
+}
+
+Datum
+textgename(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
+}
+
+#undef CmpCall
+
+
+/*
+ * The following operators support character-by-character comparison
+ * of text datums, to allow building indexes suitable for LIKE clauses.
+ * Note that the regular texteq/textne comparison operators, and regular
+ * support functions 1 and 2 with "C" collation are assumed to be
+ * compatible with these!
+ */
+
+static int
+internal_text_pattern_compare(text *arg1, text *arg2)
+{
+ int result;
+ int len1,
+ len2;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if (result != 0)
+ return result;
+ else if (len1 < len2)
+ return -1;
+ else if (len1 > len2)
+ return 1;
+ else
+ return 0;
+}
+
+
+Datum
+text_pattern_lt(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int result;
+
+ result = internal_text_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result < 0);
+}
+
+
+Datum
+text_pattern_le(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int result;
+
+ result = internal_text_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result <= 0);
+}
+
+
+Datum
+text_pattern_ge(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int result;
+
+ result = internal_text_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result >= 0);
+}
+
+
+Datum
+text_pattern_gt(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int result;
+
+ result = internal_text_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL(result > 0);
+}
+
+
+Datum
+bttext_pattern_cmp(PG_FUNCTION_ARGS)
+{
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ int result;
+
+ result = internal_text_pattern_compare(arg1, arg2);
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(result);
+}
+
+
+Datum
+bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+
+/*-------------------------------------------------------------
+ * byteaoctetlen
+ *
+ * get the number of bytes contained in an instance of type 'bytea'
+ *-------------------------------------------------------------
+ */
+Datum
+byteaoctetlen(PG_FUNCTION_ARGS)
+{
+ Datum str = PG_GETARG_DATUM(0);
+
+ /* We need not detoast the input at all */
+ PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
+}
+
+/*
+ * byteacat -
+ * takes two bytea* and returns a bytea* that is the concatenation of
+ * the two.
+ *
+ * Cloned from textcat and modified as required.
+ */
+Datum
+byteacat(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+
+ PG_RETURN_BYTEA_P(bytea_catenate(t1, t2));
+}
+
+/*
+ * bytea_catenate
+ * Guts of byteacat(), broken out so it can be used by other functions
+ *
+ * Arguments can be in short-header form, but not compressed or out-of-line
+ */
+static bytea *
+bytea_catenate(bytea *t1, bytea *t2)
+{
+ bytea *result;
+ int len1,
+ len2,
+ len;
+ char *ptr;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ /* paranoia ... probably should throw error instead? */
+ if (len1 < 0)
+ len1 = 0;
+ if (len2 < 0)
+ len2 = 0;
+
+ len = len1 + len2 + VARHDRSZ;
+ result = (bytea *) palloc(len);
+
+ /* Set size of result string... */
+ SET_VARSIZE(result, len);
+
+ /* Fill data field of result string... */
+ ptr = VARDATA(result);
+ if (len1 > 0)
+ memcpy(ptr, VARDATA_ANY(t1), len1);
+ if (len2 > 0)
+ memcpy(ptr + len1, VARDATA_ANY(t2), len2);
+
+ return result;
+}
+
+#define PG_STR_GET_BYTEA(str_) \
+ DatumGetByteaPP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
+
+/*
+ * bytea_substr()
+ * Return a substring starting at the specified position.
+ * Cloned from text_substr and modified as required.
+ *
+ * Input:
+ * - string
+ * - starting position (is one-based)
+ * - string length (optional)
+ *
+ * If the starting position is zero or less, then return from the start of the string
+ * adjusting the length to be consistent with the "negative start" per SQL.
+ * If the length is less than zero, an ERROR is thrown. If no third argument
+ * (length) is provided, the length to the end of the string is assumed.
+ */
+Datum
+bytea_substr(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ PG_GETARG_INT32(2),
+ false));
+}
+
+/*
+ * bytea_substr_no_len -
+ * Wrapper to avoid opr_sanity failure due to
+ * one function accepting a different number of args.
+ */
+Datum
+bytea_substr_no_len(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_BYTEA_P(bytea_substring(PG_GETARG_DATUM(0),
+ PG_GETARG_INT32(1),
+ -1,
+ true));
+}
+
+static bytea *
+bytea_substring(Datum str,
+ int S,
+ int L,
+ bool length_not_specified)
+{
+ int32 S1; /* adjusted start position */
+ int32 L1; /* adjusted substring length */
+ int32 E; /* end position */
+
+ /*
+ * The logic here should generally match text_substring().
+ */
+ S1 = Max(S, 1);
+
+ if (length_not_specified)
+ {
+ /*
+ * Not passed a length - DatumGetByteaPSlice() grabs everything to the
+ * end of the string if we pass it a negative value for length.
+ */
+ L1 = -1;
+ }
+ else if (L < 0)
+ {
+ /* SQL99 says to throw an error for E < S, i.e., negative length */
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ L1 = -1; /* silence stupider compilers */
+ }
+ else if (pg_add_s32_overflow(S, L, &E))
+ {
+ /*
+ * L could be large enough for S + L to overflow, in which case the
+ * substring must run to end of string.
+ */
+ L1 = -1;
+ }
+ else
+ {
+ /*
+ * A zero or negative value for the end position can happen if the
+ * start was negative or one. SQL99 says to return a zero-length
+ * string.
+ */
+ if (E < 1)
+ return PG_STR_GET_BYTEA("");
+
+ L1 = E - S1;
+ }
+
+ /*
+ * If the start position is past the end of the string, SQL99 says to
+ * return a zero-length string -- DatumGetByteaPSlice() will do that for
+ * us. We need only convert S1 to zero-based starting position.
+ */
+ return DatumGetByteaPSlice(str, S1 - 1, L1);
+}
+
+/*
+ * byteaoverlay
+ * Replace specified substring of first string with second
+ *
+ * The SQL standard defines OVERLAY() in terms of substring and concatenation.
+ * This code is a direct implementation of what the standard says.
+ */
+Datum
+byteaoverlay(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl = PG_GETARG_INT32(3); /* substring length */
+
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+Datum
+byteaoverlay_no_len(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int sp = PG_GETARG_INT32(2); /* substring start position */
+ int sl;
+
+ sl = VARSIZE_ANY_EXHDR(t2); /* defaults to length(t2) */
+ PG_RETURN_BYTEA_P(bytea_overlay(t1, t2, sp, sl));
+}
+
+static bytea *
+bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
+{
+ bytea *result;
+ bytea *s1;
+ bytea *s2;
+ int sp_pl_sl;
+
+ /*
+ * Check for possible integer-overflow cases. For negative sp, throw a
+ * "substring length" error because that's what should be expected
+ * according to the spec's definition of OVERLAY().
+ */
+ if (sp <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SUBSTRING_ERROR),
+ errmsg("negative substring length not allowed")));
+ if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("integer out of range")));
+
+ s1 = bytea_substring(PointerGetDatum(t1), 1, sp - 1, false);
+ s2 = bytea_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
+ result = bytea_catenate(s1, t2);
+ result = bytea_catenate(result, s2);
+
+ return result;
+}
+
+/*
+ * bit_count
+ */
+Datum
+bytea_bit_count(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+
+ PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+}
+
+/*
+ * byteapos -
+ * Return the position of the specified substring.
+ * Implements the SQL POSITION() function.
+ * Cloned from textpos and modified as required.
+ */
+Datum
+byteapos(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ bytea *t2 = PG_GETARG_BYTEA_PP(1);
+ int pos;
+ int px,
+ p;
+ int len1,
+ len2;
+ char *p1,
+ *p2;
+
+ len1 = VARSIZE_ANY_EXHDR(t1);
+ len2 = VARSIZE_ANY_EXHDR(t2);
+
+ if (len2 <= 0)
+ PG_RETURN_INT32(1); /* result for empty pattern */
+
+ p1 = VARDATA_ANY(t1);
+ p2 = VARDATA_ANY(t2);
+
+ pos = 0;
+ px = (len1 - len2);
+ for (p = 0; p <= px; p++)
+ {
+ if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
+ {
+ pos = p + 1;
+ break;
+ };
+ p1++;
+ };
+
+ PG_RETURN_INT32(pos);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetByte
+ *
+ * this routine treats "bytea" as an array of bytes.
+ * It returns the Nth byte (a number between 0 and 255).
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetByte(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int32 n = PG_GETARG_INT32(1);
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[n];
+
+ PG_RETURN_INT32(byte);
+}
+
+/*-------------------------------------------------------------
+ * byteaGetBit
+ *
+ * This routine treats a "bytea" type like an array of bits.
+ * It returns the value of the Nth bit (0 or 1).
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaGetBit(PG_FUNCTION_ARGS)
+{
+ bytea *v = PG_GETARG_BYTEA_PP(0);
+ int64 n = PG_GETARG_INT64(1);
+ int byteNo,
+ bitNo;
+ int len;
+ int byte;
+
+ len = VARSIZE_ANY_EXHDR(v);
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %lld out of valid range, 0..%lld",
+ (long long) n, (long long) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
+
+ if (byte & (1 << bitNo))
+ PG_RETURN_INT32(1);
+ else
+ PG_RETURN_INT32(0);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetByte
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth byte set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetByte(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int32 n = PG_GETARG_INT32(1);
+ int32 newByte = PG_GETARG_INT32(2);
+ int len;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= len)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %d out of valid range, 0..%d",
+ n, len - 1)));
+
+ /*
+ * Now set the byte.
+ */
+ ((unsigned char *) VARDATA(res))[n] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+/*-------------------------------------------------------------
+ * byteaSetBit
+ *
+ * Given an instance of type 'bytea' creates a new one with
+ * the Nth bit set to the given value.
+ *
+ *-------------------------------------------------------------
+ */
+Datum
+byteaSetBit(PG_FUNCTION_ARGS)
+{
+ bytea *res = PG_GETARG_BYTEA_P_COPY(0);
+ int64 n = PG_GETARG_INT64(1);
+ int32 newBit = PG_GETARG_INT32(2);
+ int len;
+ int oldByte,
+ newByte;
+ int byteNo,
+ bitNo;
+
+ len = VARSIZE(res) - VARHDRSZ;
+
+ if (n < 0 || n >= (int64) len * 8)
+ ereport(ERROR,
+ (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
+ errmsg("index %lld out of valid range, 0..%lld",
+ (long long) n, (long long) len * 8 - 1)));
+
+ /* n/8 is now known < len, so safe to cast to int */
+ byteNo = (int) (n / 8);
+ bitNo = (int) (n % 8);
+
+ /*
+ * sanity check!
+ */
+ if (newBit != 0 && newBit != 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("new bit must be 0 or 1")));
+
+ /*
+ * Update the byte.
+ */
+ oldByte = ((unsigned char *) VARDATA(res))[byteNo];
+
+ if (newBit == 0)
+ newByte = oldByte & (~(1 << bitNo));
+ else
+ newByte = oldByte | (1 << bitNo);
+
+ ((unsigned char *) VARDATA(res))[byteNo] = newByte;
+
+ PG_RETURN_BYTEA_P(res);
+}
+
+
+/* text_name()
+ * Converts a text type to a Name type.
+ */
+Datum
+text_name(PG_FUNCTION_ARGS)
+{
+ text *s = PG_GETARG_TEXT_PP(0);
+ Name result;
+ int len;
+
+ len = VARSIZE_ANY_EXHDR(s);
+
+ /* Truncate oversize input */
+ if (len >= NAMEDATALEN)
+ len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
+
+ /* We use palloc0 here to ensure result is zero-padded */
+ result = (Name) palloc0(NAMEDATALEN);
+ memcpy(NameStr(*result), VARDATA_ANY(s), len);
+
+ PG_RETURN_NAME(result);
+}
+
+/* name_text()
+ * Converts a Name type to a text type.
+ */
+Datum
+name_text(PG_FUNCTION_ARGS)
+{
+ Name s = PG_GETARG_NAME(0);
+
+ PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
+}
+
+
+/*
+ * textToQualifiedNameList - convert a text object to list of names
+ *
+ * This implements the input parsing needed by nextval() and other
+ * functions that take a text parameter representing a qualified name.
+ * We split the name at dots, downcase if not double-quoted, and
+ * truncate names if they're too long.
+ */
+List *
+textToQualifiedNameList(text *textval)
+{
+ char *rawname;
+ List *result = NIL;
+ List *namelist;
+ ListCell *l;
+
+ /* Convert to C string (handles possible detoasting). */
+ /* Note we rely on being able to modify rawname below. */
+ rawname = text_to_cstring(textval);
+
+ if (!SplitIdentifierString(rawname, '.', &namelist))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ if (namelist == NIL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_NAME),
+ errmsg("invalid name syntax")));
+
+ foreach(l, namelist)
+ {
+ char *curname = (char *) lfirst(l);
+
+ result = lappend(result, makeString(pstrdup(curname)));
+ }
+
+ pfree(rawname);
+ list_free(namelist);
+
+ return result;
+}
+
+/*
+ * SplitIdentifierString --- parse a string containing identifiers
+ *
+ * This is the guts of textToQualifiedNameList, and is exported for use in
+ * other situations such as parsing GUC variables. In the GUC case, it's
+ * important to avoid memory leaks, so the API is designed to minimize the
+ * amount of stuff that needs to be allocated and freed.
+ *
+ * Inputs:
+ * rawstring: the input string; must be overwritable! On return, it's
+ * been modified to contain the separated identifiers.
+ * separator: the separator punctuation expected between identifiers
+ * (typically '.' or ','). Whitespace may also appear around
+ * identifiers.
+ * Outputs:
+ * namelist: filled with a palloc'd list of pointers to identifiers within
+ * rawstring. Caller should list_free() this even on error return.
+ *
+ * Returns true if okay, false if there is a syntax error in the string.
+ *
+ * Note that an empty string is considered okay here, though not in
+ * textToQualifiedNameList.
+ */
+bool
+SplitIdentifierString(char *rawstring, char separator,
+ List **namelist)
+{
+ char *nextp = rawstring;
+ bool done = false;
+
+ *namelist = NIL;
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace */
+
+ if (*nextp == '\0')
+ return true; /* allow empty string */
+
+ /* At the top of the loop, we are at start of a new identifier. */
+ do
+ {
+ char *curname;
+ char *endp;
+
+ if (*nextp == '"')
+ {
+ /* Quoted name --- collapse quote-quote pairs, no downcasing */
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '"');
+ if (endp == NULL)
+ return false; /* mismatched quotes */
+ if (endp[1] != '"')
+ break; /* found end of quoted name */
+ /* Collapse adjacent quotes into one quote, and look again */
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ /* endp now points at the terminating quote */
+ nextp = endp + 1;
+ }
+ else
+ {
+ /* Unquoted name --- extends to separator or whitespace */
+ char *downname;
+ int len;
+
+ curname = nextp;
+ while (*nextp && *nextp != separator &&
+ !scanner_isspace(*nextp))
+ nextp++;
+ endp = nextp;
+ if (curname == nextp)
+ return false; /* empty unquoted name not allowed */
+
+ /*
+ * Downcase the identifier, using same code as main lexer does.
+ *
+ * XXX because we want to overwrite the input in-place, we cannot
+ * support a downcasing transformation that increases the string
+ * length. This is not a problem given the current implementation
+ * of downcase_truncate_identifier, but we'll probably have to do
+ * something about this someday.
+ */
+ len = endp - curname;
+ downname = downcase_truncate_identifier(curname, len, false);
+ Assert(strlen(downname) <= len);
+ strncpy(curname, downname, len); /* strncpy is required here */
+ pfree(downname);
+ }
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip trailing whitespace */
+
+ if (*nextp == separator)
+ {
+ nextp++;
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace for next */
+ /* we expect another name, so done remains false */
+ }
+ else if (*nextp == '\0')
+ done = true;
+ else
+ return false; /* invalid syntax */
+
+ /* Now safe to overwrite separator with a null */
+ *endp = '\0';
+
+ /* Truncate name if it's overlength */
+ truncate_identifier(curname, strlen(curname), false);
+
+ /*
+ * Finished isolating current name --- add it to list
+ */
+ *namelist = lappend(*namelist, curname);
+
+ /* Loop back if we didn't reach end of string */
+ } while (!done);
+
+ return true;
+}
+
+
+/*
+ * SplitDirectoriesString --- parse a string containing file/directory names
+ *
+ * This works fine on file names too; the function name is historical.
+ *
+ * This is similar to SplitIdentifierString, except that the parsing
+ * rules are meant to handle pathnames instead of identifiers: there is
+ * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
+ * and we apply canonicalize_path() to each extracted string. Because of the
+ * last, the returned strings are separately palloc'd rather than being
+ * pointers into rawstring --- but we still scribble on rawstring.
+ *
+ * Inputs:
+ * rawstring: the input string; must be modifiable!
+ * separator: the separator punctuation expected between directories
+ * (typically ',' or ';'). Whitespace may also appear around
+ * directories.
+ * Outputs:
+ * namelist: filled with a palloc'd list of directory names.
+ * Caller should list_free_deep() this even on error return.
+ *
+ * Returns true if okay, false if there is a syntax error in the string.
+ *
+ * Note that an empty string is considered okay here.
+ */
+bool
+SplitDirectoriesString(char *rawstring, char separator,
+ List **namelist)
+{
+ char *nextp = rawstring;
+ bool done = false;
+
+ *namelist = NIL;
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace */
+
+ if (*nextp == '\0')
+ return true; /* allow empty string */
+
+ /* At the top of the loop, we are at start of a new directory. */
+ do
+ {
+ char *curname;
+ char *endp;
+
+ if (*nextp == '"')
+ {
+ /* Quoted name --- collapse quote-quote pairs */
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '"');
+ if (endp == NULL)
+ return false; /* mismatched quotes */
+ if (endp[1] != '"')
+ break; /* found end of quoted name */
+ /* Collapse adjacent quotes into one quote, and look again */
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ /* endp now points at the terminating quote */
+ nextp = endp + 1;
+ }
+ else
+ {
+ /* Unquoted name --- extends to separator or end of string */
+ curname = endp = nextp;
+ while (*nextp && *nextp != separator)
+ {
+ /* trailing whitespace should not be included in name */
+ if (!scanner_isspace(*nextp))
+ endp = nextp + 1;
+ nextp++;
+ }
+ if (curname == endp)
+ return false; /* empty unquoted name not allowed */
+ }
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip trailing whitespace */
+
+ if (*nextp == separator)
+ {
+ nextp++;
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace for next */
+ /* we expect another name, so done remains false */
+ }
+ else if (*nextp == '\0')
+ done = true;
+ else
+ return false; /* invalid syntax */
+
+ /* Now safe to overwrite separator with a null */
+ *endp = '\0';
+
+ /* Truncate path if it's overlength */
+ if (strlen(curname) >= MAXPGPATH)
+ curname[MAXPGPATH - 1] = '\0';
+
+ /*
+ * Finished isolating current name --- add it to list
+ */
+ curname = pstrdup(curname);
+ canonicalize_path(curname);
+ *namelist = lappend(*namelist, curname);
+
+ /* Loop back if we didn't reach end of string */
+ } while (!done);
+
+ return true;
+}
+
+
+/*
+ * SplitGUCList --- parse a string containing identifiers or file names
+ *
+ * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
+ * presuming whether the elements will be taken as identifiers or file names.
+ * We assume the input has already been through flatten_set_variable_args(),
+ * so that we need never downcase (if appropriate, that was done already).
+ * Nor do we ever truncate, since we don't know the correct max length.
+ * We disallow embedded whitespace for simplicity (it shouldn't matter,
+ * because any embedded whitespace should have led to double-quoting).
+ * Otherwise the API is identical to SplitIdentifierString.
+ *
+ * XXX it's annoying to have so many copies of this string-splitting logic.
+ * However, it's not clear that having one function with a bunch of option
+ * flags would be much better.
+ *
+ * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
+ * Be sure to update that if you have to change this.
+ *
+ * Inputs:
+ * rawstring: the input string; must be overwritable! On return, it's
+ * been modified to contain the separated identifiers.
+ * separator: the separator punctuation expected between identifiers
+ * (typically '.' or ','). Whitespace may also appear around
+ * identifiers.
+ * Outputs:
+ * namelist: filled with a palloc'd list of pointers to identifiers within
+ * rawstring. Caller should list_free() this even on error return.
+ *
+ * Returns true if okay, false if there is a syntax error in the string.
+ */
+bool
+SplitGUCList(char *rawstring, char separator,
+ List **namelist)
+{
+ char *nextp = rawstring;
+ bool done = false;
+
+ *namelist = NIL;
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace */
+
+ if (*nextp == '\0')
+ return true; /* allow empty string */
+
+ /* At the top of the loop, we are at start of a new identifier. */
+ do
+ {
+ char *curname;
+ char *endp;
+
+ if (*nextp == '"')
+ {
+ /* Quoted name --- collapse quote-quote pairs */
+ curname = nextp + 1;
+ for (;;)
+ {
+ endp = strchr(nextp + 1, '"');
+ if (endp == NULL)
+ return false; /* mismatched quotes */
+ if (endp[1] != '"')
+ break; /* found end of quoted name */
+ /* Collapse adjacent quotes into one quote, and look again */
+ memmove(endp, endp + 1, strlen(endp));
+ nextp = endp;
+ }
+ /* endp now points at the terminating quote */
+ nextp = endp + 1;
+ }
+ else
+ {
+ /* Unquoted name --- extends to separator or whitespace */
+ curname = nextp;
+ while (*nextp && *nextp != separator &&
+ !scanner_isspace(*nextp))
+ nextp++;
+ endp = nextp;
+ if (curname == nextp)
+ return false; /* empty unquoted name not allowed */
+ }
+
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip trailing whitespace */
+
+ if (*nextp == separator)
+ {
+ nextp++;
+ while (scanner_isspace(*nextp))
+ nextp++; /* skip leading whitespace for next */
+ /* we expect another name, so done remains false */
+ }
+ else if (*nextp == '\0')
+ done = true;
+ else
+ return false; /* invalid syntax */
+
+ /* Now safe to overwrite separator with a null */
+ *endp = '\0';
+
+ /*
+ * Finished isolating current name --- add it to list
+ */
+ *namelist = lappend(*namelist, curname);
+
+ /* Loop back if we didn't reach end of string */
+ } while (!done);
+
+ return true;
+}
+
+
+/*****************************************************************************
+ * Comparison Functions used for bytea
+ *
+ * Note: btree indexes need these routines not to leak memory; therefore,
+ * be careful to free working copies of toasted datums. Most places don't
+ * need to be so careful.
+ *****************************************************************************/
+
+Datum
+byteaeq(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) == 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+byteane(PG_FUNCTION_ARGS)
+{
+ Datum arg1 = PG_GETARG_DATUM(0);
+ Datum arg2 = PG_GETARG_DATUM(1);
+ bool result;
+ Size len1,
+ len2;
+
+ /*
+ * We can use a fast path for unequal lengths, which might save us from
+ * having to detoast one or both values.
+ */
+ len1 = toast_raw_datum_size(arg1);
+ len2 = toast_raw_datum_size(arg2);
+ if (len1 != len2)
+ result = true;
+ else
+ {
+ bytea *barg1 = DatumGetByteaPP(arg1);
+ bytea *barg2 = DatumGetByteaPP(arg2);
+
+ result = (memcmp(VARDATA_ANY(barg1), VARDATA_ANY(barg2),
+ len1 - VARHDRSZ) != 0);
+
+ PG_FREE_IF_COPY(barg1, 0);
+ PG_FREE_IF_COPY(barg2, 1);
+ }
+
+ PG_RETURN_BOOL(result);
+}
+
+Datum
+bytealt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
+}
+
+Datum
+byteale(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
+}
+
+Datum
+byteagt(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
+}
+
+Datum
+byteage(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
+}
+
+Datum
+byteacmp(PG_FUNCTION_ARGS)
+{
+ bytea *arg1 = PG_GETARG_BYTEA_PP(0);
+ bytea *arg2 = PG_GETARG_BYTEA_PP(1);
+ int len1,
+ len2;
+ int cmp;
+
+ len1 = VARSIZE_ANY_EXHDR(arg1);
+ len2 = VARSIZE_ANY_EXHDR(arg2);
+
+ cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
+ if ((cmp == 0) && (len1 != len2))
+ cmp = (len1 < len2) ? -1 : 1;
+
+ PG_FREE_IF_COPY(arg1, 0);
+ PG_FREE_IF_COPY(arg2, 1);
+
+ PG_RETURN_INT32(cmp);
+}
+
+Datum
+bytea_sortsupport(PG_FUNCTION_ARGS)
+{
+ SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
+ MemoryContext oldcontext;
+
+ oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
+
+ /* Use generic string SortSupport, forcing "C" collation */
+ varstr_sortsupport(ssup, BYTEAOID, C_COLLATION_OID);
+
+ MemoryContextSwitchTo(oldcontext);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * appendStringInfoText
+ *
+ * Append a text to str.
+ * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
+ */
+static void
+appendStringInfoText(StringInfo str, const text *t)
+{
+ appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
+}
+
+/*
+ * replace_text
+ * replace all occurrences of 'old_sub_str' in 'orig_str'
+ * with 'new_sub_str' to form 'new_str'
+ *
+ * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
+ * otherwise returns 'new_str'
+ */
+Datum
+replace_text(PG_FUNCTION_ARGS)
+{
+ text *src_text = PG_GETARG_TEXT_PP(0);
+ text *from_sub_text = PG_GETARG_TEXT_PP(1);
+ text *to_sub_text = PG_GETARG_TEXT_PP(2);
+ int src_text_len;
+ int from_sub_text_len;
+ TextPositionState state;
+ text *ret_text;
+ int chunk_len;
+ char *curr_ptr;
+ char *start_ptr;
+ StringInfoData str;
+ bool found;
+
+ src_text_len = VARSIZE_ANY_EXHDR(src_text);
+ from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
+
+ /* Return unmodified source string if empty source or pattern */
+ if (src_text_len < 1 || from_sub_text_len < 1)
+ {
+ PG_RETURN_TEXT_P(src_text);
+ }
+
+ text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
+
+ found = text_position_next(&state);
+
+ /* When the from_sub_text is not found, there is nothing to do. */
+ if (!found)
+ {
+ text_position_cleanup(&state);
+ PG_RETURN_TEXT_P(src_text);
+ }
+ curr_ptr = text_position_get_match_ptr(&state);
+ start_ptr = VARDATA_ANY(src_text);
+
+ initStringInfo(&str);
+
+ do
+ {
+ CHECK_FOR_INTERRUPTS();
+
+ /* copy the data skipped over by last text_position_next() */
+ chunk_len = curr_ptr - start_ptr;
+ appendBinaryStringInfo(&str, start_ptr, chunk_len);
+
+ appendStringInfoText(&str, to_sub_text);
+
+ start_ptr = curr_ptr + from_sub_text_len;
+
+ found = text_position_next(&state);
+ if (found)
+ curr_ptr = text_position_get_match_ptr(&state);
+ }
+ while (found);
+
+ /* copy trailing data */
+ chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
+ appendBinaryStringInfo(&str, start_ptr, chunk_len);
+
+ text_position_cleanup(&state);
+
+ ret_text = cstring_to_text_with_len(str.data, str.len);
+ pfree(str.data);
+
+ PG_RETURN_TEXT_P(ret_text);
+}
+
+/*
+ * check_replace_text_has_escape
+ *
+ * Returns 0 if text contains no backslashes that need processing.
+ * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
+ * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
+ */
+static int
+check_replace_text_has_escape(const text *replace_text)
+{
+ int result = 0;
+ const char *p = VARDATA_ANY(replace_text);
+ const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
+
+ while (p < p_end)
+ {
+ /* Find next escape char, if any. */
+ p = memchr(p, '\\', p_end - p);
+ if (p == NULL)
+ break;
+ p++;
+ /* Note: a backslash at the end doesn't require extra processing. */
+ if (p < p_end)
+ {
+ if (*p >= '1' && *p <= '9')
+ return 2; /* Found a submatch specifier, so done */
+ result = 1; /* Found some other sequence, keep looking */
+ p++;
+ }
+ }
+ return result;
+}
+
+/*
+ * appendStringInfoRegexpSubstr
+ *
+ * Append replace_text to str, substituting regexp back references for
+ * \n escapes. start_ptr is the start of the match in the source string,
+ * at logical character position data_pos.
+ */
+static void
+appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
+ regmatch_t *pmatch,
+ char *start_ptr, int data_pos)
+{
+ const char *p = VARDATA_ANY(replace_text);
+ const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
+
+ while (p < p_end)
+ {
+ const char *chunk_start = p;
+ int so;
+ int eo;
+
+ /* Find next escape char, if any. */
+ p = memchr(p, '\\', p_end - p);
+ if (p == NULL)
+ p = p_end;
+
+ /* Copy the text we just scanned over, if any. */
+ if (p > chunk_start)
+ appendBinaryStringInfo(str, chunk_start, p - chunk_start);
+
+ /* Done if at end of string, else advance over escape char. */
+ if (p >= p_end)
+ break;
+ p++;
+
+ if (p >= p_end)
+ {
+ /* Escape at very end of input. Treat same as unexpected char */
+ appendStringInfoChar(str, '\\');
+ break;
+ }
+
+ if (*p >= '1' && *p <= '9')
+ {
+ /* Use the back reference of regexp. */
+ int idx = *p - '0';
+
+ so = pmatch[idx].rm_so;
+ eo = pmatch[idx].rm_eo;
+ p++;
+ }
+ else if (*p == '&')
+ {
+ /* Use the entire matched string. */
+ so = pmatch[0].rm_so;
+ eo = pmatch[0].rm_eo;
+ p++;
+ }
+ else if (*p == '\\')
+ {
+ /* \\ means transfer one \ to output. */
+ appendStringInfoChar(str, '\\');
+ p++;
+ continue;
+ }
+ else
+ {
+ /*
+ * If escape char is not followed by any expected char, just treat
+ * it as ordinary data to copy. (XXX would it be better to throw
+ * an error?)
+ */
+ appendStringInfoChar(str, '\\');
+ continue;
+ }
+
+ if (so >= 0 && eo >= 0)
+ {
+ /*
+ * Copy the text that is back reference of regexp. Note so and eo
+ * are counted in characters not bytes.
+ */
+ char *chunk_start;
+ int chunk_len;
+
+ Assert(so >= data_pos);
+ chunk_start = start_ptr;
+ chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
+ chunk_len = charlen_to_bytelen(chunk_start, eo - so);
+ appendBinaryStringInfo(str, chunk_start, chunk_len);
+ }
+ }
+}
+
+/*
+ * replace_text_regexp
+ *
+ * replace substring(s) in src_text that match pattern with replace_text.
+ * The replace_text can contain backslash markers to substitute
+ * (parts of) the matched text.
+ *
+ * cflags: regexp compile flags.
+ * collation: collation to use.
+ * search_start: the character (not byte) offset in src_text at which to
+ * begin searching.
+ * n: if 0, replace all matches; if > 0, replace only the N'th match.
+ */
+text *
+replace_text_regexp(text *src_text, text *pattern_text,
+ text *replace_text,
+ int cflags, Oid collation,
+ int search_start, int n)
+{
+ text *ret_text;
+ regex_t *re;
+ int src_text_len = VARSIZE_ANY_EXHDR(src_text);
+ int nmatches = 0;
+ StringInfoData buf;
+ regmatch_t pmatch[10]; /* main match, plus \1 to \9 */
+ int nmatch = lengthof(pmatch);
+ pg_wchar *data;
+ size_t data_len;
+ int data_pos;
+ char *start_ptr;
+ int escape_status;
+
+ initStringInfo(&buf);
+
+ /* Convert data string to wide characters. */
+ data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
+ data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
+
+ /* Check whether replace_text has escapes, especially regexp submatches. */
+ escape_status = check_replace_text_has_escape(replace_text);
+
+ /* If no regexp submatches, we can use REG_NOSUB. */
+ if (escape_status < 2)
+ {
+ cflags |= REG_NOSUB;
+ /* Also tell pg_regexec we only want the whole-match location. */
+ nmatch = 1;
+ }
+
+ /* Prepare the regexp. */
+ re = RE_compile_and_cache(pattern_text, cflags, collation);
+
+ /* start_ptr points to the data_pos'th character of src_text */
+ start_ptr = (char *) VARDATA_ANY(src_text);
+ data_pos = 0;
+
+ while (search_start <= data_len)
+ {
+ int regexec_result;
+
+ CHECK_FOR_INTERRUPTS();
+
+ regexec_result = pg_regexec(re,
+ data,
+ data_len,
+ search_start,
+ NULL, /* no details */
+ nmatch,
+ pmatch,
+ 0);
+
+ if (regexec_result == REG_NOMATCH)
+ break;
+
+ if (regexec_result != REG_OKAY)
+ {
+ char errMsg[100];
+
+ CHECK_FOR_INTERRUPTS();
+ pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+ errmsg("regular expression failed: %s", errMsg)));
+ }
+
+ /*
+ * Count matches, and decide whether to replace this match.
+ */
+ nmatches++;
+ if (n > 0 && nmatches != n)
+ {
+ /*
+ * No, so advance search_start, but not start_ptr/data_pos. (Thus,
+ * we treat the matched text as if it weren't matched, and copy it
+ * to the output later.)
+ */
+ search_start = pmatch[0].rm_eo;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ search_start++;
+ continue;
+ }
+
+ /*
+ * Copy the text to the left of the match position. Note we are given
+ * character not byte indexes.
+ */
+ if (pmatch[0].rm_so - data_pos > 0)
+ {
+ int chunk_len;
+
+ chunk_len = charlen_to_bytelen(start_ptr,
+ pmatch[0].rm_so - data_pos);
+ appendBinaryStringInfo(&buf, start_ptr, chunk_len);
+
+ /*
+ * Advance start_ptr over that text, to avoid multiple rescans of
+ * it if the replace_text contains multiple back-references.
+ */
+ start_ptr += chunk_len;
+ data_pos = pmatch[0].rm_so;
+ }
+
+ /*
+ * Copy the replace_text, processing escapes if any are present.
+ */
+ if (escape_status > 0)
+ appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
+ start_ptr, data_pos);
+ else
+ appendStringInfoText(&buf, replace_text);
+
+ /* Advance start_ptr and data_pos over the matched text. */
+ start_ptr += charlen_to_bytelen(start_ptr,
+ pmatch[0].rm_eo - data_pos);
+ data_pos = pmatch[0].rm_eo;
+
+ /*
+ * If we only want to replace one occurrence, we're done.
+ */
+ if (n > 0)
+ break;
+
+ /*
+ * Advance search position. Normally we start the next search at the
+ * end of the previous match; but if the match was of zero length, we
+ * have to advance by one character, or we'd just find the same match
+ * again.
+ */
+ search_start = data_pos;
+ if (pmatch[0].rm_so == pmatch[0].rm_eo)
+ search_start++;
+ }
+
+ /*
+ * Copy the text to the right of the last match.
+ */
+ if (data_pos < data_len)
+ {
+ int chunk_len;
+
+ chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
+ appendBinaryStringInfo(&buf, start_ptr, chunk_len);
+ }
+
+ ret_text = cstring_to_text_with_len(buf.data, buf.len);
+ pfree(buf.data);
+ pfree(data);
+
+ return ret_text;
+}
+
+/*
+ * split_part
+ * parse input string based on provided field separator
+ * return N'th item (1 based, negative counts from end)
+ */
+Datum
+split_part(PG_FUNCTION_ARGS)
+{
+ text *inputstring = PG_GETARG_TEXT_PP(0);
+ text *fldsep = PG_GETARG_TEXT_PP(1);
+ int fldnum = PG_GETARG_INT32(2);
+ int inputstring_len;
+ int fldsep_len;
+ TextPositionState state;
+ char *start_ptr;
+ char *end_ptr;
+ text *result_text;
+ bool found;
+
+ /* field number is 1 based */
+ if (fldnum == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("field position must not be zero")));
+
+ inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
+ fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
+
+ /* return empty string for empty input string */
+ if (inputstring_len < 1)
+ PG_RETURN_TEXT_P(cstring_to_text(""));
+
+ /* handle empty field separator */
+ if (fldsep_len < 1)
+ {
+ /* if first or last field, return input string, else empty string */
+ if (fldnum == 1 || fldnum == -1)
+ PG_RETURN_TEXT_P(inputstring);
+ else
+ PG_RETURN_TEXT_P(cstring_to_text(""));
+ }
+
+ /* find the first field separator */
+ text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
+
+ found = text_position_next(&state);
+
+ /* special case if fldsep not found at all */
+ if (!found)
+ {
+ text_position_cleanup(&state);
+ /* if first or last field, return input string, else empty string */
+ if (fldnum == 1 || fldnum == -1)
+ PG_RETURN_TEXT_P(inputstring);
+ else
+ PG_RETURN_TEXT_P(cstring_to_text(""));
+ }
+
+ /*
+ * take care of a negative field number (i.e. count from the right) by
+ * converting to a positive field number; we need total number of fields
+ */
+ if (fldnum < 0)
+ {
+ /* we found a fldsep, so there are at least two fields */
+ int numfields = 2;
+
+ while (text_position_next(&state))
+ numfields++;
+
+ /* special case of last field does not require an extra pass */
+ if (fldnum == -1)
+ {
+ start_ptr = text_position_get_match_ptr(&state) + fldsep_len;
+ end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
+ text_position_cleanup(&state);
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
+ end_ptr - start_ptr));
+ }
+
+ /* else, convert fldnum to positive notation */
+ fldnum += numfields + 1;
+
+ /* if nonexistent field, return empty string */
+ if (fldnum <= 0)
+ {
+ text_position_cleanup(&state);
+ PG_RETURN_TEXT_P(cstring_to_text(""));
+ }
+
+ /* reset to pointing at first match, but now with positive fldnum */
+ text_position_reset(&state);
+ found = text_position_next(&state);
+ Assert(found);
+ }
+
+ /* identify bounds of first field */
+ start_ptr = VARDATA_ANY(inputstring);
+ end_ptr = text_position_get_match_ptr(&state);
+
+ while (found && --fldnum > 0)
+ {
+ /* identify bounds of next field */
+ start_ptr = end_ptr + fldsep_len;
+ found = text_position_next(&state);
+ if (found)
+ end_ptr = text_position_get_match_ptr(&state);
+ }
+
+ text_position_cleanup(&state);
+
+ if (fldnum > 0)
+ {
+ /* N'th field separator not found */
+ /* if last field requested, return it, else empty string */
+ if (fldnum == 1)
+ {
+ int last_len = start_ptr - VARDATA_ANY(inputstring);
+
+ result_text = cstring_to_text_with_len(start_ptr,
+ inputstring_len - last_len);
+ }
+ else
+ result_text = cstring_to_text("");
+ }
+ else
+ {
+ /* non-last field requested */
+ result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
+ }
+
+ PG_RETURN_TEXT_P(result_text);
+}
+
+/*
+ * Convenience function to return true when two text params are equal.
+ */
+static bool
+text_isequal(text *txt1, text *txt2, Oid collid)
+{
+ return DatumGetBool(DirectFunctionCall2Coll(texteq,
+ collid,
+ PointerGetDatum(txt1),
+ PointerGetDatum(txt2)));
+}
+
+/*
+ * text_to_array
+ * parse input string and return text array of elements,
+ * based on provided field separator
+ */
+Datum
+text_to_array(PG_FUNCTION_ARGS)
+{
+ SplitTextOutputData tstate;
+
+ /* For array output, tstate should start as all zeroes */
+ memset(&tstate, 0, sizeof(tstate));
+
+ if (!split_text(fcinfo, &tstate))
+ PG_RETURN_NULL();
+
+ if (tstate.astate == NULL)
+ PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
+
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(tstate.astate,
+ CurrentMemoryContext));
+}
+
+/*
+ * text_to_array_null
+ * parse input string and return text array of elements,
+ * based on provided field separator and null string
+ *
+ * This is a separate entry point only to prevent the regression tests from
+ * complaining about different argument sets for the same internal function.
+ */
+Datum
+text_to_array_null(PG_FUNCTION_ARGS)
+{
+ return text_to_array(fcinfo);
+}
+
+/*
+ * text_to_table
+ * parse input string and return table of elements,
+ * based on provided field separator
+ */
+Datum
+text_to_table(PG_FUNCTION_ARGS)
+{
+ ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
+ SplitTextOutputData tstate;
+
+ tstate.astate = NULL;
+ InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
+ tstate.tupstore = rsi->setResult;
+ tstate.tupdesc = rsi->setDesc;
+
+ (void) split_text(fcinfo, &tstate);
+
+ return (Datum) 0;
+}
+
+/*
+ * text_to_table_null
+ * parse input string and return table of elements,
+ * based on provided field separator and null string
+ *
+ * This is a separate entry point only to prevent the regression tests from
+ * complaining about different argument sets for the same internal function.
+ */
+Datum
+text_to_table_null(PG_FUNCTION_ARGS)
+{
+ return text_to_table(fcinfo);
+}
+
+/*
+ * Common code for text_to_array, text_to_array_null, text_to_table
+ * and text_to_table_null functions.
+ *
+ * These are not strict so we have to test for null inputs explicitly.
+ * Returns false if result is to be null, else returns true.
+ *
+ * Note that if the result is valid but empty (zero elements), we return
+ * without changing *tstate --- caller must handle that case, too.
+ */
+static bool
+split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
+{
+ text *inputstring;
+ text *fldsep;
+ text *null_string;
+ Oid collation = PG_GET_COLLATION();
+ int inputstring_len;
+ int fldsep_len;
+ char *start_ptr;
+ text *result_text;
+
+ /* when input string is NULL, then result is NULL too */
+ if (PG_ARGISNULL(0))
+ return false;
+
+ inputstring = PG_GETARG_TEXT_PP(0);
+
+ /* fldsep can be NULL */
+ if (!PG_ARGISNULL(1))
+ fldsep = PG_GETARG_TEXT_PP(1);
+ else
+ fldsep = NULL;
+
+ /* null_string can be NULL or omitted */
+ if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
+ null_string = PG_GETARG_TEXT_PP(2);
+ else
+ null_string = NULL;
+
+ if (fldsep != NULL)
+ {
+ /*
+ * Normal case with non-null fldsep. Use the text_position machinery
+ * to search for occurrences of fldsep.
+ */
+ TextPositionState state;
+
+ inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
+ fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
+
+ /* return empty set for empty input string */
+ if (inputstring_len < 1)
+ return true;
+
+ /* empty field separator: return input string as a one-element set */
+ if (fldsep_len < 1)
+ {
+ split_text_accum_result(tstate, inputstring,
+ null_string, collation);
+ return true;
+ }
+
+ text_position_setup(inputstring, fldsep, collation, &state);
+
+ start_ptr = VARDATA_ANY(inputstring);
+
+ for (;;)
+ {
+ bool found;
+ char *end_ptr;
+ int chunk_len;
+
+ CHECK_FOR_INTERRUPTS();
+
+ found = text_position_next(&state);
+ if (!found)
+ {
+ /* fetch last field */
+ chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
+ end_ptr = NULL; /* not used, but some compilers complain */
+ }
+ else
+ {
+ /* fetch non-last field */
+ end_ptr = text_position_get_match_ptr(&state);
+ chunk_len = end_ptr - start_ptr;
+ }
+
+ /* build a temp text datum to pass to split_text_accum_result */
+ result_text = cstring_to_text_with_len(start_ptr, chunk_len);
+
+ /* stash away this field */
+ split_text_accum_result(tstate, result_text,
+ null_string, collation);
+
+ pfree(result_text);
+
+ if (!found)
+ break;
+
+ start_ptr = end_ptr + fldsep_len;
+ }
+
+ text_position_cleanup(&state);
+ }
+ else
+ {
+ /*
+ * When fldsep is NULL, each character in the input string becomes a
+ * separate element in the result set. The separator is effectively
+ * the space between characters.
+ */
+ inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
+
+ start_ptr = VARDATA_ANY(inputstring);
+
+ while (inputstring_len > 0)
+ {
+ int chunk_len = pg_mblen(start_ptr);
+
+ CHECK_FOR_INTERRUPTS();
+
+ /* build a temp text datum to pass to split_text_accum_result */
+ result_text = cstring_to_text_with_len(start_ptr, chunk_len);
+
+ /* stash away this field */
+ split_text_accum_result(tstate, result_text,
+ null_string, collation);
+
+ pfree(result_text);
+
+ start_ptr += chunk_len;
+ inputstring_len -= chunk_len;
+ }
+ }
+
+ return true;
+}
+
+/*
+ * Add text item to result set (table or array).
+ *
+ * This is also responsible for checking to see if the item matches
+ * the null_string, in which case we should emit NULL instead.
+ */
+static void
+split_text_accum_result(SplitTextOutputData *tstate,
+ text *field_value,
+ text *null_string,
+ Oid collation)
+{
+ bool is_null = false;
+
+ if (null_string && text_isequal(field_value, null_string, collation))
+ is_null = true;
+
+ if (tstate->tupstore)
+ {
+ Datum values[1];
+ bool nulls[1];
+
+ values[0] = PointerGetDatum(field_value);
+ nulls[0] = is_null;
+
+ tuplestore_putvalues(tstate->tupstore,
+ tstate->tupdesc,
+ values,
+ nulls);
+ }
+ else
+ {
+ tstate->astate = accumArrayResult(tstate->astate,
+ PointerGetDatum(field_value),
+ is_null,
+ TEXTOID,
+ CurrentMemoryContext);
+ }
+}
+
+/*
+ * array_to_text
+ * concatenate Cstring representation of input array elements
+ * using provided field separator
+ */
+Datum
+array_to_text(PG_FUNCTION_ARGS)
+{
+ ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
+ char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+ PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
+}
+
+/*
+ * array_to_text_null
+ * concatenate Cstring representation of input array elements
+ * using provided field separator and null string
+ *
+ * This version is not strict so we have to test for null inputs explicitly.
+ */
+Datum
+array_to_text_null(PG_FUNCTION_ARGS)
+{
+ ArrayType *v;
+ char *fldsep;
+ char *null_string;
+
+ /* returns NULL when first or second parameter is NULL */
+ if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+
+ v = PG_GETARG_ARRAYTYPE_P(0);
+ fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
+
+ /* NULL null string is passed through as a null pointer */
+ if (!PG_ARGISNULL(2))
+ null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
+ else
+ null_string = NULL;
+
+ PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
+}
+
+/*
+ * common code for array_to_text and array_to_text_null functions
+ */
+static text *
+array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
+ const char *fldsep, const char *null_string)
+{
+ text *result;
+ int nitems,
+ *dims,
+ ndims;
+ Oid element_type;
+ int typlen;
+ bool typbyval;
+ char typalign;
+ StringInfoData buf;
+ bool printed = false;
+ char *p;
+ bits8 *bitmap;
+ int bitmask;
+ int i;
+ ArrayMetaState *my_extra;
+
+ ndims = ARR_NDIM(v);
+ dims = ARR_DIMS(v);
+ nitems = ArrayGetNItems(ndims, dims);
+
+ /* if there are no elements, return an empty string */
+ if (nitems == 0)
+ return cstring_to_text_with_len("", 0);
+
+ element_type = ARR_ELEMTYPE(v);
+ initStringInfo(&buf);
+
+ /*
+ * We arrange to look up info about element type, including its output
+ * conversion proc, only once per series of calls, assuming the element
+ * type doesn't change underneath us.
+ */
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ if (my_extra == NULL)
+ {
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(ArrayMetaState));
+ my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
+ my_extra->element_type = ~element_type;
+ }
+
+ if (my_extra->element_type != element_type)
+ {
+ /*
+ * Get info about element type, including its output conversion proc
+ */
+ get_type_io_data(element_type, IOFunc_output,
+ &my_extra->typlen, &my_extra->typbyval,
+ &my_extra->typalign, &my_extra->typdelim,
+ &my_extra->typioparam, &my_extra->typiofunc);
+ fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
+ fcinfo->flinfo->fn_mcxt);
+ my_extra->element_type = element_type;
+ }
+ typlen = my_extra->typlen;
+ typbyval = my_extra->typbyval;
+ typalign = my_extra->typalign;
+
+ p = ARR_DATA_PTR(v);
+ bitmap = ARR_NULLBITMAP(v);
+ bitmask = 1;
+
+ for (i = 0; i < nitems; i++)
+ {
+ Datum itemvalue;
+ char *value;
+
+ /* Get source element, checking for NULL */
+ if (bitmap && (*bitmap & bitmask) == 0)
+ {
+ /* if null_string is NULL, we just ignore null elements */
+ if (null_string != NULL)
+ {
+ if (printed)
+ appendStringInfo(&buf, "%s%s", fldsep, null_string);
+ else
+ appendStringInfoString(&buf, null_string);
+ printed = true;
+ }
+ }
+ else
+ {
+ itemvalue = fetch_att(p, typbyval, typlen);
+
+ value = OutputFunctionCall(&my_extra->proc, itemvalue);
+
+ if (printed)
+ appendStringInfo(&buf, "%s%s", fldsep, value);
+ else
+ appendStringInfoString(&buf, value);
+ printed = true;
+
+ p = att_addlength_pointer(p, typlen, p);
+ p = (char *) att_align_nominal(p, typalign);
+ }
+
+ /* advance bitmap pointer if any */
+ if (bitmap)
+ {
+ bitmask <<= 1;
+ if (bitmask == 0x100)
+ {
+ bitmap++;
+ bitmask = 1;
+ }
+ }
+ }
+
+ result = cstring_to_text_with_len(buf.data, buf.len);
+ pfree(buf.data);
+
+ return result;
+}
+
+#define HEXBASE 16
+/*
+ * Convert an int32 to a string containing a base 16 (hex) representation of
+ * the number.
+ */
+Datum
+to_hex32(PG_FUNCTION_ARGS)
+{
+ uint32 value = (uint32) PG_GETARG_INT32(0);
+ char *ptr;
+ const char *digits = "0123456789abcdef";
+ char buf[32]; /* bigger than needed, but reasonable */
+
+ ptr = buf + sizeof(buf) - 1;
+ *ptr = '\0';
+
+ do
+ {
+ *--ptr = digits[value % HEXBASE];
+ value /= HEXBASE;
+ } while (ptr > buf && value);
+
+ PG_RETURN_TEXT_P(cstring_to_text(ptr));
+}
+
+/*
+ * Convert an int64 to a string containing a base 16 (hex) representation of
+ * the number.
+ */
+Datum
+to_hex64(PG_FUNCTION_ARGS)
+{
+ uint64 value = (uint64) PG_GETARG_INT64(0);
+ char *ptr;
+ const char *digits = "0123456789abcdef";
+ char buf[32]; /* bigger than needed, but reasonable */
+
+ ptr = buf + sizeof(buf) - 1;
+ *ptr = '\0';
+
+ do
+ {
+ *--ptr = digits[value % HEXBASE];
+ value /= HEXBASE;
+ } while (ptr > buf && value);
+
+ PG_RETURN_TEXT_P(cstring_to_text(ptr));
+}
+
+/*
+ * Return the size of a datum, possibly compressed
+ *
+ * Works on any data type
+ */
+Datum
+pg_column_size(PG_FUNCTION_ARGS)
+{
+ Datum value = PG_GETARG_DATUM(0);
+ int32 result;
+ int typlen;
+
+ /* On first call, get the input type's typlen, and save at *fn_extra */
+ if (fcinfo->flinfo->fn_extra == NULL)
+ {
+ /* Lookup the datatype of the supplied argument */
+ Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+ typlen = get_typlen(argtypeid);
+ if (typlen == 0) /* should not happen */
+ elog(ERROR, "cache lookup failed for type %u", argtypeid);
+
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(int));
+ *((int *) fcinfo->flinfo->fn_extra) = typlen;
+ }
+ else
+ typlen = *((int *) fcinfo->flinfo->fn_extra);
+
+ if (typlen == -1)
+ {
+ /* varlena type, possibly toasted */
+ result = toast_datum_size(value);
+ }
+ else if (typlen == -2)
+ {
+ /* cstring */
+ result = strlen(DatumGetCString(value)) + 1;
+ }
+ else
+ {
+ /* ordinary fixed-width type */
+ result = typlen;
+ }
+
+ PG_RETURN_INT32(result);
+}
+
+/*
+ * Return the compression method stored in the compressed attribute. Return
+ * NULL for non varlena type or uncompressed data.
+ */
+Datum
+pg_column_compression(PG_FUNCTION_ARGS)
+{
+ int typlen;
+ char *result;
+ ToastCompressionId cmid;
+
+ /* On first call, get the input type's typlen, and save at *fn_extra */
+ if (fcinfo->flinfo->fn_extra == NULL)
+ {
+ /* Lookup the datatype of the supplied argument */
+ Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
+
+ typlen = get_typlen(argtypeid);
+ if (typlen == 0) /* should not happen */
+ elog(ERROR, "cache lookup failed for type %u", argtypeid);
+
+ fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ sizeof(int));
+ *((int *) fcinfo->flinfo->fn_extra) = typlen;
+ }
+ else
+ typlen = *((int *) fcinfo->flinfo->fn_extra);
+
+ if (typlen != -1)
+ PG_RETURN_NULL();
+
+ /* get the compression method id stored in the compressed varlena */
+ cmid = toast_get_compression_id((struct varlena *)
+ DatumGetPointer(PG_GETARG_DATUM(0)));
+ if (cmid == TOAST_INVALID_COMPRESSION_ID)
+ PG_RETURN_NULL();
+
+ /* convert compression method id to compression method name */
+ switch (cmid)
+ {
+ case TOAST_PGLZ_COMPRESSION_ID:
+ result = "pglz";
+ break;
+ case TOAST_LZ4_COMPRESSION_ID:
+ result = "lz4";
+ break;
+ default:
+ elog(ERROR, "invalid compression method id %d", cmid);
+ }
+
+ PG_RETURN_TEXT_P(cstring_to_text(result));
+}
+
+/*
+ * string_agg - Concatenates values and returns string.
+ *
+ * Syntax: string_agg(value text, delimiter text) RETURNS text
+ *
+ * Note: Any NULL values are ignored. The first-call delimiter isn't
+ * actually used at all, and on subsequent calls the delimiter precedes
+ * the associated value.
+ */
+
+/* subroutine to initialize state */
+static StringInfo
+makeStringAggState(FunctionCallInfo fcinfo)
+{
+ StringInfo state;
+ MemoryContext aggcontext;
+ MemoryContext oldcontext;
+
+ if (!AggCheckCallContext(fcinfo, &aggcontext))
+ {
+ /* cannot be called directly because of internal-type argument */
+ elog(ERROR, "string_agg_transfn called in non-aggregate context");
+ }
+
+ /*
+ * Create state in aggregate context. It'll stay there across subsequent
+ * calls.
+ */
+ oldcontext = MemoryContextSwitchTo(aggcontext);
+ state = makeStringInfo();
+ MemoryContextSwitchTo(oldcontext);
+
+ return state;
+}
+
+Datum
+string_agg_transfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ /* Append the value unless null. */
+ if (!PG_ARGISNULL(1))
+ {
+ /* On the first time through, we ignore the delimiter. */
+ if (state == NULL)
+ state = makeStringAggState(fcinfo);
+ else if (!PG_ARGISNULL(2))
+ appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
+
+ appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
+ }
+
+ /*
+ * The transition type for string_agg() is declared to be "internal",
+ * which is a pass-by-value type the same size as a pointer.
+ */
+ PG_RETURN_POINTER(state);
+}
+
+Datum
+string_agg_finalfn(PG_FUNCTION_ARGS)
+{
+ StringInfo state;
+
+ /* cannot be called directly because of internal-type argument */
+ Assert(AggCheckCallContext(fcinfo, NULL));
+
+ state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
+
+ if (state != NULL)
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(state->data, state->len));
+ else
+ PG_RETURN_NULL();
+}
+
+/*
+ * Prepare cache with fmgr info for the output functions of the datatypes of
+ * the arguments of a concat-like function, beginning with argument "argidx".
+ * (Arguments before that will have corresponding slots in the resulting
+ * FmgrInfo array, but we don't fill those slots.)
+ */
+static FmgrInfo *
+build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
+{
+ FmgrInfo *foutcache;
+ int i;
+
+ /* We keep the info in fn_mcxt so it survives across calls */
+ foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+ PG_NARGS() * sizeof(FmgrInfo));
+
+ for (i = argidx; i < PG_NARGS(); i++)
+ {
+ Oid valtype;
+ Oid typOutput;
+ bool typIsVarlena;
+
+ valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
+ if (!OidIsValid(valtype))
+ elog(ERROR, "could not determine data type of concat() input");
+
+ getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
+ fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
+ }
+
+ fcinfo->flinfo->fn_extra = foutcache;
+
+ return foutcache;
+}
+
+/*
+ * Implementation of both concat() and concat_ws().
+ *
+ * sepstr is the separator string to place between values.
+ * argidx identifies the first argument to concatenate (counting from zero);
+ * note that this must be constant across any one series of calls.
+ *
+ * Returns NULL if result should be NULL, else text value.
+ */
+static text *
+concat_internal(const char *sepstr, int argidx,
+ FunctionCallInfo fcinfo)
+{
+ text *result;
+ StringInfoData str;
+ FmgrInfo *foutcache;
+ bool first_arg = true;
+ int i;
+
+ /*
+ * concat(VARIADIC some-array) is essentially equivalent to
+ * array_to_text(), ie concat the array elements with the given separator.
+ * So we just pass the case off to that code.
+ */
+ if (get_fn_expr_variadic(fcinfo->flinfo))
+ {
+ ArrayType *arr;
+
+ /* Should have just the one argument */
+ Assert(argidx == PG_NARGS() - 1);
+
+ /* concat(VARIADIC NULL) is defined as NULL */
+ if (PG_ARGISNULL(argidx))
+ return NULL;
+
+ /*
+ * Non-null argument had better be an array. We assume that any call
+ * context that could let get_fn_expr_variadic return true will have
+ * checked that a VARIADIC-labeled parameter actually is an array. So
+ * it should be okay to just Assert that it's an array rather than
+ * doing a full-fledged error check.
+ */
+ Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
+
+ /* OK, safe to fetch the array value */
+ arr = PG_GETARG_ARRAYTYPE_P(argidx);
+
+ /*
+ * And serialize the array. We tell array_to_text to ignore null
+ * elements, which matches the behavior of the loop below.
+ */
+ return array_to_text_internal(fcinfo, arr, sepstr, NULL);
+ }
+
+ /* Normal case without explicit VARIADIC marker */
+ initStringInfo(&str);
+
+ /* Get output function info, building it if first time through */
+ foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
+ if (foutcache == NULL)
+ foutcache = build_concat_foutcache(fcinfo, argidx);
+
+ for (i = argidx; i < PG_NARGS(); i++)
+ {
+ if (!PG_ARGISNULL(i))
+ {
+ Datum value = PG_GETARG_DATUM(i);
+
+ /* add separator if appropriate */
+ if (first_arg)
+ first_arg = false;
+ else
+ appendStringInfoString(&str, sepstr);
+
+ /* call the appropriate type output function, append the result */
+ appendStringInfoString(&str,
+ OutputFunctionCall(&foutcache[i], value));
+ }
+ }
+
+ result = cstring_to_text_with_len(str.data, str.len);
+ pfree(str.data);
+
+ return result;
+}
+
+/*
+ * Concatenate all arguments. NULL arguments are ignored.
+ */
+Datum
+text_concat(PG_FUNCTION_ARGS)
+{
+ text *result;
+
+ result = concat_internal("", 0, fcinfo);
+ if (result == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Concatenate all but first argument value with separators. The first
+ * parameter is used as the separator. NULL arguments are ignored.
+ */
+Datum
+text_concat_ws(PG_FUNCTION_ARGS)
+{
+ char *sep;
+ text *result;
+
+ /* return NULL when separator is NULL */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+ sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
+
+ result = concat_internal(sep, 1, fcinfo);
+ if (result == NULL)
+ PG_RETURN_NULL();
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Return first n characters in the string. When n is negative,
+ * return all but last |n| characters.
+ */
+Datum
+text_left(PG_FUNCTION_ARGS)
+{
+ int n = PG_GETARG_INT32(1);
+
+ if (n < 0)
+ {
+ text *str = PG_GETARG_TEXT_PP(0);
+ const char *p = VARDATA_ANY(str);
+ int len = VARSIZE_ANY_EXHDR(str);
+ int rlen;
+
+ n = pg_mbstrlen_with_len(p, len) + n;
+ rlen = pg_mbcharcliplen(p, len, n);
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
+ }
+ else
+ PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
+}
+
+/*
+ * Return last n characters in the string. When n is negative,
+ * return all but first |n| characters.
+ */
+Datum
+text_right(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ const char *p = VARDATA_ANY(str);
+ int len = VARSIZE_ANY_EXHDR(str);
+ int n = PG_GETARG_INT32(1);
+ int off;
+
+ if (n < 0)
+ n = -n;
+ else
+ n = pg_mbstrlen_with_len(p, len) - n;
+ off = pg_mbcharcliplen(p, len, n);
+
+ PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
+}
+
+/*
+ * Return reversed string
+ */
+Datum
+text_reverse(PG_FUNCTION_ARGS)
+{
+ text *str = PG_GETARG_TEXT_PP(0);
+ const char *p = VARDATA_ANY(str);
+ int len = VARSIZE_ANY_EXHDR(str);
+ const char *endp = p + len;
+ text *result;
+ char *dst;
+
+ result = palloc(len + VARHDRSZ);
+ dst = (char *) VARDATA(result) + len;
+ SET_VARSIZE(result, len + VARHDRSZ);
+
+ if (pg_database_encoding_max_length() > 1)
+ {
+ /* multibyte version */
+ while (p < endp)
+ {
+ int sz;
+
+ sz = pg_mblen(p);
+ dst -= sz;
+ memcpy(dst, p, sz);
+ p += sz;
+ }
+ }
+ else
+ {
+ /* single byte version */
+ while (p < endp)
+ *(--dst) = *p++;
+ }
+
+ PG_RETURN_TEXT_P(result);
+}
+
+
+/*
+ * Support macros for text_format()
+ */
+#define TEXT_FORMAT_FLAG_MINUS 0x0001 /* is minus flag present? */
+
+#define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
+ do { \
+ if (++(ptr) >= (end_ptr)) \
+ ereport(ERROR, \
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
+ errmsg("unterminated format() type specifier"), \
+ errhint("For a single \"%%\" use \"%%%%\"."))); \
+ } while (0)
+
+/*
+ * Returns a formatted string
+ */
+Datum
+text_format(PG_FUNCTION_ARGS)
+{
+ text *fmt;
+ StringInfoData str;
+ const char *cp;
+ const char *start_ptr;
+ const char *end_ptr;
+ text *result;
+ int arg;
+ bool funcvariadic;
+ int nargs;
+ Datum *elements = NULL;
+ bool *nulls = NULL;
+ Oid element_type = InvalidOid;
+ Oid prev_type = InvalidOid;
+ Oid prev_width_type = InvalidOid;
+ FmgrInfo typoutputfinfo;
+ FmgrInfo typoutputinfo_width;
+
+ /* When format string is null, immediately return null */
+ if (PG_ARGISNULL(0))
+ PG_RETURN_NULL();
+
+ /* If argument is marked VARIADIC, expand array into elements */
+ if (get_fn_expr_variadic(fcinfo->flinfo))
+ {
+ ArrayType *arr;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int nitems;
+
+ /* Should have just the one argument */
+ Assert(PG_NARGS() == 2);
+
+ /* If argument is NULL, we treat it as zero-length array */
+ if (PG_ARGISNULL(1))
+ nitems = 0;
+ else
+ {
+ /*
+ * Non-null argument had better be an array. We assume that any
+ * call context that could let get_fn_expr_variadic return true
+ * will have checked that a VARIADIC-labeled parameter actually is
+ * an array. So it should be okay to just Assert that it's an
+ * array rather than doing a full-fledged error check.
+ */
+ Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
+
+ /* OK, safe to fetch the array value */
+ arr = PG_GETARG_ARRAYTYPE_P(1);
+
+ /* Get info about array element type */
+ element_type = ARR_ELEMTYPE(arr);
+ get_typlenbyvalalign(element_type,
+ &elmlen, &elmbyval, &elmalign);
+
+ /* Extract all array elements */
+ deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
+ &elements, &nulls, &nitems);
+ }
+
+ nargs = nitems + 1;
+ funcvariadic = true;
+ }
+ else
+ {
+ /* Non-variadic case, we'll process the arguments individually */
+ nargs = PG_NARGS();
+ funcvariadic = false;
+ }
+
+ /* Setup for main loop. */
+ fmt = PG_GETARG_TEXT_PP(0);
+ start_ptr = VARDATA_ANY(fmt);
+ end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
+ initStringInfo(&str);
+ arg = 1; /* next argument position to print */
+
+ /* Scan format string, looking for conversion specifiers. */
+ for (cp = start_ptr; cp < end_ptr; cp++)
+ {
+ int argpos;
+ int widthpos;
+ int flags;
+ int width;
+ Datum value;
+ bool isNull;
+ Oid typid;
+
+ /*
+ * If it's not the start of a conversion specifier, just copy it to
+ * the output buffer.
+ */
+ if (*cp != '%')
+ {
+ appendStringInfoCharMacro(&str, *cp);
+ continue;
+ }
+
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+
+ /* Easy case: %% outputs a single % */
+ if (*cp == '%')
+ {
+ appendStringInfoCharMacro(&str, *cp);
+ continue;
+ }
+
+ /* Parse the optional portions of the format specifier */
+ cp = text_format_parse_format(cp, end_ptr,
+ &argpos, &widthpos,
+ &flags, &width);
+
+ /*
+ * Next we should see the main conversion specifier. Whether or not
+ * an argument position was present, it's known that at least one
+ * character remains in the string at this point. Experience suggests
+ * that it's worth checking that that character is one of the expected
+ * ones before we try to fetch arguments, so as to produce the least
+ * confusing response to a mis-formatted specifier.
+ */
+ if (strchr("sIL", *cp) == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized format() type specifier \"%.*s\"",
+ pg_mblen(cp), cp),
+ errhint("For a single \"%%\" use \"%%%%\".")));
+
+ /* If indirect width was specified, get its value */
+ if (widthpos >= 0)
+ {
+ /* Collect the specified or next argument position */
+ if (widthpos > 0)
+ arg = widthpos;
+ if (arg >= nargs)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format()")));
+
+ /* Get the value and type of the selected argument */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(arg);
+ isNull = PG_ARGISNULL(arg);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
+ }
+ else
+ {
+ value = elements[arg - 1];
+ isNull = nulls[arg - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ arg++;
+
+ /* We can treat NULL width the same as zero */
+ if (isNull)
+ width = 0;
+ else if (typid == INT4OID)
+ width = DatumGetInt32(value);
+ else if (typid == INT2OID)
+ width = DatumGetInt16(value);
+ else
+ {
+ /* For less-usual datatypes, convert to text then to int */
+ char *str;
+
+ if (typid != prev_width_type)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputinfo_width);
+ prev_width_type = typid;
+ }
+
+ str = OutputFunctionCall(&typoutputinfo_width, value);
+
+ /* pg_strtoint32 will complain about bad data or overflow */
+ width = pg_strtoint32(str);
+
+ pfree(str);
+ }
+ }
+
+ /* Collect the specified or next argument position */
+ if (argpos > 0)
+ arg = argpos;
+ if (arg >= nargs)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("too few arguments for format()")));
+
+ /* Get the value and type of the selected argument */
+ if (!funcvariadic)
+ {
+ value = PG_GETARG_DATUM(arg);
+ isNull = PG_ARGISNULL(arg);
+ typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
+ }
+ else
+ {
+ value = elements[arg - 1];
+ isNull = nulls[arg - 1];
+ typid = element_type;
+ }
+ if (!OidIsValid(typid))
+ elog(ERROR, "could not determine data type of format() input");
+
+ arg++;
+
+ /*
+ * Get the appropriate typOutput function, reusing previous one if
+ * same type as previous argument. That's particularly useful in the
+ * variadic-array case, but often saves work even for ordinary calls.
+ */
+ if (typid != prev_type)
+ {
+ Oid typoutputfunc;
+ bool typIsVarlena;
+
+ getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
+ fmgr_info(typoutputfunc, &typoutputfinfo);
+ prev_type = typid;
+ }
+
+ /*
+ * And now we can format the value.
+ */
+ switch (*cp)
+ {
+ case 's':
+ case 'I':
+ case 'L':
+ text_format_string_conversion(&str, *cp, &typoutputfinfo,
+ value, isNull,
+ flags, width);
+ break;
+ default:
+ /* should not get here, because of previous check */
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("unrecognized format() type specifier \"%.*s\"",
+ pg_mblen(cp), cp),
+ errhint("For a single \"%%\" use \"%%%%\".")));
+ break;
+ }
+ }
+
+ /* Don't need deconstruct_array results anymore. */
+ if (elements != NULL)
+ pfree(elements);
+ if (nulls != NULL)
+ pfree(nulls);
+
+ /* Generate results. */
+ result = cstring_to_text_with_len(str.data, str.len);
+ pfree(str.data);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Parse contiguous digits as a decimal number.
+ *
+ * Returns true if some digits could be parsed.
+ * The value is returned into *value, and *ptr is advanced to the next
+ * character to be parsed.
+ *
+ * Note parsing invariant: at least one character is known available before
+ * string end (end_ptr) at entry, and this is still true at exit.
+ */
+static bool
+text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
+{
+ bool found = false;
+ const char *cp = *ptr;
+ int val = 0;
+
+ while (*cp >= '0' && *cp <= '9')
+ {
+ int8 digit = (*cp - '0');
+
+ if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
+ unlikely(pg_add_s32_overflow(val, digit, &val)))
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+ found = true;
+ }
+
+ *ptr = cp;
+ *value = val;
+
+ return found;
+}
+
+/*
+ * Parse a format specifier (generally following the SUS printf spec).
+ *
+ * We have already advanced over the initial '%', and we are looking for
+ * [argpos][flags][width]type (but the type character is not consumed here).
+ *
+ * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
+ * Output parameters:
+ * argpos: argument position for value to be printed. -1 means unspecified.
+ * widthpos: argument position for width. Zero means the argument position
+ * was unspecified (ie, take the next arg) and -1 means no width
+ * argument (width was omitted or specified as a constant).
+ * flags: bitmask of flags.
+ * width: directly-specified width value. Zero means the width was omitted
+ * (note it's not necessary to distinguish this case from an explicit
+ * zero width value).
+ *
+ * The function result is the next character position to be parsed, ie, the
+ * location where the type character is/should be.
+ *
+ * Note parsing invariant: at least one character is known available before
+ * string end (end_ptr) at entry, and this is still true at exit.
+ */
+static const char *
+text_format_parse_format(const char *start_ptr, const char *end_ptr,
+ int *argpos, int *widthpos,
+ int *flags, int *width)
+{
+ const char *cp = start_ptr;
+ int n;
+
+ /* set defaults for output parameters */
+ *argpos = -1;
+ *widthpos = -1;
+ *flags = 0;
+ *width = 0;
+
+ /* try to identify first number */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ {
+ if (*cp != '$')
+ {
+ /* Must be just a width and a type, so we're done */
+ *width = n;
+ return cp;
+ }
+ /* The number was argument position */
+ *argpos = n;
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("format specifies argument 0, but arguments are numbered from 1")));
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+ }
+
+ /* Handle flags (only minus is supported now) */
+ while (*cp == '-')
+ {
+ *flags |= TEXT_FORMAT_FLAG_MINUS;
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+ }
+
+ if (*cp == '*')
+ {
+ /* Handle indirect width */
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ {
+ /* number in this position must be closed by $ */
+ if (*cp != '$')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("width argument position must be ended by \"$\"")));
+ /* The number was width argument position */
+ *widthpos = n;
+ /* Explicit 0 for argument index is immediately refused */
+ if (n == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("format specifies argument 0, but arguments are numbered from 1")));
+ ADVANCE_PARSE_POINTER(cp, end_ptr);
+ }
+ else
+ *widthpos = 0; /* width's argument position is unspecified */
+ }
+ else
+ {
+ /* Check for direct width specification */
+ if (text_format_parse_digits(&cp, end_ptr, &n))
+ *width = n;
+ }
+
+ /* cp should now be pointing at type character */
+ return cp;
+}
+
+/*
+ * Format a %s, %I, or %L conversion
+ */
+static void
+text_format_string_conversion(StringInfo buf, char conversion,
+ FmgrInfo *typOutputInfo,
+ Datum value, bool isNull,
+ int flags, int width)
+{
+ char *str;
+
+ /* Handle NULL arguments before trying to stringify the value. */
+ if (isNull)
+ {
+ if (conversion == 's')
+ text_format_append_string(buf, "", flags, width);
+ else if (conversion == 'L')
+ text_format_append_string(buf, "NULL", flags, width);
+ else if (conversion == 'I')
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("null values cannot be formatted as an SQL identifier")));
+ return;
+ }
+
+ /* Stringify. */
+ str = OutputFunctionCall(typOutputInfo, value);
+
+ /* Escape. */
+ if (conversion == 'I')
+ {
+ /* quote_identifier may or may not allocate a new string. */
+ text_format_append_string(buf, quote_identifier(str), flags, width);
+ }
+ else if (conversion == 'L')
+ {
+ char *qstr = quote_literal_cstr(str);
+
+ text_format_append_string(buf, qstr, flags, width);
+ /* quote_literal_cstr() always allocates a new string */
+ pfree(qstr);
+ }
+ else
+ text_format_append_string(buf, str, flags, width);
+
+ /* Cleanup. */
+ pfree(str);
+}
+
+/*
+ * Append str to buf, padding as directed by flags/width
+ */
+static void
+text_format_append_string(StringInfo buf, const char *str,
+ int flags, int width)
+{
+ bool align_to_left = false;
+ int len;
+
+ /* fast path for typical easy case */
+ if (width == 0)
+ {
+ appendStringInfoString(buf, str);
+ return;
+ }
+
+ if (width < 0)
+ {
+ /* Negative width: implicit '-' flag, then take absolute value */
+ align_to_left = true;
+ /* -INT_MIN is undefined */
+ if (width <= INT_MIN)
+ ereport(ERROR,
+ (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+ errmsg("number is out of range")));
+ width = -width;
+ }
+ else if (flags & TEXT_FORMAT_FLAG_MINUS)
+ align_to_left = true;
+
+ len = pg_mbstrlen(str);
+ if (align_to_left)
+ {
+ /* left justify */
+ appendStringInfoString(buf, str);
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ }
+ else
+ {
+ /* right justify */
+ if (len < width)
+ appendStringInfoSpaces(buf, width - len);
+ appendStringInfoString(buf, str);
+ }
+}
+
+/*
+ * text_format_nv - nonvariadic wrapper for text_format function.
+ *
+ * note: this wrapper is necessary to pass the sanity check in opr_sanity,
+ * which checks that all built-in functions that share the implementing C
+ * function take the same number of arguments.
+ */
+Datum
+text_format_nv(PG_FUNCTION_ARGS)
+{
+ return text_format(fcinfo);
+}
+
+/*
+ * Helper function for Levenshtein distance functions. Faster than memcmp(),
+ * for this use case.
+ */
+static inline bool
+rest_of_char_same(const char *s1, const char *s2, int len)
+{
+ while (len > 0)
+ {
+ len--;
+ if (s1[len] != s2[len])
+ return false;
+ }
+ return true;
+}
+
+/* Expand each Levenshtein distance variant */
+#include "levenshtein.c"
+#define LEVENSHTEIN_LESS_EQUAL
+#include "levenshtein.c"
+
+
+/*
+ * Unicode support
+ */
+
+static UnicodeNormalizationForm
+unicode_norm_form_from_string(const char *formstr)
+{
+ UnicodeNormalizationForm form = -1;
+
+ /*
+ * Might as well check this while we're here.
+ */
+ if (GetDatabaseEncoding() != PG_UTF8)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
+
+ if (pg_strcasecmp(formstr, "NFC") == 0)
+ form = UNICODE_NFC;
+ else if (pg_strcasecmp(formstr, "NFD") == 0)
+ form = UNICODE_NFD;
+ else if (pg_strcasecmp(formstr, "NFKC") == 0)
+ form = UNICODE_NFKC;
+ else if (pg_strcasecmp(formstr, "NFKD") == 0)
+ form = UNICODE_NFKD;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid normalization form: %s", formstr)));
+
+ return form;
+}
+
+Datum
+unicode_normalize_func(PG_FUNCTION_ARGS)
+{
+ text *input = PG_GETARG_TEXT_PP(0);
+ char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
+ UnicodeNormalizationForm form;
+ int size;
+ pg_wchar *input_chars;
+ pg_wchar *output_chars;
+ unsigned char *p;
+ text *result;
+ int i;
+
+ form = unicode_norm_form_from_string(formstr);
+
+ /* convert to pg_wchar */
+ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
+ input_chars = palloc((size + 1) * sizeof(pg_wchar));
+ p = (unsigned char *) VARDATA_ANY(input);
+ for (i = 0; i < size; i++)
+ {
+ input_chars[i] = utf8_to_unicode(p);
+ p += pg_utf_mblen(p);
+ }
+ input_chars[i] = (pg_wchar) '\0';
+ Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
+
+ /* action */
+ output_chars = unicode_normalize(form, input_chars);
+
+ /* convert back to UTF-8 string */
+ size = 0;
+ for (pg_wchar *wp = output_chars; *wp; wp++)
+ {
+ unsigned char buf[4];
+
+ unicode_to_utf8(*wp, buf);
+ size += pg_utf_mblen(buf);
+ }
+
+ result = palloc(size + VARHDRSZ);
+ SET_VARSIZE(result, size + VARHDRSZ);
+
+ p = (unsigned char *) VARDATA_ANY(result);
+ for (pg_wchar *wp = output_chars; *wp; wp++)
+ {
+ unicode_to_utf8(*wp, p);
+ p += pg_utf_mblen(p);
+ }
+ Assert((char *) p == (char *) result + size + VARHDRSZ);
+
+ PG_RETURN_TEXT_P(result);
+}
+
+/*
+ * Check whether the string is in the specified Unicode normalization form.
+ *
+ * This is done by converting the string to the specified normal form and then
+ * comparing that to the original string. To speed that up, we also apply the
+ * "quick check" algorithm specified in UAX #15, which can give a yes or no
+ * answer for many strings by just scanning the string once.
+ *
+ * This function should generally be optimized for the case where the string
+ * is in fact normalized. In that case, we'll end up looking at the entire
+ * string, so it's probably not worth doing any incremental conversion etc.
+ */
+Datum
+unicode_is_normalized(PG_FUNCTION_ARGS)
+{
+ text *input = PG_GETARG_TEXT_PP(0);
+ char *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
+ UnicodeNormalizationForm form;
+ int size;
+ pg_wchar *input_chars;
+ pg_wchar *output_chars;
+ unsigned char *p;
+ int i;
+ UnicodeNormalizationQC quickcheck;
+ int output_size;
+ bool result;
+
+ form = unicode_norm_form_from_string(formstr);
+
+ /* convert to pg_wchar */
+ size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
+ input_chars = palloc((size + 1) * sizeof(pg_wchar));
+ p = (unsigned char *) VARDATA_ANY(input);
+ for (i = 0; i < size; i++)
+ {
+ input_chars[i] = utf8_to_unicode(p);
+ p += pg_utf_mblen(p);
+ }
+ input_chars[i] = (pg_wchar) '\0';
+ Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
+
+ /* quick check (see UAX #15) */
+ quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
+ if (quickcheck == UNICODE_NORM_QC_YES)
+ PG_RETURN_BOOL(true);
+ else if (quickcheck == UNICODE_NORM_QC_NO)
+ PG_RETURN_BOOL(false);
+
+ /* normalize and compare with original */
+ output_chars = unicode_normalize(form, input_chars);
+
+ output_size = 0;
+ for (pg_wchar *wp = output_chars; *wp; wp++)
+ output_size++;
+
+ result = (size == output_size) &&
+ (memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
+
+ PG_RETURN_BOOL(result);
+}
+
+/*
+ * Check if first n chars are hexadecimal digits
+ */
+static bool
+isxdigits_n(const char *instr, size_t n)
+{
+ for (size_t i = 0; i < n; i++)
+ if (!isxdigit((unsigned char) instr[i]))
+ return false;
+
+ return true;
+}
+
+static unsigned int
+hexval(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+ elog(ERROR, "invalid hexadecimal digit");
+ return 0; /* not reached */
+}
+
+/*
+ * Translate string with hexadecimal digits to number
+ */
+static unsigned int
+hexval_n(const char *instr, size_t n)
+{
+ unsigned int result = 0;
+
+ for (size_t i = 0; i < n; i++)
+ result += hexval(instr[i]) << (4 * (n - i - 1));
+
+ return result;
+}
+
+/*
+ * Replaces Unicode escape sequences by Unicode characters
+ */
+Datum
+unistr(PG_FUNCTION_ARGS)
+{
+ text *input_text = PG_GETARG_TEXT_PP(0);
+ char *instr;
+ int len;
+ StringInfoData str;
+ text *result;
+ pg_wchar pair_first = 0;
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ instr = VARDATA_ANY(input_text);
+ len = VARSIZE_ANY_EXHDR(input_text);
+
+ initStringInfo(&str);
+
+ while (len > 0)
+ {
+ if (instr[0] == '\\')
+ {
+ if (len >= 2 &&
+ instr[1] == '\\')
+ {
+ if (pair_first)
+ goto invalid_pair;
+ appendStringInfoChar(&str, '\\');
+ instr += 2;
+ len -= 2;
+ }
+ else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
+ (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
+ {
+ pg_wchar unicode;
+ int offset = instr[1] == 'u' ? 2 : 1;
+
+ unicode = hexval_n(instr + offset, 4);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 4 + offset;
+ len -= 4 + offset;
+ }
+ else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
+ {
+ pg_wchar unicode;
+
+ unicode = hexval_n(instr + 2, 6);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 8;
+ len -= 8;
+ }
+ else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
+ {
+ pg_wchar unicode;
+
+ unicode = hexval_n(instr + 2, 8);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 10;
+ len -= 10;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid Unicode escape"),
+ errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
+ }
+ else
+ {
+ if (pair_first)
+ goto invalid_pair;
+
+ appendStringInfoChar(&str, *instr++);
+ len--;
+ }
+ }
+
+ /* unfinished surrogate pair? */
+ if (pair_first)
+ goto invalid_pair;
+
+ result = cstring_to_text_with_len(str.data, str.len);
+ pfree(str.data);
+
+ PG_RETURN_TEXT_P(result);
+
+invalid_pair:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid Unicode surrogate pair")));
+ PG_RETURN_NULL(); /* keep compiler quiet */
+}
diff --git a/src/backend/utils/adt/version.c b/src/backend/utils/adt/version.c
new file mode 100644
index 0000000..044ffc9
--- /dev/null
+++ b/src/backend/utils/adt/version.c
@@ -0,0 +1,24 @@
+/*-------------------------------------------------------------------------
+ *
+ * version.c
+ * Returns the PostgreSQL version string
+ *
+ * Copyright (c) 1998-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *
+ * src/backend/utils/adt/version.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "utils/builtins.h"
+
+
+Datum
+pgsql_version(PG_FUNCTION_ARGS)
+{
+ PG_RETURN_TEXT_P(cstring_to_text(PG_VERSION_STR));
+}
diff --git a/src/backend/utils/adt/windowfuncs.c b/src/backend/utils/adt/windowfuncs.c
new file mode 100644
index 0000000..596564f
--- /dev/null
+++ b/src/backend/utils/adt/windowfuncs.c
@@ -0,0 +1,537 @@
+/*-------------------------------------------------------------------------
+ *
+ * windowfuncs.c
+ * Standard window functions defined in SQL spec.
+ *
+ * Portions Copyright (c) 2000-2022, PostgreSQL Global Development Group
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/windowfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "nodes/supportnodes.h"
+#include "utils/builtins.h"
+#include "windowapi.h"
+
+/*
+ * ranking process information
+ */
+typedef struct rank_context
+{
+ int64 rank; /* current rank */
+} rank_context;
+
+/*
+ * ntile process information
+ */
+typedef struct
+{
+ int32 ntile; /* current result */
+ int64 rows_per_bucket; /* row number of current bucket */
+ int64 boundary; /* how many rows should be in the bucket */
+ int64 remainder; /* (total rows) % (bucket num) */
+} ntile_context;
+
+static bool rank_up(WindowObject winobj);
+static Datum leadlag_common(FunctionCallInfo fcinfo,
+ bool forward, bool withoffset, bool withdefault);
+
+
+/*
+ * utility routine for *_rank functions.
+ */
+static bool
+rank_up(WindowObject winobj)
+{
+ bool up = false; /* should rank increase? */
+ int64 curpos = WinGetCurrentPosition(winobj);
+ rank_context *context;
+
+ context = (rank_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
+
+ if (context->rank == 0)
+ {
+ /* first call: rank of first row is always 1 */
+ Assert(curpos == 0);
+ context->rank = 1;
+ }
+ else
+ {
+ Assert(curpos > 0);
+ /* do current and prior tuples match by ORDER BY clause? */
+ if (!WinRowsArePeers(winobj, curpos - 1, curpos))
+ up = true;
+ }
+
+ /* We can advance the mark, but only *after* access to prior row */
+ WinSetMarkPosition(winobj, curpos);
+
+ return up;
+}
+
+
+/*
+ * row_number
+ * just increment up from 1 until current partition finishes.
+ */
+Datum
+window_row_number(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ int64 curpos = WinGetCurrentPosition(winobj);
+
+ WinSetMarkPosition(winobj, curpos);
+ PG_RETURN_INT64(curpos + 1);
+}
+
+/*
+ * window_row_number_support
+ * prosupport function for window_row_number()
+ */
+Datum
+window_row_number_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* row_number() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+/*
+ * rank
+ * Rank changes when key columns change.
+ * The new rank number is the current row number.
+ */
+Datum
+window_rank(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ rank_context *context;
+ bool up;
+
+ up = rank_up(winobj);
+ context = (rank_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
+ if (up)
+ context->rank = WinGetCurrentPosition(winobj) + 1;
+
+ PG_RETURN_INT64(context->rank);
+}
+
+/*
+ * window_rank_support
+ * prosupport function for window_rank()
+ */
+Datum
+window_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* rank() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+/*
+ * dense_rank
+ * Rank increases by 1 when key columns change.
+ */
+Datum
+window_dense_rank(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ rank_context *context;
+ bool up;
+
+ up = rank_up(winobj);
+ context = (rank_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
+ if (up)
+ context->rank++;
+
+ PG_RETURN_INT64(context->rank);
+}
+
+/*
+ * window_dense_rank_support
+ * prosupport function for window_dense_rank()
+ */
+Datum
+window_dense_rank_support(PG_FUNCTION_ARGS)
+{
+ Node *rawreq = (Node *) PG_GETARG_POINTER(0);
+
+ if (IsA(rawreq, SupportRequestWFuncMonotonic))
+ {
+ SupportRequestWFuncMonotonic *req = (SupportRequestWFuncMonotonic *) rawreq;
+
+ /* dense_rank() is monotonically increasing */
+ req->monotonic = MONOTONICFUNC_INCREASING;
+ PG_RETURN_POINTER(req);
+ }
+
+ PG_RETURN_POINTER(NULL);
+}
+
+/*
+ * percent_rank
+ * return fraction between 0 and 1 inclusive,
+ * which is described as (RK - 1) / (NR - 1), where RK is the current row's
+ * rank and NR is the total number of rows, per spec.
+ */
+Datum
+window_percent_rank(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ rank_context *context;
+ bool up;
+ int64 totalrows = WinGetPartitionRowCount(winobj);
+
+ Assert(totalrows > 0);
+
+ up = rank_up(winobj);
+ context = (rank_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
+ if (up)
+ context->rank = WinGetCurrentPosition(winobj) + 1;
+
+ /* return zero if there's only one row, per spec */
+ if (totalrows <= 1)
+ PG_RETURN_FLOAT8(0.0);
+
+ PG_RETURN_FLOAT8((float8) (context->rank - 1) / (float8) (totalrows - 1));
+}
+
+/*
+ * cume_dist
+ * return fraction between 0 and 1 inclusive,
+ * which is described as NP / NR, where NP is the number of rows preceding or
+ * peers to the current row, and NR is the total number of rows, per spec.
+ */
+Datum
+window_cume_dist(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ rank_context *context;
+ bool up;
+ int64 totalrows = WinGetPartitionRowCount(winobj);
+
+ Assert(totalrows > 0);
+
+ up = rank_up(winobj);
+ context = (rank_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(rank_context));
+ if (up || context->rank == 1)
+ {
+ /*
+ * The current row is not peer to prior row or is just the first, so
+ * count up the number of rows that are peer to the current.
+ */
+ int64 row;
+
+ context->rank = WinGetCurrentPosition(winobj) + 1;
+
+ /*
+ * start from current + 1
+ */
+ for (row = context->rank; row < totalrows; row++)
+ {
+ if (!WinRowsArePeers(winobj, row - 1, row))
+ break;
+ context->rank++;
+ }
+ }
+
+ PG_RETURN_FLOAT8((float8) context->rank / (float8) totalrows);
+}
+
+/*
+ * ntile
+ * compute an exact numeric value with scale 0 (zero),
+ * ranging from 1 (one) to n, per spec.
+ */
+Datum
+window_ntile(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ ntile_context *context;
+
+ context = (ntile_context *)
+ WinGetPartitionLocalMemory(winobj, sizeof(ntile_context));
+
+ if (context->ntile == 0)
+ {
+ /* first call */
+ int64 total;
+ int32 nbuckets;
+ bool isnull;
+
+ total = WinGetPartitionRowCount(winobj);
+ nbuckets = DatumGetInt32(WinGetFuncArgCurrent(winobj, 0, &isnull));
+
+ /*
+ * per spec: If NT is the null value, then the result is the null
+ * value.
+ */
+ if (isnull)
+ PG_RETURN_NULL();
+
+ /*
+ * per spec: If NT is less than or equal to 0 (zero), then an
+ * exception condition is raised.
+ */
+ if (nbuckets <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTILE),
+ errmsg("argument of ntile must be greater than zero")));
+
+ context->ntile = 1;
+ context->rows_per_bucket = 0;
+ context->boundary = total / nbuckets;
+ if (context->boundary <= 0)
+ context->boundary = 1;
+ else
+ {
+ /*
+ * If the total number is not divisible, add 1 row to leading
+ * buckets.
+ */
+ context->remainder = total % nbuckets;
+ if (context->remainder != 0)
+ context->boundary++;
+ }
+ }
+
+ context->rows_per_bucket++;
+ if (context->boundary < context->rows_per_bucket)
+ {
+ /* ntile up */
+ if (context->remainder != 0 && context->ntile == context->remainder)
+ {
+ context->remainder = 0;
+ context->boundary -= 1;
+ }
+ context->ntile += 1;
+ context->rows_per_bucket = 1;
+ }
+
+ PG_RETURN_INT32(context->ntile);
+}
+
+/*
+ * leadlag_common
+ * common operation of lead() and lag()
+ * For lead() forward is true, whereas for lag() it is false.
+ * withoffset indicates we have an offset second argument.
+ * withdefault indicates we have a default third argument.
+ */
+static Datum
+leadlag_common(FunctionCallInfo fcinfo,
+ bool forward, bool withoffset, bool withdefault)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ int32 offset;
+ bool const_offset;
+ Datum result;
+ bool isnull;
+ bool isout;
+
+ if (withoffset)
+ {
+ offset = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull));
+ if (isnull)
+ PG_RETURN_NULL();
+ const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1);
+ }
+ else
+ {
+ offset = 1;
+ const_offset = true;
+ }
+
+ result = WinGetFuncArgInPartition(winobj, 0,
+ (forward ? offset : -offset),
+ WINDOW_SEEK_CURRENT,
+ const_offset,
+ &isnull, &isout);
+
+ if (isout)
+ {
+ /*
+ * target row is out of the partition; supply default value if
+ * provided. otherwise it'll stay NULL
+ */
+ if (withdefault)
+ result = WinGetFuncArgCurrent(winobj, 2, &isnull);
+ }
+
+ if (isnull)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * lag
+ * returns the value of VE evaluated on a row that is 1
+ * row before the current row within a partition,
+ * per spec.
+ */
+Datum
+window_lag(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, false, false, false);
+}
+
+/*
+ * lag_with_offset
+ * returns the value of VE evaluated on a row that is OFFSET
+ * rows before the current row within a partition,
+ * per spec.
+ */
+Datum
+window_lag_with_offset(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, false, true, false);
+}
+
+/*
+ * lag_with_offset_and_default
+ * same as lag_with_offset but accepts default value
+ * as its third argument.
+ */
+Datum
+window_lag_with_offset_and_default(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, false, true, true);
+}
+
+/*
+ * lead
+ * returns the value of VE evaluated on a row that is 1
+ * row after the current row within a partition,
+ * per spec.
+ */
+Datum
+window_lead(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, true, false, false);
+}
+
+/*
+ * lead_with_offset
+ * returns the value of VE evaluated on a row that is OFFSET
+ * number of rows after the current row within a partition,
+ * per spec.
+ */
+Datum
+window_lead_with_offset(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, true, true, false);
+}
+
+/*
+ * lead_with_offset_and_default
+ * same as lead_with_offset but accepts default value
+ * as its third argument.
+ */
+Datum
+window_lead_with_offset_and_default(PG_FUNCTION_ARGS)
+{
+ return leadlag_common(fcinfo, true, true, true);
+}
+
+/*
+ * first_value
+ * return the value of VE evaluated on the first row of the
+ * window frame, per spec.
+ */
+Datum
+window_first_value(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ Datum result;
+ bool isnull;
+
+ result = WinGetFuncArgInFrame(winobj, 0,
+ 0, WINDOW_SEEK_HEAD, true,
+ &isnull, NULL);
+ if (isnull)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * last_value
+ * return the value of VE evaluated on the last row of the
+ * window frame, per spec.
+ */
+Datum
+window_last_value(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ Datum result;
+ bool isnull;
+
+ result = WinGetFuncArgInFrame(winobj, 0,
+ 0, WINDOW_SEEK_TAIL, true,
+ &isnull, NULL);
+ if (isnull)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(result);
+}
+
+/*
+ * nth_value
+ * return the value of VE evaluated on the n-th row from the first
+ * row of the window frame, per spec.
+ */
+Datum
+window_nth_value(PG_FUNCTION_ARGS)
+{
+ WindowObject winobj = PG_WINDOW_OBJECT();
+ bool const_offset;
+ Datum result;
+ bool isnull;
+ int32 nth;
+
+ nth = DatumGetInt32(WinGetFuncArgCurrent(winobj, 1, &isnull));
+ if (isnull)
+ PG_RETURN_NULL();
+ const_offset = get_fn_expr_arg_stable(fcinfo->flinfo, 1);
+
+ if (nth <= 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_ARGUMENT_FOR_NTH_VALUE),
+ errmsg("argument of nth_value must be greater than zero")));
+
+ result = WinGetFuncArgInFrame(winobj, 0,
+ nth - 1, WINDOW_SEEK_HEAD, const_offset,
+ &isnull, NULL);
+ if (isnull)
+ PG_RETURN_NULL();
+
+ PG_RETURN_DATUM(result);
+}
diff --git a/src/backend/utils/adt/xid.c b/src/backend/utils/adt/xid.c
new file mode 100644
index 0000000..e4b4952
--- /dev/null
+++ b/src/backend/utils/adt/xid.c
@@ -0,0 +1,373 @@
+/*-------------------------------------------------------------------------
+ *
+ * xid.c
+ * POSTGRES transaction identifier and command identifier datatypes.
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/xid.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include <limits.h>
+
+#include "access/multixact.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "libpq/pqformat.h"
+#include "utils/builtins.h"
+#include "utils/xid8.h"
+
+#define PG_GETARG_COMMANDID(n) DatumGetCommandId(PG_GETARG_DATUM(n))
+#define PG_RETURN_COMMANDID(x) return CommandIdGetDatum(x)
+
+
+Datum
+xidin(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_TRANSACTIONID((TransactionId) strtoul(str, NULL, 0));
+}
+
+Datum
+xidout(PG_FUNCTION_ARGS)
+{
+ TransactionId transactionId = PG_GETARG_TRANSACTIONID(0);
+ char *result = (char *) palloc(16);
+
+ snprintf(result, 16, "%lu", (unsigned long) transactionId);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * xidrecv - converts external binary format to xid
+ */
+Datum
+xidrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_TRANSACTIONID((TransactionId) pq_getmsgint(buf, sizeof(TransactionId)));
+}
+
+/*
+ * xidsend - converts xid to binary format
+ */
+Datum
+xidsend(PG_FUNCTION_ARGS)
+{
+ TransactionId arg1 = PG_GETARG_TRANSACTIONID(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * xideq - are two xids equal?
+ */
+Datum
+xideq(PG_FUNCTION_ARGS)
+{
+ TransactionId xid1 = PG_GETARG_TRANSACTIONID(0);
+ TransactionId xid2 = PG_GETARG_TRANSACTIONID(1);
+
+ PG_RETURN_BOOL(TransactionIdEquals(xid1, xid2));
+}
+
+/*
+ * xidneq - are two xids different?
+ */
+Datum
+xidneq(PG_FUNCTION_ARGS)
+{
+ TransactionId xid1 = PG_GETARG_TRANSACTIONID(0);
+ TransactionId xid2 = PG_GETARG_TRANSACTIONID(1);
+
+ PG_RETURN_BOOL(!TransactionIdEquals(xid1, xid2));
+}
+
+/*
+ * xid_age - compute age of an XID (relative to latest stable xid)
+ */
+Datum
+xid_age(PG_FUNCTION_ARGS)
+{
+ TransactionId xid = PG_GETARG_TRANSACTIONID(0);
+ TransactionId now = GetStableLatestTransactionId();
+
+ /* Permanent XIDs are always infinitely old */
+ if (!TransactionIdIsNormal(xid))
+ PG_RETURN_INT32(INT_MAX);
+
+ PG_RETURN_INT32((int32) (now - xid));
+}
+
+/*
+ * mxid_age - compute age of a multi XID (relative to latest stable mxid)
+ */
+Datum
+mxid_age(PG_FUNCTION_ARGS)
+{
+ TransactionId xid = PG_GETARG_TRANSACTIONID(0);
+ MultiXactId now = ReadNextMultiXactId();
+
+ if (!MultiXactIdIsValid(xid))
+ PG_RETURN_INT32(INT_MAX);
+
+ PG_RETURN_INT32((int32) (now - xid));
+}
+
+/*
+ * xidComparator
+ * qsort comparison function for XIDs
+ *
+ * We can't use wraparound comparison for XIDs because that does not respect
+ * the triangle inequality! Any old sort order will do.
+ */
+int
+xidComparator(const void *arg1, const void *arg2)
+{
+ TransactionId xid1 = *(const TransactionId *) arg1;
+ TransactionId xid2 = *(const TransactionId *) arg2;
+
+ if (xid1 > xid2)
+ return 1;
+ if (xid1 < xid2)
+ return -1;
+ return 0;
+}
+
+/*
+ * xidLogicalComparator
+ * qsort comparison function for XIDs
+ *
+ * This is used to compare only XIDs from the same epoch (e.g. for backends
+ * running at the same time). So there must be only normal XIDs, so there's
+ * no issue with triangle inequality.
+ */
+int
+xidLogicalComparator(const void *arg1, const void *arg2)
+{
+ TransactionId xid1 = *(const TransactionId *) arg1;
+ TransactionId xid2 = *(const TransactionId *) arg2;
+
+ Assert(TransactionIdIsNormal(xid1));
+ Assert(TransactionIdIsNormal(xid2));
+
+ if (TransactionIdPrecedes(xid1, xid2))
+ return -1;
+
+ if (TransactionIdPrecedes(xid2, xid1))
+ return 1;
+
+ return 0;
+}
+
+Datum
+xid8toxid(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0);
+
+ PG_RETURN_TRANSACTIONID(XidFromFullTransactionId(fxid));
+}
+
+Datum
+xid8in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(strtou64(str, NULL, 0)));
+}
+
+Datum
+xid8out(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0);
+ char *result = (char *) palloc(21);
+
+ snprintf(result, 21, UINT64_FORMAT, U64FromFullTransactionId(fxid));
+ PG_RETURN_CSTRING(result);
+}
+
+Datum
+xid8recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ uint64 value;
+
+ value = (uint64) pq_getmsgint64(buf);
+ PG_RETURN_FULLTRANSACTIONID(FullTransactionIdFromU64(value));
+}
+
+Datum
+xid8send(PG_FUNCTION_ARGS)
+{
+ FullTransactionId arg1 = PG_GETARG_FULLTRANSACTIONID(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint64(&buf, (uint64) U64FromFullTransactionId(arg1));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+xid8eq(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(FullTransactionIdEquals(fxid1, fxid2));
+}
+
+Datum
+xid8ne(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(!FullTransactionIdEquals(fxid1, fxid2));
+}
+
+Datum
+xid8lt(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(FullTransactionIdPrecedes(fxid1, fxid2));
+}
+
+Datum
+xid8gt(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(FullTransactionIdFollows(fxid1, fxid2));
+}
+
+Datum
+xid8le(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(FullTransactionIdPrecedesOrEquals(fxid1, fxid2));
+}
+
+Datum
+xid8ge(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ PG_RETURN_BOOL(FullTransactionIdFollowsOrEquals(fxid1, fxid2));
+}
+
+Datum
+xid8cmp(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ if (FullTransactionIdFollows(fxid1, fxid2))
+ PG_RETURN_INT32(1);
+ else if (FullTransactionIdEquals(fxid1, fxid2))
+ PG_RETURN_INT32(0);
+ else
+ PG_RETURN_INT32(-1);
+}
+
+Datum
+xid8_larger(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ if (FullTransactionIdFollows(fxid1, fxid2))
+ PG_RETURN_FULLTRANSACTIONID(fxid1);
+ else
+ PG_RETURN_FULLTRANSACTIONID(fxid2);
+}
+
+Datum
+xid8_smaller(PG_FUNCTION_ARGS)
+{
+ FullTransactionId fxid1 = PG_GETARG_FULLTRANSACTIONID(0);
+ FullTransactionId fxid2 = PG_GETARG_FULLTRANSACTIONID(1);
+
+ if (FullTransactionIdPrecedes(fxid1, fxid2))
+ PG_RETURN_FULLTRANSACTIONID(fxid1);
+ else
+ PG_RETURN_FULLTRANSACTIONID(fxid2);
+}
+
+/*****************************************************************************
+ * COMMAND IDENTIFIER ROUTINES *
+ *****************************************************************************/
+
+/*
+ * cidin - converts CommandId to internal representation.
+ */
+Datum
+cidin(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+
+ PG_RETURN_COMMANDID((CommandId) strtoul(str, NULL, 0));
+}
+
+/*
+ * cidout - converts a cid to external representation.
+ */
+Datum
+cidout(PG_FUNCTION_ARGS)
+{
+ CommandId c = PG_GETARG_COMMANDID(0);
+ char *result = (char *) palloc(16);
+
+ snprintf(result, 16, "%lu", (unsigned long) c);
+ PG_RETURN_CSTRING(result);
+}
+
+/*
+ * cidrecv - converts external binary format to cid
+ */
+Datum
+cidrecv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+
+ PG_RETURN_COMMANDID((CommandId) pq_getmsgint(buf, sizeof(CommandId)));
+}
+
+/*
+ * cidsend - converts cid to binary format
+ */
+Datum
+cidsend(PG_FUNCTION_ARGS)
+{
+ CommandId arg1 = PG_GETARG_COMMANDID(0);
+ StringInfoData buf;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, arg1);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+Datum
+cideq(PG_FUNCTION_ARGS)
+{
+ CommandId arg1 = PG_GETARG_COMMANDID(0);
+ CommandId arg2 = PG_GETARG_COMMANDID(1);
+
+ PG_RETURN_BOOL(arg1 == arg2);
+}
diff --git a/src/backend/utils/adt/xid8funcs.c b/src/backend/utils/adt/xid8funcs.c
new file mode 100644
index 0000000..d8e40b3
--- /dev/null
+++ b/src/backend/utils/adt/xid8funcs.c
@@ -0,0 +1,708 @@
+/*-------------------------------------------------------------------------
+ * xid8funcs.c
+ *
+ * Export internal transaction IDs to user level.
+ *
+ * Note that only top-level transaction IDs are exposed to user sessions.
+ * This is important because xid8s frequently persist beyond the global
+ * xmin horizon, or may even be shipped to other machines, so we cannot
+ * rely on being able to correlate subtransaction IDs with their parents
+ * via functions such as SubTransGetTopmostTransaction().
+ *
+ * These functions are used to support the txid_XXX functions and the newer
+ * pg_current_xact, pg_current_snapshot and related fmgr functions, since the
+ * only difference between them is whether they expose xid8 or int8 values to
+ * users. The txid_XXX variants should eventually be dropped.
+ *
+ *
+ * Copyright (c) 2003-2022, PostgreSQL Global Development Group
+ * Author: Jan Wieck, Afilias USA INC.
+ * 64-bit txids: Marko Kreen, Skype Technologies
+ *
+ * src/backend/utils/adt/xid8funcs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/clog.h"
+#include "access/transam.h"
+#include "access/xact.h"
+#include "access/xlog.h"
+#include "funcapi.h"
+#include "lib/qunique.h"
+#include "libpq/pqformat.h"
+#include "miscadmin.h"
+#include "postmaster/postmaster.h"
+#include "storage/lwlock.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+#include "utils/memutils.h"
+#include "utils/snapmgr.h"
+#include "utils/xid8.h"
+
+
+/*
+ * If defined, use bsearch() function for searching for xid8s in snapshots
+ * that have more than the specified number of values.
+ */
+#define USE_BSEARCH_IF_NXIP_GREATER 30
+
+
+/*
+ * Snapshot containing FullTransactionIds.
+ */
+typedef struct
+{
+ /*
+ * 4-byte length hdr, should not be touched directly.
+ *
+ * Explicit embedding is ok as we want always correct alignment anyway.
+ */
+ int32 __varsz;
+
+ uint32 nxip; /* number of fxids in xip array */
+ FullTransactionId xmin;
+ FullTransactionId xmax;
+ /* in-progress fxids, xmin <= xip[i] < xmax: */
+ FullTransactionId xip[FLEXIBLE_ARRAY_MEMBER];
+} pg_snapshot;
+
+#define PG_SNAPSHOT_SIZE(nxip) \
+ (offsetof(pg_snapshot, xip) + sizeof(FullTransactionId) * (nxip))
+#define PG_SNAPSHOT_MAX_NXIP \
+ ((MaxAllocSize - offsetof(pg_snapshot, xip)) / sizeof(FullTransactionId))
+
+/*
+ * Helper to get a TransactionId from a 64-bit xid with wraparound detection.
+ *
+ * It is an ERROR if the xid is in the future. Otherwise, returns true if
+ * the transaction is still new enough that we can determine whether it
+ * committed and false otherwise. If *extracted_xid is not NULL, it is set
+ * to the low 32 bits of the transaction ID (i.e. the actual XID, without the
+ * epoch).
+ *
+ * The caller must hold XactTruncationLock since it's dealing with arbitrary
+ * XIDs, and must continue to hold it until it's done with any clog lookups
+ * relating to those XIDs.
+ */
+static bool
+TransactionIdInRecentPast(FullTransactionId fxid, TransactionId *extracted_xid)
+{
+ uint32 xid_epoch = EpochFromFullTransactionId(fxid);
+ TransactionId xid = XidFromFullTransactionId(fxid);
+ uint32 now_epoch;
+ TransactionId now_epoch_next_xid;
+ FullTransactionId now_fullxid;
+
+ now_fullxid = ReadNextFullTransactionId();
+ now_epoch_next_xid = XidFromFullTransactionId(now_fullxid);
+ now_epoch = EpochFromFullTransactionId(now_fullxid);
+
+ if (extracted_xid != NULL)
+ *extracted_xid = xid;
+
+ if (!TransactionIdIsValid(xid))
+ return false;
+
+ /* For non-normal transaction IDs, we can ignore the epoch. */
+ if (!TransactionIdIsNormal(xid))
+ return true;
+
+ /* If the transaction ID is in the future, throw an error. */
+ if (!FullTransactionIdPrecedes(fxid, now_fullxid))
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("transaction ID %llu is in the future",
+ (unsigned long long) U64FromFullTransactionId(fxid))));
+
+ /*
+ * ShmemVariableCache->oldestClogXid is protected by XactTruncationLock,
+ * but we don't acquire that lock here. Instead, we require the caller to
+ * acquire it, because the caller is presumably going to look up the
+ * returned XID. If we took and released the lock within this function, a
+ * CLOG truncation could occur before the caller finished with the XID.
+ */
+ Assert(LWLockHeldByMe(XactTruncationLock));
+
+ /*
+ * If the transaction ID has wrapped around, it's definitely too old to
+ * determine the commit status. Otherwise, we can compare it to
+ * ShmemVariableCache->oldestClogXid to determine whether the relevant
+ * CLOG entry is guaranteed to still exist.
+ */
+ if (xid_epoch + 1 < now_epoch
+ || (xid_epoch + 1 == now_epoch && xid < now_epoch_next_xid)
+ || TransactionIdPrecedes(xid, ShmemVariableCache->oldestClogXid))
+ return false;
+
+ return true;
+}
+
+/*
+ * Convert a TransactionId obtained from a snapshot held by the caller to a
+ * FullTransactionId. Use next_fxid as a reference FullTransactionId, so that
+ * we can compute the high order bits. It must have been obtained by the
+ * caller with ReadNextFullTransactionId() after the snapshot was created.
+ */
+static FullTransactionId
+widen_snapshot_xid(TransactionId xid, FullTransactionId next_fxid)
+{
+ TransactionId next_xid = XidFromFullTransactionId(next_fxid);
+ uint32 epoch = EpochFromFullTransactionId(next_fxid);
+
+ /* Special transaction ID. */
+ if (!TransactionIdIsNormal(xid))
+ return FullTransactionIdFromEpochAndXid(0, xid);
+
+ /*
+ * The 64 bit result must be <= next_fxid, since next_fxid hadn't been
+ * issued yet when the snapshot was created. Every TransactionId in the
+ * snapshot must therefore be from the same epoch as next_fxid, or the
+ * epoch before. We know this because next_fxid is never allow to get
+ * more than one epoch ahead of the TransactionIds in any snapshot.
+ */
+ if (xid > next_xid)
+ epoch--;
+
+ return FullTransactionIdFromEpochAndXid(epoch, xid);
+}
+
+/*
+ * txid comparator for qsort/bsearch
+ */
+static int
+cmp_fxid(const void *aa, const void *bb)
+{
+ FullTransactionId a = *(const FullTransactionId *) aa;
+ FullTransactionId b = *(const FullTransactionId *) bb;
+
+ if (FullTransactionIdPrecedes(a, b))
+ return -1;
+ if (FullTransactionIdPrecedes(b, a))
+ return 1;
+ return 0;
+}
+
+/*
+ * Sort a snapshot's txids, so we can use bsearch() later. Also remove
+ * any duplicates.
+ *
+ * For consistency of on-disk representation, we always sort even if bsearch
+ * will not be used.
+ */
+static void
+sort_snapshot(pg_snapshot *snap)
+{
+ if (snap->nxip > 1)
+ {
+ qsort(snap->xip, snap->nxip, sizeof(FullTransactionId), cmp_fxid);
+ snap->nxip = qunique(snap->xip, snap->nxip, sizeof(FullTransactionId),
+ cmp_fxid);
+ }
+}
+
+/*
+ * check fxid visibility.
+ */
+static bool
+is_visible_fxid(FullTransactionId value, const pg_snapshot *snap)
+{
+ if (FullTransactionIdPrecedes(value, snap->xmin))
+ return true;
+ else if (!FullTransactionIdPrecedes(value, snap->xmax))
+ return false;
+#ifdef USE_BSEARCH_IF_NXIP_GREATER
+ else if (snap->nxip > USE_BSEARCH_IF_NXIP_GREATER)
+ {
+ void *res;
+
+ res = bsearch(&value, snap->xip, snap->nxip, sizeof(FullTransactionId),
+ cmp_fxid);
+ /* if found, transaction is still in progress */
+ return (res) ? false : true;
+ }
+#endif
+ else
+ {
+ uint32 i;
+
+ for (i = 0; i < snap->nxip; i++)
+ {
+ if (FullTransactionIdEquals(value, snap->xip[i]))
+ return false;
+ }
+ return true;
+ }
+}
+
+/*
+ * helper functions to use StringInfo for pg_snapshot creation.
+ */
+
+static StringInfo
+buf_init(FullTransactionId xmin, FullTransactionId xmax)
+{
+ pg_snapshot snap;
+ StringInfo buf;
+
+ snap.xmin = xmin;
+ snap.xmax = xmax;
+ snap.nxip = 0;
+
+ buf = makeStringInfo();
+ appendBinaryStringInfo(buf, (char *) &snap, PG_SNAPSHOT_SIZE(0));
+ return buf;
+}
+
+static void
+buf_add_txid(StringInfo buf, FullTransactionId fxid)
+{
+ pg_snapshot *snap = (pg_snapshot *) buf->data;
+
+ /* do this before possible realloc */
+ snap->nxip++;
+
+ appendBinaryStringInfo(buf, (char *) &fxid, sizeof(fxid));
+}
+
+static pg_snapshot *
+buf_finalize(StringInfo buf)
+{
+ pg_snapshot *snap = (pg_snapshot *) buf->data;
+
+ SET_VARSIZE(snap, buf->len);
+
+ /* buf is not needed anymore */
+ buf->data = NULL;
+ pfree(buf);
+
+ return snap;
+}
+
+/*
+ * parse snapshot from cstring
+ */
+static pg_snapshot *
+parse_snapshot(const char *str)
+{
+ FullTransactionId xmin;
+ FullTransactionId xmax;
+ FullTransactionId last_val = InvalidFullTransactionId;
+ FullTransactionId val;
+ const char *str_start = str;
+ char *endp;
+ StringInfo buf;
+
+ xmin = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ if (*endp != ':')
+ goto bad_format;
+ str = endp + 1;
+
+ xmax = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ if (*endp != ':')
+ goto bad_format;
+ str = endp + 1;
+
+ /* it should look sane */
+ if (!FullTransactionIdIsValid(xmin) ||
+ !FullTransactionIdIsValid(xmax) ||
+ FullTransactionIdPrecedes(xmax, xmin))
+ goto bad_format;
+
+ /* allocate buffer */
+ buf = buf_init(xmin, xmax);
+
+ /* loop over values */
+ while (*str != '\0')
+ {
+ /* read next value */
+ val = FullTransactionIdFromU64(strtou64(str, &endp, 10));
+ str = endp;
+
+ /* require the input to be in order */
+ if (FullTransactionIdPrecedes(val, xmin) ||
+ FullTransactionIdFollowsOrEquals(val, xmax) ||
+ FullTransactionIdPrecedes(val, last_val))
+ goto bad_format;
+
+ /* skip duplicates */
+ if (!FullTransactionIdEquals(val, last_val))
+ buf_add_txid(buf, val);
+ last_val = val;
+
+ if (*str == ',')
+ str++;
+ else if (*str != '\0')
+ goto bad_format;
+ }
+
+ return buf_finalize(buf);
+
+bad_format:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+ errmsg("invalid input syntax for type %s: \"%s\"",
+ "pg_snapshot", str_start)));
+ return NULL; /* keep compiler quiet */
+}
+
+/*
+ * pg_current_xact_id() returns xid8
+ *
+ * Return the current toplevel full transaction ID.
+ * If the current transaction does not have one, one is assigned.
+ */
+Datum
+pg_current_xact_id(PG_FUNCTION_ARGS)
+{
+ /*
+ * Must prevent during recovery because if an xid is not assigned we try
+ * to assign one, which would fail. Programs already rely on this function
+ * to always return a valid current xid, so we should not change this to
+ * return NULL or similar invalid xid.
+ */
+ PreventCommandDuringRecovery("pg_current_xact_id()");
+
+ PG_RETURN_FULLTRANSACTIONID(GetTopFullTransactionId());
+}
+
+/*
+ * Same as pg_current_xact_id() but doesn't assign a new xid if there
+ * isn't one yet.
+ */
+Datum
+pg_current_xact_id_if_assigned(PG_FUNCTION_ARGS)
+{
+ FullTransactionId topfxid = GetTopFullTransactionIdIfAny();
+
+ if (!FullTransactionIdIsValid(topfxid))
+ PG_RETURN_NULL();
+
+ PG_RETURN_FULLTRANSACTIONID(topfxid);
+}
+
+/*
+ * pg_current_snapshot() returns pg_snapshot
+ *
+ * Return current snapshot
+ *
+ * Note that only top-transaction XIDs are included in the snapshot.
+ */
+Datum
+pg_current_snapshot(PG_FUNCTION_ARGS)
+{
+ pg_snapshot *snap;
+ uint32 nxip,
+ i;
+ Snapshot cur;
+ FullTransactionId next_fxid = ReadNextFullTransactionId();
+
+ cur = GetActiveSnapshot();
+ if (cur == NULL)
+ elog(ERROR, "no active snapshot set");
+
+ /*
+ * Compile-time limits on the procarray (MAX_BACKENDS processes plus
+ * MAX_BACKENDS prepared transactions) guarantee nxip won't be too large.
+ */
+ StaticAssertStmt(MAX_BACKENDS * 2 <= PG_SNAPSHOT_MAX_NXIP,
+ "possible overflow in pg_current_snapshot()");
+
+ /* allocate */
+ nxip = cur->xcnt;
+ snap = palloc(PG_SNAPSHOT_SIZE(nxip));
+
+ /* fill */
+ snap->xmin = widen_snapshot_xid(cur->xmin, next_fxid);
+ snap->xmax = widen_snapshot_xid(cur->xmax, next_fxid);
+ snap->nxip = nxip;
+ for (i = 0; i < nxip; i++)
+ snap->xip[i] = widen_snapshot_xid(cur->xip[i], next_fxid);
+
+ /*
+ * We want them guaranteed to be in ascending order. This also removes
+ * any duplicate xids. Normally, an XID can only be assigned to one
+ * backend, but when preparing a transaction for two-phase commit, there
+ * is a transient state when both the original backend and the dummy
+ * PGPROC entry reserved for the prepared transaction hold the same XID.
+ */
+ sort_snapshot(snap);
+
+ /* set size after sorting, because it may have removed duplicate xips */
+ SET_VARSIZE(snap, PG_SNAPSHOT_SIZE(snap->nxip));
+
+ PG_RETURN_POINTER(snap);
+}
+
+/*
+ * pg_snapshot_in(cstring) returns pg_snapshot
+ *
+ * input function for type pg_snapshot
+ */
+Datum
+pg_snapshot_in(PG_FUNCTION_ARGS)
+{
+ char *str = PG_GETARG_CSTRING(0);
+ pg_snapshot *snap;
+
+ snap = parse_snapshot(str);
+
+ PG_RETURN_POINTER(snap);
+}
+
+/*
+ * pg_snapshot_out(pg_snapshot) returns cstring
+ *
+ * output function for type pg_snapshot
+ */
+Datum
+pg_snapshot_out(PG_FUNCTION_ARGS)
+{
+ pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0);
+ StringInfoData str;
+ uint32 i;
+
+ initStringInfo(&str);
+
+ appendStringInfo(&str, UINT64_FORMAT ":",
+ U64FromFullTransactionId(snap->xmin));
+ appendStringInfo(&str, UINT64_FORMAT ":",
+ U64FromFullTransactionId(snap->xmax));
+
+ for (i = 0; i < snap->nxip; i++)
+ {
+ if (i > 0)
+ appendStringInfoChar(&str, ',');
+ appendStringInfo(&str, UINT64_FORMAT,
+ U64FromFullTransactionId(snap->xip[i]));
+ }
+
+ PG_RETURN_CSTRING(str.data);
+}
+
+/*
+ * pg_snapshot_recv(internal) returns pg_snapshot
+ *
+ * binary input function for type pg_snapshot
+ *
+ * format: int4 nxip, int8 xmin, int8 xmax, int8 xip
+ */
+Datum
+pg_snapshot_recv(PG_FUNCTION_ARGS)
+{
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ pg_snapshot *snap;
+ FullTransactionId last = InvalidFullTransactionId;
+ int nxip;
+ int i;
+ FullTransactionId xmin;
+ FullTransactionId xmax;
+
+ /* load and validate nxip */
+ nxip = pq_getmsgint(buf, 4);
+ if (nxip < 0 || nxip > PG_SNAPSHOT_MAX_NXIP)
+ goto bad_format;
+
+ xmin = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ xmax = FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+ if (!FullTransactionIdIsValid(xmin) ||
+ !FullTransactionIdIsValid(xmax) ||
+ FullTransactionIdPrecedes(xmax, xmin))
+ goto bad_format;
+
+ snap = palloc(PG_SNAPSHOT_SIZE(nxip));
+ snap->xmin = xmin;
+ snap->xmax = xmax;
+
+ for (i = 0; i < nxip; i++)
+ {
+ FullTransactionId cur =
+ FullTransactionIdFromU64((uint64) pq_getmsgint64(buf));
+
+ if (FullTransactionIdPrecedes(cur, last) ||
+ FullTransactionIdPrecedes(cur, xmin) ||
+ FullTransactionIdPrecedes(xmax, cur))
+ goto bad_format;
+
+ /* skip duplicate xips */
+ if (FullTransactionIdEquals(cur, last))
+ {
+ i--;
+ nxip--;
+ continue;
+ }
+
+ snap->xip[i] = cur;
+ last = cur;
+ }
+ snap->nxip = nxip;
+ SET_VARSIZE(snap, PG_SNAPSHOT_SIZE(nxip));
+ PG_RETURN_POINTER(snap);
+
+bad_format:
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
+ errmsg("invalid external pg_snapshot data")));
+ PG_RETURN_POINTER(NULL); /* keep compiler quiet */
+}
+
+/*
+ * pg_snapshot_send(pg_snapshot) returns bytea
+ *
+ * binary output function for type pg_snapshot
+ *
+ * format: int4 nxip, u64 xmin, u64 xmax, u64 xip...
+ */
+Datum
+pg_snapshot_send(PG_FUNCTION_ARGS)
+{
+ pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0);
+ StringInfoData buf;
+ uint32 i;
+
+ pq_begintypsend(&buf);
+ pq_sendint32(&buf, snap->nxip);
+ pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmin));
+ pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xmax));
+ for (i = 0; i < snap->nxip; i++)
+ pq_sendint64(&buf, (int64) U64FromFullTransactionId(snap->xip[i]));
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+/*
+ * pg_visible_in_snapshot(xid8, pg_snapshot) returns bool
+ *
+ * is txid visible in snapshot ?
+ */
+Datum
+pg_visible_in_snapshot(PG_FUNCTION_ARGS)
+{
+ FullTransactionId value = PG_GETARG_FULLTRANSACTIONID(0);
+ pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(1);
+
+ PG_RETURN_BOOL(is_visible_fxid(value, snap));
+}
+
+/*
+ * pg_snapshot_xmin(pg_snapshot) returns xid8
+ *
+ * return snapshot's xmin
+ */
+Datum
+pg_snapshot_xmin(PG_FUNCTION_ARGS)
+{
+ pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0);
+
+ PG_RETURN_FULLTRANSACTIONID(snap->xmin);
+}
+
+/*
+ * pg_snapshot_xmax(pg_snapshot) returns xid8
+ *
+ * return snapshot's xmax
+ */
+Datum
+pg_snapshot_xmax(PG_FUNCTION_ARGS)
+{
+ pg_snapshot *snap = (pg_snapshot *) PG_GETARG_VARLENA_P(0);
+
+ PG_RETURN_FULLTRANSACTIONID(snap->xmax);
+}
+
+/*
+ * pg_snapshot_xip(pg_snapshot) returns setof xid8
+ *
+ * return in-progress xid8s in snapshot.
+ */
+Datum
+pg_snapshot_xip(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *fctx;
+ pg_snapshot *snap;
+ FullTransactionId value;
+
+ /* on first call initialize fctx and get copy of snapshot */
+ if (SRF_IS_FIRSTCALL())
+ {
+ pg_snapshot *arg = (pg_snapshot *) PG_GETARG_VARLENA_P(0);
+
+ fctx = SRF_FIRSTCALL_INIT();
+
+ /* make a copy of user snapshot */
+ snap = MemoryContextAlloc(fctx->multi_call_memory_ctx, VARSIZE(arg));
+ memcpy(snap, arg, VARSIZE(arg));
+
+ fctx->user_fctx = snap;
+ }
+
+ /* return values one-by-one */
+ fctx = SRF_PERCALL_SETUP();
+ snap = fctx->user_fctx;
+ if (fctx->call_cntr < snap->nxip)
+ {
+ value = snap->xip[fctx->call_cntr];
+ SRF_RETURN_NEXT(fctx, FullTransactionIdGetDatum(value));
+ }
+ else
+ {
+ SRF_RETURN_DONE(fctx);
+ }
+}
+
+/*
+ * Report the status of a recent transaction ID, or null for wrapped,
+ * truncated away or otherwise too old XIDs.
+ *
+ * The passed epoch-qualified xid is treated as a normal xid, not a
+ * multixact id.
+ *
+ * If it points to a committed subxact the result is the subxact status even
+ * though the parent xact may still be in progress or may have aborted.
+ */
+Datum
+pg_xact_status(PG_FUNCTION_ARGS)
+{
+ const char *status;
+ FullTransactionId fxid = PG_GETARG_FULLTRANSACTIONID(0);
+ TransactionId xid;
+
+ /*
+ * We must protect against concurrent truncation of clog entries to avoid
+ * an I/O error on SLRU lookup.
+ */
+ LWLockAcquire(XactTruncationLock, LW_SHARED);
+ if (TransactionIdInRecentPast(fxid, &xid))
+ {
+ Assert(TransactionIdIsValid(xid));
+
+ /*
+ * Like when doing visiblity checks on a row, check whether the
+ * transaction is still in progress before looking into the CLOG.
+ * Otherwise we would incorrectly return "committed" for a transaction
+ * that is committing and has already updated the CLOG, but hasn't
+ * removed its XID from the proc array yet. (See comment on that race
+ * condition at the top of heapam_visibility.c)
+ */
+ if (TransactionIdIsInProgress(xid))
+ status = "in progress";
+ else if (TransactionIdDidCommit(xid))
+ status = "committed";
+ else
+ {
+ /* it must have aborted or crashed */
+ status = "aborted";
+ }
+ }
+ else
+ {
+ status = NULL;
+ }
+ LWLockRelease(XactTruncationLock);
+
+ if (status == NULL)
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_TEXT_P(cstring_to_text(status));
+}
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
new file mode 100644
index 0000000..6411f56
--- /dev/null
+++ b/src/backend/utils/adt/xml.c
@@ -0,0 +1,4769 @@
+/*-------------------------------------------------------------------------
+ *
+ * xml.c
+ * XML data type support.
+ *
+ *
+ * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/backend/utils/adt/xml.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * Generally, XML type support is only available when libxml use was
+ * configured during the build. But even if that is not done, the
+ * type and all the functions are available, but most of them will
+ * fail. For one thing, this avoids having to manage variant catalog
+ * installations. But it also has nice effects such as that you can
+ * dump a database containing XML type data even if the server is not
+ * linked with libxml. Thus, make sure xml_out() works even if nothing
+ * else does.
+ */
+
+/*
+ * Notes on memory management:
+ *
+ * Sometimes libxml allocates global structures in the hope that it can reuse
+ * them later on. This makes it impractical to change the xmlMemSetup
+ * functions on-the-fly; that is likely to lead to trying to pfree() chunks
+ * allocated with malloc() or vice versa. Since libxml might be used by
+ * loadable modules, eg libperl, our only safe choices are to change the
+ * functions at postmaster/backend launch or not at all. Since we'd rather
+ * not activate libxml in sessions that might never use it, the latter choice
+ * is the preferred one. However, for debugging purposes it can be awfully
+ * handy to constrain libxml's allocations to be done in a specific palloc
+ * context, where they're easy to track. Therefore there is code here that
+ * can be enabled in debug builds to redirect libxml's allocations into a
+ * special context LibxmlContext. It's not recommended to turn this on in
+ * a production build because of the possibility of bad interactions with
+ * external modules.
+ */
+/* #define USE_LIBXMLCONTEXT */
+
+#include "postgres.h"
+
+#ifdef USE_LIBXML
+#include <libxml/chvalid.h>
+#include <libxml/parser.h>
+#include <libxml/parserInternals.h>
+#include <libxml/tree.h>
+#include <libxml/uri.h>
+#include <libxml/xmlerror.h>
+#include <libxml/xmlversion.h>
+#include <libxml/xmlwriter.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+/*
+ * We used to check for xmlStructuredErrorContext via a configure test; but
+ * that doesn't work on Windows, so instead use this grottier method of
+ * testing the library version number.
+ */
+#if LIBXML_VERSION >= 20704
+#define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
+#endif
+#endif /* USE_LIBXML */
+
+#include "access/htup_details.h"
+#include "access/table.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_class.h"
+#include "catalog/pg_type.h"
+#include "commands/dbcommands.h"
+#include "executor/spi.h"
+#include "executor/tablefunc.h"
+#include "fmgr.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "nodes/execnodes.h"
+#include "nodes/nodeFuncs.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/date.h"
+#include "utils/datetime.h"
+#include "utils/lsyscache.h"
+#include "utils/memutils.h"
+#include "utils/rel.h"
+#include "utils/syscache.h"
+#include "utils/xml.h"
+
+
+/* GUC variables */
+int xmlbinary;
+int xmloption;
+
+#ifdef USE_LIBXML
+
+/* random number to identify PgXmlErrorContext */
+#define ERRCXT_MAGIC 68275028
+
+struct PgXmlErrorContext
+{
+ int magic;
+ /* strictness argument passed to pg_xml_init */
+ PgXmlStrictness strictness;
+ /* current error status and accumulated message, if any */
+ bool err_occurred;
+ StringInfoData err_buf;
+ /* previous libxml error handling state (saved by pg_xml_init) */
+ xmlStructuredErrorFunc saved_errfunc;
+ void *saved_errcxt;
+ /* previous libxml entity handler (saved by pg_xml_init) */
+ xmlExternalEntityLoader saved_entityfunc;
+};
+
+static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
+ xmlParserCtxtPtr ctxt);
+static void xml_errorHandler(void *data, xmlErrorPtr error);
+static void xml_ereport_by_code(int level, int sqlcode,
+ const char *msg, int errcode);
+static void chopStringInfoNewlines(StringInfo str);
+static void appendStringInfoLineSeparator(StringInfo str);
+
+#ifdef USE_LIBXMLCONTEXT
+
+static MemoryContext LibxmlContext = NULL;
+
+static void xml_memory_init(void);
+static void *xml_palloc(size_t size);
+static void *xml_repalloc(void *ptr, size_t size);
+static void xml_pfree(void *ptr);
+static char *xml_pstrdup(const char *string);
+#endif /* USE_LIBXMLCONTEXT */
+
+static xmlChar *xml_text2xmlChar(text *in);
+static int parse_xml_decl(const xmlChar *str, size_t *lenp,
+ xmlChar **version, xmlChar **encoding, int *standalone);
+static bool print_xml_decl(StringInfo buf, const xmlChar *version,
+ pg_enc encoding, int standalone);
+static bool xml_doctype_in_content(const xmlChar *str);
+static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
+ bool preserve_whitespace, int encoding);
+static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
+static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
+ ArrayBuildState *astate,
+ PgXmlErrorContext *xmlerrcxt);
+static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
+#endif /* USE_LIBXML */
+
+static void xmldata_root_element_start(StringInfo result, const char *eltname,
+ const char *xmlschema, const char *targetns,
+ bool top_level);
+static void xmldata_root_element_end(StringInfo result, const char *eltname);
+static StringInfo query_to_xml_internal(const char *query, char *tablename,
+ const char *xmlschema, bool nulls, bool tableforest,
+ const char *targetns, bool top_level);
+static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
+ bool nulls, bool tableforest, const char *targetns);
+static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
+ List *relid_list, bool nulls,
+ bool tableforest, const char *targetns);
+static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
+ bool nulls, bool tableforest,
+ const char *targetns);
+static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
+static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
+static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
+static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
+ char *tablename, bool nulls, bool tableforest,
+ const char *targetns, bool top_level);
+
+/* XMLTABLE support */
+#ifdef USE_LIBXML
+/* random number to identify XmlTableContext */
+#define XMLTABLE_CONTEXT_MAGIC 46922182
+typedef struct XmlTableBuilderData
+{
+ int magic;
+ int natts;
+ long int row_count;
+ PgXmlErrorContext *xmlerrcxt;
+ xmlParserCtxtPtr ctxt;
+ xmlDocPtr doc;
+ xmlXPathContextPtr xpathcxt;
+ xmlXPathCompExprPtr xpathcomp;
+ xmlXPathObjectPtr xpathobj;
+ xmlXPathCompExprPtr *xpathscomp;
+} XmlTableBuilderData;
+#endif
+
+static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
+static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
+static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
+ const char *uri);
+static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
+static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
+ const char *path, int colnum);
+static bool XmlTableFetchRow(struct TableFuncScanState *state);
+static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
+ Oid typid, int32 typmod, bool *isnull);
+static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
+
+const TableFuncRoutine XmlTableRoutine =
+{
+ XmlTableInitOpaque,
+ XmlTableSetDocument,
+ XmlTableSetNamespace,
+ XmlTableSetRowFilter,
+ XmlTableSetColumnFilter,
+ XmlTableFetchRow,
+ XmlTableGetValue,
+ XmlTableDestroyOpaque
+};
+
+#define NO_XML_SUPPORT() \
+ ereport(ERROR, \
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
+ errmsg("unsupported XML feature"), \
+ errdetail("This functionality requires the server to be built with libxml support.")))
+
+
+/* from SQL/XML:2008 section 4.9 */
+#define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
+#define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
+#define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
+
+
+#ifdef USE_LIBXML
+
+static int
+xmlChar_to_encoding(const xmlChar *encoding_name)
+{
+ int encoding = pg_char_to_encoding((const char *) encoding_name);
+
+ if (encoding < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid encoding name \"%s\"",
+ (const char *) encoding_name)));
+ return encoding;
+}
+#endif
+
+
+/*
+ * xml_in uses a plain C string to VARDATA conversion, so for the time being
+ * we use the conversion function for the text datatype.
+ *
+ * This is only acceptable so long as xmltype and text use the same
+ * representation.
+ */
+Datum
+xml_in(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ char *s = PG_GETARG_CSTRING(0);
+ xmltype *vardata;
+ xmlDocPtr doc;
+
+ vardata = (xmltype *) cstring_to_text(s);
+
+ /*
+ * Parse the data to check if it is well-formed XML data. Assume that
+ * ERROR occurred if parsing failed.
+ */
+ doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
+ xmlFreeDoc(doc);
+
+ PG_RETURN_XML_P(vardata);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+#define PG_XML_DEFAULT_VERSION "1.0"
+
+
+/*
+ * xml_out_internal uses a plain VARDATA to C string conversion, so for the
+ * time being we use the conversion function for the text datatype.
+ *
+ * This is only acceptable so long as xmltype and text use the same
+ * representation.
+ */
+static char *
+xml_out_internal(xmltype *x, pg_enc target_encoding)
+{
+ char *str = text_to_cstring((text *) x);
+
+#ifdef USE_LIBXML
+ size_t len = strlen(str);
+ xmlChar *version;
+ int standalone;
+ int res_code;
+
+ if ((res_code = parse_xml_decl((xmlChar *) str,
+ &len, &version, NULL, &standalone)) == 0)
+ {
+ StringInfoData buf;
+
+ initStringInfo(&buf);
+
+ if (!print_xml_decl(&buf, version, target_encoding, standalone))
+ {
+ /*
+ * If we are not going to produce an XML declaration, eat a single
+ * newline in the original string to prevent empty first lines in
+ * the output.
+ */
+ if (*(str + len) == '\n')
+ len += 1;
+ }
+ appendStringInfoString(&buf, str + len);
+
+ pfree(str);
+
+ return buf.data;
+ }
+
+ xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
+ "could not parse XML declaration in stored value",
+ res_code);
+#endif
+ return str;
+}
+
+
+Datum
+xml_out(PG_FUNCTION_ARGS)
+{
+ xmltype *x = PG_GETARG_XML_P(0);
+
+ /*
+ * xml_out removes the encoding property in all cases. This is because we
+ * cannot control from here whether the datum will be converted to a
+ * different client encoding, so we'd do more harm than good by including
+ * it.
+ */
+ PG_RETURN_CSTRING(xml_out_internal(x, 0));
+}
+
+
+Datum
+xml_recv(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
+ xmltype *result;
+ char *str;
+ char *newstr;
+ int nbytes;
+ xmlDocPtr doc;
+ xmlChar *encodingStr = NULL;
+ int encoding;
+
+ /*
+ * Read the data in raw format. We don't know yet what the encoding is, as
+ * that information is embedded in the xml declaration; so we have to
+ * parse that before converting to server encoding.
+ */
+ nbytes = buf->len - buf->cursor;
+ str = (char *) pq_getmsgbytes(buf, nbytes);
+
+ /*
+ * We need a null-terminated string to pass to parse_xml_decl(). Rather
+ * than make a separate copy, make the temporary result one byte bigger
+ * than it needs to be.
+ */
+ result = palloc(nbytes + 1 + VARHDRSZ);
+ SET_VARSIZE(result, nbytes + VARHDRSZ);
+ memcpy(VARDATA(result), str, nbytes);
+ str = VARDATA(result);
+ str[nbytes] = '\0';
+
+ parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
+
+ /*
+ * If encoding wasn't explicitly specified in the XML header, treat it as
+ * UTF-8, as that's the default in XML. This is different from xml_in(),
+ * where the input has to go through the normal client to server encoding
+ * conversion.
+ */
+ encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
+
+ /*
+ * Parse the data to check if it is well-formed XML data. Assume that
+ * xml_parse will throw ERROR if not.
+ */
+ doc = xml_parse(result, xmloption, true, encoding);
+ xmlFreeDoc(doc);
+
+ /* Now that we know what we're dealing with, convert to server encoding */
+ newstr = pg_any_to_server(str, nbytes, encoding);
+
+ if (newstr != str)
+ {
+ pfree(result);
+ result = (xmltype *) cstring_to_text(newstr);
+ pfree(newstr);
+ }
+
+ PG_RETURN_XML_P(result);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+Datum
+xml_send(PG_FUNCTION_ARGS)
+{
+ xmltype *x = PG_GETARG_XML_P(0);
+ char *outval;
+ StringInfoData buf;
+
+ /*
+ * xml_out_internal doesn't convert the encoding, it just prints the right
+ * declaration. pq_sendtext will do the conversion.
+ */
+ outval = xml_out_internal(x, pg_get_client_encoding());
+
+ pq_begintypsend(&buf);
+ pq_sendtext(&buf, outval, strlen(outval));
+ pfree(outval);
+ PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
+
+
+#ifdef USE_LIBXML
+static void
+appendStringInfoText(StringInfo str, const text *t)
+{
+ appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
+}
+#endif
+
+
+static xmltype *
+stringinfo_to_xmltype(StringInfo buf)
+{
+ return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
+}
+
+
+static xmltype *
+cstring_to_xmltype(const char *string)
+{
+ return (xmltype *) cstring_to_text(string);
+}
+
+
+#ifdef USE_LIBXML
+static xmltype *
+xmlBuffer_to_xmltype(xmlBufferPtr buf)
+{
+ return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
+ xmlBufferLength(buf));
+}
+#endif
+
+
+Datum
+xmlcomment(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *arg = PG_GETARG_TEXT_PP(0);
+ char *argdata = VARDATA_ANY(arg);
+ int len = VARSIZE_ANY_EXHDR(arg);
+ StringInfoData buf;
+ int i;
+
+ /* check for "--" in string or "-" at the end */
+ for (i = 1; i < len; i++)
+ {
+ if (argdata[i] == '-' && argdata[i - 1] == '-')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_XML_COMMENT),
+ errmsg("invalid XML comment")));
+ }
+ if (len > 0 && argdata[len - 1] == '-')
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_XML_COMMENT),
+ errmsg("invalid XML comment")));
+
+ initStringInfo(&buf);
+ appendStringInfoString(&buf, "<!--");
+ appendStringInfoText(&buf, arg);
+ appendStringInfoString(&buf, "-->");
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+
+
+/*
+ * TODO: xmlconcat needs to merge the notations and unparsed entities
+ * of the argument values. Not very important in practice, though.
+ */
+xmltype *
+xmlconcat(List *args)
+{
+#ifdef USE_LIBXML
+ int global_standalone = 1;
+ xmlChar *global_version = NULL;
+ bool global_version_no_value = false;
+ StringInfoData buf;
+ ListCell *v;
+
+ initStringInfo(&buf);
+ foreach(v, args)
+ {
+ xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
+ size_t len;
+ xmlChar *version;
+ int standalone;
+ char *str;
+
+ len = VARSIZE(x) - VARHDRSZ;
+ str = text_to_cstring((text *) x);
+
+ parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
+
+ if (standalone == 0 && global_standalone == 1)
+ global_standalone = 0;
+ if (standalone < 0)
+ global_standalone = -1;
+
+ if (!version)
+ global_version_no_value = true;
+ else if (!global_version)
+ global_version = version;
+ else if (xmlStrcmp(version, global_version) != 0)
+ global_version_no_value = true;
+
+ appendStringInfoString(&buf, str + len);
+ pfree(str);
+ }
+
+ if (!global_version_no_value || global_standalone >= 0)
+ {
+ StringInfoData buf2;
+
+ initStringInfo(&buf2);
+
+ print_xml_decl(&buf2,
+ (!global_version_no_value) ? global_version : NULL,
+ 0,
+ global_standalone);
+
+ appendBinaryStringInfo(&buf2, buf.data, buf.len);
+ buf = buf2;
+ }
+
+ return stringinfo_to_xmltype(&buf);
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
+}
+
+
+/*
+ * XMLAGG support
+ */
+Datum
+xmlconcat2(PG_FUNCTION_ARGS)
+{
+ if (PG_ARGISNULL(0))
+ {
+ if (PG_ARGISNULL(1))
+ PG_RETURN_NULL();
+ else
+ PG_RETURN_XML_P(PG_GETARG_XML_P(1));
+ }
+ else if (PG_ARGISNULL(1))
+ PG_RETURN_XML_P(PG_GETARG_XML_P(0));
+ else
+ PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
+ PG_GETARG_XML_P(1))));
+}
+
+
+Datum
+texttoxml(PG_FUNCTION_ARGS)
+{
+ text *data = PG_GETARG_TEXT_PP(0);
+
+ PG_RETURN_XML_P(xmlparse(data, xmloption, true));
+}
+
+
+Datum
+xmltotext(PG_FUNCTION_ARGS)
+{
+ xmltype *data = PG_GETARG_XML_P(0);
+
+ /* It's actually binary compatible. */
+ PG_RETURN_TEXT_P((text *) data);
+}
+
+
+text *
+xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
+{
+ if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
+ ereport(ERROR,
+ (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
+ errmsg("not an XML document")));
+
+ /* It's actually binary compatible, save for the above check. */
+ return (text *) data;
+}
+
+
+xmltype *
+xmlelement(XmlExpr *xexpr,
+ Datum *named_argvalue, bool *named_argnull,
+ Datum *argvalue, bool *argnull)
+{
+#ifdef USE_LIBXML
+ xmltype *result;
+ List *named_arg_strings;
+ List *arg_strings;
+ int i;
+ ListCell *arg;
+ ListCell *narg;
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlTextWriterPtr writer = NULL;
+
+ /*
+ * All arguments are already evaluated, and their values are passed in the
+ * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
+ * issues if one of the arguments involves a call to some other function
+ * or subsystem that wants to use libxml on its own terms. We examine the
+ * original XmlExpr to identify the numbers and types of the arguments.
+ */
+ named_arg_strings = NIL;
+ i = 0;
+ foreach(arg, xexpr->named_args)
+ {
+ Expr *e = (Expr *) lfirst(arg);
+ char *str;
+
+ if (named_argnull[i])
+ str = NULL;
+ else
+ str = map_sql_value_to_xml_value(named_argvalue[i],
+ exprType((Node *) e),
+ false);
+ named_arg_strings = lappend(named_arg_strings, str);
+ i++;
+ }
+
+ arg_strings = NIL;
+ i = 0;
+ foreach(arg, xexpr->args)
+ {
+ Expr *e = (Expr *) lfirst(arg);
+ char *str;
+
+ /* here we can just forget NULL elements immediately */
+ if (!argnull[i])
+ {
+ str = map_sql_value_to_xml_value(argvalue[i],
+ exprType((Node *) e),
+ true);
+ arg_strings = lappend(arg_strings, str);
+ }
+ i++;
+ }
+
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ buf = xmlBufferCreate();
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+ writer = xmlNewTextWriterMemory(buf, 0);
+ if (writer == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlTextWriter");
+
+ xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
+
+ forboth(arg, named_arg_strings, narg, xexpr->arg_names)
+ {
+ char *str = (char *) lfirst(arg);
+ char *argname = strVal(lfirst(narg));
+
+ if (str)
+ xmlTextWriterWriteAttribute(writer,
+ (xmlChar *) argname,
+ (xmlChar *) str);
+ }
+
+ foreach(arg, arg_strings)
+ {
+ char *str = (char *) lfirst(arg);
+
+ xmlTextWriterWriteRaw(writer, (xmlChar *) str);
+ }
+
+ xmlTextWriterEndElement(writer);
+
+ /* we MUST do this now to flush data out to the buffer ... */
+ xmlFreeTextWriter(writer);
+ writer = NULL;
+
+ result = xmlBuffer_to_xmltype(buf);
+ }
+ PG_CATCH();
+ {
+ if (writer)
+ xmlFreeTextWriter(writer);
+ if (buf)
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, false);
+
+ return result;
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
+}
+
+
+xmltype *
+xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
+{
+#ifdef USE_LIBXML
+ xmlDocPtr doc;
+
+ doc = xml_parse(data, xmloption_arg, preserve_whitespace,
+ GetDatabaseEncoding());
+ xmlFreeDoc(doc);
+
+ return (xmltype *) data;
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
+}
+
+
+xmltype *
+xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
+{
+#ifdef USE_LIBXML
+ xmltype *result;
+ StringInfoData buf;
+
+ if (pg_strcasecmp(target, "xml") == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR), /* really */
+ errmsg("invalid XML processing instruction"),
+ errdetail("XML processing instruction target name cannot be \"%s\".", target)));
+
+ /*
+ * Following the SQL standard, the null check comes after the syntax check
+ * above.
+ */
+ *result_is_null = arg_is_null;
+ if (*result_is_null)
+ return NULL;
+
+ initStringInfo(&buf);
+
+ appendStringInfo(&buf, "<?%s", target);
+
+ if (arg != NULL)
+ {
+ char *string;
+
+ string = text_to_cstring(arg);
+ if (strstr(string, "?>") != NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
+ errmsg("invalid XML processing instruction"),
+ errdetail("XML processing instruction cannot contain \"?>\".")));
+
+ appendStringInfoChar(&buf, ' ');
+ appendStringInfoString(&buf, string + strspn(string, " "));
+ pfree(string);
+ }
+ appendStringInfoString(&buf, "?>");
+
+ result = stringinfo_to_xmltype(&buf);
+ pfree(buf.data);
+ return result;
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
+}
+
+
+xmltype *
+xmlroot(xmltype *data, text *version, int standalone)
+{
+#ifdef USE_LIBXML
+ char *str;
+ size_t len;
+ xmlChar *orig_version;
+ int orig_standalone;
+ StringInfoData buf;
+
+ len = VARSIZE(data) - VARHDRSZ;
+ str = text_to_cstring((text *) data);
+
+ parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
+
+ if (version)
+ orig_version = xml_text2xmlChar(version);
+ else
+ orig_version = NULL;
+
+ switch (standalone)
+ {
+ case XML_STANDALONE_YES:
+ orig_standalone = 1;
+ break;
+ case XML_STANDALONE_NO:
+ orig_standalone = 0;
+ break;
+ case XML_STANDALONE_NO_VALUE:
+ orig_standalone = -1;
+ break;
+ case XML_STANDALONE_OMITTED:
+ /* leave original value */
+ break;
+ }
+
+ initStringInfo(&buf);
+ print_xml_decl(&buf, orig_version, 0, orig_standalone);
+ appendStringInfoString(&buf, str + len);
+
+ return stringinfo_to_xmltype(&buf);
+#else
+ NO_XML_SUPPORT();
+ return NULL;
+#endif
+}
+
+
+/*
+ * Validate document (given as string) against DTD (given as external link)
+ *
+ * This has been removed because it is a security hole: unprivileged users
+ * should not be able to use Postgres to fetch arbitrary external files,
+ * which unfortunately is exactly what libxml is willing to do with the DTD
+ * parameter.
+ */
+Datum
+xmlvalidate(PG_FUNCTION_ARGS)
+{
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("xmlvalidate is not implemented")));
+ return 0;
+}
+
+
+bool
+xml_is_document(xmltype *arg)
+{
+#ifdef USE_LIBXML
+ bool result;
+ volatile xmlDocPtr doc = NULL;
+ MemoryContext ccxt = CurrentMemoryContext;
+
+ /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
+ PG_TRY();
+ {
+ doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
+ GetDatabaseEncoding());
+ result = true;
+ }
+ PG_CATCH();
+ {
+ ErrorData *errdata;
+ MemoryContext ecxt;
+
+ ecxt = MemoryContextSwitchTo(ccxt);
+ errdata = CopyErrorData();
+ if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
+ {
+ FlushErrorState();
+ result = false;
+ }
+ else
+ {
+ MemoryContextSwitchTo(ecxt);
+ PG_RE_THROW();
+ }
+ }
+ PG_END_TRY();
+
+ if (doc)
+ xmlFreeDoc(doc);
+
+ return result;
+#else /* not USE_LIBXML */
+ NO_XML_SUPPORT();
+ return false;
+#endif /* not USE_LIBXML */
+}
+
+
+#ifdef USE_LIBXML
+
+/*
+ * pg_xml_init_library --- set up for use of libxml
+ *
+ * This should be called by each function that is about to use libxml
+ * facilities but doesn't require error handling. It initializes libxml
+ * and verifies compatibility with the loaded libxml version. These are
+ * once-per-session activities.
+ *
+ * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
+ * check)
+ */
+void
+pg_xml_init_library(void)
+{
+ static bool first_time = true;
+
+ if (first_time)
+ {
+ /* Stuff we need do only once per session */
+
+ /*
+ * Currently, we have no pure UTF-8 support for internals -- check if
+ * we can work.
+ */
+ if (sizeof(char) != sizeof(xmlChar))
+ ereport(ERROR,
+ (errmsg("could not initialize XML library"),
+ errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
+ sizeof(char), sizeof(xmlChar))));
+
+#ifdef USE_LIBXMLCONTEXT
+ /* Set up libxml's memory allocation our way */
+ xml_memory_init();
+#endif
+
+ /* Check library compatibility */
+ LIBXML_TEST_VERSION;
+
+ first_time = false;
+ }
+}
+
+/*
+ * pg_xml_init --- set up for use of libxml and register an error handler
+ *
+ * This should be called by each function that is about to use libxml
+ * facilities and requires error handling. It initializes libxml with
+ * pg_xml_init_library() and establishes our libxml error handler.
+ *
+ * strictness determines which errors are reported and which are ignored.
+ *
+ * Calls to this function MUST be followed by a PG_TRY block that guarantees
+ * that pg_xml_done() is called during either normal or error exit.
+ *
+ * This is exported for use by contrib/xml2, as well as other code that might
+ * wish to share use of this module's libxml error handler.
+ */
+PgXmlErrorContext *
+pg_xml_init(PgXmlStrictness strictness)
+{
+ PgXmlErrorContext *errcxt;
+ void *new_errcxt;
+
+ /* Do one-time setup if needed */
+ pg_xml_init_library();
+
+ /* Create error handling context structure */
+ errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
+ errcxt->magic = ERRCXT_MAGIC;
+ errcxt->strictness = strictness;
+ errcxt->err_occurred = false;
+ initStringInfo(&errcxt->err_buf);
+
+ /*
+ * Save original error handler and install ours. libxml originally didn't
+ * distinguish between the contexts for generic and for structured error
+ * handlers. If we're using an old libxml version, we must thus save the
+ * generic error context, even though we're using a structured error
+ * handler.
+ */
+ errcxt->saved_errfunc = xmlStructuredError;
+
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ errcxt->saved_errcxt = xmlStructuredErrorContext;
+#else
+ errcxt->saved_errcxt = xmlGenericErrorContext;
+#endif
+
+ xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
+
+ /*
+ * Verify that xmlSetStructuredErrorFunc set the context variable we
+ * expected it to. If not, the error context pointer we just saved is not
+ * the correct thing to restore, and since that leaves us without a way to
+ * restore the context in pg_xml_done, we must fail.
+ *
+ * The only known situation in which this test fails is if we compile with
+ * headers from a libxml2 that doesn't track the structured error context
+ * separately (< 2.7.4), but at runtime use a version that does, or vice
+ * versa. The libxml2 authors did not treat that change as constituting
+ * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
+ * fails to protect us from this.
+ */
+
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ new_errcxt = xmlStructuredErrorContext;
+#else
+ new_errcxt = xmlGenericErrorContext;
+#endif
+
+ if (new_errcxt != (void *) errcxt)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("could not set up XML error handler"),
+ errhint("This probably indicates that the version of libxml2"
+ " being used is not compatible with the libxml2"
+ " header files that PostgreSQL was built with.")));
+
+ /*
+ * Also, install an entity loader to prevent unwanted fetches of external
+ * files and URLs.
+ */
+ errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
+ xmlSetExternalEntityLoader(xmlPgEntityLoader);
+
+ return errcxt;
+}
+
+
+/*
+ * pg_xml_done --- restore previous libxml error handling
+ *
+ * Resets libxml's global error-handling state to what it was before
+ * pg_xml_init() was called.
+ *
+ * This routine verifies that all pending errors have been dealt with
+ * (in assert-enabled builds, anyway).
+ */
+void
+pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
+{
+ void *cur_errcxt;
+
+ /* An assert seems like enough protection here */
+ Assert(errcxt->magic == ERRCXT_MAGIC);
+
+ /*
+ * In a normal exit, there should be no un-handled libxml errors. But we
+ * shouldn't try to enforce this during error recovery, since the longjmp
+ * could have been thrown before xml_ereport had a chance to run.
+ */
+ Assert(!errcxt->err_occurred || isError);
+
+ /*
+ * Check that libxml's global state is correct, warn if not. This is a
+ * real test and not an Assert because it has a higher probability of
+ * happening.
+ */
+#ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
+ cur_errcxt = xmlStructuredErrorContext;
+#else
+ cur_errcxt = xmlGenericErrorContext;
+#endif
+
+ if (cur_errcxt != (void *) errcxt)
+ elog(WARNING, "libxml error handling state is out of sync with xml.c");
+
+ /* Restore the saved handlers */
+ xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
+ xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
+
+ /*
+ * Mark the struct as invalid, just in case somebody somehow manages to
+ * call xml_errorHandler or xml_ereport with it.
+ */
+ errcxt->magic = 0;
+
+ /* Release memory */
+ pfree(errcxt->err_buf.data);
+ pfree(errcxt);
+}
+
+
+/*
+ * pg_xml_error_occurred() --- test the error flag
+ */
+bool
+pg_xml_error_occurred(PgXmlErrorContext *errcxt)
+{
+ return errcxt->err_occurred;
+}
+
+
+/*
+ * SQL/XML allows storing "XML documents" or "XML content". "XML
+ * documents" are specified by the XML specification and are parsed
+ * easily by libxml. "XML content" is specified by SQL/XML as the
+ * production "XMLDecl? content". But libxml can only parse the
+ * "content" part, so we have to parse the XML declaration ourselves
+ * to complete this.
+ */
+
+#define CHECK_XML_SPACE(p) \
+ do { \
+ if (!xmlIsBlank_ch(*(p))) \
+ return XML_ERR_SPACE_REQUIRED; \
+ } while (0)
+
+#define SKIP_XML_SPACE(p) \
+ while (xmlIsBlank_ch(*(p))) (p)++
+
+/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
+/* Beware of multiple evaluations of argument! */
+#define PG_XMLISNAMECHAR(c) \
+ (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
+ || xmlIsDigit_ch(c) \
+ || c == '.' || c == '-' || c == '_' || c == ':' \
+ || xmlIsCombiningQ(c) \
+ || xmlIsExtender_ch(c))
+
+/* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
+static xmlChar *
+xml_pnstrdup(const xmlChar *str, size_t len)
+{
+ xmlChar *result;
+
+ result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+ memcpy(result, str, len * sizeof(xmlChar));
+ result[len] = 0;
+ return result;
+}
+
+/* Ditto, except input is char* */
+static xmlChar *
+pg_xmlCharStrndup(const char *str, size_t len)
+{
+ xmlChar *result;
+
+ result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
+ memcpy(result, str, len);
+ result[len] = '\0';
+
+ return result;
+}
+
+/*
+ * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
+ *
+ * The input xmlChar is freed regardless of success of the copy.
+ */
+static char *
+xml_pstrdup_and_free(xmlChar *str)
+{
+ char *result;
+
+ if (str)
+ {
+ PG_TRY();
+ {
+ result = pstrdup((char *) str);
+ }
+ PG_FINALLY();
+ {
+ xmlFree(str);
+ }
+ PG_END_TRY();
+ }
+ else
+ result = NULL;
+
+ return result;
+}
+
+/*
+ * str is the null-terminated input string. Remaining arguments are
+ * output arguments; each can be NULL if value is not wanted.
+ * version and encoding are returned as locally-palloc'd strings.
+ * Result is 0 if OK, an error code if not.
+ */
+static int
+parse_xml_decl(const xmlChar *str, size_t *lenp,
+ xmlChar **version, xmlChar **encoding, int *standalone)
+{
+ const xmlChar *p;
+ const xmlChar *save_p;
+ size_t len;
+ int utf8char;
+ int utf8len;
+
+ /*
+ * Only initialize libxml. We don't need error handling here, but we do
+ * need to make sure libxml is initialized before calling any of its
+ * functions. Note that this is safe (and a no-op) if caller has already
+ * done pg_xml_init().
+ */
+ pg_xml_init_library();
+
+ /* Initialize output arguments to "not present" */
+ if (version)
+ *version = NULL;
+ if (encoding)
+ *encoding = NULL;
+ if (standalone)
+ *standalone = -1;
+
+ p = str;
+
+ if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
+ goto finished;
+
+ /*
+ * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
+ * rather than an XMLDecl, so we have done what we came to do and found no
+ * XMLDecl.
+ *
+ * We need an input length value for xmlGetUTF8Char, but there's no need
+ * to count the whole document size, so use strnlen not strlen.
+ */
+ utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
+ utf8char = xmlGetUTF8Char(p + 5, &utf8len);
+ if (PG_XMLISNAMECHAR(utf8char))
+ goto finished;
+
+ p += 5;
+
+ /* version */
+ CHECK_XML_SPACE(p);
+ SKIP_XML_SPACE(p);
+ if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
+ return XML_ERR_VERSION_MISSING;
+ p += 7;
+ SKIP_XML_SPACE(p);
+ if (*p != '=')
+ return XML_ERR_VERSION_MISSING;
+ p += 1;
+ SKIP_XML_SPACE(p);
+
+ if (*p == '\'' || *p == '"')
+ {
+ const xmlChar *q;
+
+ q = xmlStrchr(p + 1, *p);
+ if (!q)
+ return XML_ERR_VERSION_MISSING;
+
+ if (version)
+ *version = xml_pnstrdup(p + 1, q - p - 1);
+ p = q + 1;
+ }
+ else
+ return XML_ERR_VERSION_MISSING;
+
+ /* encoding */
+ save_p = p;
+ SKIP_XML_SPACE(p);
+ if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
+ {
+ CHECK_XML_SPACE(save_p);
+ p += 8;
+ SKIP_XML_SPACE(p);
+ if (*p != '=')
+ return XML_ERR_MISSING_ENCODING;
+ p += 1;
+ SKIP_XML_SPACE(p);
+
+ if (*p == '\'' || *p == '"')
+ {
+ const xmlChar *q;
+
+ q = xmlStrchr(p + 1, *p);
+ if (!q)
+ return XML_ERR_MISSING_ENCODING;
+
+ if (encoding)
+ *encoding = xml_pnstrdup(p + 1, q - p - 1);
+ p = q + 1;
+ }
+ else
+ return XML_ERR_MISSING_ENCODING;
+ }
+ else
+ {
+ p = save_p;
+ }
+
+ /* standalone */
+ save_p = p;
+ SKIP_XML_SPACE(p);
+ if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
+ {
+ CHECK_XML_SPACE(save_p);
+ p += 10;
+ SKIP_XML_SPACE(p);
+ if (*p != '=')
+ return XML_ERR_STANDALONE_VALUE;
+ p += 1;
+ SKIP_XML_SPACE(p);
+ if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
+ xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
+ {
+ if (standalone)
+ *standalone = 1;
+ p += 5;
+ }
+ else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
+ xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
+ {
+ if (standalone)
+ *standalone = 0;
+ p += 4;
+ }
+ else
+ return XML_ERR_STANDALONE_VALUE;
+ }
+ else
+ {
+ p = save_p;
+ }
+
+ SKIP_XML_SPACE(p);
+ if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
+ return XML_ERR_XMLDECL_NOT_FINISHED;
+ p += 2;
+
+finished:
+ len = p - str;
+
+ for (p = str; p < str + len; p++)
+ if (*p > 127)
+ return XML_ERR_INVALID_CHAR;
+
+ if (lenp)
+ *lenp = len;
+
+ return XML_ERR_OK;
+}
+
+
+/*
+ * Write an XML declaration. On output, we adjust the XML declaration
+ * as follows. (These rules are the moral equivalent of the clause
+ * "Serialization of an XML value" in the SQL standard.)
+ *
+ * We try to avoid generating an XML declaration if possible. This is
+ * so that you don't get trivial things like xml '<foo/>' resulting in
+ * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
+ * must provide a declaration if the standalone property is specified
+ * or if we include an encoding declaration. If we have a
+ * declaration, we must specify a version (XML requires this).
+ * Otherwise we only make a declaration if the version is not "1.0",
+ * which is the default version specified in SQL:2003.
+ */
+static bool
+print_xml_decl(StringInfo buf, const xmlChar *version,
+ pg_enc encoding, int standalone)
+{
+ if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
+ || (encoding && encoding != PG_UTF8)
+ || standalone != -1)
+ {
+ appendStringInfoString(buf, "<?xml");
+
+ if (version)
+ appendStringInfo(buf, " version=\"%s\"", version);
+ else
+ appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
+
+ if (encoding && encoding != PG_UTF8)
+ {
+ /*
+ * XXX might be useful to convert this to IANA names (ISO-8859-1
+ * instead of LATIN1 etc.); needs field experience
+ */
+ appendStringInfo(buf, " encoding=\"%s\"",
+ pg_encoding_to_char(encoding));
+ }
+
+ if (standalone == 1)
+ appendStringInfoString(buf, " standalone=\"yes\"");
+ else if (standalone == 0)
+ appendStringInfoString(buf, " standalone=\"no\"");
+ appendStringInfoString(buf, "?>");
+
+ return true;
+ }
+ else
+ return false;
+}
+
+/*
+ * Test whether an input that is to be parsed as CONTENT contains a DTD.
+ *
+ * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
+ * satisfied by a document with a DTD, which is a bit of a wart, as it means
+ * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
+ * later fix that, by redefining content with reference to the "more
+ * permissive" Document Node of the XQuery/XPath Data Model, such that any
+ * DOCUMENT value is indeed also a CONTENT value. That definition is more
+ * useful, as CONTENT becomes usable for parsing input of unknown form (think
+ * pg_restore).
+ *
+ * As used below in parse_xml when parsing for CONTENT, libxml does not give
+ * us the 2006+ behavior, but only the 2003; it will choke if the input has
+ * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
+ * by detecting this case first and simply doing the parse as DOCUMENT.
+ *
+ * A DTD can be found arbitrarily far in, but that would be a contrived case;
+ * it will ordinarily start within a few dozen characters. The only things
+ * that can precede it are an XMLDecl (here, the caller will have called
+ * parse_xml_decl already), whitespace, comments, and processing instructions.
+ * This function need only return true if it sees a valid sequence of such
+ * things leading to <!DOCTYPE. It can simply return false in any other
+ * cases, including malformed input; that will mean the input gets parsed as
+ * CONTENT as originally planned, with libxml reporting any errors.
+ *
+ * This is only to be called from xml_parse, when pg_xml_init has already
+ * been called. The input is already in UTF8 encoding.
+ */
+static bool
+xml_doctype_in_content(const xmlChar *str)
+{
+ const xmlChar *p = str;
+
+ for (;;)
+ {
+ const xmlChar *e;
+
+ SKIP_XML_SPACE(p);
+ if (*p != '<')
+ return false;
+ p++;
+
+ if (*p == '!')
+ {
+ p++;
+
+ /* if we see <!DOCTYPE, we can return true */
+ if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
+ return true;
+
+ /* otherwise, if it's not a comment, fail */
+ if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
+ return false;
+ /* find end of comment: find -- and a > must follow */
+ p = xmlStrstr(p + 2, (xmlChar *) "--");
+ if (!p || p[2] != '>')
+ return false;
+ /* advance over comment, and keep scanning */
+ p += 3;
+ continue;
+ }
+
+ /* otherwise, if it's not a PI <?target something?>, fail */
+ if (*p != '?')
+ return false;
+ p++;
+
+ /* find end of PI (the string ?> is forbidden within a PI) */
+ e = xmlStrstr(p, (xmlChar *) "?>");
+ if (!e)
+ return false;
+
+ /* advance over PI, keep scanning */
+ p = e + 2;
+ }
+}
+
+
+/*
+ * Convert a C string to XML internal representation
+ *
+ * Note: it is caller's responsibility to xmlFreeDoc() the result,
+ * else a permanent memory leak will ensue!
+ *
+ * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
+ * yet do not use SAX - see xmlreader.c)
+ */
+static xmlDocPtr
+xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
+ int encoding)
+{
+ int32 len;
+ xmlChar *string;
+ xmlChar *utf8string;
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlParserCtxtPtr ctxt = NULL;
+ volatile xmlDocPtr doc = NULL;
+
+ len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
+ string = xml_text2xmlChar(data);
+
+ utf8string = pg_do_encoding_conversion(string,
+ len,
+ encoding,
+ PG_UTF8);
+
+ /* Start up libxml and its parser */
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
+
+ /* Use a TRY block to ensure we clean up correctly */
+ PG_TRY();
+ {
+ bool parse_as_document = false;
+ int res_code;
+ size_t count = 0;
+ xmlChar *version = NULL;
+ int standalone = 0;
+
+ xmlInitParser();
+
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
+
+ /* Decide whether to parse as document or content */
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ parse_as_document = true;
+ else
+ {
+ /* Parse and skip over the XML declaration, if any */
+ res_code = parse_xml_decl(utf8string,
+ &count, &version, NULL, &standalone);
+ if (res_code != 0)
+ xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content: invalid XML declaration",
+ res_code);
+
+ /* Is there a DOCTYPE element? */
+ if (xml_doctype_in_content(utf8string + count))
+ parse_as_document = true;
+ }
+
+ if (parse_as_document)
+ {
+ /*
+ * Note, that here we try to apply DTD defaults
+ * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
+ * 'Default values defined by internal DTD are applied'. As for
+ * external DTDs, we try to support them too, (see SQL/XML:2008 GR
+ * 10.16.7.e)
+ */
+ doc = xmlCtxtReadDoc(ctxt, utf8string,
+ NULL,
+ "UTF-8",
+ XML_PARSE_NOENT | XML_PARSE_DTDATTR
+ | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
+ if (doc == NULL || xmlerrcxt->err_occurred)
+ {
+ /* Use original option to decide which error code to throw */
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "invalid XML document");
+ else
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content");
+ }
+ }
+ else
+ {
+ doc = xmlNewDoc(version);
+ Assert(doc->encoding == NULL);
+ doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
+ doc->standalone = standalone;
+
+ /* allow empty content */
+ if (*(utf8string + count))
+ {
+ res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
+ utf8string + count, NULL);
+ if (res_code != 0 || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content");
+ }
+ }
+ }
+ PG_CATCH();
+ {
+ if (doc != NULL)
+ xmlFreeDoc(doc);
+ if (ctxt != NULL)
+ xmlFreeParserCtxt(ctxt);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlFreeParserCtxt(ctxt);
+
+ pg_xml_done(xmlerrcxt, false);
+
+ return doc;
+}
+
+
+/*
+ * xmlChar<->text conversions
+ */
+static xmlChar *
+xml_text2xmlChar(text *in)
+{
+ return (xmlChar *) text_to_cstring(in);
+}
+
+
+#ifdef USE_LIBXMLCONTEXT
+
+/*
+ * Manage the special context used for all libxml allocations (but only
+ * in special debug builds; see notes at top of file)
+ */
+static void
+xml_memory_init(void)
+{
+ /* Create memory context if not there already */
+ if (LibxmlContext == NULL)
+ LibxmlContext = AllocSetContextCreate(TopMemoryContext,
+ "Libxml context",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Re-establish the callbacks even if already set */
+ xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
+}
+
+/*
+ * Wrappers for memory management functions
+ */
+static void *
+xml_palloc(size_t size)
+{
+ return MemoryContextAlloc(LibxmlContext, size);
+}
+
+
+static void *
+xml_repalloc(void *ptr, size_t size)
+{
+ return repalloc(ptr, size);
+}
+
+
+static void
+xml_pfree(void *ptr)
+{
+ /* At least some parts of libxml assume xmlFree(NULL) is allowed */
+ if (ptr)
+ pfree(ptr);
+}
+
+
+static char *
+xml_pstrdup(const char *string)
+{
+ return MemoryContextStrdup(LibxmlContext, string);
+}
+#endif /* USE_LIBXMLCONTEXT */
+
+
+/*
+ * xmlPgEntityLoader --- entity loader callback function
+ *
+ * Silently prevent any external entity URL from being loaded. We don't want
+ * to throw an error, so instead make the entity appear to expand to an empty
+ * string.
+ *
+ * We would prefer to allow loading entities that exist in the system's
+ * global XML catalog; but the available libxml2 APIs make that a complex
+ * and fragile task. For now, just shut down all external access.
+ */
+static xmlParserInputPtr
+xmlPgEntityLoader(const char *URL, const char *ID,
+ xmlParserCtxtPtr ctxt)
+{
+ return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
+}
+
+
+/*
+ * xml_ereport --- report an XML-related error
+ *
+ * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
+ * standard. This function adds libxml's native error message, if any, as
+ * detail.
+ *
+ * This is exported for modules that want to share the core libxml error
+ * handler. Note that pg_xml_init() *must* have been called previously.
+ */
+void
+xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
+{
+ char *detail;
+
+ /* Defend against someone passing us a bogus context struct */
+ if (errcxt->magic != ERRCXT_MAGIC)
+ elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
+
+ /* Flag that the current libxml error has been reported */
+ errcxt->err_occurred = false;
+
+ /* Include detail only if we have some text from libxml */
+ if (errcxt->err_buf.len > 0)
+ detail = errcxt->err_buf.data;
+ else
+ detail = NULL;
+
+ ereport(level,
+ (errcode(sqlcode),
+ errmsg_internal("%s", msg),
+ detail ? errdetail_internal("%s", detail) : 0));
+}
+
+
+/*
+ * Error handler for libxml errors and warnings
+ */
+static void
+xml_errorHandler(void *data, xmlErrorPtr error)
+{
+ PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
+ xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
+ xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
+ xmlNodePtr node = error->node;
+ const xmlChar *name = (node != NULL &&
+ node->type == XML_ELEMENT_NODE) ? node->name : NULL;
+ int domain = error->domain;
+ int level = error->level;
+ StringInfo errorBuf;
+
+ /*
+ * Defend against someone passing us a bogus context struct.
+ *
+ * We force a backend exit if this check fails because longjmp'ing out of
+ * libxml would likely render it unsafe to use further.
+ */
+ if (xmlerrcxt->magic != ERRCXT_MAGIC)
+ elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
+
+ /*----------
+ * Older libxml versions report some errors differently.
+ * First, some errors were previously reported as coming from the parser
+ * domain but are now reported as coming from the namespace domain.
+ * Second, some warnings were upgraded to errors.
+ * We attempt to compensate for that here.
+ *----------
+ */
+ switch (error->code)
+ {
+ case XML_WAR_NS_URI:
+ level = XML_ERR_ERROR;
+ domain = XML_FROM_NAMESPACE;
+ break;
+
+ case XML_ERR_NS_DECL_ERROR:
+ case XML_WAR_NS_URI_RELATIVE:
+ case XML_WAR_NS_COLUMN:
+ case XML_NS_ERR_XML_NAMESPACE:
+ case XML_NS_ERR_UNDEFINED_NAMESPACE:
+ case XML_NS_ERR_QNAME:
+ case XML_NS_ERR_ATTRIBUTE_REDEFINED:
+ case XML_NS_ERR_EMPTY:
+ domain = XML_FROM_NAMESPACE;
+ break;
+ }
+
+ /* Decide whether to act on the error or not */
+ switch (domain)
+ {
+ case XML_FROM_PARSER:
+ case XML_FROM_NONE:
+ case XML_FROM_MEMORY:
+ case XML_FROM_IO:
+
+ /*
+ * Suppress warnings about undeclared entities. We need to do
+ * this to avoid problems due to not loading DTD definitions.
+ */
+ if (error->code == XML_WAR_UNDECLARED_ENTITY)
+ return;
+
+ /* Otherwise, accept error regardless of the parsing purpose */
+ break;
+
+ default:
+ /* Ignore error if only doing well-formedness check */
+ if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
+ return;
+ break;
+ }
+
+ /* Prepare error message in errorBuf */
+ errorBuf = makeStringInfo();
+
+ if (error->line > 0)
+ appendStringInfo(errorBuf, "line %d: ", error->line);
+ if (name != NULL)
+ appendStringInfo(errorBuf, "element %s: ", name);
+ if (error->message != NULL)
+ appendStringInfoString(errorBuf, error->message);
+ else
+ appendStringInfoString(errorBuf, "(no message provided)");
+
+ /*
+ * Append context information to errorBuf.
+ *
+ * xmlParserPrintFileContext() uses libxml's "generic" error handler to
+ * write the context. Since we don't want to duplicate libxml
+ * functionality here, we set up a generic error handler temporarily.
+ *
+ * We use appendStringInfo() directly as libxml's generic error handler.
+ * This should work because it has essentially the same signature as
+ * libxml expects, namely (void *ptr, const char *msg, ...).
+ */
+ if (input != NULL)
+ {
+ xmlGenericErrorFunc errFuncSaved = xmlGenericError;
+ void *errCtxSaved = xmlGenericErrorContext;
+
+ xmlSetGenericErrorFunc((void *) errorBuf,
+ (xmlGenericErrorFunc) appendStringInfo);
+
+ /* Add context information to errorBuf */
+ appendStringInfoLineSeparator(errorBuf);
+
+ xmlParserPrintFileContext(input);
+
+ /* Restore generic error func */
+ xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
+ }
+
+ /* Get rid of any trailing newlines in errorBuf */
+ chopStringInfoNewlines(errorBuf);
+
+ /*
+ * Legacy error handling mode. err_occurred is never set, we just add the
+ * message to err_buf. This mode exists because the xml2 contrib module
+ * uses our error-handling infrastructure, but we don't want to change its
+ * behaviour since it's deprecated anyway. This is also why we don't
+ * distinguish between notices, warnings and errors here --- the old-style
+ * generic error handler wouldn't have done that either.
+ */
+ if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
+ {
+ appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+ appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
+ errorBuf->len);
+
+ pfree(errorBuf->data);
+ pfree(errorBuf);
+ return;
+ }
+
+ /*
+ * We don't want to ereport() here because that'd probably leave libxml in
+ * an inconsistent state. Instead, we remember the error and ereport()
+ * from xml_ereport().
+ *
+ * Warnings and notices can be reported immediately since they won't cause
+ * a longjmp() out of libxml.
+ */
+ if (level >= XML_ERR_ERROR)
+ {
+ appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
+ appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
+ errorBuf->len);
+
+ xmlerrcxt->err_occurred = true;
+ }
+ else if (level >= XML_ERR_WARNING)
+ {
+ ereport(WARNING,
+ (errmsg_internal("%s", errorBuf->data)));
+ }
+ else
+ {
+ ereport(NOTICE,
+ (errmsg_internal("%s", errorBuf->data)));
+ }
+
+ pfree(errorBuf->data);
+ pfree(errorBuf);
+}
+
+
+/*
+ * Wrapper for "ereport" function for XML-related errors. The "msg"
+ * is the SQL-level message; some can be adopted from the SQL/XML
+ * standard. This function uses "code" to create a textual detail
+ * message. At the moment, we only need to cover those codes that we
+ * may raise in this file.
+ */
+static void
+xml_ereport_by_code(int level, int sqlcode,
+ const char *msg, int code)
+{
+ const char *det;
+
+ switch (code)
+ {
+ case XML_ERR_INVALID_CHAR:
+ det = gettext_noop("Invalid character value.");
+ break;
+ case XML_ERR_SPACE_REQUIRED:
+ det = gettext_noop("Space required.");
+ break;
+ case XML_ERR_STANDALONE_VALUE:
+ det = gettext_noop("standalone accepts only 'yes' or 'no'.");
+ break;
+ case XML_ERR_VERSION_MISSING:
+ det = gettext_noop("Malformed declaration: missing version.");
+ break;
+ case XML_ERR_MISSING_ENCODING:
+ det = gettext_noop("Missing encoding in text declaration.");
+ break;
+ case XML_ERR_XMLDECL_NOT_FINISHED:
+ det = gettext_noop("Parsing XML declaration: '?>' expected.");
+ break;
+ default:
+ det = gettext_noop("Unrecognized libxml error code: %d.");
+ break;
+ }
+
+ ereport(level,
+ (errcode(sqlcode),
+ errmsg_internal("%s", msg),
+ errdetail(det, code)));
+}
+
+
+/*
+ * Remove all trailing newlines from a StringInfo string
+ */
+static void
+chopStringInfoNewlines(StringInfo str)
+{
+ while (str->len > 0 && str->data[str->len - 1] == '\n')
+ str->data[--str->len] = '\0';
+}
+
+
+/*
+ * Append a newline after removing any existing trailing newlines
+ */
+static void
+appendStringInfoLineSeparator(StringInfo str)
+{
+ chopStringInfoNewlines(str);
+ if (str->len > 0)
+ appendStringInfoChar(str, '\n');
+}
+
+
+/*
+ * Convert one char in the current server encoding to a Unicode codepoint.
+ */
+static pg_wchar
+sqlchar_to_unicode(const char *s)
+{
+ char *utf8string;
+ pg_wchar ret[2]; /* need space for trailing zero */
+
+ /* note we're not assuming s is null-terminated */
+ utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
+
+ pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
+ pg_encoding_mblen(PG_UTF8, utf8string));
+
+ if (utf8string != s)
+ pfree(utf8string);
+
+ return ret[0];
+}
+
+
+static bool
+is_valid_xml_namefirst(pg_wchar c)
+{
+ /* (Letter | '_' | ':') */
+ return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
+ || c == '_' || c == ':');
+}
+
+
+static bool
+is_valid_xml_namechar(pg_wchar c)
+{
+ /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
+ return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
+ || xmlIsDigitQ(c)
+ || c == '.' || c == '-' || c == '_' || c == ':'
+ || xmlIsCombiningQ(c)
+ || xmlIsExtenderQ(c));
+}
+#endif /* USE_LIBXML */
+
+
+/*
+ * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
+ */
+char *
+map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
+ bool escape_period)
+{
+#ifdef USE_LIBXML
+ StringInfoData buf;
+ const char *p;
+
+ /*
+ * SQL/XML doesn't make use of this case anywhere, so it's probably a
+ * mistake.
+ */
+ Assert(fully_escaped || !escape_period);
+
+ initStringInfo(&buf);
+
+ for (p = ident; *p; p += pg_mblen(p))
+ {
+ if (*p == ':' && (p == ident || fully_escaped))
+ appendStringInfoString(&buf, "_x003A_");
+ else if (*p == '_' && *(p + 1) == 'x')
+ appendStringInfoString(&buf, "_x005F_");
+ else if (fully_escaped && p == ident &&
+ pg_strncasecmp(p, "xml", 3) == 0)
+ {
+ if (*p == 'x')
+ appendStringInfoString(&buf, "_x0078_");
+ else
+ appendStringInfoString(&buf, "_x0058_");
+ }
+ else if (escape_period && *p == '.')
+ appendStringInfoString(&buf, "_x002E_");
+ else
+ {
+ pg_wchar u = sqlchar_to_unicode(p);
+
+ if ((p == ident)
+ ? !is_valid_xml_namefirst(u)
+ : !is_valid_xml_namechar(u))
+ appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
+ else
+ appendBinaryStringInfo(&buf, p, pg_mblen(p));
+ }
+ }
+
+ return buf.data;
+#else /* not USE_LIBXML */
+ NO_XML_SUPPORT();
+ return NULL;
+#endif /* not USE_LIBXML */
+}
+
+
+/*
+ * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
+ */
+char *
+map_xml_name_to_sql_identifier(const char *name)
+{
+ StringInfoData buf;
+ const char *p;
+
+ initStringInfo(&buf);
+
+ for (p = name; *p; p += pg_mblen(p))
+ {
+ if (*p == '_' && *(p + 1) == 'x'
+ && isxdigit((unsigned char) *(p + 2))
+ && isxdigit((unsigned char) *(p + 3))
+ && isxdigit((unsigned char) *(p + 4))
+ && isxdigit((unsigned char) *(p + 5))
+ && *(p + 6) == '_')
+ {
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+ unsigned int u;
+
+ sscanf(p + 2, "%X", &u);
+ pg_unicode_to_server(u, (unsigned char *) cbuf);
+ appendStringInfoString(&buf, cbuf);
+ p += 6;
+ }
+ else
+ appendBinaryStringInfo(&buf, p, pg_mblen(p));
+ }
+
+ return buf.data;
+}
+
+/*
+ * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
+ *
+ * When xml_escape_strings is true, then certain characters in string
+ * values are replaced by entity references (&lt; etc.), as specified
+ * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
+ * wanted. The false case is mainly useful when the resulting value
+ * is used with xmlTextWriterWriteAttribute() to write out an
+ * attribute, because that function does the escaping itself.
+ */
+char *
+map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
+{
+ if (type_is_array_domain(type))
+ {
+ ArrayType *array;
+ Oid elmtype;
+ int16 elmlen;
+ bool elmbyval;
+ char elmalign;
+ int num_elems;
+ Datum *elem_values;
+ bool *elem_nulls;
+ StringInfoData buf;
+ int i;
+
+ array = DatumGetArrayTypeP(value);
+ elmtype = ARR_ELEMTYPE(array);
+ get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
+
+ deconstruct_array(array, elmtype,
+ elmlen, elmbyval, elmalign,
+ &elem_values, &elem_nulls,
+ &num_elems);
+
+ initStringInfo(&buf);
+
+ for (i = 0; i < num_elems; i++)
+ {
+ if (elem_nulls[i])
+ continue;
+ appendStringInfoString(&buf, "<element>");
+ appendStringInfoString(&buf,
+ map_sql_value_to_xml_value(elem_values[i],
+ elmtype, true));
+ appendStringInfoString(&buf, "</element>");
+ }
+
+ pfree(elem_values);
+ pfree(elem_nulls);
+
+ return buf.data;
+ }
+ else
+ {
+ Oid typeOut;
+ bool isvarlena;
+ char *str;
+
+ /*
+ * Flatten domains; the special-case treatments below should apply to,
+ * eg, domains over boolean not just boolean.
+ */
+ type = getBaseType(type);
+
+ /*
+ * Special XSD formatting for some data types
+ */
+ switch (type)
+ {
+ case BOOLOID:
+ if (DatumGetBool(value))
+ return "true";
+ else
+ return "false";
+
+ case DATEOID:
+ {
+ DateADT date;
+ struct pg_tm tm;
+ char buf[MAXDATELEN + 1];
+
+ date = DatumGetDateADT(value);
+ /* XSD doesn't support infinite values */
+ if (DATE_NOT_FINITE(date))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("date out of range"),
+ errdetail("XML does not support infinite date values.")));
+ j2date(date + POSTGRES_EPOCH_JDATE,
+ &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
+ EncodeDateOnly(&tm, USE_XSD_DATES, buf);
+
+ return pstrdup(buf);
+ }
+
+ case TIMESTAMPOID:
+ {
+ Timestamp timestamp;
+ struct pg_tm tm;
+ fsec_t fsec;
+ char buf[MAXDATELEN + 1];
+
+ timestamp = DatumGetTimestamp(value);
+
+ /* XSD doesn't support infinite values */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range"),
+ errdetail("XML does not support infinite timestamp values.")));
+ else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
+ EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ return pstrdup(buf);
+ }
+
+ case TIMESTAMPTZOID:
+ {
+ TimestampTz timestamp;
+ struct pg_tm tm;
+ int tz;
+ fsec_t fsec;
+ const char *tzn = NULL;
+ char buf[MAXDATELEN + 1];
+
+ timestamp = DatumGetTimestamp(value);
+
+ /* XSD doesn't support infinite values */
+ if (TIMESTAMP_NOT_FINITE(timestamp))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range"),
+ errdetail("XML does not support infinite timestamp values.")));
+ else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
+ EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
+ errmsg("timestamp out of range")));
+
+ return pstrdup(buf);
+ }
+
+#ifdef USE_LIBXML
+ case BYTEAOID:
+ {
+ bytea *bstr = DatumGetByteaPP(value);
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlTextWriterPtr writer = NULL;
+ char *result;
+
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ buf = xmlBufferCreate();
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+ writer = xmlNewTextWriterMemory(buf, 0);
+ if (writer == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlTextWriter");
+
+ if (xmlbinary == XMLBINARY_BASE64)
+ xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
+ 0, VARSIZE_ANY_EXHDR(bstr));
+ else
+ xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
+ 0, VARSIZE_ANY_EXHDR(bstr));
+
+ /* we MUST do this now to flush data out to the buffer */
+ xmlFreeTextWriter(writer);
+ writer = NULL;
+
+ result = pstrdup((const char *) xmlBufferContent(buf));
+ }
+ PG_CATCH();
+ {
+ if (writer)
+ xmlFreeTextWriter(writer);
+ if (buf)
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlBufferFree(buf);
+
+ pg_xml_done(xmlerrcxt, false);
+
+ return result;
+ }
+#endif /* USE_LIBXML */
+
+ }
+
+ /*
+ * otherwise, just use the type's native text representation
+ */
+ getTypeOutputInfo(type, &typeOut, &isvarlena);
+ str = OidOutputFunctionCall(typeOut, value);
+
+ /* ... exactly as-is for XML, and when escaping is not wanted */
+ if (type == XMLOID || !xml_escape_strings)
+ return str;
+
+ /* otherwise, translate special characters as needed */
+ return escape_xml(str);
+ }
+}
+
+
+/*
+ * Escape characters in text that have special meanings in XML.
+ *
+ * Returns a palloc'd string.
+ *
+ * NB: this is intentionally not dependent on libxml.
+ */
+char *
+escape_xml(const char *str)
+{
+ StringInfoData buf;
+ const char *p;
+
+ initStringInfo(&buf);
+ for (p = str; *p; p++)
+ {
+ switch (*p)
+ {
+ case '&':
+ appendStringInfoString(&buf, "&amp;");
+ break;
+ case '<':
+ appendStringInfoString(&buf, "&lt;");
+ break;
+ case '>':
+ appendStringInfoString(&buf, "&gt;");
+ break;
+ case '\r':
+ appendStringInfoString(&buf, "&#x0d;");
+ break;
+ default:
+ appendStringInfoCharMacro(&buf, *p);
+ break;
+ }
+ }
+ return buf.data;
+}
+
+
+static char *
+_SPI_strdup(const char *s)
+{
+ size_t len = strlen(s) + 1;
+ char *ret = SPI_palloc(len);
+
+ memcpy(ret, s, len);
+ return ret;
+}
+
+
+/*
+ * SQL to XML mapping functions
+ *
+ * What follows below was at one point intentionally organized so that
+ * you can read along in the SQL/XML standard. The functions are
+ * mostly split up the way the clauses lay out in the standards
+ * document, and the identifiers are also aligned with the standard
+ * text. Unfortunately, SQL/XML:2006 reordered the clauses
+ * differently than SQL/XML:2003, so the order below doesn't make much
+ * sense anymore.
+ *
+ * There are many things going on there:
+ *
+ * There are two kinds of mappings: Mapping SQL data (table contents)
+ * to XML documents, and mapping SQL structure (the "schema") to XML
+ * Schema. And there are functions that do both at the same time.
+ *
+ * Then you can map a database, a schema, or a table, each in both
+ * ways. This breaks down recursively: Mapping a database invokes
+ * mapping schemas, which invokes mapping tables, which invokes
+ * mapping rows, which invokes mapping columns, although you can't
+ * call the last two from the outside. Because of this, there are a
+ * number of xyz_internal() functions which are to be called both from
+ * the function manager wrapper and from some upper layer in a
+ * recursive call.
+ *
+ * See the documentation about what the common function arguments
+ * nulls, tableforest, and targetns mean.
+ *
+ * Some style guidelines for XML output: Use double quotes for quoting
+ * XML attributes. Indent XML elements by two spaces, but remember
+ * that a lot of code is called recursively at different levels, so
+ * it's better not to indent rather than create output that indents
+ * and outdents weirdly. Add newlines to make the output look nice.
+ */
+
+
+/*
+ * Visibility of objects for XML mappings; see SQL/XML:2008 section
+ * 4.10.8.
+ */
+
+/*
+ * Given a query, which must return type oid as first column, produce
+ * a list of Oids with the query results.
+ */
+static List *
+query_to_oid_list(const char *query)
+{
+ uint64 i;
+ List *list = NIL;
+ int spi_result;
+
+ spi_result = SPI_execute(query, true, 0);
+ if (spi_result != SPI_OK_SELECT)
+ elog(ERROR, "SPI_execute returned %s for %s",
+ SPI_result_code_string(spi_result), query);
+
+ for (i = 0; i < SPI_processed; i++)
+ {
+ Datum oid;
+ bool isnull;
+
+ oid = SPI_getbinval(SPI_tuptable->vals[i],
+ SPI_tuptable->tupdesc,
+ 1,
+ &isnull);
+ if (!isnull)
+ list = lappend_oid(list, DatumGetObjectId(oid));
+ }
+
+ return list;
+}
+
+
+static List *
+schema_get_xml_visible_tables(Oid nspid)
+{
+ StringInfoData query;
+
+ initStringInfo(&query);
+ appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
+ " WHERE relnamespace = %u AND relkind IN ("
+ CppAsString2(RELKIND_RELATION) ","
+ CppAsString2(RELKIND_MATVIEW) ","
+ CppAsString2(RELKIND_VIEW) ")"
+ " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
+ " ORDER BY relname;", nspid);
+
+ return query_to_oid_list(query.data);
+}
+
+
+/*
+ * Including the system schemas is probably not useful for a database
+ * mapping.
+ */
+#define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
+
+#define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
+
+
+static List *
+database_get_xml_visible_schemas(void)
+{
+ return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
+}
+
+
+static List *
+database_get_xml_visible_tables(void)
+{
+ /* At the moment there is no order required here. */
+ return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
+ " WHERE relkind IN ("
+ CppAsString2(RELKIND_RELATION) ","
+ CppAsString2(RELKIND_MATVIEW) ","
+ CppAsString2(RELKIND_VIEW) ")"
+ " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
+ " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
+}
+
+
+/*
+ * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
+ * section 9.11.
+ */
+
+static StringInfo
+table_to_xml_internal(Oid relid,
+ const char *xmlschema, bool nulls, bool tableforest,
+ const char *targetns, bool top_level)
+{
+ StringInfoData query;
+
+ initStringInfo(&query);
+ appendStringInfo(&query, "SELECT * FROM %s",
+ DatumGetCString(DirectFunctionCall1(regclassout,
+ ObjectIdGetDatum(relid))));
+ return query_to_xml_internal(query.data, get_rel_name(relid),
+ xmlschema, nulls, tableforest,
+ targetns, top_level);
+}
+
+
+Datum
+table_to_xml(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
+ nulls, tableforest,
+ targetns, true)));
+}
+
+
+Datum
+query_to_xml(PG_FUNCTION_ARGS)
+{
+ char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
+ NULL, nulls, tableforest,
+ targetns, true)));
+}
+
+
+Datum
+cursor_to_xml(PG_FUNCTION_ARGS)
+{
+ char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ int32 count = PG_GETARG_INT32(1);
+ bool nulls = PG_GETARG_BOOL(2);
+ bool tableforest = PG_GETARG_BOOL(3);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
+
+ StringInfoData result;
+ Portal portal;
+ uint64 i;
+
+ initStringInfo(&result);
+
+ if (!tableforest)
+ {
+ xmldata_root_element_start(&result, "table", NULL, targetns, true);
+ appendStringInfoChar(&result, '\n');
+ }
+
+ SPI_connect();
+ portal = SPI_cursor_find(name);
+ if (portal == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_CURSOR),
+ errmsg("cursor \"%s\" does not exist", name)));
+
+ SPI_cursor_fetch(portal, true, count);
+ for (i = 0; i < SPI_processed; i++)
+ SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
+ tableforest, targetns, true);
+
+ SPI_finish();
+
+ if (!tableforest)
+ xmldata_root_element_end(&result, "table");
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
+}
+
+
+/*
+ * Write the start tag of the root element of a data mapping.
+ *
+ * top_level means that this is the very top level of the eventual
+ * output. For example, when the user calls table_to_xml, then a call
+ * with a table name to this function is the top level. When the user
+ * calls database_to_xml, then a call with a schema name to this
+ * function is not the top level. If top_level is false, then the XML
+ * namespace declarations are omitted, because they supposedly already
+ * appeared earlier in the output. Repeating them is not wrong, but
+ * it looks ugly.
+ */
+static void
+xmldata_root_element_start(StringInfo result, const char *eltname,
+ const char *xmlschema, const char *targetns,
+ bool top_level)
+{
+ /* This isn't really wrong but currently makes no sense. */
+ Assert(top_level || !xmlschema);
+
+ appendStringInfo(result, "<%s", eltname);
+ if (top_level)
+ {
+ appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
+ if (strlen(targetns) > 0)
+ appendStringInfo(result, " xmlns=\"%s\"", targetns);
+ }
+ if (xmlschema)
+ {
+ /* FIXME: better targets */
+ if (strlen(targetns) > 0)
+ appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
+ else
+ appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
+ }
+ appendStringInfoString(result, ">\n");
+}
+
+
+static void
+xmldata_root_element_end(StringInfo result, const char *eltname)
+{
+ appendStringInfo(result, "</%s>\n", eltname);
+}
+
+
+static StringInfo
+query_to_xml_internal(const char *query, char *tablename,
+ const char *xmlschema, bool nulls, bool tableforest,
+ const char *targetns, bool top_level)
+{
+ StringInfo result;
+ char *xmltn;
+ uint64 i;
+
+ if (tablename)
+ xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
+ else
+ xmltn = "table";
+
+ result = makeStringInfo();
+
+ SPI_connect();
+ if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid query")));
+
+ if (!tableforest)
+ {
+ xmldata_root_element_start(result, xmltn, xmlschema,
+ targetns, top_level);
+ appendStringInfoChar(result, '\n');
+ }
+
+ if (xmlschema)
+ appendStringInfo(result, "%s\n\n", xmlschema);
+
+ for (i = 0; i < SPI_processed; i++)
+ SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
+ tableforest, targetns, top_level);
+
+ if (!tableforest)
+ xmldata_root_element_end(result, xmltn);
+
+ SPI_finish();
+
+ return result;
+}
+
+
+Datum
+table_to_xmlschema(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+ const char *result;
+ Relation rel;
+
+ rel = table_open(relid, AccessShareLock);
+ result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
+ tableforest, targetns);
+ table_close(rel, NoLock);
+
+ PG_RETURN_XML_P(cstring_to_xmltype(result));
+}
+
+
+Datum
+query_to_xmlschema(PG_FUNCTION_ARGS)
+{
+ char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+ const char *result;
+ SPIPlanPtr plan;
+ Portal portal;
+
+ SPI_connect();
+
+ if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
+ elog(ERROR, "SPI_prepare(\"%s\") failed", query);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
+
+ result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
+ InvalidOid, nulls,
+ tableforest, targetns));
+ SPI_cursor_close(portal);
+ SPI_finish();
+
+ PG_RETURN_XML_P(cstring_to_xmltype(result));
+}
+
+
+Datum
+cursor_to_xmlschema(PG_FUNCTION_ARGS)
+{
+ char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+ const char *xmlschema;
+ Portal portal;
+
+ SPI_connect();
+ portal = SPI_cursor_find(name);
+ if (portal == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_CURSOR),
+ errmsg("cursor \"%s\" does not exist", name)));
+ if (portal->tupDesc == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_CURSOR_STATE),
+ errmsg("portal \"%s\" does not return tuples", name)));
+
+ xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
+ InvalidOid, nulls,
+ tableforest, targetns));
+ SPI_finish();
+
+ PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
+}
+
+
+Datum
+table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+ Relation rel;
+ const char *xmlschema;
+
+ rel = table_open(relid, AccessShareLock);
+ xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
+ tableforest, targetns);
+ table_close(rel, NoLock);
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
+ xmlschema, nulls, tableforest,
+ targetns, true)));
+}
+
+
+Datum
+query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
+{
+ char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+
+ const char *xmlschema;
+ SPIPlanPtr plan;
+ Portal portal;
+
+ SPI_connect();
+
+ if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
+ elog(ERROR, "SPI_prepare(\"%s\") failed", query);
+
+ if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
+ elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
+
+ xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
+ InvalidOid, nulls, tableforest, targetns));
+ SPI_cursor_close(portal);
+ SPI_finish();
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
+ xmlschema, nulls, tableforest,
+ targetns, true)));
+}
+
+
+/*
+ * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
+ * sections 9.13, 9.14.
+ */
+
+static StringInfo
+schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
+ bool tableforest, const char *targetns, bool top_level)
+{
+ StringInfo result;
+ char *xmlsn;
+ List *relid_list;
+ ListCell *cell;
+
+ xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
+ true, false);
+ result = makeStringInfo();
+
+ xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
+ appendStringInfoChar(result, '\n');
+
+ if (xmlschema)
+ appendStringInfo(result, "%s\n\n", xmlschema);
+
+ SPI_connect();
+
+ relid_list = schema_get_xml_visible_tables(nspid);
+
+ foreach(cell, relid_list)
+ {
+ Oid relid = lfirst_oid(cell);
+ StringInfo subres;
+
+ subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
+ targetns, false);
+
+ appendBinaryStringInfo(result, subres->data, subres->len);
+ appendStringInfoChar(result, '\n');
+ }
+
+ SPI_finish();
+
+ xmldata_root_element_end(result, xmlsn);
+
+ return result;
+}
+
+
+Datum
+schema_to_xml(PG_FUNCTION_ARGS)
+{
+ Name name = PG_GETARG_NAME(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+
+ char *schemaname;
+ Oid nspid;
+
+ schemaname = NameStr(*name);
+ nspid = LookupExplicitNamespace(schemaname, false);
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
+ nulls, tableforest, targetns, true)));
+}
+
+
+/*
+ * Write the start element of the root element of an XML Schema mapping.
+ */
+static void
+xsd_schema_element_start(StringInfo result, const char *targetns)
+{
+ appendStringInfoString(result,
+ "<xsd:schema\n"
+ " xmlns:xsd=\"" NAMESPACE_XSD "\"");
+ if (strlen(targetns) > 0)
+ appendStringInfo(result,
+ "\n"
+ " targetNamespace=\"%s\"\n"
+ " elementFormDefault=\"qualified\"",
+ targetns);
+ appendStringInfoString(result,
+ ">\n\n");
+}
+
+
+static void
+xsd_schema_element_end(StringInfo result)
+{
+ appendStringInfoString(result, "</xsd:schema>");
+}
+
+
+static StringInfo
+schema_to_xmlschema_internal(const char *schemaname, bool nulls,
+ bool tableforest, const char *targetns)
+{
+ Oid nspid;
+ List *relid_list;
+ List *tupdesc_list;
+ ListCell *cell;
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ nspid = LookupExplicitNamespace(schemaname, false);
+
+ xsd_schema_element_start(result, targetns);
+
+ SPI_connect();
+
+ relid_list = schema_get_xml_visible_tables(nspid);
+
+ tupdesc_list = NIL;
+ foreach(cell, relid_list)
+ {
+ Relation rel;
+
+ rel = table_open(lfirst_oid(cell), AccessShareLock);
+ tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
+ table_close(rel, NoLock);
+ }
+
+ appendStringInfoString(result,
+ map_sql_typecoll_to_xmlschema_types(tupdesc_list));
+
+ appendStringInfoString(result,
+ map_sql_schema_to_xmlschema_types(nspid, relid_list,
+ nulls, tableforest, targetns));
+
+ xsd_schema_element_end(result);
+
+ SPI_finish();
+
+ return result;
+}
+
+
+Datum
+schema_to_xmlschema(PG_FUNCTION_ARGS)
+{
+ Name name = PG_GETARG_NAME(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
+ nulls, tableforest, targetns)));
+}
+
+
+Datum
+schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
+{
+ Name name = PG_GETARG_NAME(0);
+ bool nulls = PG_GETARG_BOOL(1);
+ bool tableforest = PG_GETARG_BOOL(2);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
+ char *schemaname;
+ Oid nspid;
+ StringInfo xmlschema;
+
+ schemaname = NameStr(*name);
+ nspid = LookupExplicitNamespace(schemaname, false);
+
+ xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
+ tableforest, targetns);
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
+ xmlschema->data, nulls,
+ tableforest, targetns, true)));
+}
+
+
+/*
+ * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
+ * sections 9.16, 9.17.
+ */
+
+static StringInfo
+database_to_xml_internal(const char *xmlschema, bool nulls,
+ bool tableforest, const char *targetns)
+{
+ StringInfo result;
+ List *nspid_list;
+ ListCell *cell;
+ char *xmlcn;
+
+ xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
+ true, false);
+ result = makeStringInfo();
+
+ xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
+ appendStringInfoChar(result, '\n');
+
+ if (xmlschema)
+ appendStringInfo(result, "%s\n\n", xmlschema);
+
+ SPI_connect();
+
+ nspid_list = database_get_xml_visible_schemas();
+
+ foreach(cell, nspid_list)
+ {
+ Oid nspid = lfirst_oid(cell);
+ StringInfo subres;
+
+ subres = schema_to_xml_internal(nspid, NULL, nulls,
+ tableforest, targetns, false);
+
+ appendBinaryStringInfo(result, subres->data, subres->len);
+ appendStringInfoChar(result, '\n');
+ }
+
+ SPI_finish();
+
+ xmldata_root_element_end(result, xmlcn);
+
+ return result;
+}
+
+
+Datum
+database_to_xml(PG_FUNCTION_ARGS)
+{
+ bool nulls = PG_GETARG_BOOL(0);
+ bool tableforest = PG_GETARG_BOOL(1);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
+ tableforest, targetns)));
+}
+
+
+static StringInfo
+database_to_xmlschema_internal(bool nulls, bool tableforest,
+ const char *targetns)
+{
+ List *relid_list;
+ List *nspid_list;
+ List *tupdesc_list;
+ ListCell *cell;
+ StringInfo result;
+
+ result = makeStringInfo();
+
+ xsd_schema_element_start(result, targetns);
+
+ SPI_connect();
+
+ relid_list = database_get_xml_visible_tables();
+ nspid_list = database_get_xml_visible_schemas();
+
+ tupdesc_list = NIL;
+ foreach(cell, relid_list)
+ {
+ Relation rel;
+
+ rel = table_open(lfirst_oid(cell), AccessShareLock);
+ tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
+ table_close(rel, NoLock);
+ }
+
+ appendStringInfoString(result,
+ map_sql_typecoll_to_xmlschema_types(tupdesc_list));
+
+ appendStringInfoString(result,
+ map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
+
+ xsd_schema_element_end(result);
+
+ SPI_finish();
+
+ return result;
+}
+
+
+Datum
+database_to_xmlschema(PG_FUNCTION_ARGS)
+{
+ bool nulls = PG_GETARG_BOOL(0);
+ bool tableforest = PG_GETARG_BOOL(1);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
+ tableforest, targetns)));
+}
+
+
+Datum
+database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
+{
+ bool nulls = PG_GETARG_BOOL(0);
+ bool tableforest = PG_GETARG_BOOL(1);
+ const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
+ StringInfo xmlschema;
+
+ xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
+
+ PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
+ nulls, tableforest, targetns)));
+}
+
+
+/*
+ * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
+ * 9.2.
+ */
+static char *
+map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
+{
+ StringInfoData result;
+
+ initStringInfo(&result);
+
+ if (a)
+ appendStringInfoString(&result,
+ map_sql_identifier_to_xml_name(a, true, true));
+ if (b)
+ appendStringInfo(&result, ".%s",
+ map_sql_identifier_to_xml_name(b, true, true));
+ if (c)
+ appendStringInfo(&result, ".%s",
+ map_sql_identifier_to_xml_name(c, true, true));
+ if (d)
+ appendStringInfo(&result, ".%s",
+ map_sql_identifier_to_xml_name(d, true, true));
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL table to an XML Schema document; see SQL/XML:2008
+ * section 9.11.
+ *
+ * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
+ * 9.9.
+ */
+static const char *
+map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
+ bool tableforest, const char *targetns)
+{
+ int i;
+ char *xmltn;
+ char *tabletypename;
+ char *rowtypename;
+ StringInfoData result;
+
+ initStringInfo(&result);
+
+ if (OidIsValid(relid))
+ {
+ HeapTuple tuple;
+ Form_pg_class reltuple;
+
+ tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for relation %u", relid);
+ reltuple = (Form_pg_class) GETSTRUCT(tuple);
+
+ xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
+ true, false);
+
+ tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
+ get_database_name(MyDatabaseId),
+ get_namespace_name(reltuple->relnamespace),
+ NameStr(reltuple->relname));
+
+ rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
+ get_database_name(MyDatabaseId),
+ get_namespace_name(reltuple->relnamespace),
+ NameStr(reltuple->relname));
+
+ ReleaseSysCache(tuple);
+ }
+ else
+ {
+ if (tableforest)
+ xmltn = "row";
+ else
+ xmltn = "table";
+
+ tabletypename = "TableType";
+ rowtypename = "RowType";
+ }
+
+ xsd_schema_element_start(&result, targetns);
+
+ appendStringInfoString(&result,
+ map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
+
+ appendStringInfo(&result,
+ "<xsd:complexType name=\"%s\">\n"
+ " <xsd:sequence>\n",
+ rowtypename);
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+ appendStringInfo(&result,
+ " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
+ map_sql_identifier_to_xml_name(NameStr(att->attname),
+ true, false),
+ map_sql_type_to_xml_name(att->atttypid, -1),
+ nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
+ }
+
+ appendStringInfoString(&result,
+ " </xsd:sequence>\n"
+ "</xsd:complexType>\n\n");
+
+ if (!tableforest)
+ {
+ appendStringInfo(&result,
+ "<xsd:complexType name=\"%s\">\n"
+ " <xsd:sequence>\n"
+ " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
+ " </xsd:sequence>\n"
+ "</xsd:complexType>\n\n",
+ tabletypename, rowtypename);
+
+ appendStringInfo(&result,
+ "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
+ xmltn, tabletypename);
+ }
+ else
+ appendStringInfo(&result,
+ "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
+ xmltn, rowtypename);
+
+ xsd_schema_element_end(&result);
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL schema to XML Schema data types; see SQL/XML:2008
+ * section 9.12.
+ */
+static const char *
+map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
+ bool tableforest, const char *targetns)
+{
+ char *dbname;
+ char *nspname;
+ char *xmlsn;
+ char *schematypename;
+ StringInfoData result;
+ ListCell *cell;
+
+ dbname = get_database_name(MyDatabaseId);
+ nspname = get_namespace_name(nspid);
+
+ initStringInfo(&result);
+
+ xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
+
+ schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
+ dbname,
+ nspname,
+ NULL);
+
+ appendStringInfo(&result,
+ "<xsd:complexType name=\"%s\">\n", schematypename);
+ if (!tableforest)
+ appendStringInfoString(&result,
+ " <xsd:all>\n");
+ else
+ appendStringInfoString(&result,
+ " <xsd:sequence>\n");
+
+ foreach(cell, relid_list)
+ {
+ Oid relid = lfirst_oid(cell);
+ char *relname = get_rel_name(relid);
+ char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
+ char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
+ dbname,
+ nspname,
+ relname);
+
+ if (!tableforest)
+ appendStringInfo(&result,
+ " <xsd:element name=\"%s\" type=\"%s\"/>\n",
+ xmltn, tabletypename);
+ else
+ appendStringInfo(&result,
+ " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
+ xmltn, tabletypename);
+ }
+
+ if (!tableforest)
+ appendStringInfoString(&result,
+ " </xsd:all>\n");
+ else
+ appendStringInfoString(&result,
+ " </xsd:sequence>\n");
+ appendStringInfoString(&result,
+ "</xsd:complexType>\n\n");
+
+ appendStringInfo(&result,
+ "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
+ xmlsn, schematypename);
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
+ * section 9.15.
+ */
+static const char *
+map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
+ bool tableforest, const char *targetns)
+{
+ char *dbname;
+ char *xmlcn;
+ char *catalogtypename;
+ StringInfoData result;
+ ListCell *cell;
+
+ dbname = get_database_name(MyDatabaseId);
+
+ initStringInfo(&result);
+
+ xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
+
+ catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
+ dbname,
+ NULL,
+ NULL);
+
+ appendStringInfo(&result,
+ "<xsd:complexType name=\"%s\">\n", catalogtypename);
+ appendStringInfoString(&result,
+ " <xsd:all>\n");
+
+ foreach(cell, nspid_list)
+ {
+ Oid nspid = lfirst_oid(cell);
+ char *nspname = get_namespace_name(nspid);
+ char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
+ char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
+ dbname,
+ nspname,
+ NULL);
+
+ appendStringInfo(&result,
+ " <xsd:element name=\"%s\" type=\"%s\"/>\n",
+ xmlsn, schematypename);
+ }
+
+ appendStringInfoString(&result,
+ " </xsd:all>\n");
+ appendStringInfoString(&result,
+ "</xsd:complexType>\n\n");
+
+ appendStringInfo(&result,
+ "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
+ xmlcn, catalogtypename);
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
+ */
+static const char *
+map_sql_type_to_xml_name(Oid typeoid, int typmod)
+{
+ StringInfoData result;
+
+ initStringInfo(&result);
+
+ switch (typeoid)
+ {
+ case BPCHAROID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "CHAR");
+ else
+ appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
+ break;
+ case VARCHAROID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "VARCHAR");
+ else
+ appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
+ break;
+ case NUMERICOID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "NUMERIC");
+ else
+ appendStringInfo(&result, "NUMERIC_%d_%d",
+ ((typmod - VARHDRSZ) >> 16) & 0xffff,
+ (typmod - VARHDRSZ) & 0xffff);
+ break;
+ case INT4OID:
+ appendStringInfoString(&result, "INTEGER");
+ break;
+ case INT2OID:
+ appendStringInfoString(&result, "SMALLINT");
+ break;
+ case INT8OID:
+ appendStringInfoString(&result, "BIGINT");
+ break;
+ case FLOAT4OID:
+ appendStringInfoString(&result, "REAL");
+ break;
+ case FLOAT8OID:
+ appendStringInfoString(&result, "DOUBLE");
+ break;
+ case BOOLOID:
+ appendStringInfoString(&result, "BOOLEAN");
+ break;
+ case TIMEOID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "TIME");
+ else
+ appendStringInfo(&result, "TIME_%d", typmod);
+ break;
+ case TIMETZOID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "TIME_WTZ");
+ else
+ appendStringInfo(&result, "TIME_WTZ_%d", typmod);
+ break;
+ case TIMESTAMPOID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "TIMESTAMP");
+ else
+ appendStringInfo(&result, "TIMESTAMP_%d", typmod);
+ break;
+ case TIMESTAMPTZOID:
+ if (typmod == -1)
+ appendStringInfoString(&result, "TIMESTAMP_WTZ");
+ else
+ appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
+ break;
+ case DATEOID:
+ appendStringInfoString(&result, "DATE");
+ break;
+ case XMLOID:
+ appendStringInfoString(&result, "XML");
+ break;
+ default:
+ {
+ HeapTuple tuple;
+ Form_pg_type typtuple;
+
+ tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
+ if (!HeapTupleIsValid(tuple))
+ elog(ERROR, "cache lookup failed for type %u", typeoid);
+ typtuple = (Form_pg_type) GETSTRUCT(tuple);
+
+ appendStringInfoString(&result,
+ map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
+ get_database_name(MyDatabaseId),
+ get_namespace_name(typtuple->typnamespace),
+ NameStr(typtuple->typname)));
+
+ ReleaseSysCache(tuple);
+ }
+ }
+
+ return result.data;
+}
+
+
+/*
+ * Map a collection of SQL data types to XML Schema data types; see
+ * SQL/XML:2008 section 9.7.
+ */
+static const char *
+map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
+{
+ List *uniquetypes = NIL;
+ int i;
+ StringInfoData result;
+ ListCell *cell0;
+
+ /* extract all column types used in the set of TupleDescs */
+ foreach(cell0, tupdesc_list)
+ {
+ TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
+
+ for (i = 0; i < tupdesc->natts; i++)
+ {
+ Form_pg_attribute att = TupleDescAttr(tupdesc, i);
+
+ if (att->attisdropped)
+ continue;
+ uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
+ }
+ }
+
+ /* add base types of domains */
+ foreach(cell0, uniquetypes)
+ {
+ Oid typid = lfirst_oid(cell0);
+ Oid basetypid = getBaseType(typid);
+
+ if (basetypid != typid)
+ uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
+ }
+
+ /* Convert to textual form */
+ initStringInfo(&result);
+
+ foreach(cell0, uniquetypes)
+ {
+ appendStringInfo(&result, "%s\n",
+ map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
+ -1));
+ }
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL data type to a named XML Schema data type; see
+ * SQL/XML:2008 sections 9.5 and 9.6.
+ *
+ * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
+ * a name attribute, which this function does. The name-less version
+ * 9.5 doesn't appear to be required anywhere.)
+ */
+static const char *
+map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
+{
+ StringInfoData result;
+ const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
+
+ initStringInfo(&result);
+
+ if (typeoid == XMLOID)
+ {
+ appendStringInfoString(&result,
+ "<xsd:complexType mixed=\"true\">\n"
+ " <xsd:sequence>\n"
+ " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
+ " </xsd:sequence>\n"
+ "</xsd:complexType>\n");
+ }
+ else
+ {
+ appendStringInfo(&result,
+ "<xsd:simpleType name=\"%s\">\n", typename);
+
+ switch (typeoid)
+ {
+ case BPCHAROID:
+ case VARCHAROID:
+ case TEXTOID:
+ appendStringInfoString(&result,
+ " <xsd:restriction base=\"xsd:string\">\n");
+ if (typmod != -1)
+ appendStringInfo(&result,
+ " <xsd:maxLength value=\"%d\"/>\n",
+ typmod - VARHDRSZ);
+ appendStringInfoString(&result, " </xsd:restriction>\n");
+ break;
+
+ case BYTEAOID:
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:%s\">\n"
+ " </xsd:restriction>\n",
+ xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
+ break;
+
+ case NUMERICOID:
+ if (typmod != -1)
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:decimal\">\n"
+ " <xsd:totalDigits value=\"%d\"/>\n"
+ " <xsd:fractionDigits value=\"%d\"/>\n"
+ " </xsd:restriction>\n",
+ ((typmod - VARHDRSZ) >> 16) & 0xffff,
+ (typmod - VARHDRSZ) & 0xffff);
+ break;
+
+ case INT2OID:
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:short\">\n"
+ " <xsd:maxInclusive value=\"%d\"/>\n"
+ " <xsd:minInclusive value=\"%d\"/>\n"
+ " </xsd:restriction>\n",
+ SHRT_MAX, SHRT_MIN);
+ break;
+
+ case INT4OID:
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:int\">\n"
+ " <xsd:maxInclusive value=\"%d\"/>\n"
+ " <xsd:minInclusive value=\"%d\"/>\n"
+ " </xsd:restriction>\n",
+ INT_MAX, INT_MIN);
+ break;
+
+ case INT8OID:
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:long\">\n"
+ " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
+ " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
+ " </xsd:restriction>\n",
+ PG_INT64_MAX,
+ PG_INT64_MIN);
+ break;
+
+ case FLOAT4OID:
+ appendStringInfoString(&result,
+ " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
+ break;
+
+ case FLOAT8OID:
+ appendStringInfoString(&result,
+ " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
+ break;
+
+ case BOOLOID:
+ appendStringInfoString(&result,
+ " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
+ break;
+
+ case TIMEOID:
+ case TIMETZOID:
+ {
+ const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
+
+ if (typmod == -1)
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:time\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
+ " </xsd:restriction>\n", tz);
+ else if (typmod == 0)
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:time\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
+ " </xsd:restriction>\n", tz);
+ else
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:time\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
+ " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
+ break;
+ }
+
+ case TIMESTAMPOID:
+ case TIMESTAMPTZOID:
+ {
+ const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
+
+ if (typmod == -1)
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:dateTime\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
+ " </xsd:restriction>\n", tz);
+ else if (typmod == 0)
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:dateTime\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
+ " </xsd:restriction>\n", tz);
+ else
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"xsd:dateTime\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
+ " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
+ break;
+ }
+
+ case DATEOID:
+ appendStringInfoString(&result,
+ " <xsd:restriction base=\"xsd:date\">\n"
+ " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
+ " </xsd:restriction>\n");
+ break;
+
+ default:
+ if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
+ {
+ Oid base_typeoid;
+ int32 base_typmod = -1;
+
+ base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
+
+ appendStringInfo(&result,
+ " <xsd:restriction base=\"%s\"/>\n",
+ map_sql_type_to_xml_name(base_typeoid, base_typmod));
+ }
+ break;
+ }
+ appendStringInfoString(&result, "</xsd:simpleType>\n");
+ }
+
+ return result.data;
+}
+
+
+/*
+ * Map an SQL row to an XML element, taking the row from the active
+ * SPI cursor. See also SQL/XML:2008 section 9.10.
+ */
+static void
+SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
+ bool nulls, bool tableforest,
+ const char *targetns, bool top_level)
+{
+ int i;
+ char *xmltn;
+
+ if (tablename)
+ xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
+ else
+ {
+ if (tableforest)
+ xmltn = "row";
+ else
+ xmltn = "table";
+ }
+
+ if (tableforest)
+ xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
+ else
+ appendStringInfoString(result, "<row>\n");
+
+ for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
+ {
+ char *colname;
+ Datum colval;
+ bool isnull;
+
+ colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
+ true, false);
+ colval = SPI_getbinval(SPI_tuptable->vals[rownum],
+ SPI_tuptable->tupdesc,
+ i,
+ &isnull);
+ if (isnull)
+ {
+ if (nulls)
+ appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
+ }
+ else
+ appendStringInfo(result, " <%s>%s</%s>\n",
+ colname,
+ map_sql_value_to_xml_value(colval,
+ SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
+ colname);
+ }
+
+ if (tableforest)
+ {
+ xmldata_root_element_end(result, xmltn);
+ appendStringInfoChar(result, '\n');
+ }
+ else
+ appendStringInfoString(result, "</row>\n\n");
+}
+
+
+/*
+ * XPath related functions
+ */
+
+#ifdef USE_LIBXML
+
+/*
+ * Convert XML node to text.
+ *
+ * For attribute and text nodes, return the escaped text. For anything else,
+ * dump the whole subtree.
+ */
+static text *
+xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
+{
+ xmltype *result = NULL;
+
+ if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
+ {
+ void (*volatile nodefree) (xmlNodePtr) = NULL;
+ volatile xmlBufferPtr buf = NULL;
+ volatile xmlNodePtr cur_copy = NULL;
+
+ PG_TRY();
+ {
+ int bytes;
+
+ buf = xmlBufferCreate();
+ if (buf == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate xmlBuffer");
+
+ /*
+ * Produce a dump of the node that we can serialize. xmlNodeDump
+ * does that, but the result of that function won't contain
+ * namespace definitions from ancestor nodes, so we first do a
+ * xmlCopyNode() which duplicates the node along with its required
+ * namespace definitions.
+ *
+ * Some old libxml2 versions such as 2.7.6 produce partially
+ * broken XML_DOCUMENT_NODE nodes (unset content field) when
+ * copying them. xmlNodeDump of such a node works fine, but
+ * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
+ */
+ cur_copy = xmlCopyNode(cur, 1);
+ if (cur_copy == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not copy node");
+ nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
+ (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
+
+ bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
+ if (bytes == -1 || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not dump node");
+
+ result = xmlBuffer_to_xmltype(buf);
+ }
+ PG_FINALLY();
+ {
+ if (nodefree)
+ nodefree(cur_copy);
+ if (buf)
+ xmlBufferFree(buf);
+ }
+ PG_END_TRY();
+ }
+ else
+ {
+ xmlChar *str;
+
+ str = xmlXPathCastNodeToString(cur);
+ PG_TRY();
+ {
+ /* Here we rely on XML having the same representation as TEXT */
+ char *escaped = escape_xml((char *) str);
+
+ result = (xmltype *) cstring_to_text(escaped);
+ pfree(escaped);
+ }
+ PG_FINALLY();
+ {
+ xmlFree(str);
+ }
+ PG_END_TRY();
+ }
+
+ return result;
+}
+
+/*
+ * Convert an XML XPath object (the result of evaluating an XPath expression)
+ * to an array of xml values, which are appended to astate. The function
+ * result value is the number of elements in the array.
+ *
+ * If "astate" is NULL then we don't generate the array value, but we still
+ * return the number of elements it would have had.
+ *
+ * Nodesets are converted to an array containing the nodes' textual
+ * representations. Primitive values (float, double, string) are converted
+ * to a single-element array containing the value's string representation.
+ */
+static int
+xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
+ ArrayBuildState *astate,
+ PgXmlErrorContext *xmlerrcxt)
+{
+ int result = 0;
+ Datum datum;
+ Oid datumtype;
+ char *result_str;
+
+ switch (xpathobj->type)
+ {
+ case XPATH_NODESET:
+ if (xpathobj->nodesetval != NULL)
+ {
+ result = xpathobj->nodesetval->nodeNr;
+ if (astate != NULL)
+ {
+ int i;
+
+ for (i = 0; i < result; i++)
+ {
+ datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
+ xmlerrcxt));
+ (void) accumArrayResult(astate, datum, false,
+ XMLOID, CurrentMemoryContext);
+ }
+ }
+ }
+ return result;
+
+ case XPATH_BOOLEAN:
+ if (astate == NULL)
+ return 1;
+ datum = BoolGetDatum(xpathobj->boolval);
+ datumtype = BOOLOID;
+ break;
+
+ case XPATH_NUMBER:
+ if (astate == NULL)
+ return 1;
+ datum = Float8GetDatum(xpathobj->floatval);
+ datumtype = FLOAT8OID;
+ break;
+
+ case XPATH_STRING:
+ if (astate == NULL)
+ return 1;
+ datum = CStringGetDatum((char *) xpathobj->stringval);
+ datumtype = CSTRINGOID;
+ break;
+
+ default:
+ elog(ERROR, "xpath expression result type %d is unsupported",
+ xpathobj->type);
+ return 0; /* keep compiler quiet */
+ }
+
+ /* Common code for scalar-value cases */
+ result_str = map_sql_value_to_xml_value(datum, datumtype, true);
+ datum = PointerGetDatum(cstring_to_xmltype(result_str));
+ (void) accumArrayResult(astate, datum, false,
+ XMLOID, CurrentMemoryContext);
+ return 1;
+}
+
+
+/*
+ * Common code for xpath() and xmlexists()
+ *
+ * Evaluate XPath expression and return number of nodes in res_nitems
+ * and array of XML values in astate. Either of those pointers can be
+ * NULL if the corresponding result isn't wanted.
+ *
+ * It is up to the user to ensure that the XML passed is in fact
+ * an XML document - XPath doesn't work easily on fragments without
+ * a context node being known.
+ */
+static void
+xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
+ int *res_nitems, ArrayBuildState *astate)
+{
+ PgXmlErrorContext *xmlerrcxt;
+ volatile xmlParserCtxtPtr ctxt = NULL;
+ volatile xmlDocPtr doc = NULL;
+ volatile xmlXPathContextPtr xpathctx = NULL;
+ volatile xmlXPathCompExprPtr xpathcomp = NULL;
+ volatile xmlXPathObjectPtr xpathobj = NULL;
+ char *datastr;
+ int32 len;
+ int32 xpath_len;
+ xmlChar *string;
+ xmlChar *xpath_expr;
+ size_t xmldecl_len = 0;
+ int i;
+ int ndim;
+ Datum *ns_names_uris;
+ bool *ns_names_uris_nulls;
+ int ns_count;
+
+ /*
+ * Namespace mappings are passed as text[]. If an empty array is passed
+ * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
+ * Else, a 2-dimensional array with length of the second axis being equal
+ * to 2 should be passed, i.e., every subarray contains 2 elements, the
+ * first element defining the name, the second one the URI. Example:
+ * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
+ * 'http://example2.com']].
+ */
+ ndim = namespaces ? ARR_NDIM(namespaces) : 0;
+ if (ndim != 0)
+ {
+ int *dims;
+
+ dims = ARR_DIMS(namespaces);
+
+ if (ndim != 2 || dims[1] != 2)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid array for XML namespace mapping"),
+ errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
+
+ Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
+
+ deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT,
+ &ns_names_uris, &ns_names_uris_nulls,
+ &ns_count);
+
+ Assert((ns_count % 2) == 0); /* checked above */
+ ns_count /= 2; /* count pairs only */
+ }
+ else
+ {
+ ns_names_uris = NULL;
+ ns_names_uris_nulls = NULL;
+ ns_count = 0;
+ }
+
+ datastr = VARDATA(data);
+ len = VARSIZE(data) - VARHDRSZ;
+ xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
+ if (xpath_len == 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("empty XPath expression")));
+
+ string = pg_xmlCharStrndup(datastr, len);
+ xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
+
+ /*
+ * In a UTF8 database, skip any xml declaration, which might assert
+ * another encoding. Ignore parse_xml_decl() failure, letting
+ * xmlCtxtReadMemory() report parse errors. Documentation disclaims
+ * xpath() support for non-ASCII data in non-UTF8 databases, so leave
+ * those scenarios bug-compatible with historical behavior.
+ */
+ if (GetDatabaseEncoding() == PG_UTF8)
+ parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
+
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ xmlInitParser();
+
+ /*
+ * redundant XML parsing (two parsings for the same value during one
+ * command execution are possible)
+ */
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
+ doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
+ len - xmldecl_len, NULL, NULL, 0);
+ if (doc == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "could not parse XML document");
+ xpathctx = xmlXPathNewContext(doc);
+ if (xpathctx == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate XPath context");
+ xpathctx->node = (xmlNodePtr) doc;
+
+ /* register namespaces, if any */
+ if (ns_count > 0)
+ {
+ for (i = 0; i < ns_count; i++)
+ {
+ char *ns_name;
+ char *ns_uri;
+
+ if (ns_names_uris_nulls[i * 2] ||
+ ns_names_uris_nulls[i * 2 + 1])
+ ereport(ERROR,
+ (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
+ errmsg("neither namespace name nor URI may be null")));
+ ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
+ ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
+ if (xmlXPathRegisterNs(xpathctx,
+ (xmlChar *) ns_name,
+ (xmlChar *) ns_uri) != 0)
+ ereport(ERROR, /* is this an internal error??? */
+ (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
+ ns_name, ns_uri)));
+ }
+ }
+
+ xpathcomp = xmlXPathCompile(xpath_expr);
+ if (xpathcomp == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "invalid XPath expression");
+
+ /*
+ * Version 2.6.27 introduces a function named
+ * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
+ * but we can derive the existence by whether any nodes are returned,
+ * thereby preventing a library version upgrade and keeping the code
+ * the same.
+ */
+ xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
+ if (xpathobj == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not create XPath object");
+
+ /*
+ * Extract the results as requested.
+ */
+ if (res_nitems != NULL)
+ *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
+ else
+ (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
+ }
+ PG_CATCH();
+ {
+ if (xpathobj)
+ xmlXPathFreeObject(xpathobj);
+ if (xpathcomp)
+ xmlXPathFreeCompExpr(xpathcomp);
+ if (xpathctx)
+ xmlXPathFreeContext(xpathctx);
+ if (doc)
+ xmlFreeDoc(doc);
+ if (ctxt)
+ xmlFreeParserCtxt(ctxt);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xmlXPathFreeObject(xpathobj);
+ xmlXPathFreeCompExpr(xpathcomp);
+ xmlXPathFreeContext(xpathctx);
+ xmlFreeDoc(doc);
+ xmlFreeParserCtxt(ctxt);
+
+ pg_xml_done(xmlerrcxt, false);
+}
+#endif /* USE_LIBXML */
+
+/*
+ * Evaluate XPath expression and return array of XML values.
+ *
+ * As we have no support of XQuery sequences yet, this function seems
+ * to be the most useful one (array of XML functions plays a role of
+ * some kind of substitution for XQuery sequences).
+ */
+Datum
+xpath(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ ArrayBuildState *astate;
+
+ astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
+ xpath_internal(xpath_expr_text, data, namespaces,
+ NULL, astate);
+ PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+/*
+ * Determines if the node specified by the supplied XPath exists
+ * in a given XML document, returning a boolean.
+ */
+Datum
+xmlexists(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ int res_nitems;
+
+ xpath_internal(xpath_expr_text, data, NULL,
+ &res_nitems, NULL);
+
+ PG_RETURN_BOOL(res_nitems > 0);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+/*
+ * Determines if the node specified by the supplied XPath exists
+ * in a given XML document, returning a boolean. Differs from
+ * xmlexists as it supports namespaces and is not defined in SQL/XML.
+ */
+Datum
+xpath_exists(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
+ xmltype *data = PG_GETARG_XML_P(1);
+ ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
+ int res_nitems;
+
+ xpath_internal(xpath_expr_text, data, namespaces,
+ &res_nitems, NULL);
+
+ PG_RETURN_BOOL(res_nitems > 0);
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif
+}
+
+/*
+ * Functions for checking well-formed-ness
+ */
+
+#ifdef USE_LIBXML
+static bool
+wellformed_xml(text *data, XmlOptionType xmloption_arg)
+{
+ bool result;
+ volatile xmlDocPtr doc = NULL;
+
+ /* We want to catch any exceptions and return false */
+ PG_TRY();
+ {
+ doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
+ result = true;
+ }
+ PG_CATCH();
+ {
+ FlushErrorState();
+ result = false;
+ }
+ PG_END_TRY();
+
+ if (doc)
+ xmlFreeDoc(doc);
+
+ return result;
+}
+#endif
+
+Datum
+xml_is_well_formed(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_PP(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, xmloption));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_document(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_PP(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+Datum
+xml_is_well_formed_content(PG_FUNCTION_ARGS)
+{
+#ifdef USE_LIBXML
+ text *data = PG_GETARG_TEXT_PP(0);
+
+ PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * support functions for XMLTABLE
+ *
+ */
+#ifdef USE_LIBXML
+
+/*
+ * Returns private data from executor state. Ensure validity by check with
+ * MAGIC number.
+ */
+static inline XmlTableBuilderData *
+GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
+{
+ XmlTableBuilderData *result;
+
+ if (!IsA(state, TableFuncScanState))
+ elog(ERROR, "%s called with invalid TableFuncScanState", fname);
+ result = (XmlTableBuilderData *) state->opaque;
+ if (result->magic != XMLTABLE_CONTEXT_MAGIC)
+ elog(ERROR, "%s called with invalid TableFuncScanState", fname);
+
+ return result;
+}
+#endif
+
+/*
+ * XmlTableInitOpaque
+ * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
+ * the XML parser.
+ *
+ * Note: Because we call pg_xml_init() here and pg_xml_done() in
+ * XmlTableDestroyOpaque, it is critical for robustness that no other
+ * executor nodes run until this node is processed to completion. Caller
+ * must execute this to completion (probably filling a tuplestore to exhaust
+ * this node in a single pass) instead of using row-per-call mode.
+ */
+static void
+XmlTableInitOpaque(TableFuncScanState *state, int natts)
+{
+#ifdef USE_LIBXML
+ volatile xmlParserCtxtPtr ctxt = NULL;
+ XmlTableBuilderData *xtCxt;
+ PgXmlErrorContext *xmlerrcxt;
+
+ xtCxt = palloc0(sizeof(XmlTableBuilderData));
+ xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
+ xtCxt->natts = natts;
+ xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
+
+ xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
+
+ PG_TRY();
+ {
+ xmlInitParser();
+
+ ctxt = xmlNewParserCtxt();
+ if (ctxt == NULL || xmlerrcxt->err_occurred)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate parser context");
+ }
+ PG_CATCH();
+ {
+ if (ctxt != NULL)
+ xmlFreeParserCtxt(ctxt);
+
+ pg_xml_done(xmlerrcxt, true);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xtCxt->xmlerrcxt = xmlerrcxt;
+ xtCxt->ctxt = ctxt;
+
+ state->opaque = xtCxt;
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableSetDocument
+ * Install the input document
+ */
+static void
+XmlTableSetDocument(TableFuncScanState *state, Datum value)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+ xmltype *xmlval = DatumGetXmlP(value);
+ char *str;
+ xmlChar *xstr;
+ int length;
+ volatile xmlDocPtr doc = NULL;
+ volatile xmlXPathContextPtr xpathcxt = NULL;
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
+
+ /*
+ * Use out function for casting to string (remove encoding property). See
+ * comment in xml_out.
+ */
+ str = xml_out_internal(xmlval, 0);
+
+ length = strlen(str);
+ xstr = pg_xmlCharStrndup(str, length);
+
+ PG_TRY();
+ {
+ doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
+ if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "could not parse XML document");
+ xpathcxt = xmlXPathNewContext(doc);
+ if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
+ "could not allocate XPath context");
+ xpathcxt->node = (xmlNodePtr) doc;
+ }
+ PG_CATCH();
+ {
+ if (xpathcxt != NULL)
+ xmlXPathFreeContext(xpathcxt);
+ if (doc != NULL)
+ xmlFreeDoc(doc);
+
+ PG_RE_THROW();
+ }
+ PG_END_TRY();
+
+ xtCxt->doc = doc;
+ xtCxt->xpathcxt = xpathcxt;
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableSetNamespace
+ * Add a namespace declaration
+ */
+static void
+XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+
+ if (name == NULL)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("DEFAULT namespace is not supported")));
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
+
+ if (xmlXPathRegisterNs(xtCxt->xpathcxt,
+ pg_xmlCharStrndup(name, strlen(name)),
+ pg_xmlCharStrndup(uri, strlen(uri))))
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
+ "could not set XML namespace");
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableSetRowFilter
+ * Install the row-filter Xpath expression.
+ */
+static void
+XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+ xmlChar *xstr;
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
+
+ if (*path == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("row path filter must not be empty string")));
+
+ xstr = pg_xmlCharStrndup(path, strlen(path));
+
+ xtCxt->xpathcomp = xmlXPathCompile(xstr);
+ if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
+ "invalid XPath expression");
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableSetColumnFilter
+ * Install the column-filter Xpath expression, for the given column.
+ */
+static void
+XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+ xmlChar *xstr;
+
+ AssertArg(PointerIsValid(path));
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
+
+ if (*path == '\0')
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("column path filter must not be empty string")));
+
+ xstr = pg_xmlCharStrndup(path, strlen(path));
+
+ xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
+ if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
+ "invalid XPath expression");
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableFetchRow
+ * Prepare the next "current" tuple for upcoming GetValue calls.
+ * Returns false if the row-filter expression returned no more rows.
+ */
+static bool
+XmlTableFetchRow(TableFuncScanState *state)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
+
+ /* Propagate our own error context to libxml2 */
+ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
+
+ if (xtCxt->xpathobj == NULL)
+ {
+ xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
+ if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not create XPath object");
+
+ xtCxt->row_count = 0;
+ }
+
+ if (xtCxt->xpathobj->type == XPATH_NODESET)
+ {
+ if (xtCxt->xpathobj->nodesetval != NULL)
+ {
+ if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
+ return true;
+ }
+ }
+
+ return false;
+#else
+ NO_XML_SUPPORT();
+ return false;
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableGetValue
+ * Return the value for column number 'colnum' for the current row. If
+ * column -1 is requested, return representation of the whole row.
+ *
+ * This leaks memory, so be sure to reset often the context in which it's
+ * called.
+ */
+static Datum
+XmlTableGetValue(TableFuncScanState *state, int colnum,
+ Oid typid, int32 typmod, bool *isnull)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+ Datum result = (Datum) 0;
+ xmlNodePtr cur;
+ char *cstr = NULL;
+ volatile xmlXPathObjectPtr xpathobj = NULL;
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
+
+ Assert(xtCxt->xpathobj &&
+ xtCxt->xpathobj->type == XPATH_NODESET &&
+ xtCxt->xpathobj->nodesetval != NULL);
+
+ /* Propagate our own error context to libxml2 */
+ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
+
+ *isnull = false;
+
+ cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
+
+ Assert(xtCxt->xpathscomp[colnum] != NULL);
+
+ PG_TRY();
+ {
+ /* Set current node as entry point for XPath evaluation */
+ xtCxt->xpathcxt->node = cur;
+
+ /* Evaluate column path */
+ xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
+ if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
+ xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
+ "could not create XPath object");
+
+ /*
+ * There are four possible cases, depending on the number of nodes
+ * returned by the XPath expression and the type of the target column:
+ * a) XPath returns no nodes. b) The target type is XML (return all
+ * as XML). For non-XML return types: c) One node (return content).
+ * d) Multiple nodes (error).
+ */
+ if (xpathobj->type == XPATH_NODESET)
+ {
+ int count = 0;
+
+ if (xpathobj->nodesetval != NULL)
+ count = xpathobj->nodesetval->nodeNr;
+
+ if (xpathobj->nodesetval == NULL || count == 0)
+ {
+ *isnull = true;
+ }
+ else
+ {
+ if (typid == XMLOID)
+ {
+ text *textstr;
+ StringInfoData str;
+
+ /* Concatenate serialized values */
+ initStringInfo(&str);
+ for (int i = 0; i < count; i++)
+ {
+ textstr =
+ xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
+ xtCxt->xmlerrcxt);
+
+ appendStringInfoText(&str, textstr);
+ }
+ cstr = str.data;
+ }
+ else
+ {
+ xmlChar *str;
+
+ if (count > 1)
+ ereport(ERROR,
+ (errcode(ERRCODE_CARDINALITY_VIOLATION),
+ errmsg("more than one value returned by column XPath expression")));
+
+ str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
+ cstr = str ? xml_pstrdup_and_free(str) : "";
+ }
+ }
+ }
+ else if (xpathobj->type == XPATH_STRING)
+ {
+ /* Content should be escaped when target will be XML */
+ if (typid == XMLOID)
+ cstr = escape_xml((char *) xpathobj->stringval);
+ else
+ cstr = (char *) xpathobj->stringval;
+ }
+ else if (xpathobj->type == XPATH_BOOLEAN)
+ {
+ char typcategory;
+ bool typispreferred;
+ xmlChar *str;
+
+ /* Allow implicit casting from boolean to numbers */
+ get_type_category_preferred(typid, &typcategory, &typispreferred);
+
+ if (typcategory != TYPCATEGORY_NUMERIC)
+ str = xmlXPathCastBooleanToString(xpathobj->boolval);
+ else
+ str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
+
+ cstr = xml_pstrdup_and_free(str);
+ }
+ else if (xpathobj->type == XPATH_NUMBER)
+ {
+ xmlChar *str;
+
+ str = xmlXPathCastNumberToString(xpathobj->floatval);
+ cstr = xml_pstrdup_and_free(str);
+ }
+ else
+ elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
+
+ /*
+ * By here, either cstr contains the result value, or the isnull flag
+ * has been set.
+ */
+ Assert(cstr || *isnull);
+
+ if (!*isnull)
+ result = InputFunctionCall(&state->in_functions[colnum],
+ cstr,
+ state->typioparams[colnum],
+ typmod);
+ }
+ PG_FINALLY();
+ {
+ if (xpathobj != NULL)
+ xmlXPathFreeObject(xpathobj);
+ }
+ PG_END_TRY();
+
+ return result;
+#else
+ NO_XML_SUPPORT();
+ return 0;
+#endif /* not USE_LIBXML */
+}
+
+/*
+ * XmlTableDestroyOpaque
+ * Release all libxml2 resources
+ */
+static void
+XmlTableDestroyOpaque(TableFuncScanState *state)
+{
+#ifdef USE_LIBXML
+ XmlTableBuilderData *xtCxt;
+
+ xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
+
+ /* Propagate our own error context to libxml2 */
+ xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
+
+ if (xtCxt->xpathscomp != NULL)
+ {
+ int i;
+
+ for (i = 0; i < xtCxt->natts; i++)
+ if (xtCxt->xpathscomp[i] != NULL)
+ xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
+ }
+
+ if (xtCxt->xpathobj != NULL)
+ xmlXPathFreeObject(xtCxt->xpathobj);
+ if (xtCxt->xpathcomp != NULL)
+ xmlXPathFreeCompExpr(xtCxt->xpathcomp);
+ if (xtCxt->xpathcxt != NULL)
+ xmlXPathFreeContext(xtCxt->xpathcxt);
+ if (xtCxt->doc != NULL)
+ xmlFreeDoc(xtCxt->doc);
+ if (xtCxt->ctxt != NULL)
+ xmlFreeParserCtxt(xtCxt->ctxt);
+
+ pg_xml_done(xtCxt->xmlerrcxt, true);
+
+ /* not valid anymore */
+ xtCxt->magic = 0;
+ state->opaque = NULL;
+
+#else
+ NO_XML_SUPPORT();
+#endif /* not USE_LIBXML */
+}