diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 13:44:03 +0000 |
commit | 293913568e6a7a86fd1479e1cff8e2ecb58d6568 (patch) | |
tree | fc3b469a3ec5ab71b36ea97cc7aaddb838423a0c /src/backend/commands/collationcmds.c | |
parent | Initial commit. (diff) | |
download | postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.tar.xz postgresql-16-293913568e6a7a86fd1479e1cff8e2ecb58d6568.zip |
Adding upstream version 16.2.upstream/16.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/commands/collationcmds.c')
-rw-r--r-- | src/backend/commands/collationcmds.c | 1018 |
1 files changed, 1018 insertions, 0 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c new file mode 100644 index 0000000..4088481 --- /dev/null +++ b/src/backend/commands/collationcmds.c @@ -0,0 +1,1018 @@ +/*------------------------------------------------------------------------- + * + * collationcmds.c + * collation-related commands support code + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/commands/collationcmds.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/table.h" +#include "access/xact.h" +#include "catalog/dependency.h" +#include "catalog/indexing.h" +#include "catalog/namespace.h" +#include "catalog/objectaccess.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_database.h" +#include "catalog/pg_namespace.h" +#include "commands/alter.h" +#include "commands/collationcmds.h" +#include "commands/comment.h" +#include "commands/dbcommands.h" +#include "commands/defrem.h" +#include "common/string.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/pg_locale.h" +#include "utils/rel.h" +#include "utils/syscache.h" + + +typedef struct +{ + char *localename; /* name of locale, as per "locale -a" */ + char *alias; /* shortened alias for same */ + int enc; /* encoding */ +} CollAliasData; + + +/* + * CREATE COLLATION + */ +ObjectAddress +DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists) +{ + char *collName; + Oid collNamespace; + AclResult aclresult; + ListCell *pl; + DefElem *fromEl = NULL; + DefElem *localeEl = NULL; + DefElem *lccollateEl = NULL; + DefElem *lcctypeEl = NULL; + DefElem *providerEl = NULL; + DefElem *deterministicEl = NULL; + DefElem *rulesEl = NULL; + DefElem *versionEl = NULL; + char *collcollate; + char *collctype; + char *colliculocale; + char *collicurules; + bool collisdeterministic; + int collencoding; + char collprovider; + char *collversion = NULL; + Oid newoid; + ObjectAddress address; + + collNamespace = QualifiedNameGetCreationNamespace(names, &collName); + + aclresult = object_aclcheck(NamespaceRelationId, collNamespace, GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(collNamespace)); + + foreach(pl, parameters) + { + DefElem *defel = lfirst_node(DefElem, pl); + DefElem **defelp; + + if (strcmp(defel->defname, "from") == 0) + defelp = &fromEl; + else if (strcmp(defel->defname, "locale") == 0) + defelp = &localeEl; + else if (strcmp(defel->defname, "lc_collate") == 0) + defelp = &lccollateEl; + else if (strcmp(defel->defname, "lc_ctype") == 0) + defelp = &lcctypeEl; + else if (strcmp(defel->defname, "provider") == 0) + defelp = &providerEl; + else if (strcmp(defel->defname, "deterministic") == 0) + defelp = &deterministicEl; + else if (strcmp(defel->defname, "rules") == 0) + defelp = &rulesEl; + else if (strcmp(defel->defname, "version") == 0) + defelp = &versionEl; + else + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("collation attribute \"%s\" not recognized", + defel->defname), + parser_errposition(pstate, defel->location))); + break; + } + if (*defelp != NULL) + errorConflictingDefElem(defel, pstate); + *defelp = defel; + } + + if (localeEl && (lccollateEl || lcctypeEl)) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE.")); + + if (fromEl && list_length(parameters) != 1) + ereport(ERROR, + errcode(ERRCODE_SYNTAX_ERROR), + errmsg("conflicting or redundant options"), + errdetail("FROM cannot be specified together with any other options.")); + + if (fromEl) + { + Oid collid; + HeapTuple tp; + Datum datum; + bool isnull; + + collid = get_collation_oid(defGetQualifiedName(fromEl), false); + tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for collation %u", collid); + + collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; + collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; + collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collcollate, &isnull); + if (!isnull) + collcollate = TextDatumGetCString(datum); + else + collcollate = NULL; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collctype, &isnull); + if (!isnull) + collctype = TextDatumGetCString(datum); + else + collctype = NULL; + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull); + if (!isnull) + colliculocale = TextDatumGetCString(datum); + else + colliculocale = NULL; + + /* + * When the ICU locale comes from an existing collation, do not + * canonicalize to a language tag. + */ + + datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull); + if (!isnull) + collicurules = TextDatumGetCString(datum); + else + collicurules = NULL; + + ReleaseSysCache(tp); + + /* + * Copying the "default" collation is not allowed because most code + * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT, + * and so having a second collation with COLLPROVIDER_DEFAULT would + * not work and potentially confuse or crash some code. This could be + * fixed with some legwork. + */ + if (collprovider == COLLPROVIDER_DEFAULT) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("collation \"default\" cannot be copied"))); + } + else + { + char *collproviderstr = NULL; + + collcollate = NULL; + collctype = NULL; + colliculocale = NULL; + collicurules = NULL; + + if (providerEl) + collproviderstr = defGetString(providerEl); + + if (deterministicEl) + collisdeterministic = defGetBoolean(deterministicEl); + else + collisdeterministic = true; + + if (rulesEl) + collicurules = defGetString(rulesEl); + + if (versionEl) + collversion = defGetString(versionEl); + + if (collproviderstr) + { + if (pg_strcasecmp(collproviderstr, "icu") == 0) + collprovider = COLLPROVIDER_ICU; + else if (pg_strcasecmp(collproviderstr, "libc") == 0) + collprovider = COLLPROVIDER_LIBC; + else + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("unrecognized collation provider: %s", + collproviderstr))); + } + else + collprovider = COLLPROVIDER_LIBC; + + if (localeEl) + { + if (collprovider == COLLPROVIDER_LIBC) + { + collcollate = defGetString(localeEl); + collctype = defGetString(localeEl); + } + else + colliculocale = defGetString(localeEl); + } + + if (lccollateEl) + collcollate = defGetString(lccollateEl); + + if (lcctypeEl) + collctype = defGetString(lcctypeEl); + + if (collprovider == COLLPROVIDER_LIBC) + { + if (!collcollate) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"%s\" must be specified", + "lc_collate"))); + + if (!collctype) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"%s\" must be specified", + "lc_ctype"))); + } + else if (collprovider == COLLPROVIDER_ICU) + { + if (!colliculocale) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("parameter \"%s\" must be specified", + "locale"))); + + /* + * During binary upgrade, preserve the locale string. Otherwise, + * canonicalize to a language tag. + */ + if (!IsBinaryUpgrade) + { + char *langtag = icu_language_tag(colliculocale, + icu_validation_level); + + if (langtag && strcmp(colliculocale, langtag) != 0) + { + ereport(NOTICE, + (errmsg("using standard form \"%s\" for ICU locale \"%s\"", + langtag, colliculocale))); + + colliculocale = langtag; + } + } + + icu_validate_locale(colliculocale); + } + + /* + * Nondeterministic collations are currently only supported with ICU + * because that's the only case where it can actually make a + * difference. So we can save writing the code for the other + * providers. + */ + if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("nondeterministic collations not supported with this provider"))); + + if (collicurules && collprovider != COLLPROVIDER_ICU) + ereport(ERROR, + (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), + errmsg("ICU rules cannot be specified unless locale provider is ICU"))); + + if (collprovider == COLLPROVIDER_ICU) + { +#ifdef USE_ICU + /* + * We could create ICU collations with collencoding == database + * encoding, but it seems better to use -1 so that it matches the + * way initdb would create ICU collations. However, only allow + * one to be created when the current database's encoding is + * supported. Otherwise the collation is useless, plus we get + * surprising behaviors like not being able to drop the collation. + * + * Skip this test when !USE_ICU, because the error we want to + * throw for that isn't thrown till later. + */ + if (!is_encoding_supported_by_icu(GetDatabaseEncoding())) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("current database's encoding is not supported with this provider"))); +#endif + collencoding = -1; + } + else + { + collencoding = GetDatabaseEncoding(); + check_encoding_locale_matches(collencoding, collcollate, collctype); + } + } + + if (!collversion) + collversion = get_collation_actual_version(collprovider, collprovider == COLLPROVIDER_ICU ? colliculocale : collcollate); + + newoid = CollationCreate(collName, + collNamespace, + GetUserId(), + collprovider, + collisdeterministic, + collencoding, + collcollate, + collctype, + colliculocale, + collicurules, + collversion, + if_not_exists, + false); /* not quiet */ + + if (!OidIsValid(newoid)) + return InvalidObjectAddress; + + /* + * Check that the locales can be loaded. NB: pg_newlocale_from_collation + * is only supposed to be called on non-C-equivalent locales. + */ + CommandCounterIncrement(); + if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid)) + (void) pg_newlocale_from_collation(newoid); + + ObjectAddressSet(address, CollationRelationId, newoid); + + return address; +} + +/* + * Subroutine for ALTER COLLATION SET SCHEMA and RENAME + * + * Is there a collation with the same name of the given collation already in + * the given namespace? If so, raise an appropriate error message. + */ +void +IsThereCollationInNamespace(const char *collname, Oid nspOid) +{ + /* make sure the name doesn't already exist in new schema */ + if (SearchSysCacheExists3(COLLNAMEENCNSP, + CStringGetDatum(collname), + Int32GetDatum(GetDatabaseEncoding()), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"", + collname, GetDatabaseEncodingName(), + get_namespace_name(nspOid)))); + + /* mustn't match an any-encoding entry, either */ + if (SearchSysCacheExists3(COLLNAMEENCNSP, + CStringGetDatum(collname), + Int32GetDatum(-1), + ObjectIdGetDatum(nspOid))) + ereport(ERROR, + (errcode(ERRCODE_DUPLICATE_OBJECT), + errmsg("collation \"%s\" already exists in schema \"%s\"", + collname, get_namespace_name(nspOid)))); +} + +/* + * ALTER COLLATION + */ +ObjectAddress +AlterCollation(AlterCollationStmt *stmt) +{ + Relation rel; + Oid collOid; + HeapTuple tup; + Form_pg_collation collForm; + Datum datum; + bool isnull; + char *oldversion; + char *newversion; + ObjectAddress address; + + rel = table_open(CollationRelationId, RowExclusiveLock); + collOid = get_collation_oid(stmt->collname, false); + + if (collOid == DEFAULT_COLLATION_OID) + ereport(ERROR, + (errmsg("cannot refresh version of default collation"), + /* translator: %s is an SQL command */ + errhint("Use %s instead.", + "ALTER DATABASE ... REFRESH COLLATION VERSION"))); + + if (!object_ownercheck(CollationRelationId, collOid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION, + NameListToString(stmt->collname)); + + tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid)); + if (!HeapTupleIsValid(tup)) + elog(ERROR, "cache lookup failed for collation %u", collOid); + + collForm = (Form_pg_collation) GETSTRUCT(tup); + datum = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, &isnull); + oldversion = isnull ? NULL : TextDatumGetCString(datum); + + datum = SysCacheGetAttrNotNull(COLLOID, tup, collForm->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate); + newversion = get_collation_actual_version(collForm->collprovider, TextDatumGetCString(datum)); + + /* cannot change from NULL to non-NULL or vice versa */ + if ((!oldversion && newversion) || (oldversion && !newversion)) + elog(ERROR, "invalid collation version change"); + else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) + { + bool nulls[Natts_pg_collation]; + bool replaces[Natts_pg_collation]; + Datum values[Natts_pg_collation]; + + ereport(NOTICE, + (errmsg("changing version from %s to %s", + oldversion, newversion))); + + memset(values, 0, sizeof(values)); + memset(nulls, false, sizeof(nulls)); + memset(replaces, false, sizeof(replaces)); + + values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion); + replaces[Anum_pg_collation_collversion - 1] = true; + + tup = heap_modify_tuple(tup, RelationGetDescr(rel), + values, nulls, replaces); + } + else + ereport(NOTICE, + (errmsg("version has not changed"))); + + CatalogTupleUpdate(rel, &tup->t_self, tup); + + InvokeObjectPostAlterHook(CollationRelationId, collOid, 0); + + ObjectAddressSet(address, CollationRelationId, collOid); + + heap_freetuple(tup); + table_close(rel, NoLock); + + return address; +} + + +Datum +pg_collation_actual_version(PG_FUNCTION_ARGS) +{ + Oid collid = PG_GETARG_OID(0); + char provider; + char *locale; + char *version; + Datum datum; + + if (collid == DEFAULT_COLLATION_OID) + { + /* retrieve from pg_database */ + + HeapTuple dbtup = SearchSysCache1(DATABASEOID, ObjectIdGetDatum(MyDatabaseId)); + + if (!HeapTupleIsValid(dbtup)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("database with OID %u does not exist", MyDatabaseId))); + + provider = ((Form_pg_database) GETSTRUCT(dbtup))->datlocprovider; + + datum = SysCacheGetAttrNotNull(DATABASEOID, dbtup, + provider == COLLPROVIDER_ICU ? + Anum_pg_database_daticulocale : Anum_pg_database_datcollate); + + locale = TextDatumGetCString(datum); + + ReleaseSysCache(dbtup); + } + else + { + /* retrieve from pg_collation */ + + HeapTuple colltp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); + + if (!HeapTupleIsValid(colltp)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_OBJECT), + errmsg("collation with OID %u does not exist", collid))); + + provider = ((Form_pg_collation) GETSTRUCT(colltp))->collprovider; + Assert(provider != COLLPROVIDER_DEFAULT); + datum = SysCacheGetAttrNotNull(COLLOID, colltp, + provider == COLLPROVIDER_ICU ? + Anum_pg_collation_colliculocale : Anum_pg_collation_collcollate); + + locale = TextDatumGetCString(datum); + + ReleaseSysCache(colltp); + } + + version = get_collation_actual_version(provider, locale); + if (version) + PG_RETURN_TEXT_P(cstring_to_text(version)); + else + PG_RETURN_NULL(); +} + + +/* will we use "locale -a" in pg_import_system_collations? */ +#if defined(HAVE_LOCALE_T) && !defined(WIN32) +#define READ_LOCALE_A_OUTPUT +#endif + +/* will we use EnumSystemLocalesEx in pg_import_system_collations? */ +#ifdef WIN32 +#define ENUM_SYSTEM_LOCALE +#endif + + +#ifdef READ_LOCALE_A_OUTPUT +/* + * "Normalize" a libc locale name, stripping off encoding tags such as + * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" + * -> "br_FR@euro"). Return true if a new, different name was + * generated. + */ +static bool +normalize_libc_locale_name(char *new, const char *old) +{ + char *n = new; + const char *o = old; + bool changed = false; + + while (*o) + { + if (*o == '.') + { + /* skip over encoding tag such as ".utf8" or ".UTF-8" */ + o++; + while ((*o >= 'A' && *o <= 'Z') + || (*o >= 'a' && *o <= 'z') + || (*o >= '0' && *o <= '9') + || (*o == '-')) + o++; + changed = true; + } + else + *n++ = *o++; + } + *n = '\0'; + + return changed; +} + +/* + * qsort comparator for CollAliasData items + */ +static int +cmpaliases(const void *a, const void *b) +{ + const CollAliasData *ca = (const CollAliasData *) a; + const CollAliasData *cb = (const CollAliasData *) b; + + /* comparing localename is enough because other fields are derived */ + return strcmp(ca->localename, cb->localename); +} +#endif /* READ_LOCALE_A_OUTPUT */ + + +#ifdef USE_ICU +/* + * Get a comment (specifically, the display name) for an ICU locale. + * The result is a palloc'd string, or NULL if we can't get a comment + * or find that it's not all ASCII. (We can *not* accept non-ASCII + * comments, because the contents of template0 must be encoding-agnostic.) + */ +static char * +get_icu_locale_comment(const char *localename) +{ + UErrorCode status; + UChar displayname[128]; + int32 len_uchar; + int32 i; + char *result; + + status = U_ZERO_ERROR; + len_uchar = uloc_getDisplayName(localename, "en", + displayname, lengthof(displayname), + &status); + if (U_FAILURE(status)) + return NULL; /* no good reason to raise an error */ + + /* Check for non-ASCII comment (can't use pg_is_ascii for this) */ + for (i = 0; i < len_uchar; i++) + { + if (displayname[i] > 127) + return NULL; + } + + /* OK, transcribe */ + result = palloc(len_uchar + 1); + for (i = 0; i < len_uchar; i++) + result[i] = displayname[i]; + result[len_uchar] = '\0'; + + return result; +} +#endif /* USE_ICU */ + + +/* + * Create a new collation using the input locale 'locale'. (subroutine for + * pg_import_system_collations()) + * + * 'nspid' is the namespace id where the collation will be created. + * + * 'nvalidp' is incremented if the locale has a valid encoding. + * + * 'ncreatedp' is incremented if the collation is actually created. If the + * collation already exists it will quietly do nothing. + * + * The returned value is the encoding of the locale, -1 if the locale is not + * valid for creating a collation. + * + */ +pg_attribute_unused() +static int +create_collation_from_locale(const char *locale, int nspid, + int *nvalidp, int *ncreatedp) +{ + int enc; + Oid collid; + + /* + * Some systems have locale names that don't consist entirely of ASCII + * letters (such as "bokmål" or "français"). This is pretty + * silly, since we need the locale itself to interpret the non-ASCII + * characters. We can't do much with those, so we filter them out. + */ + if (!pg_is_ascii(locale)) + { + elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale); + return -1; + } + + enc = pg_get_encoding_from_locale(locale, false); + if (enc < 0) + { + elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale); + return -1; + } + if (!PG_VALID_BE_ENCODING(enc)) + { + elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale); + return -1; + } + if (enc == PG_SQL_ASCII) + return -1; /* C/POSIX are already in the catalog */ + + /* count valid locales found in operating system */ + (*nvalidp)++; + + /* + * Create a collation named the same as the locale, but quietly doing + * nothing if it already exists. This is the behavior we need even at + * initdb time, because some versions of "locale -a" can report the same + * locale name more than once. And it's convenient for later import runs, + * too, since you just about always want to add on new locales without a + * lot of chatter about existing ones. + */ + collid = CollationCreate(locale, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + locale, locale, NULL, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + (*ncreatedp)++; + + /* Must do CCI between inserts to handle duplicates correctly */ + CommandCounterIncrement(); + } + + return enc; +} + + +#ifdef ENUM_SYSTEM_LOCALE +/* parameter to be passed to the callback function win32_read_locale() */ +typedef struct +{ + Oid nspid; + int *ncreatedp; + int *nvalidp; +} CollParam; + +/* + * Callback function for EnumSystemLocalesEx() in + * pg_import_system_collations(). Creates a collation for every valid locale + * and a POSIX alias collation. + * + * The callback contract is to return TRUE to continue enumerating and FALSE + * to stop enumerating. We always want to continue. + */ +static BOOL CALLBACK +win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam) +{ + CollParam *param = (CollParam *) lparam; + char localebuf[NAMEDATALEN]; + int result; + int enc; + + (void) dwFlags; + + result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN, + NULL, NULL); + + if (result == 0) + { + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf); + return TRUE; + } + if (localebuf[0] == '\0') + return TRUE; + + enc = create_collation_from_locale(localebuf, param->nspid, + param->nvalidp, param->ncreatedp); + if (enc < 0) + return TRUE; + + /* + * Windows will use hyphens between language and territory, where POSIX + * uses an underscore. Simply create a POSIX alias. + */ + if (strchr(localebuf, '-')) + { + char alias[NAMEDATALEN]; + Oid collid; + + strcpy(alias, localebuf); + for (char *p = alias; *p; p++) + if (*p == '-') + *p = '_'; + + collid = CollationCreate(alias, param->nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + localebuf, localebuf, NULL, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), + true, true); + if (OidIsValid(collid)) + { + (*param->ncreatedp)++; + + CommandCounterIncrement(); + } + } + + return TRUE; +} +#endif /* ENUM_SYSTEM_LOCALE */ + + +/* + * pg_import_system_collations: add known system collations to pg_collation + */ +Datum +pg_import_system_collations(PG_FUNCTION_ARGS) +{ + Oid nspid = PG_GETARG_OID(0); + int ncreated = 0; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to import system collations"))); + + if (!SearchSysCacheExists1(NAMESPACEOID, ObjectIdGetDatum(nspid))) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_SCHEMA), + errmsg("schema with OID %u does not exist", nspid))); + + /* Load collations known to libc, using "locale -a" to enumerate them */ +#ifdef READ_LOCALE_A_OUTPUT + { + FILE *locale_a_handle; + char localebuf[LOCALE_NAME_BUFLEN]; + int nvalid = 0; + Oid collid; + CollAliasData *aliases; + int naliases, + maxaliases, + i; + + /* expansible array of aliases */ + maxaliases = 100; + aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); + naliases = 0; + + locale_a_handle = OpenPipeStream("locale -a", "r"); + if (locale_a_handle == NULL) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not execute command \"%s\": %m", + "locale -a"))); + + while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) + { + size_t len; + int enc; + char alias[LOCALE_NAME_BUFLEN]; + + len = strlen(localebuf); + + if (len == 0 || localebuf[len - 1] != '\n') + { + elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf); + continue; + } + localebuf[len - 1] = '\0'; + + enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated); + if (enc < 0) + continue; + + /* + * Generate aliases such as "en_US" in addition to "en_US.utf8" + * for ease of use. Note that collation names are unique per + * encoding only, so this doesn't clash with "en_US" for LATIN1, + * say. + * + * However, it might conflict with a name we'll see later in the + * "locale -a" output. So save up the aliases and try to add them + * after we've read all the output. + */ + if (normalize_libc_locale_name(alias, localebuf)) + { + if (naliases >= maxaliases) + { + maxaliases *= 2; + aliases = (CollAliasData *) + repalloc(aliases, maxaliases * sizeof(CollAliasData)); + } + aliases[naliases].localename = pstrdup(localebuf); + aliases[naliases].alias = pstrdup(alias); + aliases[naliases].enc = enc; + naliases++; + } + } + + /* + * We don't check the return value of this, because we want to support + * the case where there "locale" command does not exist. (This is + * unusual but can happen on minimalized Linux distributions, for + * example.) We will warn below if no locales could be found. + */ + ClosePipeStream(locale_a_handle); + + /* + * Before processing the aliases, sort them by locale name. The point + * here is that if "locale -a" gives us multiple locale names with the + * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we + * want to pick a deterministic one of them. First in ASCII sort + * order is a good enough rule. (Before PG 10, the code corresponding + * to this logic in initdb.c had an additional ordering rule, to + * prefer the locale name exactly matching the alias, if any. We + * don't need to consider that here, because we would have already + * created such a pg_collation entry above, and that one will win.) + */ + if (naliases > 1) + qsort(aliases, naliases, sizeof(CollAliasData), cmpaliases); + + /* Now add aliases, ignoring any that match pre-existing entries */ + for (i = 0; i < naliases; i++) + { + char *locale = aliases[i].localename; + char *alias = aliases[i].alias; + int enc = aliases[i].enc; + + collid = CollationCreate(alias, nspid, GetUserId(), + COLLPROVIDER_LIBC, true, enc, + locale, locale, NULL, NULL, + get_collation_actual_version(COLLPROVIDER_LIBC, locale), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + } + } + + /* Give a warning if "locale -a" seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); + } +#endif /* READ_LOCALE_A_OUTPUT */ + + /* + * Load collations known to ICU + * + * We use uloc_countAvailable()/uloc_getAvailable() rather than + * ucol_countAvailable()/ucol_getAvailable(). The former returns a full + * set of language+region combinations, whereas the latter only returns + * language+region combinations if they are distinct from the language's + * base collation. So there might not be a de-DE or en-GB, which would be + * confusing. + */ +#ifdef USE_ICU + { + int i; + + /* + * Start the loop at -1 to sneak in the root locale without too much + * code duplication. + */ + for (i = -1; i < uloc_countAvailable(); i++) + { + const char *name; + char *langtag; + char *icucomment; + Oid collid; + + if (i == -1) + name = ""; /* ICU root locale */ + else + name = uloc_getAvailable(i); + + langtag = icu_language_tag(name, ERROR); + + /* + * Be paranoid about not allowing any non-ASCII strings into + * pg_collation + */ + if (!pg_is_ascii(langtag)) + continue; + + collid = CollationCreate(psprintf("%s-x-icu", langtag), + nspid, GetUserId(), + COLLPROVIDER_ICU, true, -1, + NULL, NULL, langtag, NULL, + get_collation_actual_version(COLLPROVIDER_ICU, langtag), + true, true); + if (OidIsValid(collid)) + { + ncreated++; + + CommandCounterIncrement(); + + icucomment = get_icu_locale_comment(name); + if (icucomment) + CreateComments(collid, CollationRelationId, 0, + icucomment); + } + } + } +#endif /* USE_ICU */ + + /* Load collations known to WIN32 */ +#ifdef ENUM_SYSTEM_LOCALE + { + int nvalid = 0; + CollParam param; + + param.nspid = nspid; + param.ncreatedp = &ncreated; + param.nvalidp = &nvalid; + + /* + * Enumerate the locales that are either installed on or supported by + * the OS. + */ + if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL, + (LPARAM) ¶m, NULL)) + _dosmaperr(GetLastError()); + + /* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */ + if (nvalid == 0) + ereport(WARNING, + (errmsg("no usable system locales were found"))); + } +#endif /* ENUM_SYSTEM_LOCALE */ + + PG_RETURN_INT32(ncreated); +} |