diff options
Diffstat (limited to 'locale.c')
-rw-r--r-- | locale.c | 645 |
1 files changed, 645 insertions, 0 deletions
diff --git a/locale.c b/locale.c new file mode 100644 index 0000000..fabf7b1 --- /dev/null +++ b/locale.c @@ -0,0 +1,645 @@ +/* locale.c - Miscellaneous internationalization functions. */ + +/* Copyright (C) 1996-2009,2012,2016-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "config.h" + +#include "bashtypes.h" + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#if HAVE_LANGINFO_CODESET +# include <langinfo.h> +#endif + +#include "bashintl.h" +#include "bashansi.h" +#include <stdio.h> +#include "chartypes.h" +#include <errno.h> + +#include "shell.h" +#include "input.h" /* For bash_input */ + +#ifndef errno +extern int errno; +#endif + +int locale_utf8locale; +int locale_mb_cur_max; /* value of MB_CUR_MAX for current locale (LC_CTYPE) */ +int locale_shiftstates = 0; + +int singlequote_translations = 0; /* single-quote output of $"..." */ + +extern int dump_translatable_strings, dump_po_strings; + +/* The current locale when the program begins */ +static char *default_locale; + +/* The current domain for textdomain(3). */ +static char *default_domain; +static char *default_dir; + +/* tracks the value of LC_ALL; used to override values for other locale + categories */ +static char *lc_all; + +/* tracks the value of LC_ALL; used to provide defaults for locale + categories */ +static char *lang; + +/* Called to reset all of the locale variables to their appropriate values + if (and only if) LC_ALL has not been assigned a value. */ +static int reset_locale_vars PARAMS((void)); + +static void locale_setblanks PARAMS((void)); +static int locale_isutf8 PARAMS((char *)); + +/* Set the value of default_locale and make the current locale the + system default locale. This should be called very early in main(). */ +void +set_default_locale () +{ +#if defined (HAVE_SETLOCALE) + default_locale = setlocale (LC_ALL, ""); + if (default_locale) + default_locale = savestring (default_locale); +#else + default_locale = savestring ("C"); +#endif /* HAVE_SETLOCALE */ + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + + locale_mb_cur_max = MB_CUR_MAX; + locale_utf8locale = locale_isutf8 (default_locale); +#if defined (HANDLE_MULTIBYTE) + locale_shiftstates = mblen ((char *)NULL, 0); +#else + locale_shiftstates = 0; +#endif +} + +/* Set default values for LC_CTYPE, LC_COLLATE, LC_MESSAGES, LC_NUMERIC and + LC_TIME if they are not specified in the environment, but LC_ALL is. This + should be called from main() after parsing the environment. */ +void +set_default_locale_vars () +{ + char *val; + +#if defined (HAVE_SETLOCALE) + +# if defined (LC_CTYPE) + val = get_string_value ("LC_CTYPE"); + if (val == 0 && lc_all && *lc_all) + { + setlocale (LC_CTYPE, lc_all); + locale_setblanks (); + locale_mb_cur_max = MB_CUR_MAX; + locale_utf8locale = locale_isutf8 (lc_all); + +# if defined (HANDLE_MULTIBYTE) + locale_shiftstates = mblen ((char *)NULL, 0); +# else + locale_shiftstates = 0; +# endif + + u32reset (); + } +# endif + +# if defined (LC_COLLATE) + val = get_string_value ("LC_COLLATE"); + if (val == 0 && lc_all && *lc_all) + setlocale (LC_COLLATE, lc_all); +# endif /* LC_COLLATE */ + +# if defined (LC_MESSAGES) + val = get_string_value ("LC_MESSAGES"); + if (val == 0 && lc_all && *lc_all) + setlocale (LC_MESSAGES, lc_all); +# endif /* LC_MESSAGES */ + +# if defined (LC_NUMERIC) + val = get_string_value ("LC_NUMERIC"); + if (val == 0 && lc_all && *lc_all) + setlocale (LC_NUMERIC, lc_all); +# endif /* LC_NUMERIC */ + +# if defined (LC_TIME) + val = get_string_value ("LC_TIME"); + if (val == 0 && lc_all && *lc_all) + setlocale (LC_TIME, lc_all); +# endif /* LC_TIME */ + +#endif /* HAVE_SETLOCALE */ + + val = get_string_value ("TEXTDOMAIN"); + if (val && *val) + { + FREE (default_domain); + default_domain = savestring (val); + if (default_dir && *default_dir) + bindtextdomain (default_domain, default_dir); + } + + val = get_string_value ("TEXTDOMAINDIR"); + if (val && *val) + { + FREE (default_dir); + default_dir = savestring (val); + if (default_domain && *default_domain) + bindtextdomain (default_domain, default_dir); + } +} + +/* Set one of the locale categories (specified by VAR) to VALUE. Returns 1 + if successful, 0 otherwise. */ +int +set_locale_var (var, value) + char *var, *value; +{ + int r; + char *x; + + x = ""; + errno = 0; + if (var[0] == 'T' && var[10] == 0) /* TEXTDOMAIN */ + { + FREE (default_domain); + default_domain = value ? savestring (value) : (char *)NULL; + if (default_dir && *default_dir) + bindtextdomain (default_domain, default_dir); + return (1); + } + else if (var[0] == 'T') /* TEXTDOMAINDIR */ + { + FREE (default_dir); + default_dir = value ? savestring (value) : (char *)NULL; + if (default_domain && *default_domain) + bindtextdomain (default_domain, default_dir); + return (1); + } + + /* var[0] == 'L' && var[1] == 'C' && var[2] == '_' */ + + else if (var[3] == 'A') /* LC_ALL */ + { + FREE (lc_all); + if (value) + lc_all = savestring (value); + else + { + lc_all = (char *)xmalloc (1); + lc_all[0] = '\0'; + } +#if defined (HAVE_SETLOCALE) + r = *lc_all ? ((x = setlocale (LC_ALL, lc_all)) != 0) : reset_locale_vars (); + if (x == 0) + { + if (errno == 0) + internal_warning(_("setlocale: LC_ALL: cannot change locale (%s)"), lc_all); + else + internal_warning(_("setlocale: LC_ALL: cannot change locale (%s): %s"), lc_all, strerror (errno)); + } + locale_setblanks (); + locale_mb_cur_max = MB_CUR_MAX; + /* if LC_ALL == "", reset_locale_vars has already called this */ + if (*lc_all && x) + locale_utf8locale = locale_isutf8 (lc_all); +# if defined (HANDLE_MULTIBYTE) + locale_shiftstates = mblen ((char *)NULL, 0); +# else + locale_shiftstates = 0; +# endif + u32reset (); + return r; +#else + return (1); +#endif + } + +#if defined (HAVE_SETLOCALE) + else if (var[3] == 'C' && var[4] == 'T') /* LC_CTYPE */ + { +# if defined (LC_CTYPE) + if (lc_all == 0 || *lc_all == '\0') + { + x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE")); + locale_setblanks (); + locale_mb_cur_max = MB_CUR_MAX; + /* if setlocale() returns NULL, the locale is not changed */ + if (x) + locale_utf8locale = locale_isutf8 (x); +#if defined (HANDLE_MULTIBYTE) + locale_shiftstates = mblen ((char *)NULL, 0); +#else + locale_shiftstates = 0; +#endif + u32reset (); + } +# endif + } + else if (var[3] == 'C' && var[4] == 'O') /* LC_COLLATE */ + { +# if defined (LC_COLLATE) + if (lc_all == 0 || *lc_all == '\0') + x = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE")); +# endif /* LC_COLLATE */ + } + else if (var[3] == 'M' && var[4] == 'E') /* LC_MESSAGES */ + { +# if defined (LC_MESSAGES) + if (lc_all == 0 || *lc_all == '\0') + x = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES")); +# endif /* LC_MESSAGES */ + } + else if (var[3] == 'N' && var[4] == 'U') /* LC_NUMERIC */ + { +# if defined (LC_NUMERIC) + if (lc_all == 0 || *lc_all == '\0') + x = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC")); +# endif /* LC_NUMERIC */ + } + else if (var[3] == 'T' && var[4] == 'I') /* LC_TIME */ + { +# if defined (LC_TIME) + if (lc_all == 0 || *lc_all == '\0') + x = setlocale (LC_TIME, get_locale_var ("LC_TIME")); +# endif /* LC_TIME */ + } +#endif /* HAVE_SETLOCALE */ + + if (x == 0) + { + if (errno == 0) + internal_warning(_("setlocale: %s: cannot change locale (%s)"), var, get_locale_var (var)); + else + internal_warning(_("setlocale: %s: cannot change locale (%s): %s"), var, get_locale_var (var), strerror (errno)); + } + + return (x != 0); +} + +/* Called when LANG is assigned a value. Tracks value in `lang'. Calls + reset_locale_vars() to reset any default values if LC_ALL is unset or + null. */ +int +set_lang (var, value) + char *var, *value; +{ + FREE (lang); + if (value) + lang = savestring (value); + else + { + lang = (char *)xmalloc (1); + lang[0] = '\0'; + } + + return ((lc_all == 0 || *lc_all == 0) ? reset_locale_vars () : 0); +} + +/* Set default values for LANG and LC_ALL. Default values for all other + locale-related variables depend on these. */ +void +set_default_lang () +{ + char *v; + + v = get_string_value ("LC_ALL"); + set_locale_var ("LC_ALL", v); + + v = get_string_value ("LANG"); + set_lang ("LANG", v); +} + +/* Get the value of one of the locale variables (LC_MESSAGES, LC_CTYPE). + The precedence is as POSIX.2 specifies: LC_ALL has precedence over + the specific locale variables, and LANG, if set, is used as the default. */ +char * +get_locale_var (var) + char *var; +{ + char *locale; + + locale = lc_all; + + if (locale == 0 || *locale == 0) + locale = get_string_value (var); /* XXX - no mem leak */ + if (locale == 0 || *locale == 0) + locale = lang; + if (locale == 0 || *locale == 0) +#if 0 + locale = default_locale; /* system-dependent; not really portable. should it be "C"? */ +#else + locale = ""; +#endif + return (locale); +} + +/* Called to reset all of the locale variables to their appropriate values + if (and only if) LC_ALL has not been assigned a value. DO NOT CALL THIS + IF LC_ALL HAS BEEN ASSIGNED A VALUE. */ +static int +reset_locale_vars () +{ + char *t, *x; +#if defined (HAVE_SETLOCALE) + if (lang == 0 || *lang == '\0') + maybe_make_export_env (); /* trust that this will change environment for setlocale */ + if (setlocale (LC_ALL, lang ? lang : "") == 0) + return 0; + + x = 0; +# if defined (LC_CTYPE) + x = setlocale (LC_CTYPE, get_locale_var ("LC_CTYPE")); +# endif +# if defined (LC_COLLATE) + t = setlocale (LC_COLLATE, get_locale_var ("LC_COLLATE")); +# endif +# if defined (LC_MESSAGES) + t = setlocale (LC_MESSAGES, get_locale_var ("LC_MESSAGES")); +# endif +# if defined (LC_NUMERIC) + t = setlocale (LC_NUMERIC, get_locale_var ("LC_NUMERIC")); +# endif +# if defined (LC_TIME) + t = setlocale (LC_TIME, get_locale_var ("LC_TIME")); +# endif + + locale_setblanks (); + locale_mb_cur_max = MB_CUR_MAX; + if (x) + locale_utf8locale = locale_isutf8 (x); +# if defined (HANDLE_MULTIBYTE) + locale_shiftstates = mblen ((char *)NULL, 0); +# else + locale_shiftstates = 0; +# endif + u32reset (); +#endif + return 1; +} + +#if defined (TRANSLATABLE_STRINGS) +/* Translate the contents of STRING, a $"..." quoted string, according + to the current locale. In the `C' or `POSIX' locale, or if gettext() + is not available, the passed string is returned unchanged. The + length of the translated string is returned in LENP, if non-null. */ +char * +localetrans (string, len, lenp) + char *string; + int len, *lenp; +{ + char *locale, *t; + char *translated; + int tlen; + + /* Don't try to translate null strings. */ + if (string == 0 || *string == 0) + { + if (lenp) + *lenp = 0; + return ((char *)NULL); + } + + locale = get_locale_var ("LC_MESSAGES"); + + /* If we don't have setlocale() or the current locale is `C' or `POSIX', + just return the string. If we don't have gettext(), there's no use + doing anything else. */ + if (locale == 0 || locale[0] == '\0' || + (locale[0] == 'C' && locale[1] == '\0') || STREQ (locale, "POSIX")) + { + t = (char *)xmalloc (len + 1); + strcpy (t, string); + if (lenp) + *lenp = len; + return (t); + } + + /* Now try to translate it. */ + if (default_domain && *default_domain) + translated = dgettext (default_domain, string); + else + translated = string; + + if (translated == string) /* gettext returns its argument if untranslatable */ + { + t = (char *)xmalloc (len + 1); + strcpy (t, string); + if (lenp) + *lenp = len; + } + else + { + tlen = strlen (translated); + t = (char *)xmalloc (tlen + 1); + strcpy (t, translated); + if (lenp) + *lenp = tlen; + } + return (t); +} + +/* Change a bash string into a string suitable for inclusion in a `po' file. + This backslash-escapes `"' and `\' and changes newlines into \\\n"\n". */ +char * +mk_msgstr (string, foundnlp) + char *string; + int *foundnlp; +{ + register int c, len; + char *result, *r, *s; + + for (len = 0, s = string; s && *s; s++) + { + len++; + if (*s == '"' || *s == '\\') + len++; + else if (*s == '\n') + len += 5; + } + + r = result = (char *)xmalloc (len + 3); + *r++ = '"'; + + for (s = string; s && (c = *s); s++) + { + if (c == '\n') /* <NL> -> \n"<NL>" */ + { + *r++ = '\\'; + *r++ = 'n'; + *r++ = '"'; + *r++ = '\n'; + *r++ = '"'; + if (foundnlp) + *foundnlp = 1; + continue; + } + if (c == '"' || c == '\\') + *r++ = '\\'; + *r++ = c; + } + + *r++ = '"'; + *r++ = '\0'; + + return result; +} + +/* $"..." -- Translate the portion of STRING between START and END + according to current locale using gettext (if available) and return + the result. The caller will take care of leaving the quotes intact. + The string will be left without the leading `$' by the caller. + If translation is performed, the translated string will be double-quoted + by the caller. The length of the translated string is returned in LENP, + if non-null. */ +char * +locale_expand (string, start, end, lineno, lenp) + char *string; + int start, end, lineno, *lenp; +{ + int len, tlen, foundnl; + char *temp, *t, *t2; + + temp = (char *)xmalloc (end - start + 1); + for (tlen = 0, len = start; len < end; ) + temp[tlen++] = string[len++]; + temp[tlen] = '\0'; + + /* If we're just dumping translatable strings, don't do anything with the + string itself, but if we're dumping in `po' file format, convert it into + a form more palatable to gettext(3) and friends by quoting `"' and `\' + with backslashes and converting <NL> into `\n"<NL>"'. If we find a + newline in TEMP, we first output a `msgid ""' line and then the + translated string; otherwise we output the `msgid' and translated + string all on one line. */ + if (dump_translatable_strings) + { + if (dump_po_strings) + { + foundnl = 0; + t = mk_msgstr (temp, &foundnl); + t2 = foundnl ? "\"\"\n" : ""; + + printf ("#: %s:%d\nmsgid %s%s\nmsgstr \"\"\n", + yy_input_name (), lineno, t2, t); + free (t); + } + else + printf ("\"%s\"\n", temp); + + if (lenp) + *lenp = tlen; + return (temp); + } + else if (*temp) + { + t = localetrans (temp, tlen, &len); + free (temp); + if (lenp) + *lenp = len; + return (t); + } + else + { + if (lenp) + *lenp = 0; + return (temp); + } +} +#endif + +/* Set every character in the <blank> character class to be a shell break + character for the lexical analyzer when the locale changes. */ +static void +locale_setblanks () +{ + int x; + + for (x = 0; x < sh_syntabsiz; x++) + { + if (isblank ((unsigned char)x)) + sh_syntaxtab[x] |= CSHBRK|CBLANK; + else if (member (x, shell_break_chars)) + { + sh_syntaxtab[x] |= CSHBRK; + sh_syntaxtab[x] &= ~CBLANK; + } + else + sh_syntaxtab[x] &= ~(CSHBRK|CBLANK); + } +} + +/* Parse a locale specification + language[_territory][.codeset][@modifier][+special][,[sponsor][_revision]] + and return TRUE if the codeset is UTF-8 or utf8 */ +static int +locale_isutf8 (lspec) + char *lspec; +{ + char *cp, *encoding; + +#if HAVE_LANGINFO_CODESET + cp = nl_langinfo (CODESET); + return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8")); +#elif HAVE_LOCALE_CHARSET + cp = locale_charset (); + return (STREQ (cp, "UTF-8") || STREQ (cp, "utf8")); +#else + /* Take a shot */ + for (cp = lspec; *cp && *cp != '@' && *cp != '+' && *cp != ','; cp++) + { + if (*cp == '.') + { + for (encoding = ++cp; *cp && *cp != '@' && *cp != '+' && *cp != ','; cp++) + ; + /* The encoding (codeset) is the substring between encoding and cp */ + if ((cp - encoding == 5 && STREQN (encoding, "UTF-8", 5)) || + (cp - encoding == 4 && STREQN (encoding, "utf8", 4))) + return 1; + else + return 0; + } + } + return 0; +#endif +} + +#if defined (HAVE_LOCALECONV) +int +locale_decpoint () +{ + struct lconv *lv; + + lv = localeconv (); + return (lv && lv->decimal_point && lv->decimal_point[0]) ? lv->decimal_point[0] : '.'; +} +#else +# undef locale_decpoint +int +locale_decpoint () +{ + return '.'; +} +#endif |