1 files changed, 318 insertions, 0 deletions
diff --git a/lib/propername.c b/lib/propername.c
new file mode 100644
index 0000000..eaef4f2
--- /dev/null
+++ b/lib/propername.c
@@ -0,0 +1,318 @@
+/* Localization of proper names.
+   Copyright (C) 2006-2022 Free Software Foundation, Inc.
+   Written by Bruno Haible <bruno@clisp.org>, 2006.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that
+   the proper_name function might be candidate for attribute 'const'  */
+#if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__
+# pragma GCC diagnostic ignored "-Wsuggest-attribute=const"
+#endif
+
+#include <config.h>
+
+/* Specification.  */
+#include "propername.h"
+
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#if HAVE_ICONV
+# include <iconv.h>
+#endif
+
+#include "trim.h"
+#include "mbchar.h"
+#include "mbuiter.h"
+#include "localcharset.h"
+#include "c-strcase.h"
+#include "xstriconv.h"
+#include "xalloc.h"
+#include "gettext.h"
+
+
+/* Tests whether STRING contains trim (SUB), starting and ending at word
+   boundaries.
+   Here, instead of implementing Unicode Standard Annex #29 for determining
+   word boundaries, we assume that trim (SUB) starts and ends with words and
+   only test whether the part before it ends with a non-word and the part
+   after it starts with a non-word.  */
+static bool
+mbsstr_trimmed_wordbounded (const char *string, const char *sub)
+{
+  char *tsub = trim (sub);
+  bool found = false;
+
+  for (; *string != '\0';)
+    {
+      const char *tsub_in_string = mbsstr (string, tsub);
+      if (tsub_in_string == NULL)
+        break;
+      else
+        {
+          if (MB_CUR_MAX > 1)
+            {
+              mbui_iterator_t string_iter;
+              bool word_boundary_before;
+              bool word_boundary_after;
+
+              mbui_init (string_iter, string);
+              word_boundary_before = true;
+              if (mbui_cur_ptr (string_iter) < tsub_in_string)
+                {
+                  mbchar_t last_char_before_tsub;
+                  do
+                    {
+                      if (!mbui_avail (string_iter))
+                        abort ();
+                      last_char_before_tsub = mbui_cur (string_iter);
+                      mbui_advance (string_iter);
+                    }
+                  while (mbui_cur_ptr (string_iter) < tsub_in_string);
+                  if (mb_isalnum (last_char_before_tsub))
+                    word_boundary_before = false;
+                }
+
+              mbui_init (string_iter, tsub_in_string);
+              {
+                mbui_iterator_t tsub_iter;
+
+                for (mbui_init (tsub_iter, tsub);
+                     mbui_avail (tsub_iter);
+                     mbui_advance (tsub_iter))
+                  {
+                    if (!mbui_avail (string_iter))
+                      abort ();
+                    mbui_advance (string_iter);
+                  }
+              }
+              word_boundary_after = true;
+              if (mbui_avail (string_iter))
+                {
+                  mbchar_t first_char_after_tsub = mbui_cur (string_iter);
+                  if (mb_isalnum (first_char_after_tsub))
+                    word_boundary_after = false;
+                }
+
+              if (word_boundary_before && word_boundary_after)
+                {
+                  found = true;
+                  break;
+                }
+
+              mbui_init (string_iter, tsub_in_string);
+              if (!mbui_avail (string_iter))
+                break;
+              string = tsub_in_string + mb_len (mbui_cur (string_iter));
+            }
+          else
+            {
+              bool word_boundary_before;
+              const char *p;
+              bool word_boundary_after;
+
+              word_boundary_before = true;
+              if (string < tsub_in_string)
+                if (isalnum ((unsigned char) tsub_in_string[-1]))
+                  word_boundary_before = false;
+
+              p = tsub_in_string + strlen (tsub);
+              word_boundary_after = true;
+              if (*p != '\0')
+                if (isalnum ((unsigned char) *p))
+                  word_boundary_after = false;
+
+              if (word_boundary_before && word_boundary_after)
+                {
+                  found = true;
+                  break;
+                }
+
+              if (*tsub_in_string == '\0')
+                break;
+              string = tsub_in_string + 1;
+            }
+        }
+    }
+  free (tsub);
+  return found;
+}
+
+/* Return the localization of NAME.  NAME is written in ASCII.  */
+
+const char *
+proper_name (const char *name)
+{
+  /* See whether there is a translation.   */
+  const char *translation = gettext (name);
+
+  if (translation != name)
+    {
+      /* See whether the translation contains the original name.  */
+      if (mbsstr_trimmed_wordbounded (translation, name))
+        return translation;
+      else
+        {
+          /* Return "TRANSLATION (NAME)".  */
+          char *result =
+            XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+          sprintf (result, "%s (%s)", translation, name);
+          return result;
+        }
+    }
+  else
+    return name;
+}
+
+/* Return the localization of a name whose original writing is not ASCII.
+   NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal
+   escape sequences.  NAME_ASCII is a fallback written only with ASCII
+   characters.  */
+
+const char *
+proper_name_utf8 (const char *name_ascii, const char *name_utf8)
+{
+  /* See whether there is a translation.   */
+  const char *translation = gettext (name_ascii);
+
+  /* Try to convert NAME_UTF8 to the locale encoding.  */
+  const char *locale_code = locale_charset ();
+  char *alloc_name_converted = NULL;
+  char *alloc_name_converted_translit = NULL;
+  const char *name_converted = NULL;
+  const char *name_converted_translit = NULL;
+  const char *name;
+
+  if (c_strcasecmp (locale_code, "UTF-8") != 0)
+    {
+#if HAVE_ICONV
+      name_converted = alloc_name_converted =
+        xstr_iconv (name_utf8, "UTF-8", locale_code);
+
+# if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \
+      && !defined __UCLIBC__) \
+     || _LIBICONV_VERSION >= 0x0105
+      {
+        char *converted_translit;
+
+        size_t len = strlen (locale_code);
+        char *locale_code_translit = XNMALLOC (len + 10 + 1, char);
+        memcpy (locale_code_translit, locale_code, len);
+        memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1);
+
+        converted_translit =
+          xstr_iconv (name_utf8, "UTF-8", locale_code_translit);
+
+        free (locale_code_translit);
+
+        if (converted_translit != NULL)
+          {
+#  if !_LIBICONV_VERSION
+            /* Don't use the transliteration if it added question marks.
+               glibc's transliteration falls back to question marks; libiconv's
+               transliteration does not.
+               mbschr is equivalent to strchr in this case.  */
+            if (strchr (converted_translit, '?') != NULL)
+              free (converted_translit);
+            else
+#  endif
+              name_converted_translit = alloc_name_converted_translit =
+                converted_translit;
+          }
+      }
+# endif
+#endif
+    }
+  else
+    {
+      name_converted = name_utf8;
+      name_converted_translit = name_utf8;
+    }
+
+  /* The name in locale encoding.  */
+  name = (name_converted != NULL ? name_converted :
+          name_converted_translit != NULL ? name_converted_translit :
+          name_ascii);
+
+  /* See whether we have a translation.  Some translators have not understood
+     that they should use the UTF-8 form of the name, if possible.  So if the
+     translator provided a no-op translation, we ignore it.  */
+  if (strcmp (translation, name_ascii) != 0)
+    {
+      /* See whether the translation contains the original name.  */
+      if (mbsstr_trimmed_wordbounded (translation, name_ascii)
+          || (name_converted != NULL
+              && mbsstr_trimmed_wordbounded (translation, name_converted))
+          || (name_converted_translit != NULL
+              && mbsstr_trimmed_wordbounded (translation, name_converted_translit)))
+        {
+          if (alloc_name_converted != NULL)
+            free (alloc_name_converted);
+          if (alloc_name_converted_translit != NULL)
+            free (alloc_name_converted_translit);
+          return translation;
+        }
+      else
+        {
+          /* Return "TRANSLATION (NAME)".  */
+          char *result =
+            XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char);
+
+          sprintf (result, "%s (%s)", translation, name);
+
+          if (alloc_name_converted != NULL)
+            free (alloc_name_converted);
+          if (alloc_name_converted_translit != NULL)
+            free (alloc_name_converted_translit);
+          return result;
+        }
+    }
+  else
+    {
+      if (alloc_name_converted != NULL && alloc_name_converted != name)
+        free (alloc_name_converted);
+      if (alloc_name_converted_translit != NULL
+          && alloc_name_converted_translit != name)
+        free (alloc_name_converted_translit);
+      return name;
+    }
+}
+
+#ifdef TEST1
+# include <locale.h>
+int
+main (int argc, char *argv[])
+{
+  setlocale (LC_ALL, "");
+  if (mbsstr_trimmed_wordbounded (argv[1], argv[2]))
+    printf("found\n");
+  return 0;
+}
+#endif
+
+#ifdef TEST2
+# include <locale.h>
+# include <stdio.h>
+int
+main (int argc, char *argv[])
+{
+  setlocale (LC_ALL, "");
+  printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard"));
+  return 0;
+}
+#endif