From a848231ae0f346dc7cc000973fbeb65b0894ee92 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 10 Apr 2024 21:59:03 +0200 Subject: Adding upstream version 3.8.5. Signed-off-by: Daniel Baumann --- src/util/strcasecmp_utf8.c | 216 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 src/util/strcasecmp_utf8.c (limited to 'src/util/strcasecmp_utf8.c') diff --git a/src/util/strcasecmp_utf8.c b/src/util/strcasecmp_utf8.c new file mode 100644 index 0000000..e3f20df --- /dev/null +++ b/src/util/strcasecmp_utf8.c @@ -0,0 +1,216 @@ +/*++ +/* NAME +/* strcasecmp_utf8 3 +/* SUMMARY +/* caseless string comparison +/* SYNOPSIS +/* #include +/* +/* int strcasecmp_utf8( +/* const char *s1, +/* const char *s2) +/* +/* int strncasecmp_utf8( +/* const char *s1, +/* const char *s2, +/* ssize_t len) +/* AUXILIARY FUNCTIONS +/* int strcasecmp_utf8x( +/* int flags, +/* const char *s1, +/* const char *s2) +/* +/* int strncasecmp_utf8x( +/* int flags, +/* const char *s1, +/* const char *s2, +/* ssize_t len) +/* DESCRIPTION +/* strcasecmp_utf8() implements caseless string comparison for +/* UTF-8 text, with an API similar to strcasecmp(). Only ASCII +/* characters are casefolded when the code is compiled without +/* EAI support or when util_utf8_enable is zero. +/* +/* strncasecmp_utf8() implements caseless string comparison +/* for UTF-8 text, with an API similar to strncasecmp(). Only +/* ASCII characters are casefolded when the code is compiled +/* without EAI support or when util_utf8_enable is zero. +/* +/* strcasecmp_utf8x() and strncasecmp_utf8x() implement a more +/* complex API that provides the above functionality and more. +/* +/* Arguments: +/* .IP "s1, s2" +/* Null-terminated strings to be compared. +/* .IP len +/* String length before casefolding. +/* .IP flags +/* Zero or CASEF_FLAG_UTF8. The latter flag enables UTF-8 case +/* folding instead of folding only ASCII characters. This flag +/* is ignored when compiled without EAI support. +/* SEE ALSO +/* casefold(), casefold text for caseless comparison. +/* LICENSE +/* .ad +/* .fi +/* The Secure Mailer license must be distributed with this software. +/* AUTHOR(S) +/* Wietse Venema +/* IBM T.J. Watson Research +/* P.O. Box 704 +/* Yorktown Heights, NY 10598, USA +/* +/* Wietse Venema +/* Google, Inc. +/* 111 8th Avenue +/* New York, NY 10011, USA +/*--*/ + + /* + * System library. + */ +#include +#include + +#ifdef STRCASECMP_IN_STRINGS_H +#include +#endif + + /* + * Utility library. + */ +#include + +#define STR(x) vstring_str(x) + +static VSTRING *f1; /* casefold result for s1 */ +static VSTRING *f2; /* casefold result for s2 */ + +/* strcasecmp_utf8_init - initialize */ + +static void strcasecmp_utf8_init(void) +{ + f1 = vstring_alloc(100); + f2 = vstring_alloc(100); +} + +/* strcasecmp_utf8x - caseless string comparison */ + +int strcasecmp_utf8x(int flags, const char *s1, const char *s2) +{ + + /* + * Short-circuit optimization for ASCII-only text. This may be slower + * than using a cache for all results. We must not expose strcasecmp(3) + * to non-ASCII text. + */ + if (allascii(s1) && allascii(s2)) + return (strcasecmp(s1, s2)); + + if (f1 == 0) + strcasecmp_utf8_init(); + + /* + * Cross our fingers and hope that strcmp() remains agnostic of + * charactersets and locales. + */ + flags &= CASEF_FLAG_UTF8; + casefoldx(flags, f1, s1, -1); + casefoldx(flags, f2, s2, -1); + return (strcmp(STR(f1), STR(f2))); +} + +/* strncasecmp_utf8x - caseless string comparison */ + +int strncasecmp_utf8x(int flags, const char *s1, const char *s2, + ssize_t len) +{ + + /* + * Consider using a cache for all results. + */ + if (f1 == 0) + strcasecmp_utf8_init(); + + /* + * Short-circuit optimization for ASCII-only text. This may be slower + * than using a cache for all results. See comments above for limitations + * of strcasecmp(). + */ + if (allascii_len(s1, len) && allascii_len(s2, len)) + return (strncasecmp(s1, s2, len)); + + /* + * Caution: casefolding may change the number of bytes. See comments + * above for concerns about strcmp(). + */ + flags &= CASEF_FLAG_UTF8; + casefoldx(flags, f1, s1, len); + casefoldx(flags, f2, s2, len); + return (strcmp(STR(f1), STR(f2))); +} + +#ifdef TEST +#include +#include +#include +#include +#include +#include + +int main(int argc, char **argv) +{ + VSTRING *buffer = vstring_alloc(1); + ARGV *cmd; + char **args; + int len; + int flags; + int res; + + msg_vstream_init(argv[0], VSTREAM_ERR); + flags = CASEF_FLAG_UTF8; + util_utf8_enable = 1; + while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { + vstream_printf("> %s\n", STR(buffer)); + cmd = argv_split(STR(buffer), CHARS_SPACE); + if (cmd->argc == 0 || cmd->argv[0][0] == '#') + continue; + args = cmd->argv; + + /* + * Compare two strings. + */ + if (strcmp(args[0], "compare") == 0 && cmd->argc == 3) { + res = strcasecmp_utf8x(flags, args[1], args[2]); + vstream_printf("\"%s\" %s \"%s\"\n", + args[1], + res < 0 ? "<" : res == 0 ? "==" : ">", + args[2]); + } + + /* + * Compare two substrings. + */ + else if (strcmp(args[0], "compare-len") == 0 && cmd->argc == 4 + && sscanf(args[3], "%d", &len) == 1 && len >= 0) { + res = strncasecmp_utf8x(flags, args[1], args[2], len); + vstream_printf("\"%.*s\" %s \"%.*s\"\n", + len, args[1], + res < 0 ? "<" : res == 0 ? "==" : ">", + len, args[2]); + } + + /* + * Usage. + */ + else { + vstream_printf("Usage: %s compare | compare-len \n", + argv[0]); + } + vstream_fflush(VSTREAM_OUT); + argv_free(cmd); + } + exit(0); +} + +#endif /* TEST */ -- cgit v1.2.3