/*++ /* NAME /* midna_domain 3 /* SUMMARY /* ASCII/UTF-8 domain name conversion /* SYNOPSIS /* #include /* /* int midna_domain_cache_size; /* int midna_domain_transitional; /* /* const char *midna_domain_to_ascii( /* const char *name) /* /* const char *midna_domain_to_utf8( /* const char *name) /* /* const char *midna_domain_suffix_to_ascii( /* const char *name) /* /* const char *midna_domain_suffix_to_utf8( /* const char *name) /* AUXILIARY FUNCTIONS /* void midna_domain_pre_chroot(void) /* DESCRIPTION /* The functions in this module transform domain names from/to /* ASCII and UTF-8 form. The result is cached to avoid repeated /* conversion. /* /* This module builds on the ICU library implementation of the /* UTS #46 specification, using default ICU library options /* because those are likely best tested: with transitional /* processing, with case mapping, with normalization, with /* limited IDNA2003 compatibility, without STD3 ASCII rules. /* /* midna_domain_to_ascii() converts an UTF-8 or ASCII domain /* name to ASCII. The result is a null pointer in case of /* error. This function verifies that the result passes /* valid_hostname(). /* /* midna_domain_to_utf8() converts an UTF-8 or ASCII domain /* name to UTF-8. The result is a null pointer in case of /* error. This function verifies that the result, after /* conversion to ASCII, passes valid_hostname(). /* /* midna_domain_suffix_to_ascii() and midna_domain_suffix_to_utf8() /* take a name that starts with '.' and otherwise perform the /* same operations as midna_domain_to_ascii() and /* midna_domain_to_utf8(). /* /* midna_domain_cache_size specifies the size of the conversion /* result cache. This value is used only once, upon the first /* lookup request. /* /* midna_domain_transitional enables transitional conversion /* between UTF8 and ASCII labels. /* /* midna_domain_pre_chroot() does some pre-chroot initialization. /* SEE ALSO /* http://unicode.org/reports/tr46/ Unicode IDNA Compatibility processing /* msg(3) diagnostics interface /* DIAGNOSTICS /* Fatal errors: memory allocation problem. /* Warnings: conversion error or result validation error. /* LICENSE /* .ad /* .fi /* The Secure Mailer license must be distributed with this software. /* AUTHOR(S) /* Arnt Gulbrandsen /* /* Wietse Venema /* IBM T.J. Watson Research /* P.O. Box 704 /* Yorktown Heights, NY 10598, USA /* /* Wietse Venema /* Google, Inc. /* 111 8th Avenue /* New York, NY 10011, USA /*--*/ /* * System library. */ #include #include #include #ifndef NO_EAI #include /* * Utility library. */ #include #include #include #include #include #include #include /* * Application-specific. */ #define DEF_MIDNA_CACHE_SIZE 256 int midna_domain_cache_size = DEF_MIDNA_CACHE_SIZE; int midna_domain_transitional = 0; static VSTRING *midna_domain_buf; /* x.suffix */ #define STR(x) vstring_str(x) /* midna_domain_strerror - pick one for error reporting */ static const char *midna_domain_strerror(UErrorCode error, int info_errors) { /* * XXX The UIDNA_ERROR_EMPTY_LABEL etc. names are defined in an ENUM, so * we can't use #ifdef to dynamically determine which names exist. */ static LONG_NAME_MASK uidna_errors[] = { "UIDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, "UIDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, "UIDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, "UIDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, "UIDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, "UIDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, "UIDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, "UIDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, "UIDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, "UIDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, "UIDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, "UIDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, "UIDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, /* The above errors are defined with ICU 46 and later. */ 0, }; if (info_errors) { return (str_long_name_mask_opt((VSTRING *) 0, "idna error", uidna_errors, info_errors, NAME_MASK_NUMBER | NAME_MASK_COMMA)); } else { return u_errorName(error); } } /* midna_domain_pre_chroot - pre-chroot initialization */ void midna_domain_pre_chroot(void) { UErrorCode error = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UIDNA *idna; idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT : UIDNA_NONTRANSITIONAL_TO_ASCII, &error); if (U_FAILURE(error)) msg_warn("ICU library initialization failed: %s", midna_domain_strerror(error, info.errors)); uidna_close(idna); } /* midna_domain_to_ascii_create - convert domain to ASCII */ static void *midna_domain_to_ascii_create(const char *name, void *unused_context) { static const char myname[] = "midna_domain_to_ascii_create"; char buf[1024]; /* XXX */ UErrorCode error = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UIDNA *idna; int anl; /* * Paranoia: do not expose uidna_*() to unfiltered network data. */ if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", myname, name, "malformed UTF-8"); return (0); } /* * Perform the requested conversion. */ idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT : UIDNA_NONTRANSITIONAL_TO_ASCII, &error); anl = uidna_nameToASCII_UTF8(idna, name, strlen(name), buf, sizeof(buf) - 1, &info, &error); uidna_close(idna); /* * Paranoia: verify that the result passes valid_hostname(). A quick * check shows that UTS46 ToASCII by default rejects inputs with labels * that start or end in '-', with names or labels that are over-long, or * "fake" A-labels, as required by UTS 46 section 4.1, but we rely on * valid_hostname() on the output side just to be sure. */ if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { buf[anl] = 0; /* XXX */ if (!valid_hostname(buf, DONT_GRIPE)) { msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", myname, name, "malformed ASCII label(s)"); return (0); } return (mystrndup(buf, anl)); } else { msg_warn("%s: Problem translating domain \"%.100s\" to ASCII form: %s", myname, name, midna_domain_strerror(error, info.errors)); return (0); } } /* midna_domain_to_utf8_create - convert domain to UTF8 */ static void *midna_domain_to_utf8_create(const char *name, void *unused_context) { static const char myname[] = "midna_domain_to_utf8_create"; char buf[1024]; /* XXX */ UErrorCode error = U_ZERO_ERROR; UIDNAInfo info = UIDNA_INFO_INITIALIZER; UIDNA *idna; int anl; /* * Paranoia: do not expose uidna_*() to unfiltered network data. */ if (allascii(name) == 0 && valid_utf8_string(name, strlen(name)) == 0) { msg_warn("%s: Problem translating domain \"%.100s\" to UTF-8 form: %s", myname, name, "malformed UTF-8"); return (0); } /* * Perform the requested conversion. */ idna = uidna_openUTS46(midna_domain_transitional ? UIDNA_DEFAULT : UIDNA_NONTRANSITIONAL_TO_UNICODE, &error); anl = uidna_nameToUnicodeUTF8(idna, name, strlen(name), buf, sizeof(buf) - 1, &info, &error); uidna_close(idna); /* * Paranoia: UTS46 toUTF8 by default accepts and produces an over-long * name or a name that contains an over-long NR-LDH label (and perhaps * other invalid forms that are not covered in UTS 46, section 4.1). We * rely on midna_domain_to_ascii() to validate the output. */ if (U_SUCCESS(error) && info.errors == 0 && anl > 0) { buf[anl] = 0; /* XXX */ if (midna_domain_to_ascii(buf) == 0) return (0); return (mystrndup(buf, anl)); } else { msg_warn("%s: Problem translating domain \"%.100s\" to UTF8 form: %s", myname, name, midna_domain_strerror(error, info.errors)); return (0); } } /* midna_domain_cache_free - cache element destructor */ static void midna_domain_cache_free(void *value, void *unused_context) { if (value) myfree(value); } /* midna_domain_to_ascii - convert name to ASCII */ const char *midna_domain_to_ascii(const char *name) { static CTABLE *midna_domain_to_ascii_cache = 0; if (midna_domain_to_ascii_cache == 0) midna_domain_to_ascii_cache = ctable_create(midna_domain_cache_size, midna_domain_to_ascii_create, midna_domain_cache_free, (void *) 0); return (ctable_locate(midna_domain_to_ascii_cache, name)); } /* midna_domain_to_utf8 - convert name to UTF8 */ const char *midna_domain_to_utf8(const char *name) { static CTABLE *midna_domain_to_utf8_cache = 0; if (midna_domain_to_utf8_cache == 0) midna_domain_to_utf8_cache = ctable_create(midna_domain_cache_size, midna_domain_to_utf8_create, midna_domain_cache_free, (void *) 0); return (ctable_locate(midna_domain_to_utf8_cache, name)); } /* midna_domain_suffix_to_ascii - convert .name to ASCII */ const char *midna_domain_suffix_to_ascii(const char *suffix) { const char *cache_res; /* * If prepending x to .name causes the result to become too long, then * the suffix is bad. */ if (midna_domain_buf == 0) midna_domain_buf = vstring_alloc(100); vstring_sprintf(midna_domain_buf, "x%s", suffix); if ((cache_res = midna_domain_to_ascii(STR(midna_domain_buf))) == 0) return (0); else return (cache_res + 1); } /* midna_domain_suffix_to_utf8 - convert .name to UTF8 */ const char *midna_domain_suffix_to_utf8(const char *name) { const char *cache_res; /* * If prepending x to .name causes the result to become too long, then * the suffix is bad. */ if (midna_domain_buf == 0) midna_domain_buf = vstring_alloc(100); vstring_sprintf(midna_domain_buf, "x%s", name); if ((cache_res = midna_domain_to_utf8(STR(midna_domain_buf))) == 0) return (0); else return (cache_res + 1); } #ifdef TEST /* * Test program - reads names from stdin, reports invalid names to stderr. */ #include #include #include #include /* XXX util_utf8_enable */ #include #include #include #include int main(int argc, char **argv) { VSTRING *buffer = vstring_alloc(1); const char *bp; const char *ascii; const char *utf8; if (setlocale(LC_ALL, "C") == 0) msg_fatal("setlocale(LC_ALL, C) failed: %m"); msg_vstream_init(argv[0], VSTREAM_ERR); /* msg_verbose = 1; */ util_utf8_enable = 1; if (geteuid() == 0) { midna_domain_pre_chroot(); if (chroot(".") != 0) msg_fatal("chroot(\".\"): %m"); } while (vstring_fgets_nonl(buffer, VSTREAM_IN)) { bp = STR(buffer); msg_info("> %s", bp); while (ISSPACE(*bp)) bp++; if (*bp == '#' || *bp == 0) continue; msg_info("unconditional conversions:"); utf8 = midna_domain_to_utf8(bp); msg_info("\"%s\" ->utf8 \"%s\"", bp, utf8 ? utf8 : "(error)"); ascii = midna_domain_to_ascii(bp); msg_info("\"%s\" ->ascii \"%s\"", bp, ascii ? ascii : "(error)"); msg_info("conditional conversions:"); if (!allascii(bp)) { if (ascii != 0) { utf8 = midna_domain_to_utf8(ascii); msg_info("\"%s\" ->ascii \"%s\" ->utf8 \"%s\"", bp, ascii, utf8 ? utf8 : "(error)"); if (utf8 != 0) { if (strcmp(utf8, bp) != 0) msg_warn("\"%s\" != \"%s\"", bp, utf8); } } } else { if (utf8 != 0) { ascii = midna_domain_to_ascii(utf8); msg_info("\"%s\" ->utf8 \"%s\" ->ascii \"%s\"", bp, utf8, ascii ? ascii : "(error)"); if (ascii != 0) { if (strcmp(ascii, bp) != 0) msg_warn("\"%s\" != \"%s\"", bp, ascii); } } } } exit(0); } #endif /* TEST */ #endif /* NO_EAI */