/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* * utf8.c * * This file contains some additional utility routines required for * handling UTF8 strings. */ #ifndef BASE_H #include "base.h" #endif /* BASE_H */ #include "plstr.h" /* * NOTES: * * There's an "is hex string" function in pki1/atav.c. If we need * it in more places, pull that one out. */ /* * nssUTF8_CaseIgnoreMatch * * Returns true if the two UTF8-encoded strings pointed to by the * two specified NSSUTF8 pointers differ only in typcase. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * * Return value: * PR_TRUE if the strings match, ignoring case * PR_FALSE if they don't * PR_FALSE upon error */ NSS_IMPLEMENT PRBool nssUTF8_CaseIgnoreMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) { #ifdef NSSDEBUG if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { nss_SetError(NSS_ERROR_INVALID_POINTER); if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_SUCCESS; } /* * XXX fgmr * * This is, like, so wrong! */ if (0 == PL_strcasecmp((const char *)a, (const char *)b)) { return PR_TRUE; } else { return PR_FALSE; } } /* * nssUTF8_PrintableMatch * * Returns true if the two Printable strings pointed to by the * two specified NSSUTF8 pointers match when compared with the * rules for Printable String (leading and trailing spaces are * disregarded, extents of whitespace match irregardless of length, * and case is not significant), then PR_TRUE will be returned. * Otherwise, PR_FALSE will be returned. Upon failure, PR_FALSE * will be returned. If the optional statusOpt argument is not * NULL, then PR_SUCCESS or PR_FAILURE will be stored in that * location. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * * Return value: * PR_TRUE if the strings match, ignoring case * PR_FALSE if they don't * PR_FALSE upon error */ NSS_IMPLEMENT PRBool nssUTF8_PrintableMatch(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) { PRUint8 *c; PRUint8 *d; #ifdef NSSDEBUG if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { nss_SetError(NSS_ERROR_INVALID_POINTER); if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_SUCCESS; } c = (PRUint8 *)a; d = (PRUint8 *)b; while (' ' == *c) { c++; } while (' ' == *d) { d++; } while (('\0' != *c) && ('\0' != *d)) { PRUint8 e, f; e = *c; f = *d; if (('a' <= e) && (e <= 'z')) { e -= ('a' - 'A'); } if (('a' <= f) && (f <= 'z')) { f -= ('a' - 'A'); } if (e != f) { return PR_FALSE; } c++; d++; if (' ' == *c) { while (' ' == *c) { c++; } c--; } if (' ' == *d) { while (' ' == *d) { d++; } d--; } } while (' ' == *c) { c++; } while (' ' == *d) { d++; } if (*c == *d) { /* And both '\0', btw */ return PR_TRUE; } else { return PR_FALSE; } } /* * nssUTF8_Duplicate * * This routine duplicates the UTF8-encoded string pointed to by the * specified NSSUTF8 pointer. If the optional arenaOpt argument is * not null, the memory required will be obtained from that arena; * otherwise, the memory required will be obtained from the heap. * A pointer to the new string will be returned. In case of error, * an error will be placed on the error stack and NULL will be * returned. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_INVALID_ARENA * NSS_ERROR_NO_MEMORY */ NSS_IMPLEMENT NSSUTF8 * nssUTF8_Duplicate(const NSSUTF8 *s, NSSArena *arenaOpt) { NSSUTF8 *rv; PRUint32 len; #ifdef NSSDEBUG if ((const NSSUTF8 *)NULL == s) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSUTF8 *)NULL; } if ((NSSArena *)NULL != arenaOpt) { if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { return (NSSUTF8 *)NULL; } } #endif /* NSSDEBUG */ len = PL_strlen((const char *)s); #ifdef PEDANTIC if ('\0' != ((const char *)s)[len]) { /* must have wrapped, e.g., too big for PRUint32 */ nss_SetError(NSS_ERROR_NO_MEMORY); return (NSSUTF8 *)NULL; } #endif /* PEDANTIC */ len++; /* zero termination */ rv = nss_ZAlloc(arenaOpt, len); if ((void *)NULL == rv) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, s, len); return rv; } /* * nssUTF8_Size * * This routine returns the length in bytes (including the terminating * null) of the UTF8-encoded string pointed to by the specified * NSSUTF8 pointer. Zero is returned on error. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_VALUE_TOO_LARGE * * Return value: * 0 on error * nonzero length of the string. */ NSS_IMPLEMENT PRUint32 nssUTF8_Size(const NSSUTF8 *s, PRStatus *statusOpt) { PRUint32 sv; #ifdef NSSDEBUG if ((const NSSUTF8 *)NULL == s) { nss_SetError(NSS_ERROR_INVALID_POINTER); if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return 0; } #endif /* NSSDEBUG */ sv = PL_strlen((const char *)s) + 1; #ifdef PEDANTIC if ('\0' != ((const char *)s)[sv - 1]) { /* wrapped */ nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return 0; } #endif /* PEDANTIC */ if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_SUCCESS; } return sv; } /* * nssUTF8_Length * * This routine returns the length in characters (not including the * terminating null) of the UTF8-encoded string pointed to by the * specified NSSUTF8 pointer. * * The error may be one of the following values: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_VALUE_TOO_LARGE * NSS_ERROR_INVALID_STRING * * Return value: * length of the string (which may be zero) * 0 on error */ NSS_IMPLEMENT PRUint32 nssUTF8_Length(const NSSUTF8 *s, PRStatus *statusOpt) { PRUint32 l = 0; const PRUint8 *c = (const PRUint8 *)s; #ifdef NSSDEBUG if ((const NSSUTF8 *)NULL == s) { nss_SetError(NSS_ERROR_INVALID_POINTER); goto loser; } #endif /* NSSDEBUG */ /* * From RFC 3629: * * UTF8-octets = *( UTF8-char ) * UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 * UTF8-1 = %x00-7F * UTF8-2 = %xC2-DF UTF8-tail * UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / * %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) * UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / * %xF4 %x80-8F 2( UTF8-tail ) * UTF8-tail = %x80-BF */ while (0 != *c) { PRUint32 incr; if (*c < 0x80) { incr = 1; } else if (*c < 0xC2) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } else if (*c < 0xE0) { incr = 2; } else if (*c == 0xE0) { if (c[1] < 0xA0) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } incr = 3; } else if (*c < 0xF0) { if (*c == 0xED && c[1] > 0x9F) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } incr = 3; } else if (*c == 0xF0) { if (c[1] < 0x90) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } incr = 4; } else if (*c < 0xF4) { incr = 4; } else if (*c == 0xF4) { if (c[1] > 0x8F) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } incr = 4; } else { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } l += incr; #ifdef PEDANTIC if (l < incr) { /* Wrapped-- too big */ nss_SetError(NSS_ERROR_VALUE_TOO_LARGE); goto loser; } #endif /* PEDANTIC */ { const PRUint8 *d; for (d = &c[1]; d < &c[incr]; d++) { if ((*d & 0xC0) != 0x80) { nss_SetError(NSS_ERROR_INVALID_STRING); goto loser; } } } c += incr; } if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_SUCCESS; } return l; loser: if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return 0; } /* * nssUTF8_Create * * This routine creates a UTF8 string from a string in some other * format. Some types of string may include embedded null characters, * so for them the length parameter must be used. For string types * that are null-terminated, the length parameter is optional; if it * is zero, it will be ignored. If the optional arena argument is * non-null, the memory used for the new string will be obtained from * that arena, otherwise it will be obtained from the heap. This * routine may return NULL upon error, in which case it will have * placed an error on the error stack. * * The error may be one of the following: * NSS_ERROR_INVALID_POINTER * NSS_ERROR_NO_MEMORY * NSS_ERROR_UNSUPPORTED_TYPE * * Return value: * NULL upon error * A non-null pointer to a new UTF8 string otherwise */ extern const NSSError NSS_ERROR_INTERNAL_ERROR; /* XXX fgmr */ NSS_IMPLEMENT NSSUTF8 * nssUTF8_Create(NSSArena *arenaOpt, nssStringType type, const void *inputString, PRUint32 size /* in bytes, not characters */ ) { NSSUTF8 *rv = NULL; #ifdef NSSDEBUG if ((NSSArena *)NULL != arenaOpt) { if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { return (NSSUTF8 *)NULL; } } if ((const void *)NULL == inputString) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSUTF8 *)NULL; } #endif /* NSSDEBUG */ switch (type) { case nssStringType_DirectoryString: /* This is a composite type requiring BER */ nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; case nssStringType_TeletexString: /* * draft-ietf-pkix-ipki-part1-11 says in part: * * In addition, many legacy implementations support names encoded * in the ISO 8859-1 character set (Latin1String) but tag them as * TeletexString. The Latin1String includes characters used in * Western European countries which are not part of the * TeletexString charcter set. Implementations that process * TeletexString SHOULD be prepared to handle the entire ISO * 8859-1 character set.[ISO 8859-1]. */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_PrintableString: /* * PrintableString consists of A-Za-z0-9 ,()+,-./:=? * This is a subset of ASCII, which is a subset of UTF8. * So we can just duplicate the string over. */ if (0 == size) { rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); } else { rv = nss_ZAlloc(arenaOpt, size + 1); if ((NSSUTF8 *)NULL == rv) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, inputString, size); } break; case nssStringType_UniversalString: /* 4-byte unicode */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_BMPString: /* Base Multilingual Plane of Unicode */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UTF8String: if (0 == size) { rv = nssUTF8_Duplicate((const NSSUTF8 *)inputString, arenaOpt); } else { rv = nss_ZAlloc(arenaOpt, size + 1); if ((NSSUTF8 *)NULL == rv) { return (NSSUTF8 *)NULL; } (void)nsslibc_memcpy(rv, inputString, size); } break; case nssStringType_PHGString: /* * PHGString is an IA5String (with case-insensitive comparisons). * IA5 is ~almost~ ascii; ascii has dollar-sign where IA5 has * currency symbol. */ nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_GeneralString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; default: nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; } return rv; } NSS_IMPLEMENT NSSItem * nssUTF8_GetEncoding(NSSArena *arenaOpt, NSSItem *rvOpt, nssStringType type, NSSUTF8 *string) { NSSItem *rv = (NSSItem *)NULL; PRStatus status = PR_SUCCESS; #ifdef NSSDEBUG if ((NSSArena *)NULL != arenaOpt) { if (PR_SUCCESS != nssArena_verifyPointer(arenaOpt)) { return (NSSItem *)NULL; } } if ((NSSUTF8 *)NULL == string) { nss_SetError(NSS_ERROR_INVALID_POINTER); return (NSSItem *)NULL; } #endif /* NSSDEBUG */ switch (type) { case nssStringType_DirectoryString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_TeletexString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_PrintableString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UniversalString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_BMPString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; case nssStringType_UTF8String: { NSSUTF8 *dup = nssUTF8_Duplicate(string, arenaOpt); if ((NSSUTF8 *)NULL == dup) { return (NSSItem *)NULL; } if ((NSSItem *)NULL == rvOpt) { rv = nss_ZNEW(arenaOpt, NSSItem); if ((NSSItem *)NULL == rv) { (void)nss_ZFreeIf(dup); return (NSSItem *)NULL; } } else { rv = rvOpt; } rv->data = dup; dup = (NSSUTF8 *)NULL; rv->size = nssUTF8_Size(rv->data, &status); if ((0 == rv->size) && (PR_SUCCESS != status)) { if ((NSSItem *)NULL == rvOpt) { (void)nss_ZFreeIf(rv); } return (NSSItem *)NULL; } } break; case nssStringType_PHGString: nss_SetError(NSS_ERROR_INTERNAL_ERROR); /* unimplemented */ break; default: nss_SetError(NSS_ERROR_UNSUPPORTED_TYPE); break; } return rv; } /* * nssUTF8_CopyIntoFixedBuffer * * This will copy a UTF8 string into a fixed-length buffer, making * sure that the all characters are valid. Any remaining space will * be padded with the specified ASCII character, typically either * null or space. * * Blah, blah, blah. */ NSS_IMPLEMENT PRStatus nssUTF8_CopyIntoFixedBuffer(NSSUTF8 *string, char *buffer, PRUint32 bufferSize, char pad) { PRUint32 stringSize = 0; #ifdef NSSDEBUG if ((char *)NULL == buffer) { nss_SetError(NSS_ERROR_INVALID_POINTER); return PR_FALSE; } if (0 == bufferSize) { nss_SetError(NSS_ERROR_INVALID_ARGUMENT); return PR_FALSE; } if ((pad & 0x80) != 0x00) { nss_SetError(NSS_ERROR_INVALID_ARGUMENT); return PR_FALSE; } #endif /* NSSDEBUG */ if ((NSSUTF8 *)NULL == string) { string = (NSSUTF8 *)""; } stringSize = nssUTF8_Size(string, (PRStatus *)NULL); stringSize--; /* don't count the trailing null */ if (stringSize > bufferSize) { PRUint32 bs = bufferSize; (void)nsslibc_memcpy(buffer, string, bufferSize); if ((((buffer[bs - 1] & 0x80) == 0x00)) || ((bs > 1) && ((buffer[bs - 2] & 0xE0) == 0xC0)) || ((bs > 2) && ((buffer[bs - 3] & 0xF0) == 0xE0)) || ((bs > 3) && ((buffer[bs - 4] & 0xF8) == 0xF0)) || ((bs > 4) && ((buffer[bs - 5] & 0xFC) == 0xF8)) || ((bs > 5) && ((buffer[bs - 6] & 0xFE) == 0xFC))) { /* It fit exactly */ return PR_SUCCESS; } /* Too long. We have to trim the last character */ for (/*bs*/; bs != 0; bs--) { if ((buffer[bs - 1] & 0xC0) != 0x80) { buffer[bs - 1] = pad; break; } else { buffer[bs - 1] = pad; } } } else { (void)nsslibc_memset(buffer, pad, bufferSize); (void)nsslibc_memcpy(buffer, string, stringSize); } return PR_SUCCESS; } /* * nssUTF8_Equal * */ NSS_IMPLEMENT PRBool nssUTF8_Equal(const NSSUTF8 *a, const NSSUTF8 *b, PRStatus *statusOpt) { PRUint32 la, lb; #ifdef NSSDEBUG if (((const NSSUTF8 *)NULL == a) || ((const NSSUTF8 *)NULL == b)) { nss_SetError(NSS_ERROR_INVALID_POINTER); if ((PRStatus *)NULL != statusOpt) { *statusOpt = PR_FAILURE; } return PR_FALSE; } #endif /* NSSDEBUG */ la = nssUTF8_Size(a, statusOpt); if (0 == la) { return PR_FALSE; } lb = nssUTF8_Size(b, statusOpt); if (0 == lb) { return PR_FALSE; } if (la != lb) { return PR_FALSE; } return nsslibc_memequal(a, b, la, statusOpt); }