diff options
Diffstat (limited to 'fs/hfsplus/unicode.c')
-rw-r--r-- | fs/hfsplus/unicode.c | 520 |
1 files changed, 520 insertions, 0 deletions
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c new file mode 100644 index 000000000..c8d1b2be7 --- /dev/null +++ b/fs/hfsplus/unicode.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/fs/hfsplus/unicode.c + * + * Copyright (C) 2001 + * Brad Boyer (flar@allandria.com) + * (C) 2003 Ardis Technologies <roman@ardistech.com> + * + * Handler routines for unicode strings + */ + +#include <linux/types.h> +#include <linux/nls.h> +#include "hfsplus_fs.h" +#include "hfsplus_raw.h" + +/* Fold the case of a unicode char, given the 16 bit value */ +/* Returns folded char, or 0 if ignorable */ +static inline u16 case_fold(u16 c) +{ + u16 tmp; + + tmp = hfsplus_case_fold_table[c >> 8]; + if (tmp) + tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; + else + tmp = c; + return tmp; +} + +/* Compare unicode strings, return values like normal strcmp */ +int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, + const struct hfsplus_unistr *s2) +{ + u16 len1, len2, c1, c2; + const hfsplus_unichr *p1, *p2; + + len1 = be16_to_cpu(s1->length); + len2 = be16_to_cpu(s2->length); + p1 = s1->unicode; + p2 = s2->unicode; + + while (1) { + c1 = c2 = 0; + + while (len1 && !c1) { + c1 = case_fold(be16_to_cpu(*p1)); + p1++; + len1--; + } + while (len2 && !c2) { + c2 = case_fold(be16_to_cpu(*p2)); + p2++; + len2--; + } + + if (c1 != c2) + return (c1 < c2) ? -1 : 1; + if (!c1 && !c2) + return 0; + } +} + +/* Compare names as a sequence of 16-bit unsigned integers */ +int hfsplus_strcmp(const struct hfsplus_unistr *s1, + const struct hfsplus_unistr *s2) +{ + u16 len1, len2, c1, c2; + const hfsplus_unichr *p1, *p2; + int len; + + len1 = be16_to_cpu(s1->length); + len2 = be16_to_cpu(s2->length); + p1 = s1->unicode; + p2 = s2->unicode; + + for (len = min(len1, len2); len > 0; len--) { + c1 = be16_to_cpu(*p1); + c2 = be16_to_cpu(*p2); + if (c1 != c2) + return c1 < c2 ? -1 : 1; + p1++; + p2++; + } + + return len1 < len2 ? -1 : + len1 > len2 ? 1 : 0; +} + + +#define Hangul_SBase 0xac00 +#define Hangul_LBase 0x1100 +#define Hangul_VBase 0x1161 +#define Hangul_TBase 0x11a7 +#define Hangul_SCount 11172 +#define Hangul_LCount 19 +#define Hangul_VCount 21 +#define Hangul_TCount 28 +#define Hangul_NCount (Hangul_VCount * Hangul_TCount) + + +static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) +{ + int i, s, e; + + s = 1; + e = p[1]; + if (!e || cc < p[s * 2] || cc > p[e * 2]) + return NULL; + do { + i = (s + e) / 2; + if (cc > p[i * 2]) + s = i + 1; + else if (cc < p[i * 2]) + e = i - 1; + else + return hfsplus_compose_table + p[i * 2 + 1]; + } while (s <= e); + return NULL; +} + +int hfsplus_uni2asc(struct super_block *sb, + const struct hfsplus_unistr *ustr, + char *astr, int *len_p) +{ + const hfsplus_unichr *ip; + struct nls_table *nls = HFSPLUS_SB(sb)->nls; + u8 *op; + u16 cc, c0, c1; + u16 *ce1, *ce2; + int i, len, ustrlen, res, compose; + + op = astr; + ip = ustr->unicode; + ustrlen = be16_to_cpu(ustr->length); + len = *len_p; + ce1 = NULL; + compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + + while (ustrlen > 0) { + c0 = be16_to_cpu(*ip++); + ustrlen--; + /* search for single decomposed char */ + if (likely(compose)) + ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); + if (ce1) + cc = ce1[0]; + else + cc = 0; + if (cc) { + /* start of a possibly decomposed Hangul char */ + if (cc != 0xffff) + goto done; + if (!ustrlen) + goto same; + c1 = be16_to_cpu(*ip) - Hangul_VBase; + if (c1 < Hangul_VCount) { + /* compose the Hangul char */ + cc = (c0 - Hangul_LBase) * Hangul_VCount; + cc = (cc + c1) * Hangul_TCount; + cc += Hangul_SBase; + ip++; + ustrlen--; + if (!ustrlen) + goto done; + c1 = be16_to_cpu(*ip) - Hangul_TBase; + if (c1 > 0 && c1 < Hangul_TCount) { + cc += c1; + ip++; + ustrlen--; + } + goto done; + } + } + while (1) { + /* main loop for common case of not composed chars */ + if (!ustrlen) + goto same; + c1 = be16_to_cpu(*ip); + if (likely(compose)) + ce1 = hfsplus_compose_lookup( + hfsplus_compose_table, c1); + if (ce1) + break; + switch (c0) { + case 0: + c0 = 0x2400; + break; + case '/': + c0 = ':'; + break; + } + res = nls->uni2char(c0, op, len); + if (res < 0) { + if (res == -ENAMETOOLONG) + goto out; + *op = '?'; + res = 1; + } + op += res; + len -= res; + c0 = c1; + ip++; + ustrlen--; + } + ce2 = hfsplus_compose_lookup(ce1, c0); + if (ce2) { + i = 1; + while (i < ustrlen) { + ce1 = hfsplus_compose_lookup(ce2, + be16_to_cpu(ip[i])); + if (!ce1) + break; + i++; + ce2 = ce1; + } + cc = ce2[0]; + if (cc) { + ip += i; + ustrlen -= i; + goto done; + } + } +same: + switch (c0) { + case 0: + cc = 0x2400; + break; + case '/': + cc = ':'; + break; + default: + cc = c0; + } +done: + res = nls->uni2char(cc, op, len); + if (res < 0) { + if (res == -ENAMETOOLONG) + goto out; + *op = '?'; + res = 1; + } + op += res; + len -= res; + } + res = 0; +out: + *len_p = (char *)op - astr; + return res; +} + +/* + * Convert one or more ASCII characters into a single unicode character. + * Returns the number of ASCII characters corresponding to the unicode char. + */ +static inline int asc2unichar(struct super_block *sb, const char *astr, int len, + wchar_t *uc) +{ + int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); + if (size <= 0) { + *uc = '?'; + size = 1; + } + switch (*uc) { + case 0x2400: + *uc = 0; + break; + case ':': + *uc = '/'; + break; + } + return size; +} + +/* Decomposes a non-Hangul unicode character. */ +static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) +{ + int off; + + off = hfsplus_decompose_table[(uc >> 12) & 0xf]; + if (off == 0 || off == 0xffff) + return NULL; + + off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; + if (!off) + return NULL; + + off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; + if (!off) + return NULL; + + off = hfsplus_decompose_table[off + (uc & 0xf)]; + *size = off & 3; + if (*size == 0) + return NULL; + return hfsplus_decompose_table + (off / 4); +} + +/* + * Try to decompose a unicode character as Hangul. Return 0 if @uc is not + * precomposed Hangul, otherwise return the length of the decomposition. + * + * This function was adapted from sample code from the Unicode Standard + * Annex #15: Unicode Normalization Forms, version 3.2.0. + * + * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed + * under the Terms of Use in http://www.unicode.org/copyright.html. + */ +static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) +{ + int index; + int l, v, t; + + index = uc - Hangul_SBase; + if (index < 0 || index >= Hangul_SCount) + return 0; + + l = Hangul_LBase + index / Hangul_NCount; + v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; + t = Hangul_TBase + index % Hangul_TCount; + + result[0] = l; + result[1] = v; + if (t != Hangul_TBase) { + result[2] = t; + return 3; + } + return 2; +} + +/* Decomposes a single unicode character. */ +static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) +{ + u16 *result; + + /* Hangul is handled separately */ + result = hangul_buffer; + *size = hfsplus_try_decompose_hangul(uc, result); + if (*size == 0) + result = hfsplus_decompose_nonhangul(uc, size); + return result; +} + +int hfsplus_asc2uni(struct super_block *sb, + struct hfsplus_unistr *ustr, int max_unistr_len, + const char *astr, int len) +{ + int size, dsize, decompose; + u16 *dstr, outlen = 0; + wchar_t c; + u16 dhangul[3]; + + decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + while (outlen < max_unistr_len && len > 0) { + size = asc2unichar(sb, astr, len, &c); + + if (decompose) + dstr = decompose_unichar(c, &dsize, dhangul); + else + dstr = NULL; + if (dstr) { + if (outlen + dsize > max_unistr_len) + break; + do { + ustr->unicode[outlen++] = cpu_to_be16(*dstr++); + } while (--dsize > 0); + } else + ustr->unicode[outlen++] = cpu_to_be16(c); + + astr += size; + len -= size; + } + ustr->length = cpu_to_be16(outlen); + if (len > 0) + return -ENAMETOOLONG; + return 0; +} + +/* + * Hash a string to an integer as appropriate for the HFS+ filesystem. + * Composed unicode characters are decomposed and case-folding is performed + * if the appropriate bits are (un)set on the superblock. + */ +int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) +{ + struct super_block *sb = dentry->d_sb; + const char *astr; + const u16 *dstr; + int casefold, decompose, size, len; + unsigned long hash; + wchar_t c; + u16 c2; + u16 dhangul[3]; + + casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); + decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + hash = init_name_hash(dentry); + astr = str->name; + len = str->len; + while (len > 0) { + int uninitialized_var(dsize); + size = asc2unichar(sb, astr, len, &c); + astr += size; + len -= size; + + if (decompose) + dstr = decompose_unichar(c, &dsize, dhangul); + else + dstr = NULL; + if (dstr) { + do { + c2 = *dstr++; + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) + hash = partial_name_hash(c2, hash); + } while (--dsize > 0); + } else { + c2 = c; + if (casefold) + c2 = case_fold(c2); + if (!casefold || c2) + hash = partial_name_hash(c2, hash); + } + } + str->hash = end_name_hash(hash); + + return 0; +} + +/* + * Compare strings with HFS+ filename ordering. + * Composed unicode characters are decomposed and case-folding is performed + * if the appropriate bits are (un)set on the superblock. + */ +int hfsplus_compare_dentry(const struct dentry *dentry, + unsigned int len, const char *str, const struct qstr *name) +{ + struct super_block *sb = dentry->d_sb; + int casefold, decompose, size; + int dsize1, dsize2, len1, len2; + const u16 *dstr1, *dstr2; + const char *astr1, *astr2; + u16 c1, c2; + wchar_t c; + u16 dhangul_1[3], dhangul_2[3]; + + casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); + decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); + astr1 = str; + len1 = len; + astr2 = name->name; + len2 = name->len; + dsize1 = dsize2 = 0; + dstr1 = dstr2 = NULL; + + while (len1 > 0 && len2 > 0) { + if (!dsize1) { + size = asc2unichar(sb, astr1, len1, &c); + astr1 += size; + len1 -= size; + + if (decompose) + dstr1 = decompose_unichar(c, &dsize1, + dhangul_1); + if (!decompose || !dstr1) { + c1 = c; + dstr1 = &c1; + dsize1 = 1; + } + } + + if (!dsize2) { + size = asc2unichar(sb, astr2, len2, &c); + astr2 += size; + len2 -= size; + + if (decompose) + dstr2 = decompose_unichar(c, &dsize2, + dhangul_2); + if (!decompose || !dstr2) { + c2 = c; + dstr2 = &c2; + dsize2 = 1; + } + } + + c1 = *dstr1; + c2 = *dstr2; + if (casefold) { + c1 = case_fold(c1); + if (!c1) { + dstr1++; + dsize1--; + continue; + } + c2 = case_fold(c2); + if (!c2) { + dstr2++; + dsize2--; + continue; + } + } + if (c1 < c2) + return -1; + else if (c1 > c2) + return 1; + + dstr1++; + dsize1--; + dstr2++; + dsize2--; + } + + if (len1 < len2) + return -1; + if (len1 > len2) + return 1; + return 0; +} |