diff options
Diffstat (limited to 'src/regexp.c')
-rw-r--r-- | src/regexp.c | 58 |
1 files changed, 46 insertions, 12 deletions
diff --git a/src/regexp.c b/src/regexp.c index ff201d9..b020a43 100644 --- a/src/regexp.c +++ b/src/regexp.c @@ -1729,7 +1729,9 @@ mb_decompose(int c, int *c1, int *c2, int *c3) /* * Compare two strings, ignore case if rex.reg_ic set. * Return 0 if strings match, non-zero otherwise. - * Correct the length "*n" when composing characters are ignored. + * Correct the length "*n" when composing characters are ignored + * or for utf8 when both utf codepoints are considered equal because of + * case-folding but have different length (e.g. 's' and 'ſ') */ static int cstrncmp(char_u *s1, char_u *s2, int *n) @@ -1738,6 +1740,29 @@ cstrncmp(char_u *s1, char_u *s2, int *n) if (!rex.reg_ic) result = STRNCMP(s1, s2, *n); + else if (enc_utf8) + { + char_u *p = s1; + int n2 = 0; + int n1 = *n; + // count the number of characters for byte-length of s1 + while (n1 > 0 && *p != NUL) + { + n1 -= mb_ptr2len(s1); + MB_PTR_ADV(p); + n2++; + } + // count the number of bytes to advance the same number of chars for s2 + p = s2; + while (n2-- > 0 && *p != NUL) + MB_PTR_ADV(p); + + n2 = p - s2; + + result = MB_STRNICMP2(s1, s2, *n, n2); + if (result == 0 && n2 < *n) + *n = n2; + } else result = MB_STRNICMP(s1, s2, *n); @@ -1787,7 +1812,7 @@ cstrncmp(char_u *s1, char_u *s2, int *n) cstrchr(char_u *s, int c) { char_u *p; - int cc; + int cc, lc; if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1)) return vim_strchr(s, c); @@ -1796,26 +1821,35 @@ cstrchr(char_u *s, int c) // faster (esp. when using MS Visual C++!). // For UTF-8 need to use folded case. if (enc_utf8 && c > 0x80) + { cc = utf_fold(c); + lc = cc; + } else - if (MB_ISUPPER(c)) - cc = MB_TOLOWER(c); - else if (MB_ISLOWER(c)) - cc = MB_TOUPPER(c); - else - return vim_strchr(s, c); + if (MB_ISUPPER(c)) + { + cc = MB_TOLOWER(c); + lc = cc; + } + else if (MB_ISLOWER(c)) + { + cc = MB_TOUPPER(c); + lc = c; + } + else + return vim_strchr(s, c); if (has_mbyte) { for (p = s; *p != NUL; p += (*mb_ptr2len)(p)) { - if (enc_utf8 && c > 0x80) + int uc = utf_ptr2char(p); + if (enc_utf8 && (c > 0x80 || uc > 0x80)) { - int uc = utf_ptr2char(p); - // Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf, // not 0xff. - if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc) + // compare with lower case of the character + if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc) return p; } else if (*p == c || *p == cc) |