summaryrefslogtreecommitdiffstats
path: root/src/regexp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexp.c')
-rw-r--r--src/regexp.c58
1 files changed, 46 insertions, 12 deletions
diff --git a/src/regexp.c b/src/regexp.c
index ff201d9..b020a43 100644
--- a/src/regexp.c
+++ b/src/regexp.c
@@ -1729,7 +1729,9 @@ mb_decompose(int c, int *c1, int *c2, int *c3)
/*
* Compare two strings, ignore case if rex.reg_ic set.
* Return 0 if strings match, non-zero otherwise.
- * Correct the length "*n" when composing characters are ignored.
+ * Correct the length "*n" when composing characters are ignored
+ * or for utf8 when both utf codepoints are considered equal because of
+ * case-folding but have different length (e.g. 's' and 'ſ')
*/
static int
cstrncmp(char_u *s1, char_u *s2, int *n)
@@ -1738,6 +1740,29 @@ cstrncmp(char_u *s1, char_u *s2, int *n)
if (!rex.reg_ic)
result = STRNCMP(s1, s2, *n);
+ else if (enc_utf8)
+ {
+ char_u *p = s1;
+ int n2 = 0;
+ int n1 = *n;
+ // count the number of characters for byte-length of s1
+ while (n1 > 0 && *p != NUL)
+ {
+ n1 -= mb_ptr2len(s1);
+ MB_PTR_ADV(p);
+ n2++;
+ }
+ // count the number of bytes to advance the same number of chars for s2
+ p = s2;
+ while (n2-- > 0 && *p != NUL)
+ MB_PTR_ADV(p);
+
+ n2 = p - s2;
+
+ result = MB_STRNICMP2(s1, s2, *n, n2);
+ if (result == 0 && n2 < *n)
+ *n = n2;
+ }
else
result = MB_STRNICMP(s1, s2, *n);
@@ -1787,7 +1812,7 @@ cstrncmp(char_u *s1, char_u *s2, int *n)
cstrchr(char_u *s, int c)
{
char_u *p;
- int cc;
+ int cc, lc;
if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
return vim_strchr(s, c);
@@ -1796,26 +1821,35 @@ cstrchr(char_u *s, int c)
// faster (esp. when using MS Visual C++!).
// For UTF-8 need to use folded case.
if (enc_utf8 && c > 0x80)
+ {
cc = utf_fold(c);
+ lc = cc;
+ }
else
- if (MB_ISUPPER(c))
- cc = MB_TOLOWER(c);
- else if (MB_ISLOWER(c))
- cc = MB_TOUPPER(c);
- else
- return vim_strchr(s, c);
+ if (MB_ISUPPER(c))
+ {
+ cc = MB_TOLOWER(c);
+ lc = cc;
+ }
+ else if (MB_ISLOWER(c))
+ {
+ cc = MB_TOUPPER(c);
+ lc = c;
+ }
+ else
+ return vim_strchr(s, c);
if (has_mbyte)
{
for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
{
- if (enc_utf8 && c > 0x80)
+ int uc = utf_ptr2char(p);
+ if (enc_utf8 && (c > 0x80 || uc > 0x80))
{
- int uc = utf_ptr2char(p);
-
// Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
// not 0xff.
- if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
+ // compare with lower case of the character
+ if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
return p;
}
else if (*p == c || *p == cc)