summaryrefslogtreecommitdiffstats
path: root/src/testdir/test_regexp_utf8.vim
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:44:24 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:44:24 +0000
commit8baab3c8d7a6f22888bd581cd5c6098fd2e4b5a8 (patch)
tree3537e168b860f2742f6029d70501b5ed7d15d345 /src/testdir/test_regexp_utf8.vim
parentInitial commit. (diff)
downloadvim-8baab3c8d7a6f22888bd581cd5c6098fd2e4b5a8.tar.xz
vim-8baab3c8d7a6f22888bd581cd5c6098fd2e4b5a8.zip
Adding upstream version 2:8.1.0875.upstream/2%8.1.0875upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/testdir/test_regexp_utf8.vim')
-rw-r--r--src/testdir/test_regexp_utf8.vim208
1 files changed, 208 insertions, 0 deletions
diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
new file mode 100644
index 0000000..98b9e73
--- /dev/null
+++ b/src/testdir/test_regexp_utf8.vim
@@ -0,0 +1,208 @@
+" Tests for regexp in utf8 encoding
+
+func s:equivalence_test()
+ let str = "AÀÁÂÃÄÅĀĂĄǍǞǠẢ BḂḆ CÇĆĈĊČ DĎĐḊḎḐ EÈÉÊËĒĔĖĘĚẺẼ FḞ GĜĞĠĢǤǦǴḠ HĤĦḢḦḨ IÌÍÎÏĨĪĬĮİǏỈ JĴ KĶǨḰḴ LĹĻĽĿŁḺ MḾṀ NÑŃŅŇṄṈ OÒÓÔÕÖØŌŎŐƠǑǪǬỎ PṔṖ Q RŔŖŘṘṞ SŚŜŞŠṠ TŢŤŦṪṮ UÙÚÛÜŨŪŬŮŰŲƯǓỦ VṼ WŴẀẂẄẆ XẊẌ YÝŶŸẎỲỶỸ ZŹŻŽƵẐẔ aàáâãäåāăąǎǟǡả bḃḇ cçćĉċč dďđḋḏḑ eèéêëēĕėęěẻẽ fḟ gĝğġģǥǧǵḡ hĥħḣḧḩẖ iìíîïĩīĭįǐỉ jĵǰ kķǩḱḵ lĺļľŀłḻ mḿṁ nñńņňʼnṅṉ oòóôõöøōŏőơǒǫǭỏ pṕṗ q rŕŗřṙṟ sśŝşšṡ tţťŧṫṯẗ uùúûüũūŭůűųưǔủ vṽ wŵẁẃẅẇẘ xẋẍ yýÿŷẏẙỳỷỹ zźżžƶẑẕ"
+ let groups = split(str)
+ for group1 in groups
+ for c in split(group1, '\zs')
+ " next statement confirms that equivalence class matches every
+ " character in group
+ call assert_match('^[[=' . c . '=]]*$', group1)
+ for group2 in groups
+ if group2 != group1
+ " next statement converts that equivalence class doesn't match
+ " character in any other group
+ call assert_equal(-1, match(group2, '[[=' . c . '=]]'))
+ endif
+ endfor
+ endfor
+ endfor
+endfunc
+
+func Test_equivalence_re1()
+ set re=1
+ call s:equivalence_test()
+ set re=0
+endfunc
+
+func Test_equivalence_re2()
+ set re=2
+ call s:equivalence_test()
+ set re=0
+endfunc
+
+func s:classes_test()
+ set isprint=@,161-255
+ call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+'))
+
+ let alnumchars = ''
+ let alphachars = ''
+ let backspacechar = ''
+ let blankchars = ''
+ let cntrlchars = ''
+ let digitchars = ''
+ let escapechar = ''
+ let graphchars = ''
+ let lowerchars = ''
+ let printchars = ''
+ let punctchars = ''
+ let returnchar = ''
+ let spacechars = ''
+ let tabchar = ''
+ let upperchars = ''
+ let xdigitchars = ''
+ let identchars = ''
+ let identchars1 = ''
+ let kwordchars = ''
+ let kwordchars1 = ''
+ let fnamechars = ''
+ let fnamechars1 = ''
+ let i = 1
+ while i <= 255
+ let c = nr2char(i)
+ if c =~ '[[:alpha:]]'
+ let alphachars .= c
+ endif
+ if c =~ '[[:alnum:]]'
+ let alnumchars .= c
+ endif
+ if c =~ '[[:backspace:]]'
+ let backspacechar .= c
+ endif
+ if c =~ '[[:blank:]]'
+ let blankchars .= c
+ endif
+ if c =~ '[[:cntrl:]]'
+ let cntrlchars .= c
+ endif
+ if c =~ '[[:digit:]]'
+ let digitchars .= c
+ endif
+ if c =~ '[[:escape:]]'
+ let escapechar .= c
+ endif
+ if c =~ '[[:graph:]]'
+ let graphchars .= c
+ endif
+ if c =~ '[[:lower:]]'
+ let lowerchars .= c
+ endif
+ if c =~ '[[:print:]]'
+ let printchars .= c
+ endif
+ if c =~ '[[:punct:]]'
+ let punctchars .= c
+ endif
+ if c =~ '[[:return:]]'
+ let returnchar .= c
+ endif
+ if c =~ '[[:space:]]'
+ let spacechars .= c
+ endif
+ if c =~ '[[:tab:]]'
+ let tabchar .= c
+ endif
+ if c =~ '[[:upper:]]'
+ let upperchars .= c
+ endif
+ if c =~ '[[:xdigit:]]'
+ let xdigitchars .= c
+ endif
+ if c =~ '[[:ident:]]'
+ let identchars .= c
+ endif
+ if c =~ '\i'
+ let identchars1 .= c
+ endif
+ if c =~ '[[:keyword:]]'
+ let kwordchars .= c
+ endif
+ if c =~ '\k'
+ let kwordchars1 .= c
+ endif
+ if c =~ '[[:fname:]]'
+ let fnamechars .= c
+ endif
+ if c =~ '\f'
+ let fnamechars1 .= c
+ endif
+ let i += 1
+ endwhile
+
+ call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars)
+ call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars)
+ call assert_equal("\b", backspacechar)
+ call assert_equal("\t ", blankchars)
+ call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars)
+ call assert_equal("0123456789", digitchars)
+ call assert_equal("\<Esc>", escapechar)
+ call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars)
+ call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars)
+ call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars)
+ call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars)
+ call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars)
+ call assert_equal("\r", returnchar)
+ call assert_equal("\t\n\x0b\f\r ", spacechars)
+ call assert_equal("\t", tabchar)
+ call assert_equal('0123456789ABCDEFabcdef', xdigitchars)
+
+ if has('win32')
+ let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ'
+ let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ elseif has('ebcdic')
+ let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz€ŒŽœž¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ else
+ let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ endif
+
+ if has('win32')
+ let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ elseif has('amiga')
+ let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ elseif has('vms')
+ let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ elseif has('ebcdic')
+ let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ else
+ let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'
+ endif
+
+ call assert_equal(identchars_ok, identchars)
+ call assert_equal(kwordchars_ok, kwordchars)
+ call assert_equal(fnamechars_ok, fnamechars)
+
+ call assert_equal(identchars1, identchars)
+ call assert_equal(kwordchars1, kwordchars)
+ call assert_equal(fnamechars1, fnamechars)
+endfunc
+
+func Test_classes_re1()
+ set re=1
+ call s:classes_test()
+ set re=0
+endfunc
+
+func Test_classes_re2()
+ set re=2
+ call s:classes_test()
+ set re=0
+endfunc
+
+func Test_reversed_range()
+ for re in range(0, 2)
+ exe 'set re=' . re
+ call assert_fails('call match("abc def", "[c-a]")', 'E944:')
+ endfor
+ set re=0
+endfunc
+
+func Test_large_class()
+ set re=1
+ call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:')
+ set re=2
+ call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]')
+ call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]')
+ set re=0
+endfunc