diff options
Diffstat (limited to '')
-rw-r--r-- | src/syscall/wtf8_windows_test.go | 200 |
1 files changed, 200 insertions, 0 deletions
diff --git a/src/syscall/wtf8_windows_test.go b/src/syscall/wtf8_windows_test.go new file mode 100644 index 0000000..077f718 --- /dev/null +++ b/src/syscall/wtf8_windows_test.go @@ -0,0 +1,200 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package syscall_test + +import ( + "fmt" + "slices" + "syscall" + "testing" + "unicode/utf16" + "unicode/utf8" + "unsafe" +) + +var wtf8tests = []struct { + str string + wstr []uint16 +}{ + { + str: "\x00", + wstr: []uint16{0x00}, + }, + { + str: "\x5C", + wstr: []uint16{0x5C}, + }, + { + str: "\x7F", + wstr: []uint16{0x7F}, + }, + + // 2-byte + { + str: "\xC2\x80", + wstr: []uint16{0x80}, + }, + { + str: "\xD7\x8A", + wstr: []uint16{0x05CA}, + }, + { + str: "\xDF\xBF", + wstr: []uint16{0x07FF}, + }, + + // 3-byte + { + str: "\xE0\xA0\x80", + wstr: []uint16{0x0800}, + }, + { + str: "\xE2\xB0\xBC", + wstr: []uint16{0x2C3C}, + }, + { + str: "\xEF\xBF\xBF", + wstr: []uint16{0xFFFF}, + }, + // unmatched surrogate halves + // high surrogates: 0xD800 to 0xDBFF + { + str: "\xED\xA0\x80", + wstr: []uint16{0xD800}, + }, + { + // "High surrogate followed by another high surrogate" + str: "\xED\xA0\x80\xED\xA0\x80", + wstr: []uint16{0xD800, 0xD800}, + }, + { + // "High surrogate followed by a symbol that is not a surrogate" + str: string([]byte{0xED, 0xA0, 0x80, 0xA}), + wstr: []uint16{0xD800, 0xA}, + }, + { + // "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate" + str: string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}), + wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800}, + }, + { + str: "\xED\xA6\xAF", + wstr: []uint16{0xD9AF}, + }, + { + str: "\xED\xAF\xBF", + wstr: []uint16{0xDBFF}, + }, + // low surrogates: 0xDC00 to 0xDFFF + { + str: "\xED\xB0\x80", + wstr: []uint16{0xDC00}, + }, + { + // "Low surrogate followed by another low surrogate" + str: "\xED\xB0\x80\xED\xB0\x80", + wstr: []uint16{0xDC00, 0xDC00}, + }, + { + // "Low surrogate followed by a symbol that is not a surrogate" + str: string([]byte{0xED, 0xB0, 0x80, 0xA}), + wstr: []uint16{0xDC00, 0xA}, + }, + { + // "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate" + str: string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}), + wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00}, + }, + { + str: "\xED\xBB\xAE", + wstr: []uint16{0xDEEE}, + }, + { + str: "\xED\xBF\xBF", + wstr: []uint16{0xDFFF}, + }, + + // 4-byte + { + str: "\xF0\x90\x80\x80", + wstr: []uint16{0xD800, 0xDC00}, + }, + { + str: "\xF0\x9D\x8C\x86", + wstr: []uint16{0xD834, 0xDF06}, + }, + { + str: "\xF4\x8F\xBF\xBF", + wstr: []uint16{0xDBFF, 0xDFFF}, + }, +} + +func TestWTF16Rountrip(t *testing.T) { + for _, tt := range wtf8tests { + t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) { + got := syscall.EncodeWTF16(tt.str, nil) + got2 := string(syscall.DecodeWTF16(got, nil)) + if got2 != tt.str { + t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str) + } + }) + } +} + +func TestWTF16Golden(t *testing.T) { + for _, tt := range wtf8tests { + t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) { + got := syscall.EncodeWTF16(tt.str, nil) + if !slices.Equal(got, tt.wstr) { + t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr) + } + }) + } +} + +func FuzzEncodeWTF16(f *testing.F) { + for _, tt := range wtf8tests { + f.Add(tt.str) + } + f.Fuzz(func(t *testing.T, b string) { + // test that there are no panics + got := syscall.EncodeWTF16(b, nil) + syscall.DecodeWTF16(got, nil) + if utf8.ValidString(b) { + // if the input is a valid UTF-8 string, then + // test that syscall.EncodeWTF16 behaves as + // utf16.Encode + want := utf16.Encode([]rune(b)) + if !slices.Equal(got, want) { + t.Errorf("got:\n%v\nwant:\n%v", got, want) + } + } + }) +} + +func FuzzDecodeWTF16(f *testing.F) { + for _, tt := range wtf8tests { + b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2) + f.Add(b) + } + f.Fuzz(func(t *testing.T, b []byte) { + u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2) + got := syscall.DecodeWTF16(u16, nil) + if utf8.Valid(got) { + // if the input is a valid UTF-8 string, then + // test that syscall.DecodeWTF16 behaves as + // utf16.Decode + want := utf16.Decode(u16) + if string(got) != string(want) { + t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want)) + } + } + // WTF-8 should always roundtrip + got2 := syscall.EncodeWTF16(string(got), nil) + if !slices.Equal(got2, u16) { + t.Errorf("got:\n%v\nwant:\n%v", got2, u16) + } + }) +} |