summaryrefslogtreecommitdiffstats
path: root/src/syscall/wtf8_windows_test.go
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/syscall/wtf8_windows_test.go200
1 files changed, 200 insertions, 0 deletions
diff --git a/src/syscall/wtf8_windows_test.go b/src/syscall/wtf8_windows_test.go
new file mode 100644
index 0000000..077f718
--- /dev/null
+++ b/src/syscall/wtf8_windows_test.go
@@ -0,0 +1,200 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall_test
+
+import (
+ "fmt"
+ "slices"
+ "syscall"
+ "testing"
+ "unicode/utf16"
+ "unicode/utf8"
+ "unsafe"
+)
+
+var wtf8tests = []struct {
+ str string
+ wstr []uint16
+}{
+ {
+ str: "\x00",
+ wstr: []uint16{0x00},
+ },
+ {
+ str: "\x5C",
+ wstr: []uint16{0x5C},
+ },
+ {
+ str: "\x7F",
+ wstr: []uint16{0x7F},
+ },
+
+ // 2-byte
+ {
+ str: "\xC2\x80",
+ wstr: []uint16{0x80},
+ },
+ {
+ str: "\xD7\x8A",
+ wstr: []uint16{0x05CA},
+ },
+ {
+ str: "\xDF\xBF",
+ wstr: []uint16{0x07FF},
+ },
+
+ // 3-byte
+ {
+ str: "\xE0\xA0\x80",
+ wstr: []uint16{0x0800},
+ },
+ {
+ str: "\xE2\xB0\xBC",
+ wstr: []uint16{0x2C3C},
+ },
+ {
+ str: "\xEF\xBF\xBF",
+ wstr: []uint16{0xFFFF},
+ },
+ // unmatched surrogate halves
+ // high surrogates: 0xD800 to 0xDBFF
+ {
+ str: "\xED\xA0\x80",
+ wstr: []uint16{0xD800},
+ },
+ {
+ // "High surrogate followed by another high surrogate"
+ str: "\xED\xA0\x80\xED\xA0\x80",
+ wstr: []uint16{0xD800, 0xD800},
+ },
+ {
+ // "High surrogate followed by a symbol that is not a surrogate"
+ str: string([]byte{0xED, 0xA0, 0x80, 0xA}),
+ wstr: []uint16{0xD800, 0xA},
+ },
+ {
+ // "Unmatched high surrogate, followed by a surrogate pair, followed by an unmatched high surrogate"
+ str: string([]byte{0xED, 0xA0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xA0, 0x80}),
+ wstr: []uint16{0xD800, 0xD834, 0xDF06, 0xD800},
+ },
+ {
+ str: "\xED\xA6\xAF",
+ wstr: []uint16{0xD9AF},
+ },
+ {
+ str: "\xED\xAF\xBF",
+ wstr: []uint16{0xDBFF},
+ },
+ // low surrogates: 0xDC00 to 0xDFFF
+ {
+ str: "\xED\xB0\x80",
+ wstr: []uint16{0xDC00},
+ },
+ {
+ // "Low surrogate followed by another low surrogate"
+ str: "\xED\xB0\x80\xED\xB0\x80",
+ wstr: []uint16{0xDC00, 0xDC00},
+ },
+ {
+ // "Low surrogate followed by a symbol that is not a surrogate"
+ str: string([]byte{0xED, 0xB0, 0x80, 0xA}),
+ wstr: []uint16{0xDC00, 0xA},
+ },
+ {
+ // "Unmatched low surrogate, followed by a surrogate pair, followed by an unmatched low surrogate"
+ str: string([]byte{0xED, 0xB0, 0x80, 0xF0, 0x9D, 0x8C, 0x86, 0xED, 0xB0, 0x80}),
+ wstr: []uint16{0xDC00, 0xD834, 0xDF06, 0xDC00},
+ },
+ {
+ str: "\xED\xBB\xAE",
+ wstr: []uint16{0xDEEE},
+ },
+ {
+ str: "\xED\xBF\xBF",
+ wstr: []uint16{0xDFFF},
+ },
+
+ // 4-byte
+ {
+ str: "\xF0\x90\x80\x80",
+ wstr: []uint16{0xD800, 0xDC00},
+ },
+ {
+ str: "\xF0\x9D\x8C\x86",
+ wstr: []uint16{0xD834, 0xDF06},
+ },
+ {
+ str: "\xF4\x8F\xBF\xBF",
+ wstr: []uint16{0xDBFF, 0xDFFF},
+ },
+}
+
+func TestWTF16Rountrip(t *testing.T) {
+ for _, tt := range wtf8tests {
+ t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
+ got := syscall.EncodeWTF16(tt.str, nil)
+ got2 := string(syscall.DecodeWTF16(got, nil))
+ if got2 != tt.str {
+ t.Errorf("got:\n%s\nwant:\n%s", got2, tt.str)
+ }
+ })
+ }
+}
+
+func TestWTF16Golden(t *testing.T) {
+ for _, tt := range wtf8tests {
+ t.Run(fmt.Sprintf("%X", tt.str), func(t *testing.T) {
+ got := syscall.EncodeWTF16(tt.str, nil)
+ if !slices.Equal(got, tt.wstr) {
+ t.Errorf("got:\n%v\nwant:\n%v", got, tt.wstr)
+ }
+ })
+ }
+}
+
+func FuzzEncodeWTF16(f *testing.F) {
+ for _, tt := range wtf8tests {
+ f.Add(tt.str)
+ }
+ f.Fuzz(func(t *testing.T, b string) {
+ // test that there are no panics
+ got := syscall.EncodeWTF16(b, nil)
+ syscall.DecodeWTF16(got, nil)
+ if utf8.ValidString(b) {
+ // if the input is a valid UTF-8 string, then
+ // test that syscall.EncodeWTF16 behaves as
+ // utf16.Encode
+ want := utf16.Encode([]rune(b))
+ if !slices.Equal(got, want) {
+ t.Errorf("got:\n%v\nwant:\n%v", got, want)
+ }
+ }
+ })
+}
+
+func FuzzDecodeWTF16(f *testing.F) {
+ for _, tt := range wtf8tests {
+ b := unsafe.Slice((*uint8)(unsafe.Pointer(unsafe.SliceData(tt.wstr))), len(tt.wstr)*2)
+ f.Add(b)
+ }
+ f.Fuzz(func(t *testing.T, b []byte) {
+ u16 := unsafe.Slice((*uint16)(unsafe.Pointer(unsafe.SliceData(b))), len(b)/2)
+ got := syscall.DecodeWTF16(u16, nil)
+ if utf8.Valid(got) {
+ // if the input is a valid UTF-8 string, then
+ // test that syscall.DecodeWTF16 behaves as
+ // utf16.Decode
+ want := utf16.Decode(u16)
+ if string(got) != string(want) {
+ t.Errorf("got:\n%s\nwant:\n%s", string(got), string(want))
+ }
+ }
+ // WTF-8 should always roundtrip
+ got2 := syscall.EncodeWTF16(string(got), nil)
+ if !slices.Equal(got2, u16) {
+ t.Errorf("got:\n%v\nwant:\n%v", got2, u16)
+ }
+ })
+}