summaryrefslogtreecommitdiffstats
path: root/third_party/wasm2c/src/test-utf8.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/wasm2c/src/test-utf8.cc')
-rw-r--r--third_party/wasm2c/src/test-utf8.cc167
1 files changed, 167 insertions, 0 deletions
diff --git a/third_party/wasm2c/src/test-utf8.cc b/third_party/wasm2c/src/test-utf8.cc
new file mode 100644
index 0000000000..d19077e951
--- /dev/null
+++ b/third_party/wasm2c/src/test-utf8.cc
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2017 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gtest/gtest.h"
+
+#include "wabt/utf8.h"
+
+using namespace wabt;
+
+namespace {
+
+void assert_is_valid_utf8(bool expected,
+ int length,
+ int cu0 = 0,
+ int cu1 = 0,
+ int cu2 = 0,
+ int cu3 = 0) {
+ assert(length <= 4);
+ char buf[4] = {static_cast<char>(cu0), static_cast<char>(cu1),
+ static_cast<char>(cu2), static_cast<char>(cu3)};
+ if (expected) {
+ // Make sure it fails if there are continuation bytes past the end of the
+ // string.
+ for (int bad_length = 1; bad_length < length; ++bad_length) {
+ ASSERT_FALSE(IsValidUtf8(buf, bad_length))
+ << cu0 << ", " << cu1 << ", " << cu2 << ", " << cu3;
+ }
+ }
+
+ ASSERT_TRUE(expected == IsValidUtf8(buf, length))
+ << cu0 << ", " << cu1 << ", " << cu2 << ", " << cu3;
+}
+
+bool is_in_range(int x, int low, int high) {
+ return x >= low && x < high;
+}
+
+} // end anonymous namespace
+
+#define FOR_RANGE(var, low, high) for (int var = low; var < high; var++)
+#define FOR_EACH_BYTE(var) FOR_RANGE(var, 0, 0x100)
+
+TEST(utf8, valid_empty) {
+ assert_is_valid_utf8(true, 0);
+}
+
+TEST(utf8, valid_1_byte) {
+ FOR_RANGE(cu0, 0, 0x80) { assert_is_valid_utf8(true, 1, cu0); }
+}
+
+TEST(utf8, invalid_continuation_bytes) {
+ FOR_RANGE(cu0, 0x80, 0xc0) { assert_is_valid_utf8(false, 1, cu0); }
+}
+
+TEST(utf8, invalid_2_byte) {
+ FOR_RANGE(cu0, 0xc0, 0xc2) { assert_is_valid_utf8(false, 1, cu0); }
+}
+
+TEST(utf8, valid_2_bytes) {
+ FOR_RANGE(cu0, 0xc2, 0xe0) {
+ FOR_EACH_BYTE(cu1) {
+ bool is_valid = is_in_range(cu1, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 2, cu0, cu1);
+ }
+ }
+}
+
+TEST(utf8, valid_3_bytes_e0) {
+ int cu0 = 0xe0;
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ bool is_valid =
+ is_in_range(cu1, 0xa0, 0xc0) && is_in_range(cu2, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2);
+ }
+ }
+}
+
+TEST(utf8, valid_3_bytes) {
+ FOR_RANGE(cu0, 0xe1, 0xf0) {
+ // Handle 0xed in valid_3_bytes_ed.
+ if (cu0 == 0xed) {
+ continue;
+ }
+
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ bool is_valid =
+ is_in_range(cu1, 0x80, 0xc0) && is_in_range(cu2, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2);
+ }
+ }
+ }
+}
+
+TEST(utf8, valid_3_bytes_ed) {
+ int cu0 = 0xed;
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ bool is_valid =
+ is_in_range(cu1, 0x80, 0xa0) && is_in_range(cu2, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 3, cu0, cu1, cu2);
+ }
+ }
+}
+
+TEST(utf8, valid_4_bytes_f0) {
+ int cu0 = 0xf0;
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ FOR_EACH_BYTE(cu3) {
+ bool is_valid = is_in_range(cu1, 0x90, 0xc0) &&
+ is_in_range(cu2, 0x80, 0xc0) &&
+ is_in_range(cu3, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3);
+ }
+ }
+ }
+}
+
+TEST(utf8, valid_4_bytes) {
+ FOR_RANGE(cu0, 0xf1, 0xf4) {
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ FOR_EACH_BYTE(cu3) {
+ bool is_valid = is_in_range(cu1, 0x80, 0xc0) &&
+ is_in_range(cu2, 0x80, 0xc0) &&
+ is_in_range(cu3, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3);
+ }
+ }
+ }
+ }
+}
+
+TEST(utf8, valid_4_bytes_f4) {
+ int cu0 = 0xf4;
+ FOR_EACH_BYTE(cu1) {
+ FOR_EACH_BYTE(cu2) {
+ FOR_EACH_BYTE(cu3) {
+ bool is_valid = is_in_range(cu1, 0x80, 0x90) &&
+ is_in_range(cu2, 0x80, 0xc0) &&
+ is_in_range(cu3, 0x80, 0xc0);
+ assert_is_valid_utf8(is_valid, 4, cu0, cu1, cu2, cu3);
+ }
+ }
+ }
+}
+
+TEST(utf8, invalid_4_bytes) {
+ FOR_RANGE(cu0, 0xf5, 0x100) {
+ assert_is_valid_utf8(false, 4, cu0, 0x80, 0x80, 0x80);
+ }
+}