diff options
Diffstat (limited to 'src/lib/util/tests/str_unittests.cc')
-rw-r--r-- | src/lib/util/tests/str_unittests.cc | 529 |
1 files changed, 529 insertions, 0 deletions
diff --git a/src/lib/util/tests/str_unittests.cc b/src/lib/util/tests/str_unittests.cc new file mode 100644 index 0000000..78d28ce --- /dev/null +++ b/src/lib/util/tests/str_unittests.cc @@ -0,0 +1,529 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include <config.h> + +#include <testutils/gtest_utils.h> +#include <util/encode/encode.h> +#include <util/str.h> + +#include <cstdint> +#include <exception> +#include <sstream> +#include <string> +#include <unordered_set> +#include <vector> + +#include <gtest/gtest.h> + +using namespace isc; +using namespace isc::util; +using namespace isc::util::encode; +using namespace isc::util::str; +using namespace std; + +namespace { + +/// @brief Fixture used to test StringSanitizer. +struct StringUtilTest : ::testing::Test { + /// @brief Pass string through scrub and check the result. + /// + /// @param original The string to sanitize. + /// @param char_set The regular expression string describing invalid characters. + /// @param char_replacement - character(s) which replace invalid + /// characters + /// @param expected - expected sanitized string + void checkScrub(const string& original, + const string& char_set, + const string& char_replacement, + const string& expected) { + StringSanitizerPtr ss; + string sanitized; + + try { + ss.reset(new StringSanitizer(char_set, char_replacement)); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not construct sanitizer:" << ex.what(); + return; + } + + try { + sanitized = ss->scrub(original); + } catch (const exception& ex) { + ADD_FAILURE() << "Could not scrub string:" << ex.what(); + return; + } + + EXPECT_EQ(sanitized, expected); + } + + /// @brief Check that hex strings with colons can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkColonSeparated(const string& input, const string& reference) { + // Create a reference vector. + vector<uint8_t> reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector<uint8_t> decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeColonSeparatedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) << "decoded data don't match the reference, input='" + << input << "', reference='" << reference + << "'" + ", decoded='" + << encoded << "'"; + } + + /// @brief Check that formatted hex strings can be decoded. + /// + /// @param input Input string to be decoded. + /// @param reference The expected result. + void checkFormatted(const string& input, const string& reference) { + // Create a reference vector. + vector<uint8_t> reference_vector; + ASSERT_NO_THROW_LOG(decodeHex(reference, reference_vector)); + + // Fill the output vector with some garbage to make sure that + // the data is erased when a string is decoded successfully. + vector<uint8_t> decoded(1, 10); + ASSERT_NO_THROW_LOG(decodeFormattedHexString(input, decoded)); + + // Get the string representation of the decoded data for logging + // purposes. + string encoded; + ASSERT_NO_THROW_LOG(encoded = encodeHex(decoded)); + + // Check if the decoded data matches the reference. + EXPECT_EQ(decoded, reference_vector) + << "decoded data don't match the reference, input='" << input << "', reference='" + << reference << "', decoded='" << encoded << "'"; + } + + /// @brief Convenience function which calls quotedStringToBinary + /// and converts returned vector back to string. + /// + /// @param s Input string. + /// + /// @return String holding a copy of a vector returned by the + /// quotedStringToBinary. + string checkQuoted(const string& s) { + vector<uint8_t> vec = quotedStringToBinary(s); + string s2(vec.begin(), vec.end()); + return (s2); + } +}; + +// Check that leading and trailing space trimming works. +TEST_F(StringUtilTest, Trim) { + // Empty and full string. + EXPECT_EQ("", trim("")); + EXPECT_EQ("abcxyz", trim("abcxyz")); + + // Trim right-most blanks + EXPECT_EQ("ABC", trim("ABC ")); + EXPECT_EQ("ABC", trim("ABC\t\t \n\t")); + + // Left-most blank trimming + EXPECT_EQ("XYZ", trim(" XYZ")); + EXPECT_EQ("XYZ", trim("\t\t \tXYZ")); + + // Right and left, with embedded spaces + EXPECT_EQ("MN \t OP", trim("\t\tMN \t OP \t")); +} + +// Check tokenization. +TEST_F(StringUtilTest, Tokens) { + vector<string> result; + + // Default delimiters + + // Degenerate cases + result = tokens(""); // Empty string + EXPECT_EQ(0, result.size()); + + result = tokens(" \n "); // String is all delimiters + EXPECT_EQ(0, result.size()); + + result = tokens("abc"); // String has no delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("abc"), result[0]); + + // String containing leading and/or trailing delimiters, no embedded ones. + result = tokens("\txyz"); // One leading delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t \nxyz"); // Multiple leading delimiters + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz\n"); // One trailing delimiter + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("xyz \t"); // Multiple trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + result = tokens("\t xyz \n"); // Leading and trailing + ASSERT_EQ(1, result.size()); + EXPECT_EQ(string("xyz"), result[0]); + + // Embedded delimiters + result = tokens("abc\ndef"); // 2 tokens, one separator + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\t\t\ndef"); // 2 tokens, 3 separators + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + + result = tokens("abc\n \tdef\t\tghi"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Embedded and non-embedded delimiters + + result = tokens("\t\t \nabc\n \tdef\t\tghi \n\n"); + ASSERT_EQ(3, result.size()); // Multiple tokens, many delims + EXPECT_EQ(string("abc"), result[0]); + EXPECT_EQ(string("def"), result[1]); + EXPECT_EQ(string("ghi"), result[2]); + + // Non-default delimiter + result = tokens("alpha/beta/ /gamma//delta/epsilon/", "/"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Non-default delimiters (plural) + result = tokens("+*--alpha*beta+ -gamma**delta+epsilon-+**", "*+-"); + ASSERT_EQ(6, result.size()); + EXPECT_EQ(string("alpha"), result[0]); + EXPECT_EQ(string("beta"), result[1]); + EXPECT_EQ(string(" "), result[2]); + EXPECT_EQ(string("gamma"), result[3]); + EXPECT_EQ(string("delta"), result[4]); + EXPECT_EQ(string("epsilon"), result[5]); + + // Escaped delimiter + result = tokens("foo\\,bar", ",", true); + EXPECT_EQ(1, result.size()); + EXPECT_EQ(string("foo,bar"), result[0]); + + // Escaped escape + result = tokens("foo\\\\,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Double escapes + result = tokens("foo\\\\\\\\,\\bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo\\\\"), result[0]); + EXPECT_EQ(string("\\bar"), result[1]); + + // Escaped standard character + result = tokens("fo\\o,bar", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("fo\\o"), result[0]); + EXPECT_EQ(string("bar"), result[1]); + + // Escape at the end + result = tokens("foo,bar\\", ",", true); + ASSERT_EQ(2, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string("bar\\"), result[1]); + + // Escape opening a token + result = tokens("foo,\\,,bar", ",", true); + ASSERT_EQ(3, result.size()); + EXPECT_EQ(string("foo"), result[0]); + EXPECT_EQ(string(","), result[1]); + EXPECT_EQ(string("bar"), result[2]); +} + +// Check changing of case. +TEST_F(StringUtilTest, ChangeCase) { + string mixed("abcDEFghiJKLmno123[]{=+--+]}"); + string upper("ABCDEFGHIJKLMNO123[]{=+--+]}"); + string lower("abcdefghijklmno123[]{=+--+]}"); + + string test = mixed; + lowercase(test); + EXPECT_EQ(lower, test); + + test = mixed; + uppercase(test); + EXPECT_EQ(upper, test); +} + +TEST_F(StringUtilTest, quotedStringToBinary) { + // No opening or closing quote should result in empty string. + EXPECT_TRUE(quotedStringToBinary("'").empty()); + EXPECT_TRUE(quotedStringToBinary("").empty()); + EXPECT_TRUE(quotedStringToBinary(" ").empty()); + EXPECT_TRUE(quotedStringToBinary("'circuit id").empty()); + EXPECT_TRUE(quotedStringToBinary("circuit id'").empty()); + + // If there is only opening and closing quote, an empty + // vector should be returned. + EXPECT_TRUE(quotedStringToBinary("''").empty()); + + // Both opening and ending quote is present. + EXPECT_EQ("circuit id", checkQuoted("'circuit id'")); + EXPECT_EQ("remote id", checkQuoted(" ' remote id'")); + EXPECT_EQ("duid", checkQuoted(" ' duid'")); + EXPECT_EQ("duid", checkQuoted("'duid ' ")); + EXPECT_EQ("remote'id", checkQuoted(" ' remote'id '")); + EXPECT_EQ("remote id'", checkQuoted("'remote id''")); + EXPECT_EQ("'remote id", checkQuoted("''remote id'")); + + // Multiple quotes. + EXPECT_EQ("'", checkQuoted("'''")); + EXPECT_EQ("''", checkQuoted("''''")); +} + +TEST_F(StringUtilTest, decodeColonSeparatedHexString) { + // Test valid strings. + checkColonSeparated("A1:02:C3:d4:e5:F6", "A102C3D4E5F6"); + checkColonSeparated("A:02:3:d:E5:F6", "0A02030DE5F6"); + checkColonSeparated("A:B:C:D", "0A0B0C0D"); + checkColonSeparated("1", "01"); + checkColonSeparated("1e", "1E"); + checkColonSeparated("", ""); + + // Test invalid strings. + vector<uint8_t> decoded; + // Whitespaces. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" ", decoded), BadValue, + "invalid format of the decoded string ' '"); + // Whitespace before digits. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(" A1", decoded), BadValue, + "invalid format of the decoded string ' A1'"); + // Two consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A::01'"); + // Three consecutive colons. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A:::01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string 'A:::01'"); + // Whitespace within a string. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("A :01", decoded), BadValue, + "' ' is not a valid hexadecimal digit in decoded string 'A :01'"); + // Terminating colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A:01:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0A:01:'"); + // Opening colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString(":0A:01", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string ':0A:01'"); + // Three digits before the colon. + EXPECT_THROW_MSG(decodeColonSeparatedHexString("0A1:B1", decoded), BadValue, + "invalid format of the decoded string '0A1:B1'"); +} + +TEST_F(StringUtilTest, decodeFormattedHexString) { + // Colon separated. + checkFormatted("1:A7:B5:4:23", "01A7B50423"); + // Space separated. + checkFormatted("1 A7 B5 4 23", "01A7B50423"); + // No colons, even number of digits. + checkFormatted("17a534", "17A534"); + // Odd number of digits. + checkFormatted("A3A6f78", "0A3A6F78"); + // '0x' prefix. + checkFormatted("0xA3A6f78", "0A3A6F78"); + // '0x' prefix with a special value of 0. + checkFormatted("0x0", "00"); + // Empty string. + checkFormatted("", ""); + + vector<uint8_t> decoded; + // Dangling colon. + EXPECT_THROW_MSG(decodeFormattedHexString("0a:", decoded), BadValue, + "two consecutive separators (':') specified in a decoded string '0a:'"); + // Dangling space. + EXPECT_THROW_MSG(decodeFormattedHexString("0a ", decoded), BadValue, + "two consecutive separators (' ') specified in a decoded string '0a '"); + // '0x' prefix and spaces. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01 02", decoded), BadValue, + "invalid format of the decoded string '0x01 02'"); + // '0x' prefix and colons. + EXPECT_THROW_MSG(decodeFormattedHexString("0x01:02", decoded), BadValue, + "invalid format of the decoded string '0x01:02'"); + // colon and spaces mixed + EXPECT_THROW_MSG(decodeFormattedHexString("01:02 03", decoded), BadValue, + "invalid format of the decoded string '01:02 03'"); + // Missing colon. + EXPECT_THROW_MSG(decodeFormattedHexString("01:0203", decoded), BadValue, + "invalid format of the decoded string '01:0203'"); + // Missing space. + EXPECT_THROW_MSG(decodeFormattedHexString("01 0203", decoded), BadValue, + "invalid format of the decoded string '01 0203'"); + // Invalid prefix. + EXPECT_THROW_MSG(decodeFormattedHexString("x0102", decoded), BadValue, + "'x0102' is not a valid string of hexadecimal digits"); + // Invalid prefix again. + EXPECT_THROW_MSG(decodeFormattedHexString("1x0102", decoded), BadValue, + "'1x0102' is not a valid string of hexadecimal digits"); +} + +// Verifies StringSantizer class +TEST_F(StringUtilTest, stringSanitizer) { + // Bad regular expression should throw. + StringSanitizerPtr ss; + + try { + ss.reset(new StringSanitizer("[bogus-regex", "")); + } catch (BadValue const& ex) { + unordered_set<string> expected{ + // BSD + "invalid regex: '[bogus-regex', The expression contained mismatched [ and ].", + // Linux + "invalid regex: '[bogus-regex', Invalid range in bracket expression.", + }; + if (!expected.count(ex.what())) { + FAIL() << "unexpected BadValue exception message: " << ex.what(); + } + } catch (exception const& ex) { + FAIL() << "unexpected exception: " << ex.what(); + } + + string good_data(StringSanitizer::MAX_DATA_SIZE, '0'); + string bad_data(StringSanitizer::MAX_DATA_SIZE + 1, '0'); + + ASSERT_NO_THROW_LOG(ss.reset(new StringSanitizer(good_data, good_data))); + + ASSERT_THROW_MSG(ss.reset(new StringSanitizer(bad_data, "")), BadValue, + "char set size: '4097' exceeds max size: '4096'"); + ASSERT_THROW_MSG(ss.reset(new StringSanitizer("", bad_data)), BadValue, + "char replacement size: '4097' exceeds max size: '4096'"); + + // List of invalid chars should work: (b,c,2 are invalid) + checkScrub("abc.123", "[b-c2]", "*", "a**.1*3"); + // Inverted list of valid chars should work: (b,c,2 are valid) + checkScrub("abc.123", "[^b-c2]", "*", "*bc**2*"); + + // A string of all valid chars should return an identical string. + checkScrub("-_A--B__Cabc34567_-", "[^A-Ca-c3-7_-]", "x", "-_A--B__Cabc34567_-"); + + // Replacing with a character should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "*", "A*b*c*JoE3*_x*B*Y*e"); + + // Removing (i.e.replacing with an "empty" string) should work. + checkScrub("A[b]c\12JoE3-_x!B$Y#e", "[^A-Za-z0-9_]", "", "AbcJoE3_xBYe"); + + // More than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "x", "xxAxxBxxCxx"); + + // Removing more than one non-matching in a row should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "", "ABC"); + + // Replacing with a string should work. + checkScrub("%%A%%B%%C%%", "[^A-Za-z0-9_]", "xyz", "xyzxyzAxyzxyzBxyzxyzCxyzxyz"); + + // Dots as valid chars work. + checkScrub("abc.123", "[^A-Za-z0-9_.]", "*", "abc.123"); + + string withNulls("\000ab\000c.12\0003", 10); + checkScrub(withNulls, "[^A-Za-z0-9_.]", "*", "*ab*c.12*3"); +} + +// Verifies templated buffer iterator seekTrimmed() function +TEST_F(StringUtilTest, seekTrimmed) { + // Empty buffer should be fine. + vector<uint8_t> buffer; + auto begin = buffer.end(); + auto end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(0, distance(begin, end)); + + // Buffer of only trim values, should be fine. + buffer = {1, 1}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 1)); + EXPECT_EQ(0, distance(begin, end)); + + // One trailing null should trim off. + buffer = {'o', 'n', 'e', 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(3, distance(begin, end)); + + // More than one trailing null should trim off. + buffer = {'t', 'h', 'r', 'e', 'e', 0, 0, 0}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(5, distance(begin, end)); + + // Embedded null should be left in place. + buffer = {'e', 'm', 0, 'b', 'e', 'd'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(6, distance(begin, end)); + + // Leading null should be left in place. + buffer = {0, 'l', 'e', 'a', 'd', 'i', 'n', 'g'}; + begin = buffer.begin(); + end = buffer.end(); + ASSERT_NO_THROW_LOG(end = seekTrimmed(begin, end, 0)); + EXPECT_EQ(8, distance(begin, end)); +} + +// Verifies isPrintable predicate on strings. +TEST_F(StringUtilTest, stringIsPrintable) { + string content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = "Abcd"; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content += "\a"; + EXPECT_FALSE(isPrintable(content)); +} + +// Verifies isPrintable predicate on byte vectors. +TEST_F(StringUtilTest, vectorIsPrintable) { + vector<uint8_t> content; + + // Empty is printable. + EXPECT_TRUE(isPrintable(content)); + + // Check Abcd. + content = {0x41, 0x62, 0x63, 0x64}; + EXPECT_TRUE(isPrintable(content)); + + // Add a control character (not printable). + content.push_back('\a'); + EXPECT_FALSE(isPrintable(content)); +} + +} // namespace |