summaryrefslogtreecommitdiffstats
path: root/src/lib/util/str.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/util/str.cc')
-rw-r--r--src/lib/util/str.cc345
1 files changed, 345 insertions, 0 deletions
diff --git a/src/lib/util/str.cc b/src/lib/util/str.cc
new file mode 100644
index 0000000..093cd05
--- /dev/null
+++ b/src/lib/util/str.cc
@@ -0,0 +1,345 @@
+// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <util/encode/encode.h>
+#include <util/str.h>
+
+#include <cstddef>
+#include <cstdint>
+#include <exception>
+#include <iomanip>
+#include <regex>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/constants.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+using namespace std;
+
+namespace isc {
+namespace util {
+namespace str {
+
+string
+trim(const string& input) {
+ if (input.empty()) {
+ return (string());
+ }
+ static const char* blanks = " \t\n";
+
+ // Search for first non-blank character in the string.
+ size_t const first(input.find_first_not_of(blanks));
+ if (first == string::npos) {
+ return (string());
+ }
+
+ // String not all blanks, so look for last character.
+ size_t const last(input.find_last_not_of(blanks));
+
+ // Extract the trimmed substring.
+ return (input.substr(first, (last - first + 1)));
+}
+
+vector<string>
+tokens(const string& text, const string& delim, bool escape) {
+ vector<string> result;
+ string token;
+ bool in_token = false;
+ bool escaped = false;
+ for (auto const& c : text) {
+ if (delim.find(c) != string::npos) {
+ // Current character is a delimiter
+ if (!in_token) {
+ // Two or more delimiters, eat them
+ } else if (escaped) {
+ // Escaped delimiter in a token: reset escaped and keep it
+ escaped = false;
+ token.push_back(c);
+ } else {
+ // End of the current token: save it if not empty
+ if (!token.empty()) {
+ result.push_back(token);
+ }
+ // Reset state
+ in_token = false;
+ token.clear();
+ }
+ } else if (escape && (c == '\\')) {
+ // Current character is the escape character
+ if (!in_token) {
+ // The escape character is the first character of a new token
+ in_token = true;
+ }
+ if (escaped) {
+ // Escaped escape: reset escaped and keep one character
+ escaped = false;
+ token.push_back(c);
+ } else {
+ // Remember to keep the next character
+ escaped = true;
+ }
+ } else {
+ // Not a delimiter nor an escape
+ if (!in_token) {
+ // First character of a new token
+ in_token = true;
+ }
+ if (escaped) {
+ // Escaped common character: as escape was false
+ escaped = false;
+ token.push_back('\\');
+ token.push_back(c);
+ } else {
+ // The common case: keep it
+ token.push_back(c);
+ }
+ }
+ }
+ // End of input: close and save the current token if not empty
+ if (escaped) {
+ // Pending escape
+ token.push_back('\\');
+ }
+ if (!token.empty()) {
+ result.push_back(token);
+ }
+
+ return (result);
+}
+
+char
+toUpper(char const chr) {
+ return (toupper(chr));
+}
+
+void
+uppercase(string& text) {
+ transform(text.begin(), text.end(), text.begin(), toUpper);
+}
+
+char
+toLower(char const chr) {
+ return (tolower(static_cast<int>(chr)));
+}
+
+void
+lowercase(string& text) {
+ transform(text.begin(), text.end(), text.begin(), toLower);
+}
+
+vector<uint8_t>
+quotedStringToBinary(const string& quoted_string) {
+ vector<uint8_t> binary;
+ // Remove whitespace before and after the quotes.
+ string trimmed_string = trim(quoted_string);
+
+ // We require two quote characters, so the length of the string must be
+ // equal to 2 at minimum, and it must start and end with quotes.
+ if ((trimmed_string.length() > 1) &&
+ ((trimmed_string[0] == '\'') && (trimmed_string[trimmed_string.length() - 1] == '\''))) {
+ // Remove quotes and trim the text inside the quotes.
+ trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2));
+ // Copy string contents into the vector.
+ binary.assign(trimmed_string.begin(), trimmed_string.end());
+ }
+ // Return resulting vector or empty vector.
+ return (binary);
+}
+
+void
+decodeColonSeparatedHexString(const string& hex_string, vector<uint8_t>& binary) {
+ decodeSeparatedHexString(hex_string, ":", binary);
+}
+
+void
+decodeSeparatedHexString(const string& hex_string, const string& sep, vector<uint8_t>& binary) {
+ vector<string> split_text;
+ boost::split(split_text, hex_string, boost::is_any_of(sep),
+ boost::algorithm::token_compress_off);
+
+ vector<uint8_t> binary_vec;
+ for (size_t i = 0; i < split_text.size(); ++i) {
+ // If there are multiple tokens and the current one is empty, it
+ // means that two consecutive colons were specified. This is not
+ // allowed.
+ if ((split_text.size() > 1) && split_text[i].empty()) {
+ isc_throw(BadValue, "two consecutive separators ('"
+ << sep << "') specified in a decoded string '" << hex_string
+ << "'");
+
+ // Between a colon we expect at most two characters.
+ } else if (split_text[i].size() > 2) {
+ isc_throw(BadValue, "invalid format of the decoded string"
+ << " '" << hex_string << "'");
+
+ } else if (!split_text[i].empty()) {
+ stringstream s;
+ s << "0x";
+
+ for (unsigned int j = 0; j < split_text[i].length(); ++j) {
+ // Check if we're dealing with hexadecimal digit.
+ if (!isxdigit(split_text[i][j])) {
+ isc_throw(BadValue, "'" << split_text[i][j]
+ << "' is not a valid hexadecimal digit in"
+ << " decoded string '" << hex_string << "'");
+ }
+ s << split_text[i][j];
+ }
+
+ // The stream should now have one or two hexadecimal digits.
+ // Let's convert it to a number and store in a temporary
+ // vector.
+ unsigned int binary_value;
+ s >> hex >> binary_value;
+
+ binary_vec.push_back(static_cast<uint8_t>(binary_value));
+ }
+ }
+
+ // All ok, replace the data in the output vector with a result.
+ binary.swap(binary_vec);
+}
+
+void
+decodeFormattedHexString(const string& hex_string, vector<uint8_t>& binary) {
+ // If there is at least one colon we assume that the string
+ // comprises octets separated by colons (e.g. MAC address notation).
+ if (hex_string.find(':') != string::npos) {
+ decodeSeparatedHexString(hex_string, ":", binary);
+ } else if (hex_string.find(' ') != string::npos) {
+ decodeSeparatedHexString(hex_string, " ", binary);
+ } else {
+ ostringstream s;
+
+ // If we have odd number of digits we'll have to prepend '0'.
+ if (hex_string.length() % 2 != 0) {
+ s << "0";
+ }
+
+ // It is ok to use '0x' prefix in a string.
+ if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) {
+ // Exclude '0x' from the decoded string.
+ s << hex_string.substr(2);
+
+ } else {
+ // No '0x', so decode the whole string.
+ s << hex_string;
+ }
+
+ try {
+ // Decode the hex string.
+ encode::decodeHex(s.str(), binary);
+
+ } catch (...) {
+ isc_throw(BadValue, "'" << hex_string
+ << "' is not a valid"
+ " string of hexadecimal digits");
+ }
+ }
+}
+
+class StringSanitizerImpl {
+public:
+ /// @brief Constructor.
+ StringSanitizerImpl(const string& char_set, const string& char_replacement)
+ : char_set_(char_set), char_replacement_(char_replacement) {
+ if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) {
+ isc_throw(BadValue, "char set size: '" << char_set.size() << "' exceeds max size: '"
+ << StringSanitizer::MAX_DATA_SIZE << "'");
+ }
+
+ if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) {
+ isc_throw(BadValue, "char replacement size: '"
+ << char_replacement.size() << "' exceeds max size: '"
+ << StringSanitizer::MAX_DATA_SIZE << "'");
+ }
+ try {
+ scrub_exp_ = regex(char_set, regex::extended);
+ } catch (const exception& ex) {
+ isc_throw(BadValue, "invalid regex: '" << char_set_ << "', " << ex.what());
+ }
+ }
+
+ string scrub(const string& original) {
+ stringstream result;
+ try {
+ regex_replace(ostream_iterator<char>(result), original.begin(), original.end(),
+ scrub_exp_, char_replacement_);
+ } catch (const exception& ex) {
+ isc_throw(BadValue, "replacing '" << char_set_ << "' with '" << char_replacement_
+ << "' in '" << original << "' failed: ,"
+ << ex.what());
+ }
+
+ return (result.str());
+ }
+
+private:
+ /// @brief The char set data for regex.
+ string char_set_;
+
+ /// @brief The char replacement data for regex.
+ string char_replacement_;
+
+ regex scrub_exp_;
+};
+
+// @note The regex engine is implemented using recursion and can cause
+// stack overflow if the input data is too large. An arbitrary size of
+// 4096 should be enough for all cases.
+const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096;
+
+StringSanitizer::StringSanitizer(const string& char_set, const string& char_replacement)
+ : impl_(new StringSanitizerImpl(char_set, char_replacement)) {
+}
+
+string
+StringSanitizer::scrub(const string& original) {
+ return (impl_->scrub(original));
+}
+
+bool
+isPrintable(const string& content) {
+ for (char const ch : content) {
+ if (isprint(ch) == 0) {
+ return (false);
+ }
+ }
+ return (true);
+}
+
+bool
+isPrintable(const vector<uint8_t>& content) {
+ for (uint8_t const ch : content) {
+ if (isprint(ch) == 0) {
+ return (false);
+ }
+ }
+ return (true);
+}
+
+string
+dumpAsHex(const uint8_t* data, size_t length) {
+ stringstream output;
+ for (size_t i = 0; i < length; ++i) {
+ if (i) {
+ output << ":";
+ }
+
+ output << setfill('0') << setw(2) << hex << static_cast<unsigned short>(data[i]);
+ }
+
+ return (output.str());
+}
+
+} // namespace str
+} // namespace util
+} // namespace isc