diff options
Diffstat (limited to 'src/lib/util/str.cc')
-rw-r--r-- | src/lib/util/str.cc | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/src/lib/util/str.cc b/src/lib/util/str.cc new file mode 100644 index 0000000..093cd05 --- /dev/null +++ b/src/lib/util/str.cc @@ -0,0 +1,345 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include <config.h> + +#include <util/encode/encode.h> +#include <util/str.h> + +#include <cstddef> +#include <cstdint> +#include <exception> +#include <iomanip> +#include <regex> +#include <sstream> +#include <string> +#include <vector> + +#include <boost/algorithm/string/classification.hpp> +#include <boost/algorithm/string/constants.hpp> +#include <boost/algorithm/string/split.hpp> + +using namespace std; + +namespace isc { +namespace util { +namespace str { + +string +trim(const string& input) { + if (input.empty()) { + return (string()); + } + static const char* blanks = " \t\n"; + + // Search for first non-blank character in the string. + size_t const first(input.find_first_not_of(blanks)); + if (first == string::npos) { + return (string()); + } + + // String not all blanks, so look for last character. + size_t const last(input.find_last_not_of(blanks)); + + // Extract the trimmed substring. + return (input.substr(first, (last - first + 1))); +} + +vector<string> +tokens(const string& text, const string& delim, bool escape) { + vector<string> result; + string token; + bool in_token = false; + bool escaped = false; + for (auto const& c : text) { + if (delim.find(c) != string::npos) { + // Current character is a delimiter + if (!in_token) { + // Two or more delimiters, eat them + } else if (escaped) { + // Escaped delimiter in a token: reset escaped and keep it + escaped = false; + token.push_back(c); + } else { + // End of the current token: save it if not empty + if (!token.empty()) { + result.push_back(token); + } + // Reset state + in_token = false; + token.clear(); + } + } else if (escape && (c == '\\')) { + // Current character is the escape character + if (!in_token) { + // The escape character is the first character of a new token + in_token = true; + } + if (escaped) { + // Escaped escape: reset escaped and keep one character + escaped = false; + token.push_back(c); + } else { + // Remember to keep the next character + escaped = true; + } + } else { + // Not a delimiter nor an escape + if (!in_token) { + // First character of a new token + in_token = true; + } + if (escaped) { + // Escaped common character: as escape was false + escaped = false; + token.push_back('\\'); + token.push_back(c); + } else { + // The common case: keep it + token.push_back(c); + } + } + } + // End of input: close and save the current token if not empty + if (escaped) { + // Pending escape + token.push_back('\\'); + } + if (!token.empty()) { + result.push_back(token); + } + + return (result); +} + +char +toUpper(char const chr) { + return (toupper(chr)); +} + +void +uppercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toUpper); +} + +char +toLower(char const chr) { + return (tolower(static_cast<int>(chr))); +} + +void +lowercase(string& text) { + transform(text.begin(), text.end(), text.begin(), toLower); +} + +vector<uint8_t> +quotedStringToBinary(const string& quoted_string) { + vector<uint8_t> binary; + // Remove whitespace before and after the quotes. + string trimmed_string = trim(quoted_string); + + // We require two quote characters, so the length of the string must be + // equal to 2 at minimum, and it must start and end with quotes. + if ((trimmed_string.length() > 1) && + ((trimmed_string[0] == '\'') && (trimmed_string[trimmed_string.length() - 1] == '\''))) { + // Remove quotes and trim the text inside the quotes. + trimmed_string = trim(trimmed_string.substr(1, trimmed_string.length() - 2)); + // Copy string contents into the vector. + binary.assign(trimmed_string.begin(), trimmed_string.end()); + } + // Return resulting vector or empty vector. + return (binary); +} + +void +decodeColonSeparatedHexString(const string& hex_string, vector<uint8_t>& binary) { + decodeSeparatedHexString(hex_string, ":", binary); +} + +void +decodeSeparatedHexString(const string& hex_string, const string& sep, vector<uint8_t>& binary) { + vector<string> split_text; + boost::split(split_text, hex_string, boost::is_any_of(sep), + boost::algorithm::token_compress_off); + + vector<uint8_t> binary_vec; + for (size_t i = 0; i < split_text.size(); ++i) { + // If there are multiple tokens and the current one is empty, it + // means that two consecutive colons were specified. This is not + // allowed. + if ((split_text.size() > 1) && split_text[i].empty()) { + isc_throw(BadValue, "two consecutive separators ('" + << sep << "') specified in a decoded string '" << hex_string + << "'"); + + // Between a colon we expect at most two characters. + } else if (split_text[i].size() > 2) { + isc_throw(BadValue, "invalid format of the decoded string" + << " '" << hex_string << "'"); + + } else if (!split_text[i].empty()) { + stringstream s; + s << "0x"; + + for (unsigned int j = 0; j < split_text[i].length(); ++j) { + // Check if we're dealing with hexadecimal digit. + if (!isxdigit(split_text[i][j])) { + isc_throw(BadValue, "'" << split_text[i][j] + << "' is not a valid hexadecimal digit in" + << " decoded string '" << hex_string << "'"); + } + s << split_text[i][j]; + } + + // The stream should now have one or two hexadecimal digits. + // Let's convert it to a number and store in a temporary + // vector. + unsigned int binary_value; + s >> hex >> binary_value; + + binary_vec.push_back(static_cast<uint8_t>(binary_value)); + } + } + + // All ok, replace the data in the output vector with a result. + binary.swap(binary_vec); +} + +void +decodeFormattedHexString(const string& hex_string, vector<uint8_t>& binary) { + // If there is at least one colon we assume that the string + // comprises octets separated by colons (e.g. MAC address notation). + if (hex_string.find(':') != string::npos) { + decodeSeparatedHexString(hex_string, ":", binary); + } else if (hex_string.find(' ') != string::npos) { + decodeSeparatedHexString(hex_string, " ", binary); + } else { + ostringstream s; + + // If we have odd number of digits we'll have to prepend '0'. + if (hex_string.length() % 2 != 0) { + s << "0"; + } + + // It is ok to use '0x' prefix in a string. + if ((hex_string.length() > 2) && (hex_string.substr(0, 2) == "0x")) { + // Exclude '0x' from the decoded string. + s << hex_string.substr(2); + + } else { + // No '0x', so decode the whole string. + s << hex_string; + } + + try { + // Decode the hex string. + encode::decodeHex(s.str(), binary); + + } catch (...) { + isc_throw(BadValue, "'" << hex_string + << "' is not a valid" + " string of hexadecimal digits"); + } + } +} + +class StringSanitizerImpl { +public: + /// @brief Constructor. + StringSanitizerImpl(const string& char_set, const string& char_replacement) + : char_set_(char_set), char_replacement_(char_replacement) { + if (char_set.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char set size: '" << char_set.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + + if (char_replacement.size() > StringSanitizer::MAX_DATA_SIZE) { + isc_throw(BadValue, "char replacement size: '" + << char_replacement.size() << "' exceeds max size: '" + << StringSanitizer::MAX_DATA_SIZE << "'"); + } + try { + scrub_exp_ = regex(char_set, regex::extended); + } catch (const exception& ex) { + isc_throw(BadValue, "invalid regex: '" << char_set_ << "', " << ex.what()); + } + } + + string scrub(const string& original) { + stringstream result; + try { + regex_replace(ostream_iterator<char>(result), original.begin(), original.end(), + scrub_exp_, char_replacement_); + } catch (const exception& ex) { + isc_throw(BadValue, "replacing '" << char_set_ << "' with '" << char_replacement_ + << "' in '" << original << "' failed: ," + << ex.what()); + } + + return (result.str()); + } + +private: + /// @brief The char set data for regex. + string char_set_; + + /// @brief The char replacement data for regex. + string char_replacement_; + + regex scrub_exp_; +}; + +// @note The regex engine is implemented using recursion and can cause +// stack overflow if the input data is too large. An arbitrary size of +// 4096 should be enough for all cases. +const uint32_t StringSanitizer::MAX_DATA_SIZE = 4096; + +StringSanitizer::StringSanitizer(const string& char_set, const string& char_replacement) + : impl_(new StringSanitizerImpl(char_set, char_replacement)) { +} + +string +StringSanitizer::scrub(const string& original) { + return (impl_->scrub(original)); +} + +bool +isPrintable(const string& content) { + for (char const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +bool +isPrintable(const vector<uint8_t>& content) { + for (uint8_t const ch : content) { + if (isprint(ch) == 0) { + return (false); + } + } + return (true); +} + +string +dumpAsHex(const uint8_t* data, size_t length) { + stringstream output; + for (size_t i = 0; i < length; ++i) { + if (i) { + output << ":"; + } + + output << setfill('0') << setw(2) << hex << static_cast<unsigned short>(data[i]); + } + + return (output.str()); +} + +} // namespace str +} // namespace util +} // namespace isc |