diff options
Diffstat (limited to '')
-rw-r--r-- | src/lib/util/str.h | 292 |
1 files changed, 292 insertions, 0 deletions
diff --git a/src/lib/util/str.h b/src/lib/util/str.h new file mode 100644 index 0000000..1e5d4c4 --- /dev/null +++ b/src/lib/util/str.h @@ -0,0 +1,292 @@ +// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") +// +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef KEA_UTIL_STR_H +#define KEA_UTIL_STR_H + +#include <exceptions/exceptions.h> + +#include <algorithm> +#include <cstddef> +#include <cstdint> +#include <memory> +#include <sstream> +#include <string> +#include <vector> + +#include <boost/lexical_cast.hpp> + +namespace isc { +namespace util { +namespace str { + +/// @brief A Set of C++ Utilities for Manipulating Strings + +/// +/// @brief A standard string util exception that is thrown if getToken or +/// numToToken are called with bad input data +class StringTokenError : public Exception { +public: + StringTokenError(const char* file, size_t line, const char* what) + : isc::Exception(file, line, what) { + } +}; + +/// @brief Trim leading and trailing spaces. +/// +/// Returns a copy of the input string but with any leading or trailing spaces +/// or tabs removed. +/// +/// @param input Input string to modify. +/// +/// @return String with leading and trailing spaces removed. +std::string +trim(const std::string& input); + +/// @brief Finds the "trimmed" end of a buffer +/// +/// Works backward from the end of the buffer, looking for the first +/// character not equal to the trim value, and returns an iterator +/// pointing to that position. +/// +/// @param begin - Forward iterator pointing to the beginning of the +/// buffer to trim. +/// @param end - Forward iterator pointing to the untrimmed end of +/// the buffer to trim. +/// @param trim_val - byte value to trim off +/// +/// @return Iterator pointing the first character from the end of the +/// buffer not equal to the trim value. +template <typename Iterator> +Iterator +seekTrimmed(Iterator const& begin, Iterator end, uint8_t const trim_val) { + while (end != begin && *(end - 1) == trim_val) { + --end; + } + return (end); +} + +/// @brief Split string into tokens. +/// +/// Splits a string into tokens (the tokens being delimited by one or more of +/// the delimiter characters) and returns the tokens in a vector. +/// Adjacent delimiters are considered to be a single delimiter. +/// +/// Special cases are: +/// -# The empty string is considered to be zero tokens. +/// -# A string comprising nothing but delimiters is considered to be zero +/// tokens. +/// +/// The reasoning behind this is that the string can be thought of as having +/// invisible leading and trailing delimiter characters. Therefore both cases +/// reduce to a set of contiguous delimiters, which are considered a single +/// delimiter (so getting rid of the string). +/// Optional escape allows to escape delimiter characters (and *only* them +/// and the escape character itself) using backslash. +/// +/// We could use Boost for this, but this (simple) function eliminates one +/// dependency in the code. +/// +/// @param text String to be split. Passed by value as the internal copy is +/// altered during the processing. +/// @param delim Delimiter characters +/// @param escape Use backslash to escape delimiter characters +/// +/// @return Vector of tokens. +std::vector<std::string> +tokens(const std::string& text, const std::string& delim = " \t\n", bool escape = false); + +/// @brief Convert character to uppercase. +/// +/// Used in uppercase() to pass as a parameter to std::transform(). The +/// function std::toupper() can't be used as it takes an "int" as its parameter; +/// this confuses the template expansion mechanism because dereferencing a +/// string::iterator returns a char. +/// +/// @param chr Character to be upper-cased. +/// +/// @return Uppercase version of the input character. +char +toUpper(char const chr); + +/// @brief Convert string to uppercase. +/// +/// @param text String to be upper-cased. +void +uppercase(std::string& text); + +/// @brief Convert character to lowercase. +/// +/// Used in lowercase() to pass as a parameter to std::transform(). The +/// function std::tolower() can't be used as it takes an "int" as its parameter; +/// this confuses the template expansion mechanism because dereferencing a +/// string::iterator returns a char. +/// +/// @param chr Character to be lower-cased. +/// +/// @return Lowercase version of the input character. +char +toLower(char const chr); + +/// @brief Convert string to lowercase. +/// +/// @param text String to be lower-cased. +void +lowercase(std::string& text); + +/// @brief Converts a string in quotes into vector. +/// +/// A converted string is first trimmed. If a trimmed string is in +/// quotes, the quotes are removed and the resulting string is copied +/// into a vector. If the string is not in quotes, an empty vector is +/// returned. +/// +/// The resulting string is copied to a vector and returned. +/// +/// This function is intended to be used by the server configuration +/// parsers to convert string values surrounded with quotes into +/// binary form. +/// +/// @param quoted_string String to be converted. +/// +/// @return Vector containing converted string or empty string if +/// input string didn't contain expected quote characters. +std::vector<uint8_t> +quotedStringToBinary(const std::string& quoted_string); + +/// @brief Converts a string of separated hexadecimal digits +/// into a vector. +/// +/// Octets may contain 1 or 2 digits. For example, using a colon +/// for a separator all of the following are valid: +/// +/// - yy:yy:yy:yy:yy +/// - y:y:y:y:y +/// - y:yy:yy:y:y +/// +/// If the decoded string doesn't match any of the supported formats, +/// an exception is thrown. +/// +/// @param hex_string Input string. +/// @param sep character to use as a separator. +/// @param binary Vector receiving converted string into binary. +/// +/// @throw isc::BadValue if the format of the input string is invalid. +void +decodeSeparatedHexString(const std::string& hex_string, + const std::string& sep, + std::vector<uint8_t>& binary); + +/// @brief Converts a string of hexadecimal digits with colons into +/// a vector. +/// +/// @param hex_string Input string. +/// @param binary Vector receiving converted string into binary. +/// +/// @throw isc::BadValue if the format of the input string is invalid. +void +decodeColonSeparatedHexString(const std::string& hex_string, std::vector<uint8_t>& binary); + +/// @brief Converts a formatted string of hexadecimal digits into +/// a vector. +/// +/// This function supports the following formats: +/// +/// - yy:yy:yy:yy or yy yy yy yy - octets delimited by colons or +/// spaces, see @c decodeSeparatedHexString +/// +/// - yyyyyyyyyy +/// - 0xyyyyyyyyyy +/// +/// If there is an odd number of hexadecimal digits in the input +/// string, the '0' is prepended to the string before decoding. +/// +/// @param hex_string Input string. +/// @param binary Vector receiving converted string into binary. +/// +/// @throw isc::BadValue if the format of the input string is invalid. +void +decodeFormattedHexString(const std::string& hex_string, std::vector<uint8_t>& binary); + +/// @brief Forward declaration to the @c StringSanitizer implementation. +class StringSanitizerImpl; + +/// @brief Type representing the pointer to the @c StringSanitizerImpl. +using StringSanitizerImplPtr = std::shared_ptr<StringSanitizerImpl>; + +/// @brief Implements a regular expression based string scrubber. +class StringSanitizer { +public: + /// @brief Constructor. + /// + /// Compiles the given character set into a regular expression, and + /// retains the given character replacement. Thereafter, the instance + /// may be used to scrub an arbitrary number of strings. + /// + /// @param char_set string containing a regular expression (POSIX + /// extended syntax) that describes the characters to replace. If you + /// wanted to sanitize hostnames for example, you could specify the + /// inversion of valid characters "[^A-Za-z0-9_-]". + /// @param char_replacement string of one or more characters to use as the + /// replacement for invalid characters. + /// + /// @throw BadValue if given an invalid regular expression. + StringSanitizer(const std::string& char_set, const std::string& char_replacement); + + /// @brief Returns a scrubbed copy of a given string. + /// + /// Replaces all occurrences of characters described by the regular + /// expression with the character replacement. + /// + /// @param original The string to be scrubbed. + /// + /// @throw Unexpected if an error occurs during scrubbing. + std::string scrub(const std::string& original); + + /// @brief The maximum size for regex parameters. + /// + /// @note The regex engine is implemented using recursion and can cause + /// stack overflow if the input data is too large. An arbitrary size of + /// 4096 should be enough for all cases. + static const uint32_t MAX_DATA_SIZE; + +private: + /// @brief Pointer to the @c StringSanitizerImpl. + StringSanitizerImplPtr impl_; +}; + +/// @brief Type representing the pointer to the @c StringSanitizer. +using StringSanitizerPtr = std::unique_ptr<StringSanitizer>; + +/// @brief Check if a string is printable. +/// +/// @param content String to check for printable characters. +/// +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::string& content); + +/// @brief Check if a byte vector is printable. +/// +/// @param content Vector to check for printable characters. +/// +/// @return True if empty or contains only printable characters, False otherwise. +bool +isPrintable(const std::vector<uint8_t>& content); + +/// @brief Dumps a buffer of bytes as a string of hexadecimal digits. +/// +/// @param data Pointer to the data to dump. +/// @param length Number of bytes to dump. Caller should ensure the length +/// does not exceed the buffer. +std::string +dumpAsHex(const uint8_t* data, size_t length); + +} // namespace str +} // namespace util +} // namespace isc + +#endif // KEA_UTIL_STR_H |