// Copyright (C) 2011-2024 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef KEA_UTIL_STR_H #define KEA_UTIL_STR_H #include #include #include #include #include #include #include #include #include namespace isc { namespace util { namespace str { /// @brief A Set of C++ Utilities for Manipulating Strings /// /// @brief A standard string util exception that is thrown if getToken or /// numToToken are called with bad input data class StringTokenError : public Exception { public: StringTokenError(const char* file, size_t line, const char* what) : isc::Exception(file, line, what) { } }; /// @brief Trim leading and trailing spaces. /// /// Returns a copy of the input string but with any leading or trailing spaces /// or tabs removed. /// /// @param input Input string to modify. /// /// @return String with leading and trailing spaces removed. std::string trim(const std::string& input); /// @brief Finds the "trimmed" end of a buffer /// /// Works backward from the end of the buffer, looking for the first /// character not equal to the trim value, and returns an iterator /// pointing to that position. /// /// @param begin - Forward iterator pointing to the beginning of the /// buffer to trim. /// @param end - Forward iterator pointing to the untrimmed end of /// the buffer to trim. /// @param trim_val - byte value to trim off /// /// @return Iterator pointing the first character from the end of the /// buffer not equal to the trim value. template Iterator seekTrimmed(Iterator const& begin, Iterator end, uint8_t const trim_val) { while (end != begin && *(end - 1) == trim_val) { --end; } return (end); } /// @brief Split string into tokens. /// /// Splits a string into tokens (the tokens being delimited by one or more of /// the delimiter characters) and returns the tokens in a vector. /// Adjacent delimiters are considered to be a single delimiter. /// /// Special cases are: /// -# The empty string is considered to be zero tokens. /// -# A string comprising nothing but delimiters is considered to be zero /// tokens. /// /// The reasoning behind this is that the string can be thought of as having /// invisible leading and trailing delimiter characters. Therefore both cases /// reduce to a set of contiguous delimiters, which are considered a single /// delimiter (so getting rid of the string). /// Optional escape allows to escape delimiter characters (and *only* them /// and the escape character itself) using backslash. /// /// We could use Boost for this, but this (simple) function eliminates one /// dependency in the code. /// /// @param text String to be split. Passed by value as the internal copy is /// altered during the processing. /// @param delim Delimiter characters /// @param escape Use backslash to escape delimiter characters /// /// @return Vector of tokens. std::vector tokens(const std::string& text, const std::string& delim = " \t\n", bool escape = false); /// @brief Convert character to uppercase. /// /// Used in uppercase() to pass as a parameter to std::transform(). The /// function std::toupper() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be upper-cased. /// /// @return Uppercase version of the input character. char toUpper(char const chr); /// @brief Convert string to uppercase. /// /// @param text String to be upper-cased. void uppercase(std::string& text); /// @brief Convert character to lowercase. /// /// Used in lowercase() to pass as a parameter to std::transform(). The /// function std::tolower() can't be used as it takes an "int" as its parameter; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be lower-cased. /// /// @return Lowercase version of the input character. char toLower(char const chr); /// @brief Convert string to lowercase. /// /// @param text String to be lower-cased. void lowercase(std::string& text); /// @brief Converts a string in quotes into vector. /// /// A converted string is first trimmed. If a trimmed string is in /// quotes, the quotes are removed and the resulting string is copied /// into a vector. If the string is not in quotes, an empty vector is /// returned. /// /// The resulting string is copied to a vector and returned. /// /// This function is intended to be used by the server configuration /// parsers to convert string values surrounded with quotes into /// binary form. /// /// @param quoted_string String to be converted. /// /// @return Vector containing converted string or empty string if /// input string didn't contain expected quote characters. std::vector quotedStringToBinary(const std::string& quoted_string); /// @brief Converts a string of separated hexadecimal digits /// into a vector. /// /// Octets may contain 1 or 2 digits. For example, using a colon /// for a separator all of the following are valid: /// /// - yy:yy:yy:yy:yy /// - y:y:y:y:y /// - y:yy:yy:y:y /// /// If the decoded string doesn't match any of the supported formats, /// an exception is thrown. /// /// @param hex_string Input string. /// @param sep character to use as a separator. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeSeparatedHexString(const std::string& hex_string, const std::string& sep, std::vector& binary); /// @brief Converts a string of hexadecimal digits with colons into /// a vector. /// /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeColonSeparatedHexString(const std::string& hex_string, std::vector& binary); /// @brief Converts a formatted string of hexadecimal digits into /// a vector. /// /// This function supports the following formats: /// /// - yy:yy:yy:yy or yy yy yy yy - octets delimited by colons or /// spaces, see @c decodeSeparatedHexString /// /// - yyyyyyyyyy /// - 0xyyyyyyyyyy /// /// If there is an odd number of hexadecimal digits in the input /// string, the '0' is prepended to the string before decoding. /// /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeFormattedHexString(const std::string& hex_string, std::vector& binary); /// @brief Forward declaration to the @c StringSanitizer implementation. class StringSanitizerImpl; /// @brief Type representing the pointer to the @c StringSanitizerImpl. using StringSanitizerImplPtr = std::shared_ptr; /// @brief Implements a regular expression based string scrubber. class StringSanitizer { public: /// @brief Constructor. /// /// Compiles the given character set into a regular expression, and /// retains the given character replacement. Thereafter, the instance /// may be used to scrub an arbitrary number of strings. /// /// @param char_set string containing a regular expression (POSIX /// extended syntax) that describes the characters to replace. If you /// wanted to sanitize hostnames for example, you could specify the /// inversion of valid characters "[^A-Za-z0-9_-]". /// @param char_replacement string of one or more characters to use as the /// replacement for invalid characters. /// /// @throw BadValue if given an invalid regular expression. StringSanitizer(const std::string& char_set, const std::string& char_replacement); /// @brief Returns a scrubbed copy of a given string. /// /// Replaces all occurrences of characters described by the regular /// expression with the character replacement. /// /// @param original The string to be scrubbed. /// /// @throw Unexpected if an error occurs during scrubbing. std::string scrub(const std::string& original); /// @brief The maximum size for regex parameters. /// /// @note The regex engine is implemented using recursion and can cause /// stack overflow if the input data is too large. An arbitrary size of /// 4096 should be enough for all cases. static const uint32_t MAX_DATA_SIZE; private: /// @brief Pointer to the @c StringSanitizerImpl. StringSanitizerImplPtr impl_; }; /// @brief Type representing the pointer to the @c StringSanitizer. using StringSanitizerPtr = std::unique_ptr; /// @brief Check if a string is printable. /// /// @param content String to check for printable characters. /// /// @return True if empty or contains only printable characters, False otherwise. bool isPrintable(const std::string& content); /// @brief Check if a byte vector is printable. /// /// @param content Vector to check for printable characters. /// /// @return True if empty or contains only printable characters, False otherwise. bool isPrintable(const std::vector& content); /// @brief Dumps a buffer of bytes as a string of hexadecimal digits. /// /// @param data Pointer to the data to dump. /// @param length Number of bytes to dump. Caller should ensure the length /// does not exceed the buffer. std::string dumpAsHex(const uint8_t* data, size_t length); } // namespace str } // namespace util } // namespace isc #endif // KEA_UTIL_STR_H