// Copyright (C) 2011-2022 Internet Systems Consortium, Inc. ("ISC") // // This Source Code Form is subject to the terms of the Mozilla Public // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. #ifndef STRUTIL_H #define STRUTIL_H #include #include #include #include #include #include #include #include #include #include namespace isc { namespace util { namespace str { /// @brief A Set of C++ Utilities for Manipulating Strings /// /// @brief A standard string util exception that is thrown if getToken or /// numToToken are called with bad input data /// class StringTokenError : public Exception { public: StringTokenError(const char* file, size_t line, const char* what) : isc::Exception(file, line, what) {} }; /// @brief Normalize Backslash /// /// Only relevant to Windows, this replaces all "\" in a string with "/" /// and returns the result. On other systems it is a no-op. Note /// that Windows does recognize file names with the "\" replaced by "/" /// (at least in system calls, if not the command line). /// /// @param name Name to be substituted void normalizeSlash(std::string& name); /// @brief Trim Leading and Trailing Spaces /// /// Returns a copy of the input string but with any leading or trailing spaces /// or tabs removed. /// /// @param instring Input string to modify /// /// @return String with leading and trailing spaces removed std::string trim(const std::string& instring); /// @brief Finds the "trimmed" end of a buffer /// /// Works backward from the end of the buffer, looking for the first /// character not equal to the trim value, and returns an iterator /// pointing that that position. /// /// @param begin - Forward iterator pointing to the beginning of the /// buffer to trim /// @param end - Forward iterator pointing to the untrimmed end of /// the buffer to trim /// @param trim_val - byte value to trim off /// /// @return Iterator pointing the first character from the end of the /// buffer not equal to the trim value template Iterator seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) { for ( ; end != begin && *(end - 1) == trim_val; --end); return(end); } /// @brief Split String into Tokens /// /// Splits a string into tokens (the tokens being delimited by one or more of /// the delimiter characters) and returns the tokens in a vector array. Note /// that adjacent delimiters are considered to be a single delimiter. /// /// Special cases are: /// -# The empty string is considered to be zero tokens. /// -# A string comprising nothing but delimiters is considered to be zero /// tokens. /// /// The reasoning behind this is that the string can be thought of as having /// invisible leading and trailing delimiter characters. Therefore both cases /// reduce to a set of contiguous delimiters, which are considered a single /// delimiter (so getting rid of the string). /// Optional escape allows to escape delimiter characters (and *only* them /// and the escape character itself) using backslash. /// /// We could use Boost for this, but this (simple) function eliminates one /// dependency in the code. /// /// @param text String to be split. Passed by value as the internal copy is /// altered during the processing. /// @param delim Delimiter characters /// @param escape Use backslash to escape delimiter characters /// /// @return Vector of tokens. std::vector tokens(const std::string& text, const std::string& delim = std::string(" \t\n"), bool escape = false); /// @brief Uppercase Character /// /// Used in uppercase() to pass as an argument to std::transform(). The /// function std::toupper() can't be used as it takes an "int" as its argument; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be upper-cased. /// /// @return Uppercase version of the argument inline char toUpper(char chr) { return (static_cast(std::toupper(static_cast(chr)))); } /// @brief Uppercase String /// /// A convenience function to uppercase a string. /// /// @param text String to be upper-cased. inline void uppercase(std::string& text) { std::transform(text.begin(), text.end(), text.begin(), isc::util::str::toUpper); } /// @brief Lowercase Character /// /// Used in lowercase() to pass as an argument to std::transform(). The /// function std::tolower() can't be used as it takes an "int" as its argument; /// this confuses the template expansion mechanism because dereferencing a /// string::iterator returns a char. /// /// @param chr Character to be lower-cased. /// /// @return Lowercase version of the argument inline char toLower(char chr) { return (static_cast(std::tolower(static_cast(chr)))); } /// @brief Lowercase String /// /// A convenience function to lowercase a string /// /// @param text String to be lower-cased. inline void lowercase(std::string& text) { std::transform(text.begin(), text.end(), text.begin(), isc::util::str::toLower); } /// @brief Apply Formatting /// /// Given a printf-style format string containing only "%s" place holders /// (others are ignored) and a vector of strings, this produces a single string /// with the placeholders replaced. /// /// @param format Format string /// @param args Vector of argument strings /// /// @return Resultant string std::string format(const std::string& format, const std::vector& args); /// @brief Returns one token from the given stringstream /// /// Using the >> operator, with basic error checking /// /// @throw StringTokenError if the token cannot be read from the stream /// /// @param iss stringstream to read one token from /// /// @return the first token read from the stringstream std::string getToken(std::istringstream& iss); /// @brief Converts a string token to an *unsigned* integer. /// /// The value is converted using a lexical cast, with error and bounds /// checking. /// /// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently /// wide to store resulting integers. /// /// BitSize is the maximum number of bits that the resulting integer can take. /// This function first checks whether the given token can be converted to /// an integer of NumType type. It then confirms the conversion result is /// within the valid range, i.e., [0, 2^BitSize - 1]. The second check is /// necessary because lexical_cast where T is an unsigned integer type /// doesn't correctly reject negative numbers when compiled with SunStudio. /// /// @throw StringTokenError if the value is out of range, or if it /// could not be converted /// /// @param num_token the string token to convert /// /// @return the converted value, of type NumType template NumType tokenToNum(const std::string& num_token) { NumType num; try { num = boost::lexical_cast(num_token); } catch (const boost::bad_lexical_cast&) { isc_throw(StringTokenError, "Invalid SRV numeric parameter: " << num_token); } if (num < 0 || num >= (static_cast(1) << BitSize)) { isc_throw(StringTokenError, "Numeric SRV parameter out of range: " << num); } return (num); } /// @brief Converts a string in quotes into vector. /// /// A converted string is first trimmed. If a trimmed string is in /// quotes, the quotes are removed and the resulting string is copied /// into a vector. If the string is not in quotes, an empty vector is /// returned. /// /// The resulting string is copied to a vector and returned. /// /// This function is intended to be used by the server configuration /// parsers to convert string values surrounded with quotes into /// binary form. /// /// @param quoted_string String to be converted. /// /// @return Vector containing converted string or empty string if /// input string didn't contain expected quote characters. std::vector quotedStringToBinary(const std::string& quoted_string); /// @brief Converts a string of separated hexadecimal digits /// into a vector. /// /// Octets may contain 1 or 2 digits. For example, using a colon /// for a separator all of the following are valid: /// /// - yy:yy:yy:yy:yy /// - y:y:y:y:y /// - y:yy:yy:y:y /// /// If the decoded string doesn't match any of the supported formats, /// an exception is thrown. /// /// @param hex_string Input string. /// @param sep character to use as a separator. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeSeparatedHexString(const std::string& hex_string, const std::string& sep, std::vector& binary); /// @brief Converts a string of hexadecimal digits with colons into /// a vector. /// /// Convenience method which calls @c decodeSeparatedHexString() passing /// in a colon for the separator. /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeColonSeparatedHexString(const std::string& hex_string, std::vector& binary); /// @brief Converts a formatted string of hexadecimal digits into /// a vector. /// /// This function supports the following formats: /// /// - yy:yy:yy:yy or yy yy yy yy - octets delimited by colons or /// spaces, see @c decodeSeparatedHexString /// /// - yyyyyyyyyy /// - 0xyyyyyyyyyy /// /// If there is an odd number of hexadecimal digits in the input /// string, the '0' is prepended to the string before decoding. /// /// @param hex_string Input string. /// @param binary Vector receiving converted string into binary. /// /// @throw isc::BadValue if the format of the input string is invalid. void decodeFormattedHexString(const std::string& hex_string, std::vector& binary); /// @brief Forward declaration to the @c StringSanitizer implementation. class StringSanitizerImpl; /// @brief Type representing the pointer to the @c StringSanitizerImpl. typedef boost::shared_ptr StringSanitizerImplPtr; /// @brief Implements a regular expression based string scrubber /// /// The implementation uses C++11 regex IF the environment supports it /// (tested in configure.ac). If not it falls back to C lib regcomp/regexec. /// Older compilers, such as pre Gnu g++ 4.9.0, provided only experimental /// implementations of regex which are recognized as buggy. class StringSanitizer { public: /// @brief Constructor. /// /// Compiles the given character set into a regular expression, and /// retains the given character replacement. Thereafter, the instance /// may be used to scrub an arbitrary number of strings. /// /// @param char_set string containing a regular expression (POSIX /// extended syntax) that describes the characters to replace. If you /// wanted to sanitize hostnames for example, you could specify the /// inversion of valid characters "[^A-Za-z0-9_-]". /// @param char_replacement string of one or more characters to use as the /// replacement for invalid characters. /// /// @throw BadValue if given an invalid regular expression StringSanitizer(const std::string& char_set, const std::string& char_replacement); /// @brief Destructor. /// /// Destroys the implementation instance. ~StringSanitizer(); /// Returns a scrubbed copy of a given string /// /// Replaces all occurrences of characters described by the regular /// expression with the character replacement. /// /// @param original the string to scrub /// /// @throw Unexpected if an error occurs during scrubbing std::string scrub(const std::string& original); /// @brief The maximum size for regex parameters. /// /// @note The regex engine is implemented using recursion and can cause /// stack overflow if the input data is too large. An arbitrary size of /// 4096 should be enough for all cases. static const uint32_t MAX_DATA_SIZE; private: /// @brief Pointer to the @c StringSanitizerImpl. StringSanitizerImplPtr impl_; }; /// @brief Type representing the pointer to the @c StringSanitizer. typedef boost::shared_ptr StringSanitizerPtr; /// @brief Check if a string is printable /// /// @param content String to check for printable characters /// /// @return True if empty or contains only printable characters, False otherwise inline bool isPrintable(const std::string& content) { for (const auto& ch : content) { if (isprint(static_cast(ch)) == 0) { return (false); } } return (true); } /// @brief Check if a byte vector is printable /// /// @param content Vector to check for printable characters /// /// @return True if empty or contains only printable characters, False otherwise inline bool isPrintable(const std::vector& content) { for (const auto& ch : content) { if (isprint(static_cast(ch)) == 0) { return (false); } } return (true); } /// @brief Dumps a buffer of bytes as a string of hexadecimal digits /// /// @param data pointer to the data to dump /// @param length number of bytes to dump. Caller should ensure the length /// does not exceed the buffer. std::string dumpAsHex(const uint8_t* data, size_t length); } // namespace str } // namespace util } // namespace isc #endif // STRUTIL_H