summaryrefslogtreecommitdiffstats
path: root/src/lib/util/strutil.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/lib/util/strutil.h403
1 files changed, 403 insertions, 0 deletions
diff --git a/src/lib/util/strutil.h b/src/lib/util/strutil.h
new file mode 100644
index 0000000..e5d2496
--- /dev/null
+++ b/src/lib/util/strutil.h
@@ -0,0 +1,403 @@
+// Copyright (C) 2011-2022 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef STRUTIL_H
+#define STRUTIL_H
+
+#include <algorithm>
+#include <cctype>
+#include <stdint.h>
+#include <string>
+#include <iomanip>
+#include <sstream>
+#include <vector>
+#include <exceptions/exceptions.h>
+#include <boost/lexical_cast.hpp>
+#include <boost/shared_ptr.hpp>
+
+namespace isc {
+namespace util {
+namespace str {
+
+/// @brief A Set of C++ Utilities for Manipulating Strings
+
+///
+/// @brief A standard string util exception that is thrown if getToken or
+/// numToToken are called with bad input data
+///
+class StringTokenError : public Exception {
+public:
+ StringTokenError(const char* file, size_t line, const char* what) :
+ isc::Exception(file, line, what) {}
+};
+
+/// @brief Normalize Backslash
+///
+/// Only relevant to Windows, this replaces all "\" in a string with "/"
+/// and returns the result. On other systems it is a no-op. Note
+/// that Windows does recognize file names with the "\" replaced by "/"
+/// (at least in system calls, if not the command line).
+///
+/// @param name Name to be substituted
+void normalizeSlash(std::string& name);
+
+/// @brief Trim Leading and Trailing Spaces
+///
+/// Returns a copy of the input string but with any leading or trailing spaces
+/// or tabs removed.
+///
+/// @param instring Input string to modify
+///
+/// @return String with leading and trailing spaces removed
+std::string trim(const std::string& instring);
+
+/// @brief Finds the "trimmed" end of a buffer
+///
+/// Works backward from the end of the buffer, looking for the first
+/// character not equal to the trim value, and returns an iterator
+/// pointing that that position.
+///
+/// @param begin - Forward iterator pointing to the beginning of the
+/// buffer to trim
+/// @param end - Forward iterator pointing to the untrimmed end of
+/// the buffer to trim
+/// @param trim_val - byte value to trim off
+///
+/// @return Iterator pointing the first character from the end of the
+/// buffer not equal to the trim value
+template<typename Iterator>
+Iterator
+seekTrimmed(Iterator begin, Iterator end, uint8_t trim_val) {
+ for ( ; end != begin && *(end - 1) == trim_val; --end);
+ return(end);
+}
+
+/// @brief Split String into Tokens
+///
+/// Splits a string into tokens (the tokens being delimited by one or more of
+/// the delimiter characters) and returns the tokens in a vector array. Note
+/// that adjacent delimiters are considered to be a single delimiter.
+///
+/// Special cases are:
+/// -# The empty string is considered to be zero tokens.
+/// -# A string comprising nothing but delimiters is considered to be zero
+/// tokens.
+///
+/// The reasoning behind this is that the string can be thought of as having
+/// invisible leading and trailing delimiter characters. Therefore both cases
+/// reduce to a set of contiguous delimiters, which are considered a single
+/// delimiter (so getting rid of the string).
+/// Optional escape allows to escape delimiter characters (and *only* them
+/// and the escape character itself) using backslash.
+///
+/// We could use Boost for this, but this (simple) function eliminates one
+/// dependency in the code.
+///
+/// @param text String to be split. Passed by value as the internal copy is
+/// altered during the processing.
+/// @param delim Delimiter characters
+/// @param escape Use backslash to escape delimiter characters
+///
+/// @return Vector of tokens.
+std::vector<std::string> tokens(const std::string& text,
+ const std::string& delim = std::string(" \t\n"),
+ bool escape = false);
+
+/// @brief Uppercase Character
+///
+/// Used in uppercase() to pass as an argument to std::transform(). The
+/// function std::toupper() can't be used as it takes an "int" as its argument;
+/// this confuses the template expansion mechanism because dereferencing a
+/// string::iterator returns a char.
+///
+/// @param chr Character to be upper-cased.
+///
+/// @return Uppercase version of the argument
+inline char toUpper(char chr) {
+ return (static_cast<char>(std::toupper(static_cast<int>(chr))));
+}
+
+/// @brief Uppercase String
+///
+/// A convenience function to uppercase a string.
+///
+/// @param text String to be upper-cased.
+inline void uppercase(std::string& text) {
+ std::transform(text.begin(), text.end(), text.begin(),
+ isc::util::str::toUpper);
+}
+
+/// @brief Lowercase Character
+///
+/// Used in lowercase() to pass as an argument to std::transform(). The
+/// function std::tolower() can't be used as it takes an "int" as its argument;
+/// this confuses the template expansion mechanism because dereferencing a
+/// string::iterator returns a char.
+///
+/// @param chr Character to be lower-cased.
+///
+/// @return Lowercase version of the argument
+inline char toLower(char chr) {
+ return (static_cast<char>(std::tolower(static_cast<int>(chr))));
+}
+
+/// @brief Lowercase String
+///
+/// A convenience function to lowercase a string
+///
+/// @param text String to be lower-cased.
+inline void lowercase(std::string& text) {
+ std::transform(text.begin(), text.end(), text.begin(),
+ isc::util::str::toLower);
+}
+
+/// @brief Apply Formatting
+///
+/// Given a printf-style format string containing only "%s" place holders
+/// (others are ignored) and a vector of strings, this produces a single string
+/// with the placeholders replaced.
+///
+/// @param format Format string
+/// @param args Vector of argument strings
+///
+/// @return Resultant string
+std::string format(const std::string& format,
+ const std::vector<std::string>& args);
+
+
+/// @brief Returns one token from the given stringstream
+///
+/// Using the >> operator, with basic error checking
+///
+/// @throw StringTokenError if the token cannot be read from the stream
+///
+/// @param iss stringstream to read one token from
+///
+/// @return the first token read from the stringstream
+std::string getToken(std::istringstream& iss);
+
+/// @brief Converts a string token to an *unsigned* integer.
+///
+/// The value is converted using a lexical cast, with error and bounds
+/// checking.
+///
+/// NumType is a *signed* integral type (e.g. int32_t) that is sufficiently
+/// wide to store resulting integers.
+///
+/// BitSize is the maximum number of bits that the resulting integer can take.
+/// This function first checks whether the given token can be converted to
+/// an integer of NumType type. It then confirms the conversion result is
+/// within the valid range, i.e., [0, 2^BitSize - 1]. The second check is
+/// necessary because lexical_cast<T> where T is an unsigned integer type
+/// doesn't correctly reject negative numbers when compiled with SunStudio.
+///
+/// @throw StringTokenError if the value is out of range, or if it
+/// could not be converted
+///
+/// @param num_token the string token to convert
+///
+/// @return the converted value, of type NumType
+template <typename NumType, int BitSize>
+NumType
+tokenToNum(const std::string& num_token) {
+ NumType num;
+ try {
+ num = boost::lexical_cast<NumType>(num_token);
+ } catch (const boost::bad_lexical_cast&) {
+ isc_throw(StringTokenError, "Invalid SRV numeric parameter: " <<
+ num_token);
+ }
+ if (num < 0 || num >= (static_cast<NumType>(1) << BitSize)) {
+ isc_throw(StringTokenError, "Numeric SRV parameter out of range: " <<
+ num);
+ }
+ return (num);
+}
+
+/// @brief Converts a string in quotes into vector.
+///
+/// A converted string is first trimmed. If a trimmed string is in
+/// quotes, the quotes are removed and the resulting string is copied
+/// into a vector. If the string is not in quotes, an empty vector is
+/// returned.
+///
+/// The resulting string is copied to a vector and returned.
+///
+/// This function is intended to be used by the server configuration
+/// parsers to convert string values surrounded with quotes into
+/// binary form.
+///
+/// @param quoted_string String to be converted.
+///
+/// @return Vector containing converted string or empty string if
+/// input string didn't contain expected quote characters.
+std::vector<uint8_t>
+quotedStringToBinary(const std::string& quoted_string);
+
+/// @brief Converts a string of separated hexadecimal digits
+/// into a vector.
+///
+/// Octets may contain 1 or 2 digits. For example, using a colon
+/// for a separator all of the following are valid:
+///
+/// - yy:yy:yy:yy:yy
+/// - y:y:y:y:y
+/// - y:yy:yy:y:y
+///
+/// If the decoded string doesn't match any of the supported formats,
+/// an exception is thrown.
+///
+/// @param hex_string Input string.
+/// @param sep character to use as a separator.
+/// @param binary Vector receiving converted string into binary.
+///
+/// @throw isc::BadValue if the format of the input string is invalid.
+void
+decodeSeparatedHexString(const std::string& hex_string,
+ const std::string& sep,
+ std::vector<uint8_t>& binary);
+
+/// @brief Converts a string of hexadecimal digits with colons into
+/// a vector.
+///
+/// Convenience method which calls @c decodeSeparatedHexString() passing
+/// in a colon for the separator.
+
+/// @param hex_string Input string.
+/// @param binary Vector receiving converted string into binary.
+///
+/// @throw isc::BadValue if the format of the input string is invalid.
+void
+decodeColonSeparatedHexString(const std::string& hex_string,
+ std::vector<uint8_t>& binary);
+
+/// @brief Converts a formatted string of hexadecimal digits into
+/// a vector.
+///
+/// This function supports the following formats:
+///
+/// - yy:yy:yy:yy or yy yy yy yy - octets delimited by colons or
+/// spaces, see @c decodeSeparatedHexString
+///
+/// - yyyyyyyyyy
+/// - 0xyyyyyyyyyy
+///
+/// If there is an odd number of hexadecimal digits in the input
+/// string, the '0' is prepended to the string before decoding.
+///
+/// @param hex_string Input string.
+/// @param binary Vector receiving converted string into binary.
+///
+/// @throw isc::BadValue if the format of the input string is invalid.
+void
+decodeFormattedHexString(const std::string& hex_string,
+ std::vector<uint8_t>& binary);
+
+/// @brief Forward declaration to the @c StringSanitizer implementation.
+class StringSanitizerImpl;
+
+/// @brief Type representing the pointer to the @c StringSanitizerImpl.
+typedef boost::shared_ptr<StringSanitizerImpl> StringSanitizerImplPtr;
+
+/// @brief Implements a regular expression based string scrubber
+///
+/// The implementation uses C++11 regex IF the environment supports it
+/// (tested in configure.ac). If not it falls back to C lib regcomp/regexec.
+/// Older compilers, such as pre Gnu g++ 4.9.0, provided only experimental
+/// implementations of regex which are recognized as buggy.
+class StringSanitizer {
+public:
+
+ /// @brief Constructor.
+ ///
+ /// Compiles the given character set into a regular expression, and
+ /// retains the given character replacement. Thereafter, the instance
+ /// may be used to scrub an arbitrary number of strings.
+ ///
+ /// @param char_set string containing a regular expression (POSIX
+ /// extended syntax) that describes the characters to replace. If you
+ /// wanted to sanitize hostnames for example, you could specify the
+ /// inversion of valid characters "[^A-Za-z0-9_-]".
+ /// @param char_replacement string of one or more characters to use as the
+ /// replacement for invalid characters.
+ ///
+ /// @throw BadValue if given an invalid regular expression
+ StringSanitizer(const std::string& char_set,
+ const std::string& char_replacement);
+
+ /// @brief Destructor.
+ ///
+ /// Destroys the implementation instance.
+ ~StringSanitizer();
+
+ /// Returns a scrubbed copy of a given string
+ ///
+ /// Replaces all occurrences of characters described by the regular
+ /// expression with the character replacement.
+ ///
+ /// @param original the string to scrub
+ ///
+ /// @throw Unexpected if an error occurs during scrubbing
+ std::string scrub(const std::string& original);
+
+ /// @brief The maximum size for regex parameters.
+ ///
+ /// @note The regex engine is implemented using recursion and can cause
+ /// stack overflow if the input data is too large. An arbitrary size of
+ /// 4096 should be enough for all cases.
+ static const uint32_t MAX_DATA_SIZE;
+
+private:
+ /// @brief Pointer to the @c StringSanitizerImpl.
+ StringSanitizerImplPtr impl_;
+};
+
+/// @brief Type representing the pointer to the @c StringSanitizer.
+typedef boost::shared_ptr<StringSanitizer> StringSanitizerPtr;
+
+/// @brief Check if a string is printable
+///
+/// @param content String to check for printable characters
+///
+/// @return True if empty or contains only printable characters, False otherwise
+inline bool
+isPrintable(const std::string& content) {
+ for (const auto& ch : content) {
+ if (isprint(static_cast<int>(ch)) == 0) {
+ return (false);
+ }
+ }
+ return (true);
+}
+
+/// @brief Check if a byte vector is printable
+///
+/// @param content Vector to check for printable characters
+///
+/// @return True if empty or contains only printable characters, False otherwise
+inline bool
+isPrintable(const std::vector<uint8_t>& content) {
+ for (const auto& ch : content) {
+ if (isprint(static_cast<int>(ch)) == 0) {
+ return (false);
+ }
+ }
+ return (true);
+}
+
+
+/// @brief Dumps a buffer of bytes as a string of hexadecimal digits
+///
+/// @param data pointer to the data to dump
+/// @param length number of bytes to dump. Caller should ensure the length
+/// does not exceed the buffer.
+std::string dumpAsHex(const uint8_t* data, size_t length);
+
+} // namespace str
+} // namespace util
+} // namespace isc
+
+#endif // STRUTIL_H