summaryrefslogtreecommitdiffstats
path: root/src/lib/util/encode
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/util/encode')
-rw-r--r--src/lib/util/encode/base16_from_binary.h103
-rw-r--r--src/lib/util/encode/base32hex.h56
-rw-r--r--src/lib/util/encode/base32hex_from_binary.h105
-rw-r--r--src/lib/util/encode/base64.h71
-rw-r--r--src/lib/util/encode/base_n.cc494
-rw-r--r--src/lib/util/encode/binary_from_base16.h112
-rw-r--r--src/lib/util/encode/binary_from_base32hex.h115
-rw-r--r--src/lib/util/encode/hex.h66
-rw-r--r--src/lib/util/encode/utf8.cc35
-rw-r--r--src/lib/util/encode/utf8.h27
10 files changed, 1184 insertions, 0 deletions
diff --git a/src/lib/util/encode/base16_from_binary.h b/src/lib/util/encode/base16_from_binary.h
new file mode 100644
index 0000000..3eb697d
--- /dev/null
+++ b/src/lib/util/encode/base16_from_binary.h
@@ -0,0 +1,103 @@
+#ifndef BOOST_ARCHIVE_ITERATORS_BASE16_FROM_BINARY_HPP
+#define BOOST_ARCHIVE_ITERATORS_BASE16_FROM_BINARY_HPP
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// base16_from_binary.h (derived from boost base64_from_binary.hpp)
+
+// (C) Copyright 2002 Robert Ramey - http://www.rrsd.com .
+// Use, modification and distribution is subject to the Boost Software
+// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for updates, documentation, and revision history.
+
+#include <exceptions/isc_assert.h>
+
+#include <cstddef> // size_t
+#include <boost/config.hpp> // for BOOST_DEDUCED_TYPENAME
+#if defined(BOOST_NO_STDC_NAMESPACE)
+namespace std{
+ using ::size_t;
+} // namespace std
+#endif
+
+// See base32hex_from_binary.h for why we need base64_from...hpp here.
+#include <boost/archive/iterators/base64_from_binary.hpp>
+
+namespace boost {
+namespace archive {
+namespace iterators {
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// convert binary integers to base16 characters
+
+namespace detail {
+
+template<class CharType>
+struct from_4_bit {
+ typedef CharType result_type;
+ CharType operator()(CharType t) const{
+ const char * lookup_table =
+ "0123456789"
+ "ABCDEF";
+ isc_throw_assert(t < 16);
+ return (lookup_table[static_cast<size_t>(t)]);
+ }
+};
+
+} // namespace detail
+
+// note: what we would like to do is
+// template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+// typedef transform_iterator<
+// from_4_bit<CharType>,
+// transform_width<Base, 4, sizeof(Base::value_type) * 8, CharType>
+// > base16_from_binary;
+// but C++ won't accept this. Rather than using a "type generator" and
+// using a different syntax, make a derivation which should be equivalent.
+//
+// Another issue addressed here is that the transform_iterator doesn't have
+// a templated constructor. This makes it incompatible with the dataflow
+// ideal. This is also addressed here.
+
+//template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+template<
+ class Base,
+ class CharType = BOOST_DEDUCED_TYPENAME boost::iterator_value<Base>::type
+>
+class base16_from_binary :
+ public transform_iterator<
+ detail::from_4_bit<CharType>,
+ Base
+ >
+{
+ friend class boost::iterator_core_access;
+ typedef transform_iterator<
+ BOOST_DEDUCED_TYPENAME detail::from_4_bit<CharType>,
+ Base
+ > super_t;
+
+public:
+ // make composable by using templated constructor
+ template<class T>
+ base16_from_binary(T start) :
+ super_t(
+ Base(static_cast<T>(start)),
+ detail::from_4_bit<CharType>()
+ )
+ {}
+ // intel 7.1 doesn't like default copy constructor
+ base16_from_binary(const base16_from_binary & rhs) :
+ super_t(
+ Base(rhs.base_reference()),
+ detail::from_4_bit<CharType>()
+ )
+ {}
+// base16_from_binary(){};
+};
+
+} // namespace iterators
+} // namespace archive
+} // namespace boost
+
+#endif // BOOST_ARCHIVE_ITERATORS_BASE16_FROM_BINARY_HPP
diff --git a/src/lib/util/encode/base32hex.h b/src/lib/util/encode/base32hex.h
new file mode 100644
index 0000000..0a85b36
--- /dev/null
+++ b/src/lib/util/encode/base32hex.h
@@ -0,0 +1,56 @@
+// Copyright (C) 2009-2015 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef BASE32HEX_H
+#define BASE32HEX_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+//
+// Note: this helper module isn't specific to the DNS protocol per se.
+// We should probably move this to somewhere else, possibly in some common
+// utility area.
+//
+
+namespace isc {
+namespace util {
+namespace encode {
+
+/// \brief Encode binary data in the base32hex format.
+///
+/// The underlying implementation is shared with \c encodeBase64, and all
+/// description except the format (base32hex) equally applies.
+///
+/// Note: the encoding format is base32hex, not base32.
+///
+/// \param binary A vector object storing the data to be encoded.
+/// \return A newly created string that stores base32hex encoded value for
+/// binary.
+std::string encodeBase32Hex(const std::vector<uint8_t>& binary);
+
+/// \brief Decode a text encoded in the base32hex format into the
+/// original %data.
+///
+/// The underlying implementation is shared with \c decodeBase64, and all
+/// description except the format (base32hex) equally applies.
+///
+/// Note: the encoding format is base32hex, not base32.
+///
+/// \param input A text encoded in the base32hex format.
+/// \param result A vector in which the decoded %data is to be stored.
+void decodeBase32Hex(const std::string& input, std::vector<uint8_t>& result);
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // BASE32HEX_H
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/util/encode/base32hex_from_binary.h b/src/lib/util/encode/base32hex_from_binary.h
new file mode 100644
index 0000000..84f2b69
--- /dev/null
+++ b/src/lib/util/encode/base32hex_from_binary.h
@@ -0,0 +1,105 @@
+#ifndef BOOST_ARCHIVE_ITERATORS_BASE32HEX_FROM_BINARY_HPP
+#define BOOST_ARCHIVE_ITERATORS_BASE32HEX_FROM_BINARY_HPP
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// base32hex_from_binary.h (derived from boost base64_from_binary.hpp)
+
+// (C) Copyright 2002 Robert Ramey - http://www.rrsd.com .
+// Use, modification and distribution is subject to the Boost Software
+// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for updates, documentation, and revision history.
+
+#include <exceptions/isc_assert.h>
+
+#include <cstddef> // size_t
+#include <boost/config.hpp> // for BOOST_DEDUCED_TYPENAME
+#if defined(BOOST_NO_STDC_NAMESPACE)
+namespace std{
+ using ::size_t;
+} // namespace std
+#endif
+
+// We use the same boost header files used in "base64_from_". Since the
+// precise path to these headers may vary depending on the boost version we
+// simply include the base64 header here.
+#include <boost/archive/iterators/base64_from_binary.hpp>
+
+namespace boost {
+namespace archive {
+namespace iterators {
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// convert binary integers to base32hex characters
+
+namespace detail {
+
+template<class CharType>
+struct from_5_bit {
+ typedef CharType result_type;
+ CharType operator()(CharType t) const{
+ const char * lookup_table =
+ "0123456789"
+ "ABCDEFGHIJKLMNOPQRSTUV";
+ isc_throw_assert(t < 32);
+ return (lookup_table[static_cast<size_t>(t)]);
+ }
+};
+
+} // namespace detail
+
+// note: what we would like to do is
+// template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+// typedef transform_iterator<
+// from_5_bit<CharType>,
+// transform_width<Base, 5, sizeof(Base::value_type) * 8, CharType>
+// > base32hex_from_binary;
+// but C++ won't accept this. Rather than using a "type generator" and
+// using a different syntax, make a derivation which should be equivalent.
+//
+// Another issue addressed here is that the transform_iterator doesn't have
+// a templated constructor. This makes it incompatible with the dataflow
+// ideal. This is also addressed here.
+
+//template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+template<
+ class Base,
+ class CharType = BOOST_DEDUCED_TYPENAME boost::iterator_value<Base>::type
+>
+class base32hex_from_binary :
+ public transform_iterator<
+ detail::from_5_bit<CharType>,
+ Base
+ >
+{
+ friend class boost::iterator_core_access;
+ typedef transform_iterator<
+ BOOST_DEDUCED_TYPENAME detail::from_5_bit<CharType>,
+ Base
+ > super_t;
+
+public:
+ // make composable by using templated constructor
+ template<class T>
+ base32hex_from_binary(T start) :
+ super_t(
+ Base(static_cast<T>(start)),
+ detail::from_5_bit<CharType>()
+ )
+ {}
+ // intel 7.1 doesn't like default copy constructor
+ base32hex_from_binary(const base32hex_from_binary & rhs) :
+ super_t(
+ Base(rhs.base_reference()),
+ detail::from_5_bit<CharType>()
+ )
+ {}
+// base32hex_from_binary(){};
+};
+
+} // namespace iterators
+} // namespace archive
+} // namespace boost
+
+#endif // BOOST_ARCHIVE_ITERATORS_BASE32HEX_FROM_BINARY_HPP
diff --git a/src/lib/util/encode/base64.h b/src/lib/util/encode/base64.h
new file mode 100644
index 0000000..84280ec
--- /dev/null
+++ b/src/lib/util/encode/base64.h
@@ -0,0 +1,71 @@
+// Copyright (C) 2009-2015 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef BASE64_H
+#define BASE64_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+//
+// Note: this helper module isn't specific to the DNS protocol per se.
+// We should probably move this to somewhere else, possibly in some common
+// utility area.
+//
+
+namespace isc {
+namespace util {
+namespace encode {
+
+/// \brief Encode binary data in the base64 format.
+///
+/// This function returns a new \c std::string object that stores a text
+/// encoded in the base64 format for the given \c binary %data.
+/// The resulting string will be a valid, canonical form of base64
+/// representation as specified in RFC4648.
+///
+/// If memory allocation for the returned string fails, a corresponding
+/// standard exception will be thrown. This function never throws exceptions
+/// otherwise.
+///
+/// \param binary A vector object storing the data to be encoded.
+/// \return A newly created string that stores base64 encoded value for binary.
+std::string encodeBase64(const std::vector<uint8_t>& binary);
+
+/// \brief Decode a text encoded in the base64 format into the original %data.
+///
+/// The \c input argument must be a valid string represented in the base64
+/// format as specified in RFC4648. Space characters (spaces, tabs, newlines)
+/// can be included in \c input and will be ignored. Without spaces, the
+/// length of string must be a multiple of 4 bytes with necessary paddings.
+/// Also it must be encoded using the canonical encoding (see RFC4648).
+/// If any of these conditions is not met, an exception of class
+/// \c isc::BadValue will be thrown.
+///
+/// If \c result doesn't have sufficient capacity to store all decoded %data
+/// and memory allocation fails, a corresponding standard exception will be
+/// thrown. If the caller knows the necessary length (which can in theory
+/// be calculated from the input string), this situation can be avoided by
+/// reserving sufficient space for \c result beforehand.
+///
+/// Any existing %data in \c result will be removed. This is the case in some
+/// of the cases where an exception is thrown; that is, this function only
+/// provides the basic exception guarantee.
+///
+/// \param input A text encoded in the base64 format.
+/// \param result A vector in which the decoded %data is to be stored.
+void decodeBase64(const std::string& input, std::vector<uint8_t>& result);
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // BASE64_H
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/util/encode/base_n.cc b/src/lib/util/encode/base_n.cc
new file mode 100644
index 0000000..e0c37e5
--- /dev/null
+++ b/src/lib/util/encode/base_n.cc
@@ -0,0 +1,494 @@
+// Copyright (C) 2010-2022 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <util/encode/base32hex_from_binary.h>
+#include <util/encode/binary_from_base32hex.h>
+#include <util/encode/base16_from_binary.h>
+#include <util/encode/binary_from_base16.h>
+#include <util/encode/base32hex.h>
+#include <util/encode/base64.h>
+
+#include <exceptions/exceptions.h>
+#include <exceptions/isc_assert.h>
+
+#include <boost/archive/iterators/base64_from_binary.hpp>
+#include <boost/archive/iterators/binary_from_base64.hpp>
+#include <boost/archive/iterators/transform_width.hpp>
+#ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
+#include <boost/integer/common_factor.hpp>
+#else
+#include <boost/math/common_factor.hpp>
+#endif
+
+#include <stdint.h>
+#include <stdexcept>
+#include <iterator>
+#include <string>
+#include <vector>
+
+using namespace std;
+using namespace boost::archive::iterators;
+
+namespace isc {
+namespace util {
+namespace encode {
+
+// Some versions of clang cannot handle exceptions in unnamed namespaces
+// so this exception is defined in an 'internal' namespace
+namespace clang_unnamed_namespace_workaround {
+// An internally caught exception to unify a few possible cases of the same
+// error.
+class IncompleteBaseInput : public std::exception {
+};
+} // end namespace internal
+
+// In the following anonymous namespace, we provide a generic framework
+// to encode/decode baseN format. We use the following tools:
+// - boost base64_from_binary/binary_from_base64: provide mapping table for
+// base64.
+// These classes take another iterator (Base) as a template argument, and
+// their dereference operator (operator*()) first retrieves an input value
+// from Base via Base::operator* and converts the value using their mapping
+// table. The converted value is returned as their own operator*.
+// - base{32hex,16}_from_binary/binary_from_base{32hex,16}: provide mapping
+// table for base32hex and base16. A straightforward variation of their
+// base64 counterparts.
+// - EncodeNormalizer/DecodeNormalizer: supplemental filter handling baseN
+// padding characters (=)
+// - boost transform_width: an iterator framework for handling data stream
+// per bit-group. It takes another iterator (Base) and output/input bit
+// numbers (BitsOut/BitsIn) template arguments. A transform_width object
+// internally maintains a bit stream, which can be retrieved per BitsOut
+// bits via its dereference operator (operator*()). It builds the stream
+// by internally iterating over the Base object via Base::operator++ and
+// Base::operator*, using the least BitsIn bits of the result of
+// Base::operator*. In our usage BitsIn for encoding and BitsOut for
+// decoding are always 8 (# of bits for one byte).
+//
+// Its dereference operator
+// retrieves BitsIn bits from the result of "*Base" (if necessary it
+// internally calls ++Base)
+//
+// A conceptual description of how the encoding and decoding work is as
+// follows:
+// Encoding:
+// input binary data => Normalizer (append sufficient number of 0 bits)
+// => transform_width (extract bit groups from the original
+// stream)
+// => baseXX_from_binary (convert each bit group to an
+// encoded byte using the mapping)
+// Decoding:
+// input baseXX text => Normalizer (convert '='s to the encoded characters
+// corresponding to 0, e.g. 'A's in base64)
+// => binary_from_baseXX (convert each encoded byte into
+// the original group bit)
+// => transform_width (build original byte stream by
+// concatenating the decoded bit
+// stream)
+//
+// Below, we define a set of templated classes to handle different parameters
+// for different encoding algorithms.
+namespace {
+// Common constants used for all baseN encoding.
+const char BASE_PADDING_CHAR = '=';
+const uint8_t BINARY_ZERO_CODE = 0;
+
+// EncodeNormalizer is an input iterator intended to be used as a filter
+// between the binary stream and baseXX_from_binary translator (via
+// transform_width). An EncodeNormalizer object is configured with two
+// iterators (base and base_end), specifying the head and end of the input
+// stream. It internally iterators over the original stream, and return
+// each byte of the stream intact via its dereference operator until it
+// reaches the end of the stream. After that the EncodeNormalizer object
+// will return 0 no matter how many times it is subsequently incremented.
+// This is necessary because the input binary stream may not contain
+// sufficient bits for a full encoded text while baseXX_from_binary expects
+// a sufficient length of input.
+// Note: this class is intended to be used within this implementation file,
+// and assumes "base < base_end" on construction without validating the
+// arguments. The behavior is undefined if this assumption doesn't hold.
+class EncodeNormalizer {
+public:
+ // Aliases used to enable iterator behavior on this class
+ using iterator_category = input_iterator_tag;
+ using value_type = uint8_t;
+ using difference_type = ptrdiff_t;
+ using pointer = uint8_t*;
+ using reference = uint8_t&;
+
+ EncodeNormalizer(const vector<uint8_t>::const_iterator& base,
+ const vector<uint8_t>::const_iterator& base_end) :
+ base_(base), base_end_(base_end), in_pad_(false)
+ {}
+ EncodeNormalizer& operator++() { // prefix version
+ increment();
+ return (*this);
+ }
+ EncodeNormalizer operator++(int) { // postfix version
+ const EncodeNormalizer copy = *this;
+ increment();
+ return (copy);
+ }
+ const uint8_t& operator*() const {
+ if (in_pad_) {
+ return (BINARY_ZERO_CODE);
+ } else {
+ return (*base_);
+ }
+ }
+ bool operator==(const EncodeNormalizer& other) const {
+ return (base_ == other.base_);
+ }
+private:
+ void increment() {
+ if (!in_pad_) {
+ ++base_;
+ }
+ if (base_ == base_end_) {
+ in_pad_ = true;
+ }
+ }
+ vector<uint8_t>::const_iterator base_;
+ const vector<uint8_t>::const_iterator base_end_;
+ bool in_pad_;
+};
+
+// DecodeNormalizer is an input iterator intended to be used as a filter
+// between the encoded baseX stream and binary_from_baseXX.
+// A DecodeNormalizer object is configured with three string iterators
+// (base, base_beginpad, and base_end), specifying the head of the string,
+// the beginning position of baseX padding (when there's padding), and
+// end of the string, respectively. It internally iterators over the original
+// stream, and return each character of the encoded string via its dereference
+// operator until it reaches base_beginpad. After that the DecodeNormalizer
+// will return the encoding character corresponding to the all-0 value
+// (which is specified on construction via base_zero_code. see also
+// BaseZeroCode below). This translation is necessary because
+// binary_from_baseXX doesn't accept the padding character (i.e. '=').
+// Note: this class is intended to be used within this implementation file,
+// and for simplicity assumes "base < base_beginpad <= base_end" on
+// construction without validating the arguments. The behavior is undefined
+// if this assumption doesn't hold.
+class DecodeNormalizer {
+public:
+ // Aliases used to enable iterator behavior on this class
+ using iterator_category = input_iterator_tag;
+ using value_type = char;
+ using difference_type = ptrdiff_t;
+ using pointer = char*;
+ using reference = char&;
+
+ DecodeNormalizer(const char base_zero_code,
+ const string::const_iterator& base,
+ const string::const_iterator& base_beginpad,
+ const string::const_iterator& base_end,
+ size_t* char_count) :
+ base_zero_code_(base_zero_code),
+ base_(base), base_beginpad_(base_beginpad), base_end_(base_end),
+ in_pad_(false), char_count_(char_count)
+ {
+ // Skip beginning spaces, if any. We need do it here because
+ // otherwise the first call to operator*() would be confused.
+ skipSpaces();
+ }
+ DecodeNormalizer& operator++() {
+ if (base_ < base_end_) {
+ ++*char_count_;
+ }
+ ++base_;
+ skipSpaces();
+ if (base_ == base_beginpad_) {
+ in_pad_ = true;
+ }
+ return (*this);
+ }
+ void skipSpaces() {
+ // If (char is signed and) *base_ < 0, on Windows platform with Visual
+ // Studio compiler it may trigger _ASSERTE((unsigned)(c + 1) <= 256);
+ // so make sure that the parameter of isspace() is larger than 0.
+ // We don't simply cast it to unsigned char to avoid confusing the
+ // isspace() implementation with a possible extension for values
+ // larger than 127. Also note the check is not ">= 0"; for systems
+ // where char is unsigned that would always be true and would possibly
+ // trigger a compiler warning that could stop the build.
+ while (base_ != base_end_ && *base_ > 0 && isspace(*base_)) {
+ ++base_;
+ }
+ }
+ const char& operator*() const {
+ if (base_ == base_end_) {
+ // binary_from_baseX can call this operator when it needs more bits
+ // even if the internal iterator (base_) has reached its end
+ // (if that happens it means the input is an incomplete baseX
+ // string and should be rejected). So this is the only point
+ // we can catch and reject this type of invalid input.
+ //
+ // More recent versions of Boost fixed the behavior and the
+ // out-of-range call to this operator doesn't happen. It's good,
+ // but in that case we need to catch incomplete baseX input in
+ // a different way. It's done via char_count_ and after the
+ // completion of decoding.
+
+ // throw this now and convert it
+ throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
+ }
+ if (*base_ == BASE_PADDING_CHAR) {
+ // Padding can only happen at the end of the input string. We can
+ // detect any violation of this by checking in_pad_, which is
+ // true iff we are on or after the first valid sequence of padding
+ // characters.
+ if (in_pad_) {
+ return (base_zero_code_);
+ } else {
+ isc_throw(BadValue, "Intermediate padding found");
+ }
+ } else {
+ return (*base_);
+ }
+ }
+ bool operator==(const DecodeNormalizer& other) const {
+ return (base_ == other.base_);
+ }
+private:
+ const char base_zero_code_;
+ string::const_iterator base_;
+ const string::const_iterator base_beginpad_;
+ const string::const_iterator base_end_;
+ bool in_pad_;
+ // Store number of non-space decoded characters (incl. pad) here. Define
+ // it as a pointer so we can carry it over to any copied objects.
+ size_t* char_count_;
+};
+
+// BitsPerChunk: number of bits to be converted using the baseN mapping table.
+// e.g. 6 for base64.
+// BaseZeroCode: the byte character that represents a value of 0 in
+// the corresponding encoding. e.g. 'A' for base64.
+// Encoder: baseX_from_binary<transform_width<EncodeNormalizer,
+// BitsPerChunk, 8> >
+// Decoder: transform_width<binary_from_baseX<DecodeNormalizer>,
+// 8, BitsPerChunk>
+template <int BitsPerChunk, char BaseZeroCode,
+ typename Encoder, typename Decoder>
+struct BaseNTransformer {
+ static string encode(const vector<uint8_t>& binary);
+ static void decode(const char* algorithm,
+ const string& base64, vector<uint8_t>& result);
+
+ // BITS_PER_GROUP is the number of bits for the smallest possible (non
+ // empty) bit string that can be converted to a valid baseN encoded text
+ // without padding. It's the least common multiple of 8 and BitsPerChunk,
+ // e.g. 24 for base64.
+ static const int BITS_PER_GROUP =
+#ifdef HAVE_BOOST_INTEGER_COMMON_FACTOR_HPP
+ boost::integer::static_lcm<BitsPerChunk, 8>::value;
+#else
+ boost::math::static_lcm<BitsPerChunk, 8>::value;
+#endif
+
+ // MAX_PADDING_CHARS is the maximum number of padding characters
+ // that can appear in a valid baseN encoded text.
+ // It's group_len - chars_for_byte, where group_len is the number of
+ // encoded characters to represent BITS_PER_GROUP bits, and
+ // chars_for_byte is the number of encoded character that is needed to
+ // represent a single byte, which is ceil(8 / BitsPerChunk).
+ // For example, for base64 we need two encoded characters to represent a
+ // byte, and each group consists of 4 encoded characters, so
+ // MAX_PADDING_CHARS is 4 - 2 = 2.
+ static const int MAX_PADDING_CHARS =
+ BITS_PER_GROUP / BitsPerChunk -
+ (8 / BitsPerChunk + ((8 % BitsPerChunk) == 0 ? 0 : 1));
+};
+
+template <int BitsPerChunk, char BaseZeroCode,
+ typename Encoder, typename Decoder>
+string
+BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::encode(
+ const vector<uint8_t>& binary)
+{
+ // calculate the resulting length.
+ size_t bits = binary.size() * 8;
+ if (bits % BITS_PER_GROUP > 0) {
+ bits += (BITS_PER_GROUP - (bits % BITS_PER_GROUP));
+ }
+ const size_t len = bits / BitsPerChunk;
+
+ string result;
+ result.reserve(len);
+ result.assign(Encoder(EncodeNormalizer(binary.begin(), binary.end())),
+ Encoder(EncodeNormalizer(binary.end(), binary.end())));
+ isc_throw_assert(len >= result.length());
+ result.append(len - result.length(), BASE_PADDING_CHAR);
+ return (result);
+}
+
+template <int BitsPerChunk, char BaseZeroCode,
+ typename Encoder, typename Decoder>
+void
+BaseNTransformer<BitsPerChunk, BaseZeroCode, Encoder, Decoder>::decode(
+ const char* const algorithm,
+ const string& input,
+ vector<uint8_t>& result)
+{
+ // enumerate the number of trailing padding characters (=), ignoring
+ // white spaces. since baseN_from_binary doesn't accept padding,
+ // we handle it explicitly.
+ size_t padchars = 0;
+ string::const_reverse_iterator srit = input.rbegin();
+ string::const_reverse_iterator srit_end = input.rend();
+ while (srit != srit_end) {
+ char ch = *srit;
+ if (ch == BASE_PADDING_CHAR) {
+ if (++padchars > MAX_PADDING_CHARS) {
+ isc_throw(BadValue, "Too many " << algorithm
+ << " padding characters: " << input);
+ }
+ } else if (!(ch > 0 && isspace(ch))) {
+ // see the note for DecodeNormalizer::skipSpaces() above for ch > 0
+ break;
+ }
+ ++srit;
+ }
+ // then calculate the number of padding bits corresponding to the padding
+ // characters. In general, the padding bits consist of all-zero
+ // trailing bits of the last encoded character followed by zero bits
+ // represented by the padding characters:
+ // 1st pad 2nd pad 3rd pad...
+ // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
+ // 0000...0 0......0 000...
+ // 0 7 8 15 16.... (bits)
+ // The number of bits for the '==...' part is padchars * BitsPerChunk.
+ // So the total number of padding bits is the smallest multiple of 8
+ // that is >= padchars * BitsPerChunk.
+ // (Below, note the common idiom of the bitwise AND with ~7. It clears the
+ // lowest three bits, so has the effect of rounding the result down to the
+ // nearest multiple of 8)
+ const size_t padbits = (padchars * BitsPerChunk + 7) & ~7;
+
+ // In some encoding algorithm, it could happen that a padding byte would
+ // contain a full set of encoded bits, which is not allowed by definition
+ // of padding. For example, if BitsPerChunk is 5, the following
+ // representation could happen:
+ // ++00000= (+: from encoded chars, 0: encoded char for '0', =: pad chars)
+ // 0 7 (bits)
+ // This must actually be encoded as follows:
+ // ++======
+ // 0 7 (bits)
+ // The following check rejects this type of invalid encoding.
+ if (padbits > BitsPerChunk * (padchars + 1)) {
+ isc_throw(BadValue, "Invalid " << algorithm << " padding: " << input);
+ }
+
+ // convert the number of bits in bytes for convenience.
+ const size_t padbytes = padbits / 8;
+
+ try {
+ size_t char_count = 0;
+ result.assign(Decoder(DecodeNormalizer(BaseZeroCode, input.begin(),
+ srit.base(), input.end(),
+ &char_count)),
+ Decoder(DecodeNormalizer(BaseZeroCode, input.end(),
+ input.end(), input.end(),
+ NULL)));
+
+ // Number of bits of the conversion result including padding must be
+ // a multiple of 8; otherwise the decoder reaches the end of input
+ // with some incomplete bits of data, which is invalid.
+ if (((char_count * BitsPerChunk) % 8) != 0) {
+ // catch this immediately below
+ throw clang_unnamed_namespace_workaround::IncompleteBaseInput();
+ }
+ } catch (const clang_unnamed_namespace_workaround::IncompleteBaseInput&) {
+ // we unify error handling for incomplete input here.
+ isc_throw(BadValue, "Incomplete input for " << algorithm
+ << ": " << input);
+ } catch (const dataflow_exception& ex) {
+ // convert any boost exceptions into our local one.
+ isc_throw(BadValue, ex.what());
+ }
+
+ // Confirm the original BaseX text is the canonical encoding of the
+ // data, that is, that the first byte of padding is indeed 0.
+ // (DecodeNormalizer and binary_from_baseXX ensure that the rest of the
+ // padding is all zero).
+ isc_throw_assert(result.size() >= padbytes);
+ if (padbytes > 0 && *(result.end() - padbytes) != 0) {
+ isc_throw(BadValue, "Non 0 bits included in " << algorithm
+ << " padding: " << input);
+ }
+
+ // strip the padded zero-bit fields
+ result.resize(result.size() - padbytes);
+}
+
+//
+// Instantiation for BASE-64
+//
+typedef
+base64_from_binary<transform_width<EncodeNormalizer, 6, 8> > base64_encoder;
+typedef
+transform_width<binary_from_base64<DecodeNormalizer>, 8, 6> base64_decoder;
+typedef BaseNTransformer<6, 'A', base64_encoder, base64_decoder>
+Base64Transformer;
+
+//
+// Instantiation for BASE-32HEX
+//
+typedef
+base32hex_from_binary<transform_width<EncodeNormalizer, 5, 8> >
+base32hex_encoder;
+typedef
+transform_width<binary_from_base32hex<DecodeNormalizer>, 8, 5>
+base32hex_decoder;
+typedef BaseNTransformer<5, '0', base32hex_encoder, base32hex_decoder>
+Base32HexTransformer;
+
+//
+// Instantiation for BASE-16 (HEX)
+//
+typedef
+base16_from_binary<transform_width<EncodeNormalizer, 4, 8> > base16_encoder;
+typedef
+transform_width<binary_from_base16<DecodeNormalizer>, 8, 4> base16_decoder;
+typedef BaseNTransformer<4, '0', base16_encoder, base16_decoder>
+Base16Transformer;
+}
+
+string
+encodeBase64(const vector<uint8_t>& binary) {
+ return (Base64Transformer::encode(binary));
+}
+
+void
+decodeBase64(const string& input, vector<uint8_t>& result) {
+ Base64Transformer::decode("base64", input, result);
+}
+
+string
+encodeBase32Hex(const vector<uint8_t>& binary) {
+ return (Base32HexTransformer::encode(binary));
+}
+
+void
+decodeBase32Hex(const string& input, vector<uint8_t>& result) {
+ Base32HexTransformer::decode("base32hex", input, result);
+}
+
+string
+encodeHex(const vector<uint8_t>& binary) {
+ return (Base16Transformer::encode(binary));
+}
+
+void
+decodeHex(const string& input, vector<uint8_t>& result) {
+ Base16Transformer::decode("base16", input, result);
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
diff --git a/src/lib/util/encode/binary_from_base16.h b/src/lib/util/encode/binary_from_base16.h
new file mode 100644
index 0000000..f913dd0
--- /dev/null
+++ b/src/lib/util/encode/binary_from_base16.h
@@ -0,0 +1,112 @@
+#ifndef BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE16_HPP
+#define BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE16_HPP
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// binary_from_base16.h (derived from boost binary_from_base64.hpp)
+
+// (C) Copyright 2002 Robert Ramey - http://www.rrsd.com .
+// Use, modification and distribution is subject to the Boost Software
+// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for updates, documentation, and revision history.
+
+#include <cassert>
+
+// See binary_from_base32hex.h for why we need _from_base64.hpp here.
+#include <boost/archive/iterators/binary_from_base64.hpp>
+
+#include <exceptions/exceptions.h>
+
+namespace boost {
+namespace archive {
+namespace iterators {
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// convert base16 characters to binary data
+
+namespace detail {
+
+template<class CharType>
+struct to_4_bit {
+ typedef CharType result_type;
+ CharType operator()(CharType t) const{
+ const signed char lookup_table[] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 00-0f
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 10-1f
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 20-2f
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 30-3f
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 40-4f
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 50-5f
+ -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 60-6f
+ };
+ BOOST_STATIC_ASSERT(0x70 == sizeof(lookup_table));
+ signed char value = -1;
+ if((unsigned)t < sizeof(lookup_table))
+ value = lookup_table[(unsigned)t];
+ if(-1 == value) {
+ isc_throw(isc::BadValue,
+ "attempt to decode a value not in base16 char set");
+ }
+ return (value);
+ }
+};
+
+} // namespace detail
+
+// note: what we would like to do is
+// template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+// typedef transform_iterator<
+// from_4_bit<CharType>,
+// transform_width<Base, 4, sizeof(Base::value_type) * 8, CharType>
+// > base16_from_binary;
+// but C++ won't accept this. Rather than using a "type generator" and
+// using a different syntax, make a derivation which should be equivalent.
+//
+// Another issue addressed here is that the transform_iterator doesn't have
+// a templated constructor. This makes it incompatible with the dataflow
+// ideal. This is also addressed here.
+
+template<
+ class Base,
+ class CharType = BOOST_DEDUCED_TYPENAME boost::iterator_value<Base>::type
+>
+class binary_from_base16 : public
+ transform_iterator<
+ detail::to_4_bit<CharType>,
+ Base
+ >
+{
+ friend class boost::iterator_core_access;
+ typedef transform_iterator<
+ detail::to_4_bit<CharType>,
+ Base
+ > super_t;
+public:
+ // make composable by using templated constructor
+ template<class T>
+ binary_from_base16(T start) :
+ super_t(
+ Base(static_cast<T>(start)),
+ detail::to_4_bit<CharType>()
+ )
+ {}
+ // intel 7.1 doesn't like default copy constructor
+ binary_from_base16(const binary_from_base16 & rhs) :
+ super_t(
+ Base(rhs.base_reference()),
+ detail::to_4_bit<CharType>()
+ )
+ {}
+// binary_from_base16(){};
+};
+
+} // namespace iterators
+} // namespace archive
+} // namespace boost
+
+#endif // BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE16_HPP
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/util/encode/binary_from_base32hex.h b/src/lib/util/encode/binary_from_base32hex.h
new file mode 100644
index 0000000..2911789
--- /dev/null
+++ b/src/lib/util/encode/binary_from_base32hex.h
@@ -0,0 +1,115 @@
+#ifndef BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE32HEX_HPP
+#define BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE32HEX_HPP
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// binary_from_base32hex.h (derived from boost binary_from_base64.hpp)
+
+// (C) Copyright 2002 Robert Ramey - http://www.rrsd.com .
+// Use, modification and distribution is subject to the Boost Software
+// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+// See http://www.boost.org for updates, documentation, and revision history.
+
+#include <cassert>
+
+// We use the same boost header files used in "_from_base64". Since the
+// precise path to these headers may vary depending on the boost version we
+// simply include the base64 header here.
+#include <boost/archive/iterators/binary_from_base64.hpp>
+
+#include <exceptions/exceptions.h>
+
+namespace boost {
+namespace archive {
+namespace iterators {
+
+/////////1/////////2/////////3/////////4/////////5/////////6/////////7/////////8
+// convert base32hex characters to binary data
+
+namespace detail {
+
+template<class CharType>
+struct to_5_bit {
+ typedef CharType result_type;
+ CharType operator()(CharType t) const{
+ const signed char lookup_table[] = {
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 00-0f
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 10-1f
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 20-2f
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1, // 30-3f
+ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 40-4f
+ 25,26,27,28,29,30,31,-1,-1,-1,-1,-1,-1,-1,-1,-1, // 50-5f
+ -1,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 60-6f
+ 25,26,27,28,29,30,31,-1,-1,-1,-1,-1,-1,-1,-1,-1 // 70-7f
+ };
+ BOOST_STATIC_ASSERT(0x80 == sizeof(lookup_table));
+ signed char value = -1;
+ if((unsigned)t < sizeof(lookup_table))
+ value = lookup_table[(unsigned)t];
+ if(-1 == value) {
+ isc_throw(isc::BadValue,
+ "attempt to decode a value not in base32hex char set");
+ }
+ return (value);
+ }
+};
+
+} // namespace detail
+
+// note: what we would like to do is
+// template<class Base, class CharType = BOOST_DEDUCED_TYPENAME Base::value_type>
+// typedef transform_iterator<
+// from_5_bit<CharType>,
+// transform_width<Base, 5, sizeof(Base::value_type) * 8, CharType>
+// > base32hex_from_binary;
+// but C++ won't accept this. Rather than using a "type generator" and
+// using a different syntax, make a derivation which should be equivalent.
+//
+// Another issue addressed here is that the transform_iterator doesn't have
+// a templated constructor. This makes it incompatible with the dataflow
+// ideal. This is also addressed here.
+
+template<
+ class Base,
+ class CharType = BOOST_DEDUCED_TYPENAME boost::iterator_value<Base>::type
+>
+class binary_from_base32hex : public
+ transform_iterator<
+ detail::to_5_bit<CharType>,
+ Base
+ >
+{
+ friend class boost::iterator_core_access;
+ typedef transform_iterator<
+ detail::to_5_bit<CharType>,
+ Base
+ > super_t;
+public:
+ // make composable by using templated constructor
+ template<class T>
+ binary_from_base32hex(T start) :
+ super_t(
+ Base(static_cast<T>(start)),
+ detail::to_5_bit<CharType>()
+ )
+ {}
+ // intel 7.1 doesn't like default copy constructor
+ binary_from_base32hex(const binary_from_base32hex & rhs) :
+ super_t(
+ Base(rhs.base_reference()),
+ detail::to_5_bit<CharType>()
+ )
+ {}
+// binary_from_base32hex(){};
+};
+
+} // namespace iterators
+} // namespace archive
+} // namespace boost
+
+#endif // BOOST_ARCHIVE_ITERATORS_BINARY_FROM_BASE32HEX_HPP
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/util/encode/hex.h b/src/lib/util/encode/hex.h
new file mode 100644
index 0000000..de3ac21
--- /dev/null
+++ b/src/lib/util/encode/hex.h
@@ -0,0 +1,66 @@
+// Copyright (C) 2009-2016 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef HEX_H
+#define HEX_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+//
+// Note: this helper module isn't specific to the DNS protocol per se.
+// We should probably move this to somewhere else, possibly in some common
+// utility area.
+//
+
+namespace isc {
+namespace util {
+namespace encode {
+/// \brief Encode binary data in the base16 ('hex') format.
+///
+/// The underlying implementation is shared with \c encodeBase64, and most of
+/// the description except the format (base16) equally applies.
+/// Another notable exception is that the base16 encoding doesn't require
+/// padding, so padding related considerations and the notion of canonical
+/// encoding don't apply.
+///
+/// \param binary A vector object storing the data to be encoded.
+/// \return A newly created string that stores base16 encoded value for
+/// binary.
+std::string encodeHex(const std::vector<uint8_t>& binary);
+
+/// \brief Decode a text encoded in the base16 ('hex') format into the
+/// original %data.
+///
+/// The underlying implementation is shared with \c decodeBase64, and most
+/// of the description except the format (base16) equally applies.
+/// Another notable exception is that the base16 encoding doesn't require
+/// padding, so padding related considerations and the notion of canonical
+/// encoding don't apply.
+///
+/// \param input A text encoded in the base16 format.
+/// \param result A vector in which the decoded %data is to be stored.
+void decodeHex(const std::string& input, std::vector<uint8_t>& result);
+
+/// \brief Encode in hexadecimal inline
+///
+/// \param value the value to encode
+/// \return 0x followed by the value encoded in hexa
+inline std::string toHex(std::string value) {
+ std::vector<uint8_t> bin(value.begin(), value.end());
+ return ("0x" + encodeHex(bin));
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // HEX_H
+
+// Local Variables:
+// mode: c++
+// End:
diff --git a/src/lib/util/encode/utf8.cc b/src/lib/util/encode/utf8.cc
new file mode 100644
index 0000000..ac9e0d0
--- /dev/null
+++ b/src/lib/util/encode/utf8.cc
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <util/encode/utf8.h>
+
+namespace isc {
+namespace util {
+namespace encode {
+
+std::vector<uint8_t> encodeUtf8(const std::string& value) {
+ std::vector<uint8_t> result;
+ if (value.empty()) {
+ return (result);
+ }
+ const uint8_t* start = reinterpret_cast<const uint8_t*>(value.c_str());
+ std::vector<uint8_t> binary(start, start + value.size());
+ for (uint8_t ch : binary) {
+ if (ch < 0x80) {
+ result.push_back(ch);
+ } else {
+ result.push_back(0xc0 | (ch >> 6));
+ result.push_back(0x80 | (ch & 0x3f));
+ }
+ }
+ return (result);
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
diff --git a/src/lib/util/encode/utf8.h b/src/lib/util/encode/utf8.h
new file mode 100644
index 0000000..9eda471
--- /dev/null
+++ b/src/lib/util/encode/utf8.h
@@ -0,0 +1,27 @@
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef UTF8_H
+#define UTF8_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace isc {
+namespace util {
+namespace encode {
+/// @brief Encode value string into UTF-8.
+///
+/// @param value A string in latin1 i.e. no encoding.
+/// @return A vector object storing the data encoded in UTF-8.
+std::vector<uint8_t> encodeUtf8(const std::string& value);
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // UTF8_H