summaryrefslogtreecommitdiffstats
path: root/src/lib/util/encode
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/util/encode')
-rw-r--r--src/lib/util/encode/encode.cc374
-rw-r--r--src/lib/util/encode/encode.h302
-rw-r--r--src/lib/util/encode/utf8.cc35
-rw-r--r--src/lib/util/encode/utf8.h27
4 files changed, 738 insertions, 0 deletions
diff --git a/src/lib/util/encode/encode.cc b/src/lib/util/encode/encode.cc
new file mode 100644
index 0000000..01b91cc
--- /dev/null
+++ b/src/lib/util/encode/encode.cc
@@ -0,0 +1,374 @@
+// Copyright (C) 2024 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <exceptions/exceptions.h>
+#include <exceptions/isc_assert.h>
+#include <util/encode/encode.h>
+
+#include <iostream>
+#include <stdint.h>
+#include <stdexcept>
+#include <string>
+#include <cstring>
+#include <vector>
+
+using namespace std;
+
+namespace isc {
+namespace util {
+namespace encode {
+
+BaseNEncoder::BaseNEncoder(const std::string& algorithm,
+ const char* digit_set,
+ const std::vector<uint8_t>& bits_table,
+ size_t bits_per_digit,
+ size_t digits_per_group,
+ const char pad_char,
+ size_t max_pad,
+ bool case_sensitive)
+ : algorithm_(algorithm),
+ digit_set_(digit_set),
+ bits_table_(bits_table),
+ bits_per_digit_(bits_per_digit),
+ digits_per_group_(digits_per_group),
+ pad_char_(pad_char),
+ max_pad_(max_pad),
+ case_sensitive_(case_sensitive),
+ max_bits_to_digit_(strlen(digit_set) - 1),
+ max_digit_to_bits_(bits_table_.size() - 1) {
+}
+
+char
+BaseNEncoder::bitsToDigit(uint8_t bits) {
+ if (bits > max_bits_to_digit_) {
+ isc_throw(BadValue, "Digit bits : "
+ << static_cast<uint16_t>(bits) << " invalid for " << algorithm_);
+ }
+
+ return (digit_set_[bits]);
+}
+
+uint8_t
+BaseNEncoder::digitToBits(uint8_t digit) {
+ if (digit > max_digit_to_bits_) {
+ isc_throw(BadValue, "Digit exceeds look up table: "
+ << static_cast<uint16_t>(digit) << " for " << algorithm_);
+ }
+
+ return (bits_table_[digit]);
+}
+
+std::string
+BaseNEncoder::encode(const std::vector<uint8_t>& input) {
+ std::string encoded_output;
+ if (input.empty()) {
+ return (encoded_output);
+ }
+
+ // Iterate over the input bytes as a bit stream. We add input bits
+ // to a digit set index value until we have enough (bits_per_digit). We
+ // look up a digit in the digit set add it to the encoded output and start over
+ // on the next index value. When we have exhausted the bits in the current
+ // byte, get the next byte from input and continue. In other words, we pull bits
+ // from the left side of the input bit stream and push them into the right side of
+ // the index value. Each time we have done bits_per_digit bits we look up
+ // the digit and start the index value over.
+
+ int digit_idx = 0; // Digit index we are currently constructing.
+ size_t cnt = 0; // How many bits we have in the current digit idx
+ int cur_byte = 0; // Current input byte.
+ uint8_t cur_bit_mask = 0x0; // Bitmask of the current bit in the current byte.
+ auto bytes = input.begin(); // Start with the first byte.
+ while (1) {
+ // If the current bitmask is zero, it's time for the next input byte.
+ if (!cur_bit_mask) {
+ if (bytes == input.end()) {
+ break;
+ }
+
+ // Grab the next byte.
+ cur_byte = *bytes;
+ // Start at the bitmask at the left-most bit.
+ cur_bit_mask = 0x80;
+ // Bump the iterator.
+ ++bytes;
+ }
+
+ // Do we need more bits in this digit index?
+ if (cnt < bits_per_digit_) {
+ // Yes, so shift the index over to make room for the next bit.
+ digit_idx <<= 1;
+ } else {
+ // No, the index is complete, lookup its digit and add it to the
+ // output. Start over for the next index.
+ encoded_output.push_back(bitsToDigit(digit_idx));
+ digit_idx = 0;
+ cnt = 0;
+ }
+
+ // If the current bit in the current byte is set,
+ // set the right-most digit index bit to 1 (otherwise
+ // its left as zero).
+ if (cur_byte & cur_bit_mask) {
+ digit_idx |= 1;
+ }
+
+ // Shift the cur_bit mask to select the next input bit and
+ // bump the number of bits in the current index.
+ cur_bit_mask >>= 1;
+ ++cnt;
+ }
+
+ // We've exhausted the input bits but have bits in the
+ // digit index. This means the remaining bits in our
+ // last index are zeros (pad bits). Shift "in" the
+ // required number of bits and add the corresponding
+ // digit.
+ digit_idx <<= (bits_per_digit_ - cnt);
+ encoded_output.push_back(bitsToDigit(digit_idx));
+
+ // Add padding as needed.
+ if (digits_per_group_) {
+ auto rem = encoded_output.size() % digits_per_group_;
+ if (rem) {
+ auto need = digits_per_group_ - rem;
+ while (need--) {
+ encoded_output.push_back(pad_char_);
+ }
+ }
+ }
+
+ return (encoded_output);
+}
+
+void
+BaseNEncoder::decode(const std::string& encoded_str, std::vector<uint8_t>& output) {
+
+ // Mechanics are essentially the same as encode(). We iterate over the encoded
+ // string's digits, discarding whitespaces. We lookup the digit's binary value
+ // in the lookup table, keeping only binary value's right-most, bits_per_digit bits.
+ // The remaining bits are then shifted out from the left of binary value into the
+ // right of the currently accumulating output byte until the byte is complete
+ // (8 bits) or the value's bits are exhausted. Completed bytes are added to the
+ // output buffer. We continue building bytes until we've exhausted the encoded
+ // string.
+
+ output.clear();
+ size_t dig_cnt = 0; // Tracks how many encoded digits we see.
+ size_t pad_cnt = 0; // Tracks how many pad characters we see.
+ size_t shift_bits = 8 - bits_per_digit_; // Number of unused bits in digit data values.
+ uint8_t cur_byte = 0; // Current output byte.
+ size_t cur_bit_cnt = 0; // How man bits we have added to the current byte.
+
+ for (const auto enc_digit : encoded_str) {
+ // If it's a pad char, count it and go on.
+ if (pad_char_ && enc_digit == pad_char_) {
+ pad_cnt++;
+ continue;
+ }
+
+ // Translate the encoded digit to its binary bits.
+ uint8_t dig_bits = digitToBits(enc_digit);
+
+ // Skip whitespace. The choice of 0xee to signify white-space was arbitrary.
+ if (dig_bits == 0xee) {
+ continue;
+ }
+
+ // Error on invalid characters.
+ if (dig_bits == 0xff) {
+ isc_throw(isc::BadValue, "attempt to decode a value not in "
+ << algorithm_ << " char set" << ": " << encoded_str);
+ }
+
+ // Error if pad characters occur in the middle.
+ if (pad_cnt) {
+ isc_throw(isc::BadValue, "pad mixed with digits in "
+ << algorithm_ << ": " << encoded_str);
+ }
+
+ // Bump the valid character count.
+ dig_cnt++;
+
+ // Shift off the unused bits.
+ dig_bits <<= shift_bits;
+
+ // Add digit's decoded bits to current byte.
+ for (size_t i = 0; i < bits_per_digit_; ++i) {
+ if (cur_bit_cnt < 8) {
+ // Shift contents over one to make room for next bit.
+ cur_byte <<= 1;
+ } else {
+ // Add the completed byte to the output.
+ output.push_back(cur_byte);
+ cur_byte = 0;
+ cur_bit_cnt = 0;
+ }
+
+ // Add the next bit if its set.
+ if (dig_bits & 0x80) {
+ cur_byte |= 1;
+ }
+
+ // Shift the decoded bits over.
+ dig_bits <<= 1;
+
+ // Update the current byte bit count.
+ ++cur_bit_cnt;
+ }
+ }
+
+ if (cur_bit_cnt == 8) {
+ // Whole one left to add.
+ output.push_back(cur_byte);
+ } else if (cur_bit_cnt && cur_byte) {
+ // Left over bits that are not zero.
+ isc_throw(BadValue, "non-zero bits left over " << encoded_str);
+ }
+
+ if (pad_char_) {
+ // Check for too many pad characters.
+ if (pad_cnt > max_pad_) {
+ isc_throw(isc::BadValue, "too many pad characters for "
+ << algorithm_ << ": " << encoded_str);
+ }
+
+ // Check for an invalid number of pad bits.
+ // Calculate the number of pad bits corresponding to the pad
+ // characters. In general, the pad bits consist of all-zero
+ // trailing bits of the last encoded character plus the zero bits
+ // represented by each pad character.
+ // 1st pad 2nd pad 3rd pad...
+ // +++===== ======= ===... (+: from encoded chars, =: from pad chars)
+ // 0000...0 0......0 000...
+ // 0 7 8 15 16.... (bits)
+ // The number of bits for the '==...' part is padchars * BitsPerChunk.
+ // So the total number of pad bits is the smallest multiple of 8
+ // that is >= padchars * BitsPerChunk.
+ // (Below, note the common idiom of the bitwise AND with ~0x7. It clears the
+ // lowest three bits, so has the effect of rounding the result down to the
+ // nearest multiple of 8)
+ const size_t padbits = ((pad_cnt * bits_per_digit_) + 7) & ~0x7;
+ if (padbits > bits_per_digit_ * (pad_cnt + 1)) {
+ isc_throw(isc::BadValue, "Invalid padding for "
+ << algorithm_ << ": " << encoded_str);
+ }
+ }
+
+ // Check for an invalid total of encoded characters.
+ if ((pad_cnt + dig_cnt) % digits_per_group_) {
+ isc_throw (isc::BadValue, "Incomplete input for "
+ << algorithm_ << ": " << encoded_str);
+ }
+}
+
+const char* Base64Encoder::DIGIT_SET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789"
+ "+/";
+
+const std::vector<uint8_t> Base64Encoder::BITS_TABLE = {
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
+ 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,62,0xff,0xff,0xff,63, // 20-2f
+ 52,53,54,55,56,57,58,59,60,61,0xff,0xff,0xff, 0,0xff,0xff, // 30-3f
+ 0xff, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, // 40-4f
+ 15,16,17,18,19,20,21,22,23,24,25,0xff,0xff,0xff,0xff,0xff, // 50-5f
+ 0xff,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, // 60-6f
+ 41,42,43,44,45,46,47,48,49,50,51,0xff,0xff,0xff,0xff,0xff, // 70-7f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff,
+};
+
+const char* Base32HexEncoder::DIGIT_SET = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
+
+const std::vector<uint8_t> Base32HexEncoder::BITS_TABLE = {
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
+ 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 20-2f
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,0xff,0xff,0xff,0xff,0xff,0xff, // 30-3f
+ 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 40-4f
+ 25,26,27,28,29,30,31,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 50-5f
+ 0xff,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24, // 60-6f
+ 25,26,27,28,29,30,31,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 70-7f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff
+};
+
+const char* Base16Encoder::DIGIT_SET = "0123456789ABCDEF";
+
+const std::vector<uint8_t> Base16Encoder::BITS_TABLE = {
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xee,0xee,0xee,0xee,0xee,0xff,0xff, // 00-0f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 10-1f
+ 0xee,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 20-2f
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,0xff,0xff,0xff,0xff,0xff,0xff, // 30-3f
+ 0xff,10,11,12,13,14,15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 40-4f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 50-5f
+ 0xff,10,11,12,13,14,15,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 60-6f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 70-7f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 80-8f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // 90-9f
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // a0-af
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // b0-bf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // c0-cf
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // d0-df
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff, // e0-ef
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff // f0-ff
+};
+
+string
+encodeBase64(const vector<uint8_t>& binary) {
+ static Base64Encoder encoder;
+ return (encoder.encode(binary));
+}
+
+void
+decodeBase64 (const std::string& encoded_str, std::vector<uint8_t>& output) {
+ static Base64Encoder encoder;
+ encoder.decode(encoded_str, output);
+}
+
+string
+encodeBase32Hex(const vector<uint8_t>& binary) {
+ static Base32HexEncoder encoder;
+ return (encoder.encode(binary));
+}
+
+void
+decodeBase32Hex(const std::string& encoded_str, std::vector<uint8_t>& output) {
+ static Base32HexEncoder encoder;
+ encoder.decode(encoded_str, output);
+}
+
+string
+encodeHex(const vector<uint8_t>& binary) {
+ static Base16Encoder encoder;
+ return (encoder.encode(binary));
+}
+
+void
+decodeHex(const string& encoded_str, vector<uint8_t>& output) {
+ static Base16Encoder encoder;
+ encoder.decode(encoded_str, output);
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
diff --git a/src/lib/util/encode/encode.h b/src/lib/util/encode/encode.h
new file mode 100644
index 0000000..5365f42
--- /dev/null
+++ b/src/lib/util/encode/encode.h
@@ -0,0 +1,302 @@
+// Copyright (C) 2024 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef ENCODE_H
+#define ENCODE_H
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace isc {
+namespace util {
+namespace encode {
+
+/// @brief Class for encoding and decoding binary data using an algorithm
+/// described in RFC 4648.
+class BaseNEncoder {
+public:
+
+ /// @brief Constructor
+ ///
+ /// @param algorithm name of the algorithm, used for logging
+ /// @param digit_set set of digits (i.e. alphabet) used for encoding
+ /// @param bits_table table to translate digits to data used during decoding
+ /// @param bits_per_digit number of data bits represented by a digit
+ /// @param digits_per_group number of digits contained in a group
+ /// @param pad_char character used for padding out to group size (0 means no
+ /// padding)
+ /// @param max_pad maximum number of pad characters in a group
+ /// @param case_sensitive indicates if the algorithm's digit set is
+ /// case sensitive
+ BaseNEncoder(const std::string& algorithm,
+ const char* digit_set,
+ const std::vector<uint8_t>& bits_table,
+ size_t bits_per_digit,
+ size_t digits_per_group,
+ const char pad_char,
+ size_t max_pad,
+ bool case_sensitive);
+
+ /// @brief Destructor
+ virtual ~BaseNEncoder() = default;
+
+ /// @brief Encodes binary data using the encoder's algorithm
+ ///
+ /// @param input binary data to encode
+ ///
+ /// @return resultant encoded data string
+ /// @throw BadValue if an error occurs during encoding
+ std::string encode(const std::vector<uint8_t>& input);
+
+ /// @brief Decodes an encoded string using the encoder's algorithm
+ ///
+ /// @param encoded_str encoded string to decode
+ /// @param[out] output vector into which the decoded data is stored
+ ///
+ /// @throw BadValue if an error occurs during decoding
+ void decode(const std::string& encoded_str, std::vector<uint8_t>& output);
+
+ /// @brief Translate a byte of binary data into the appropriate algorithm digit
+ ///
+ /// @param bits binary value to translate
+ ///
+ /// @return char containing the digit corresponding to the binary value
+ /// @throw BadValue if the bits value is out of range
+ char bitsToDigit(uint8_t bits);
+
+ /// @brief Translate a digit into the appropriate algorithm bit value
+ ///
+ /// Function maps all 256 ASCII chars to their corresponding algorithm-specific
+ /// data value. A data value of 0xee marks a char as whitespace, 0xff marks a
+ /// char is invalid.
+ ///
+ /// @param digit the algorithm digit to translate
+ ///
+ /// @return byte containing the binary value corresponding to the digit
+ uint8_t digitToBits(uint8_t digit);
+
+ /// @brief Get the algorithm name
+ ///
+ /// @return string containing the algorithm name
+ std::string getAlgorithm() const {
+ return (algorithm_);
+ }
+
+ /// @brief Get the digit set
+ ///
+ /// @return string containing the set of digits
+ const char* getDigitSet() const {
+ return (digit_set_);
+ }
+
+ /// @brief Get the digit lookup table
+ ///
+ /// @return vector containing the lookup table
+ const std::vector<uint8_t>& getBitsTable() const {
+ return (bits_table_);
+ }
+
+ /// @brief Get the number of data bits represented by a digit
+ ///
+ /// @return number of data bits per digit
+ size_t getBitsPerDigit() {
+ return (bits_per_digit_);
+ }
+
+ /// @brief Get the number of digits contained in a group
+ ///
+ /// @return number of digits per group
+ size_t getDigitsPerGroup() const {
+ return (digits_per_group_);
+ }
+
+ /// @brief Get the character used for padding out to group size (0 means no padding)
+ ///
+ /// @return Character used as a pad byte
+ uint8_t getPadChar() const {
+ return (pad_char_);
+ }
+
+ /// @brief Get the maximum number of pad characters in a group
+ ///
+ /// @return Maximum number of pad characters
+ size_t getMaxPad() {
+ return (max_pad_);
+ }
+
+ /// @brief Get the maxium index value of the digit set
+ ///
+ /// @return Maxium index value of the digit set
+ size_t getMaxBitsToDigit() {
+ return (max_bits_to_digit_);
+ }
+
+ /// @brief Get the maxium index value of the algorithm bit table
+ ///
+ /// @return Maxium index value of the algorithm bit table
+ size_t getMaxDigitToBits() {
+ return (max_digit_to_bits_);
+ }
+
+ /// @brief Indicates whether or not the algorithm's digit set
+ /// is case-sensitive.
+ ///
+ /// @return true if the digit set is case-sensitive, false otherwise
+ bool isCaseSensitive() {
+ return (case_sensitive_);
+ }
+
+protected:
+ /// @brief Name of the algorithm, used for logging
+ std::string algorithm_;
+
+ /// @brief Set of digits (i.e. alphabet) used for encoding
+ const char* digit_set_;
+
+ /// @brief Table to translate digits to data used during decoding
+ ///
+ /// The table must map all 256 ASCII chars to their corresponding
+ /// algorithm-specific data value. A data value of 0xee marks
+ /// a char as whitespace, 0xff marks a char is invalid
+ std::vector<uint8_t>bits_table_;
+
+ /// @brief Number of data bits represented by a digit
+ size_t bits_per_digit_;
+
+ /// @brief Number of digits contained in a group
+ size_t digits_per_group_;
+
+ /// @brief Character used for padding out to group size (0 means no padding)
+ const char pad_char_;
+
+ /// @brief Maximum number of pad characters in a group
+ size_t max_pad_;
+
+ /// @brief Indicates whether or not the algorithm's digit set is case-sensitive
+ bool case_sensitive_;
+
+ /// @brief Maxium index value of the digit set
+ size_t max_bits_to_digit_;
+
+ /// @brief Maxium index value of the algorithm bit table
+ size_t max_digit_to_bits_;
+};
+
+/// @brief Class for encoding and decoding binary data using Base64
+/// as described in RFC 4648.
+class Base64Encoder : public BaseNEncoder {
+public:
+ /// @brief Set of digits used for encoding in Base64
+ static const char* DIGIT_SET;
+
+ /// @brief Table that maps Base64 digits to their binary data value
+ static const std::vector<uint8_t> BITS_TABLE;
+
+ /// @brief Constructor
+ Base64Encoder()
+ : BaseNEncoder("base64", DIGIT_SET, BITS_TABLE, 6, 4, '=', 2, true) {
+ }
+
+ /// @brief Destructor
+ ~Base64Encoder() = default;
+};
+
+/// @brief Class for encoding and decoding binary data using Base32Hex
+/// as described in RFC 4648.
+class Base32HexEncoder : public BaseNEncoder {
+public:
+ /// @brief Set of digits used for encoding in Base32Hex
+ static const char* DIGIT_SET;
+
+ /// @brief Table that maps Base32Hex digits to their binary data value
+ static const std::vector<uint8_t> BITS_TABLE;
+
+ /// @brief Constructor
+ Base32HexEncoder()
+ : BaseNEncoder("base32Hex", DIGIT_SET, BITS_TABLE, 5, 8, '=', 6, false) {
+ }
+
+ /// @brief Destructor
+ ~Base32HexEncoder() = default;
+};
+
+/// @brief Class for encoding and decoding binary data using Base16 (aka Hex)
+/// as described in RFC 4648.
+class Base16Encoder : public BaseNEncoder {
+public:
+ /// @brief Set of digits used for encoding in Base16
+ static const char* DIGIT_SET;
+
+ /// @brief Table that maps Base16 digits to their binary data value
+ static const std::vector<uint8_t> BITS_TABLE;
+
+ /// @brief Constructor
+ Base16Encoder()
+ : BaseNEncoder("base16", DIGIT_SET, BITS_TABLE, 4, 2, '=', 0, false) {
+ }
+
+ /// @brief Destructor
+ ~Base16Encoder() = default;
+};
+
+/// @brief Encode binary data in the base32-hex format.
+///
+/// @param binary vector object storing the data to be encoded.
+/// @return string containing the base32-hex encoded value.
+std::string encodeBase32Hex(const std::vector<uint8_t>& binary);
+
+/// @brief Decode a base32-hex encoded string into binary data.
+///
+/// @param encoded_str string containing a base32-hex encoded value.
+/// @param[out] output vector into which the decoded binary data is stored.
+///
+/// @throw BadValue if the input string is invalid.
+void decodeBase32Hex(const std::string& encoded_str, std::vector<uint8_t>& output);
+
+/// @brief Encode binary data in the base64 format.
+///
+/// @param binary vector object storing the data to be encoded.
+/// @return string containing the base64 encoded value.
+std::string encodeBase64(const std::vector<uint8_t>& binary);
+
+/// @brief Decode a base64 encoded string into binary data.
+///
+/// @param encoded_str string containing a base64 encoded value.
+/// @param[out] output vector into which the decoded binary data is stored.
+///
+/// @throw BadValue if the input string is invalid.
+void decodeBase64(const std::string& encoded_str, std::vector<uint8_t>& output);
+
+/// @brief Encode binary data in the base16 format.
+///
+/// @param binary vector object containing the data to be encoded.
+/// @return string containing the base16 encoded value.
+std::string encodeHex(const std::vector<uint8_t>& binary);
+
+/// @brief Decode a base16 encoded string into binary data.
+///
+/// @param encoded_str string containing a base16 encoded value.
+/// @param[out] output vector into which the decoded binary data is stored.
+///
+/// @throw BadValue if the input string is invalid.
+void decodeHex(const std::string& encoded_str, std::vector<uint8_t>& output);
+
+/// @brief Encode in hexadecimal inline.
+///
+/// @param value the value to encode.
+///
+/// @return 0x followed by the value encoded in hex.
+inline std::string toHex(std::string value) {
+ std::vector<uint8_t> bin(value.begin(), value.end());
+ return ("0x" + encodeHex(bin));
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // ENCODE_H
diff --git a/src/lib/util/encode/utf8.cc b/src/lib/util/encode/utf8.cc
new file mode 100644
index 0000000..ac9e0d0
--- /dev/null
+++ b/src/lib/util/encode/utf8.cc
@@ -0,0 +1,35 @@
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include <config.h>
+
+#include <util/encode/utf8.h>
+
+namespace isc {
+namespace util {
+namespace encode {
+
+std::vector<uint8_t> encodeUtf8(const std::string& value) {
+ std::vector<uint8_t> result;
+ if (value.empty()) {
+ return (result);
+ }
+ const uint8_t* start = reinterpret_cast<const uint8_t*>(value.c_str());
+ std::vector<uint8_t> binary(start, start + value.size());
+ for (uint8_t ch : binary) {
+ if (ch < 0x80) {
+ result.push_back(ch);
+ } else {
+ result.push_back(0xc0 | (ch >> 6));
+ result.push_back(0x80 | (ch & 0x3f));
+ }
+ }
+ return (result);
+}
+
+} // namespace encode
+} // namespace util
+} // namespace isc
diff --git a/src/lib/util/encode/utf8.h b/src/lib/util/encode/utf8.h
new file mode 100644
index 0000000..9eda471
--- /dev/null
+++ b/src/lib/util/encode/utf8.h
@@ -0,0 +1,27 @@
+// Copyright (C) 2020 Internet Systems Consortium, Inc. ("ISC")
+//
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifndef UTF8_H
+#define UTF8_H 1
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace isc {
+namespace util {
+namespace encode {
+/// @brief Encode value string into UTF-8.
+///
+/// @param value A string in latin1 i.e. no encoding.
+/// @return A vector object storing the data encoded in UTF-8.
+std::vector<uint8_t> encodeUtf8(const std::string& value);
+
+} // namespace encode
+} // namespace util
+} // namespace isc
+
+#endif // UTF8_H