diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 21:11:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 21:11:59 +0000 |
commit | 3cd01b932e1c85394272ae64fae67ebeda92fb00 (patch) | |
tree | c5a3115d710afc1879ddea5349362a2bc651733c /dnsname.hh | |
parent | Initial commit. (diff) | |
download | dnsdist-3cd01b932e1c85394272ae64fae67ebeda92fb00.tar.xz dnsdist-3cd01b932e1c85394272ae64fae67ebeda92fb00.zip |
Adding upstream version 1.8.3.upstream/1.8.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'dnsname.hh')
-rw-r--r-- | dnsname.hh | 667 |
1 files changed, 667 insertions, 0 deletions
diff --git a/dnsname.hh b/dnsname.hh new file mode 100644 index 0000000..29c8325 --- /dev/null +++ b/dnsname.hh @@ -0,0 +1,667 @@ +/* + * This file is part of PowerDNS or dnsdist. + * Copyright -- PowerDNS.COM B.V. and its contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * In addition, for the avoidance of any doubt, permission is granted to + * link this program with OpenSSL and to (re)distribute the binaries + * produced as the result of such linking. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#pragma once +#include <array> +#include <cstring> +#include <optional> +#include <string> +#include <vector> +#include <set> +#include <strings.h> +#include <stdexcept> +#include <sstream> +#include <iterator> +#include <unordered_set> +#include <string_view> + +#include <boost/version.hpp> + +#if BOOST_VERSION >= 105300 +#include <boost/container/string.hpp> +#endif + +inline bool dns_isspace(char c) +{ + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +extern const unsigned char dns_toupper_table[256], dns_tolower_table[256]; + +inline unsigned char dns_toupper(unsigned char c) +{ + return dns_toupper_table[c]; +} + +inline unsigned char dns_tolower(unsigned char c) +{ + return dns_tolower_table[c]; +} + +#include "burtle.hh" + +// #include "dns.hh" +// #include "logger.hh" + +//#include <ext/vstring.h> + +/* Quest in life: + accept escaped ascii presentations of DNS names and store them "natively" + accept a DNS packet with an offset, and extract a DNS name from it + build up DNSNames with prepend and append of 'raw' unescaped labels + + Be able to turn them into ASCII and "DNS name in a packet" again on request + + Provide some common operators for comparison, detection of being part of another domain + + NOTE: For now, everything MUST be . terminated, otherwise it is an error +*/ + +class DNSName +{ +public: + static const size_t s_maxDNSNameLength = 255; + + DNSName() {} //!< Constructs an *empty* DNSName, NOT the root! + // Work around assertion in some boost versions that do not like self-assignment of boost::container::string + DNSName& operator=(const DNSName& rhs) + { + if (this != &rhs) { + d_storage = rhs.d_storage; + } + return *this; + } + DNSName& operator=(DNSName&& rhs) + { + if (this != &rhs) { + d_storage = std::move(rhs.d_storage); + } + return *this; + } + DNSName(const DNSName& a) = default; + DNSName(DNSName&& a) = default; + + explicit DNSName(std::string_view sw); //!< Constructs from a human formatted, escaped presentation + DNSName(const char* p, int len, int offset, bool uncompress, uint16_t* qtype=nullptr, uint16_t* qclass=nullptr, unsigned int* consumed=nullptr, uint16_t minOffset=0); //!< Construct from a DNS Packet, taking the first question if offset=12. If supplied, consumed is set to the number of bytes consumed from the packet, which will not be equal to the wire length of the resulting name in case of compression. + + bool isPartOf(const DNSName& rhs) const; //!< Are we part of the rhs name? Note that name.isPartOf(name). + inline bool operator==(const DNSName& rhs) const; //!< DNS-native comparison (case insensitive) - empty compares to empty + bool operator!=(const DNSName& other) const { return !(*this == other); } + + std::string toString(const std::string& separator=".", const bool trailing=true) const; //!< Our human-friendly, escaped, representation + void toString(std::string& output, const std::string& separator=".", const bool trailing=true) const; + std::string toLogString() const; //!< like plain toString, but returns (empty) on empty names + std::string toStringNoDot() const { return toString(".", false); } + std::string toStringRootDot() const { if(isRoot()) return "."; else return toString(".", false); } + std::string toDNSString() const; //!< Our representation in DNS native format + std::string toDNSStringLC() const; //!< Our representation in DNS native format, lower cased + void appendRawLabel(const std::string& str); //!< Append this unescaped label + void appendRawLabel(const char* start, unsigned int length); //!< Append this unescaped label + void prependRawLabel(const std::string& str); //!< Prepend this unescaped label + std::vector<std::string> getRawLabels() const; //!< Individual raw unescaped labels + std::string getRawLabel(unsigned int pos) const; //!< Get the specified raw unescaped label + DNSName getLastLabel() const; //!< Get the DNSName of the last label + bool chopOff(); //!< Turn www.powerdns.com. into powerdns.com., returns false for . + DNSName makeRelative(const DNSName& zone) const; + DNSName makeLowerCase() const + { + DNSName ret(*this); + ret.makeUsLowerCase(); + return ret; + } + void makeUsLowerCase() + { + for(auto & c : d_storage) { + c=dns_tolower(c); + } + } + void makeUsRelative(const DNSName& zone); + DNSName getCommonLabels(const DNSName& other) const; //!< Return the list of common labels from the top, for example 'c.d' for 'a.b.c.d' and 'x.y.c.d' + DNSName labelReverse() const; + bool isWildcard() const; + bool isHostname() const; + unsigned int countLabels() const; + size_t wirelength() const; //!< Number of total bytes in the name + bool empty() const { return d_storage.empty(); } + bool isRoot() const { return d_storage.size()==1 && d_storage[0]==0; } + void clear() { d_storage.clear(); } + void trimToLabels(unsigned int); + size_t hash(size_t init=0) const + { + return burtleCI((const unsigned char*)d_storage.c_str(), d_storage.size(), init); + } + DNSName& operator+=(const DNSName& rhs) + { + if(d_storage.size() + rhs.d_storage.size() > s_maxDNSNameLength + 1) // one extra byte for the second root label + throwSafeRangeError("resulting name too long", rhs.d_storage.data(), rhs.d_storage.size()); + if(rhs.empty()) + return *this; + + if(d_storage.empty()) + d_storage+=rhs.d_storage; + else + d_storage.replace(d_storage.length()-1, rhs.d_storage.length(), rhs.d_storage); + + return *this; + } + + bool operator<(const DNSName& rhs) const // this delivers _some_ kind of ordering, but not one useful in a DNS context. Really fast though. + { + return std::lexicographical_compare(d_storage.rbegin(), d_storage.rend(), + rhs.d_storage.rbegin(), rhs.d_storage.rend(), + [](const unsigned char& a, const unsigned char& b) { + return dns_tolower(a) < dns_tolower(b); + }); // note that this is case insensitive, including on the label lengths + } + + inline bool canonCompare(const DNSName& rhs) const; + bool slowCanonCompare(const DNSName& rhs) const; + +#if BOOST_VERSION >= 105300 + typedef boost::container::string string_t; +#else + typedef std::string string_t; +#endif + const string_t& getStorage() const { + return d_storage; + } + + bool has8bitBytes() const; /* returns true if at least one byte of the labels forming the name is not included in [A-Za-z0-9_*./@ \\:-] */ + + class RawLabelsVisitor + { + public: + /* Zero-copy, zero-allocation raw labels visitor. + The general idea is that we walk the labels in the constructor, + filling up our array of labels position and setting the initial + value of d_position at the number of labels. + We then can easily provide string_view into the first and last label. + pop_back() moves d_position one label closer to the start, so we + can also easily walk back the labels in reverse order. + There is no copy because we use a reference into the DNSName storage, + so it is absolutely forbidden to alter the DNSName for as long as we + exist, and no allocation because we use a static array (there cannot + be more than 128 labels in a DNSName). + */ + RawLabelsVisitor(const string_t& storage); + std::string_view front() const; + std::string_view back() const; + bool pop_back(); + bool empty() const; + private: + std::array<uint8_t, 128> d_labelPositions; + const string_t& d_storage; + size_t d_position{0}; + }; + RawLabelsVisitor getRawLabelsVisitor() const; + +private: + string_t d_storage; + + void packetParser(const char* p, int len, int offset, bool uncompress, uint16_t* qtype, uint16_t* qclass, unsigned int* consumed, int depth, uint16_t minOffset); + static void appendEscapedLabel(std::string& appendTo, const char* orig, size_t len); + static std::string unescapeLabel(const std::string& orig); + static void throwSafeRangeError(const std::string& msg, const char* buf, size_t length); +}; + +size_t hash_value(DNSName const& d); + + +inline bool DNSName::canonCompare(const DNSName& rhs) const +{ + // 01234567890abcd + // us: 1a3www4ds9a2nl + // rhs: 3www6online3com + // to compare, we start at the back, is nl < com? no -> done + // + // 0,2,6,a + // 0,4,a + + uint8_t ourpos[64], rhspos[64]; + uint8_t ourcount=0, rhscount=0; + //cout<<"Asked to compare "<<toString()<<" to "<<rhs.toString()<<endl; + for(const unsigned char* p = (const unsigned char*)d_storage.c_str(); p < (const unsigned char*)d_storage.c_str() + d_storage.size() && *p && ourcount < sizeof(ourpos); p+=*p+1) + ourpos[ourcount++]=(p-(const unsigned char*)d_storage.c_str()); + for(const unsigned char* p = (const unsigned char*)rhs.d_storage.c_str(); p < (const unsigned char*)rhs.d_storage.c_str() + rhs.d_storage.size() && *p && rhscount < sizeof(rhspos); p+=*p+1) + rhspos[rhscount++]=(p-(const unsigned char*)rhs.d_storage.c_str()); + + if(ourcount == sizeof(ourpos) || rhscount==sizeof(rhspos)) { + return slowCanonCompare(rhs); + } + + for(;;) { + if(ourcount == 0 && rhscount != 0) + return true; + if(rhscount == 0) + return false; + ourcount--; + rhscount--; + + bool res=std::lexicographical_compare( + d_storage.c_str() + ourpos[ourcount] + 1, + d_storage.c_str() + ourpos[ourcount] + 1 + *(d_storage.c_str() + ourpos[ourcount]), + rhs.d_storage.c_str() + rhspos[rhscount] + 1, + rhs.d_storage.c_str() + rhspos[rhscount] + 1 + *(rhs.d_storage.c_str() + rhspos[rhscount]), + [](const unsigned char& a, const unsigned char& b) { + return dns_tolower(a) < dns_tolower(b); + }); + + // cout<<"Forward: "<<res<<endl; + if(res) + return true; + + res=std::lexicographical_compare( rhs.d_storage.c_str() + rhspos[rhscount] + 1, + rhs.d_storage.c_str() + rhspos[rhscount] + 1 + *(rhs.d_storage.c_str() + rhspos[rhscount]), + d_storage.c_str() + ourpos[ourcount] + 1, + d_storage.c_str() + ourpos[ourcount] + 1 + *(d_storage.c_str() + ourpos[ourcount]), + [](const unsigned char& a, const unsigned char& b) { + return dns_tolower(a) < dns_tolower(b); + }); + // cout<<"Reverse: "<<res<<endl; + if(res) + return false; + } + return false; +} + + +struct CanonDNSNameCompare +{ + bool operator()(const DNSName&a, const DNSName& b) const + { + return a.canonCompare(b); + } +}; + +inline DNSName operator+(const DNSName& lhs, const DNSName& rhs) +{ + DNSName ret=lhs; + ret += rhs; + return ret; +} + +extern const DNSName g_rootdnsname, g_wildcarddnsname; + +template<typename T> +struct SuffixMatchTree +{ + SuffixMatchTree(const std::string& name="", bool endNode_=false) : d_name(name), endNode(endNode_) + {} + + SuffixMatchTree(const SuffixMatchTree& rhs): d_name(rhs.d_name), children(rhs.children), endNode(rhs.endNode) + { + if (endNode) { + d_value = rhs.d_value; + } + } + SuffixMatchTree & operator=(const SuffixMatchTree &rhs) + { + d_name = rhs.d_name; + children = rhs.children; + endNode = rhs.endNode; + if (endNode) { + d_value = rhs.d_value; + } + return *this; + } + bool operator<(const SuffixMatchTree& rhs) const + { + return strcasecmp(d_name.c_str(), rhs.d_name.c_str()) < 0; + } + + std::string d_name; + mutable std::set<SuffixMatchTree, std::less<>> children; + mutable bool endNode; + mutable T d_value; + + /* this structure is used to do a lookup without allocating and + copying a string, using C++14's heterogeneous lookups in ordered + containers */ + struct LightKey + { + std::string_view d_name; + bool operator<(const SuffixMatchTree& smt) const + { + auto compareUpTo = std::min(this->d_name.size(), smt.d_name.size()); + auto ret = strncasecmp(this->d_name.data(), smt.d_name.data(), compareUpTo); + if (ret != 0) { + return ret < 0; + } + if (this->d_name.size() == smt.d_name.size()) { + return ret < 0; + } + return this->d_name.size() < smt.d_name.size(); + } + }; + + bool operator<(const LightKey& lk) const + { + auto compareUpTo = std::min(this->d_name.size(), lk.d_name.size()); + auto ret = strncasecmp(this->d_name.data(), lk.d_name.data(), compareUpTo); + if (ret != 0) { + return ret < 0; + } + if (this->d_name.size() == lk.d_name.size()) { + return ret < 0; + } + return this->d_name.size() < lk.d_name.size(); + } + + template<typename V> + void visit(const V& v) const { + for(const auto& c : children) { + c.visit(v); + } + + if (endNode) { + v(*this); + } + } + + void add(const DNSName& name, T&& t) + { + auto labels = name.getRawLabels(); + add(labels, std::move(t)); + } + + void add(std::vector<std::string>& labels, T&& value) const + { + if (labels.empty()) { // this allows insertion of the root + endNode = true; + d_value = std::move(value); + } + else if(labels.size()==1) { + auto res = children.emplace(*labels.begin(), true); + if (!res.second) { + // we might already have had the node as an + // intermediary one, but it's now an end node + if (!res.first->endNode) { + res.first->endNode = true; + } + } + res.first->d_value = std::move(value); + } + else { + auto res = children.emplace(*labels.rbegin(), false); + labels.pop_back(); + res.first->add(labels, std::move(value)); + } + } + + void remove(const DNSName &name, bool subtree=false) const + { + auto labels = name.getRawLabels(); + remove(labels, subtree); + } + + /* Removes the node at `labels`, also make sure that no empty + * children will be left behind in memory + */ + void remove(std::vector<std::string>& labels, bool subtree = false) const + { + if (labels.empty()) { // this allows removal of the root + endNode = false; + if (subtree) { + children.clear(); + } + return; + } + + SuffixMatchTree smt(*labels.rbegin()); + auto child = children.find(smt); + if (child == children.end()) { + // No subnode found, we're done + return; + } + + // We have found a child + labels.pop_back(); + if (labels.empty()) { + // The child is no longer an endnode + child->endNode = false; + + if (subtree) { + child->children.clear(); + } + + // If the child has no further children, just remove it from the set. + if (child->children.empty()) { + children.erase(child); + } + return; + } + + // We are not at the end, let the child figure out what to do + child->remove(labels); + } + + T* lookup(const DNSName& name) const + { + auto bestNode = getBestNode(name); + if (bestNode) { + return &bestNode->d_value; + } + return nullptr; + } + + std::optional<DNSName> getBestMatch(const DNSName& name) const + { + if (children.empty()) { // speed up empty set + return endNode ? std::optional<DNSName>(g_rootdnsname) : std::nullopt; + } + + auto visitor = name.getRawLabelsVisitor(); + return getBestMatch(visitor); + } + + // Returns all end-nodes, fully qualified (not as separate labels) + std::vector<DNSName> getNodes() const { + std::vector<DNSName> ret; + if (endNode) { + ret.push_back(DNSName(d_name)); + } + for (const auto& child : children) { + auto nodes = child.getNodes(); + ret.reserve(ret.size() + nodes.size()); + for (const auto &node: nodes) { + ret.push_back(node + DNSName(d_name)); + } + } + return ret; + } + +private: + const SuffixMatchTree* getBestNode(const DNSName& name) const + { + if (children.empty()) { // speed up empty set + if (endNode) { + return this; + } + return nullptr; + } + + auto visitor = name.getRawLabelsVisitor(); + return getBestNode(visitor); + } + + const SuffixMatchTree* getBestNode(DNSName::RawLabelsVisitor& visitor) const + { + if (visitor.empty()) { // optimization + if (endNode) { + return this; + } + return nullptr; + } + + const LightKey lk{visitor.back()}; + auto child = children.find(lk); + if (child == children.end()) { + if (endNode) { + return this; + } + return nullptr; + } + visitor.pop_back(); + auto result = child->getBestNode(visitor); + if (result) { + return result; + } + return endNode ? this : nullptr; + } + + std::optional<DNSName> getBestMatch(DNSName::RawLabelsVisitor& visitor) const + { + if (visitor.empty()) { // optimization + if (endNode) { + return std::optional<DNSName>(d_name); + } + return std::nullopt; + } + + const LightKey lk{visitor.back()}; + auto child = children.find(lk); + if (child == children.end()) { + if (endNode) { + return std::optional<DNSName>(d_name); + } + return std::nullopt; + } + visitor.pop_back(); + auto result = child->getBestMatch(visitor); + if (result) { + if (!d_name.empty()) { + result->appendRawLabel(d_name); + } + return result; + } + return endNode ? std::optional<DNSName>(d_name) : std::nullopt; + } +}; + +/* Quest in life: serve as a rapid block list. If you add a DNSName to a root SuffixMatchNode, + anything part of that domain will return 'true' in check */ +struct SuffixMatchNode +{ + public: + SuffixMatchNode() + {} + SuffixMatchTree<bool> d_tree; + + void add(const DNSName& dnsname) + { + d_tree.add(dnsname, true); + d_nodes.insert(dnsname); + } + + void add(const std::string& name) + { + add(DNSName(name)); + } + + void add(std::vector<std::string> labels) + { + d_tree.add(labels, true); + DNSName tmp; + while (!labels.empty()) { + tmp.appendRawLabel(labels.back()); + labels.pop_back(); // This is safe because we have a copy of labels + } + d_nodes.insert(tmp); + } + + void remove(const DNSName& name) + { + d_tree.remove(name); + d_nodes.erase(name); + } + + void remove(std::vector<std::string> labels) + { + d_tree.remove(labels); + DNSName tmp; + while (!labels.empty()) { + tmp.appendRawLabel(labels.back()); + labels.pop_back(); // This is safe because we have a copy of labels + } + d_nodes.erase(tmp); + } + + bool check(const DNSName& dnsname) const + { + return d_tree.lookup(dnsname) != nullptr; + } + + std::optional<DNSName> getBestMatch(const DNSName& name) const + { + return d_tree.getBestMatch(name); + } + + std::string toString() const + { + std::string ret; + bool first = true; + for (const auto& n : d_nodes) { + if (!first) { + ret += ", "; + } + first = false; + ret += n.toString(); + } + return ret; + } + + private: + mutable std::set<DNSName> d_nodes; // Only used for string generation +}; + +std::ostream & operator<<(std::ostream &os, const DNSName& d); +namespace std { + template <> + struct hash<DNSName> { + size_t operator () (const DNSName& dn) const { return dn.hash(0); } + }; +} + +DNSName::string_t segmentDNSNameRaw(const char* input, size_t inputlen); // from ragel + +bool DNSName::operator==(const DNSName& rhs) const +{ + if (rhs.empty() != empty() || rhs.d_storage.size() != d_storage.size()) { + return false; + } + + const auto* us = d_storage.cbegin(); + const auto* p = rhs.d_storage.cbegin(); + for (; us != d_storage.cend() && p != rhs.d_storage.cend(); ++us, ++p) { + if (dns_tolower(*p) != dns_tolower(*us)) { + return false; + } + } + return true; +} + +struct DNSNameSet: public std::unordered_set<DNSName> { + std::string toString() const { + std::ostringstream oss; + std::copy(begin(), end(), std::ostream_iterator<DNSName>(oss, "\n")); + return oss.str(); + } +}; |