summaryrefslogtreecommitdiffstats
path: root/netwerk/dns/nsEffectiveTLDService.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:44:51 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:44:51 +0000
commit9e3c08db40b8916968b9f30096c7be3f00ce9647 (patch)
treea68f146d7fa01f0134297619fbe7e33db084e0aa /netwerk/dns/nsEffectiveTLDService.cpp
parentInitial commit. (diff)
downloadthunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.tar.xz
thunderbird-9e3c08db40b8916968b9f30096c7be3f00ce9647.zip
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--netwerk/dns/nsEffectiveTLDService.cpp517
1 files changed, 517 insertions, 0 deletions
diff --git a/netwerk/dns/nsEffectiveTLDService.cpp b/netwerk/dns/nsEffectiveTLDService.cpp
new file mode 100644
index 0000000000..a43552b1b3
--- /dev/null
+++ b/netwerk/dns/nsEffectiveTLDService.cpp
@@ -0,0 +1,517 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// This service reads a file of rules describing TLD-like domain names. For a
+// complete description of the expected file format and parsing rules, see
+// http://wiki.mozilla.org/Gecko:Effective_TLD_Service
+
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/HashFunctions.h"
+#include "mozilla/MemoryReporting.h"
+#include "mozilla/ResultExtensions.h"
+#include "mozilla/TextUtils.h"
+
+#include "MainThreadUtils.h"
+#include "nsContentUtils.h"
+#include "nsCRT.h"
+#include "nsEffectiveTLDService.h"
+#include "nsIFile.h"
+#include "nsIIDNService.h"
+#include "nsIObserverService.h"
+#include "nsIURI.h"
+#include "nsNetCID.h"
+#include "nsNetUtil.h"
+#include "nsServiceManagerUtils.h"
+#include "mozilla/net/DNS.h"
+
+namespace etld_dafsa {
+
+// Generated file that includes kDafsa
+#include "etld_data.inc"
+
+} // namespace etld_dafsa
+
+using namespace mozilla;
+
+NS_IMPL_ISUPPORTS(nsEffectiveTLDService, nsIEffectiveTLDService,
+ nsIMemoryReporter, nsIObserver)
+
+// ----------------------------------------------------------------------
+
+static nsEffectiveTLDService* gService = nullptr;
+
+nsEffectiveTLDService::nsEffectiveTLDService()
+ : mIDNService(), mGraphLock("nsEffectiveTLDService::mGraph") {
+ mGraph.emplace(etld_dafsa::kDafsa);
+}
+
+nsresult nsEffectiveTLDService::Init() {
+ MOZ_ASSERT(NS_IsMainThread());
+ nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
+ obs->AddObserver(this, "public-suffix-list-updated", false);
+
+ if (gService) {
+ return NS_ERROR_ALREADY_INITIALIZED;
+ }
+
+ nsresult rv;
+ mIDNService = do_GetService(NS_IDNSERVICE_CONTRACTID, &rv);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ gService = this;
+ RegisterWeakMemoryReporter(this);
+
+ return NS_OK;
+}
+
+NS_IMETHODIMP nsEffectiveTLDService::Observe(nsISupports* aSubject,
+ const char* aTopic,
+ const char16_t* aData) {
+ /**
+ * Signal sent from netwerk/dns/PublicSuffixList.jsm
+ * aSubject is the nsIFile object for dafsa.bin
+ * aData is the absolute path to the dafsa.bin file (not used)
+ */
+ if (aSubject && (nsCRT::strcmp(aTopic, "public-suffix-list-updated") == 0)) {
+ nsCOMPtr<nsIFile> mDafsaBinFile(do_QueryInterface(aSubject));
+ NS_ENSURE_TRUE(mDafsaBinFile, NS_ERROR_ILLEGAL_VALUE);
+
+ AutoWriteLock lock(mGraphLock);
+ // Reset mGraph with kDafsa in case reassigning to mDafsaMap fails
+ mGraph.reset();
+ mGraph.emplace(etld_dafsa::kDafsa);
+
+ mDafsaMap.reset();
+ mMruTable.Clear();
+
+ MOZ_TRY(mDafsaMap.init(mDafsaBinFile));
+
+ size_t size = mDafsaMap.size();
+ const uint8_t* remoteDafsaPtr = mDafsaMap.get<uint8_t>().get();
+
+ auto remoteDafsa = mozilla::Span(remoteDafsaPtr, size);
+
+ mGraph.reset();
+ mGraph.emplace(remoteDafsa);
+ }
+ return NS_OK;
+}
+
+nsEffectiveTLDService::~nsEffectiveTLDService() {
+ UnregisterWeakMemoryReporter(this);
+ if (mIDNService) {
+ // Only clear gService if Init() finished successfully.
+ gService = nullptr;
+ }
+}
+
+// static
+nsEffectiveTLDService* nsEffectiveTLDService::GetInstance() {
+ if (gService) {
+ return gService;
+ }
+ nsCOMPtr<nsIEffectiveTLDService> tldService =
+ do_GetService(NS_EFFECTIVETLDSERVICE_CONTRACTID);
+ if (!tldService) {
+ return nullptr;
+ }
+ MOZ_ASSERT(
+ gService,
+ "gService must have been initialized in nsEffectiveTLDService::Init");
+ return gService;
+}
+
+MOZ_DEFINE_MALLOC_SIZE_OF(EffectiveTLDServiceMallocSizeOf)
+
+// The amount of heap memory measured here is tiny. It used to be bigger when
+// nsEffectiveTLDService used a separate hash table instead of binary search.
+// Nonetheless, we keep this code here in anticipation of bug 1083971 which will
+// change ETLDEntries::entries to a heap-allocated array modifiable at runtime.
+NS_IMETHODIMP
+nsEffectiveTLDService::CollectReports(nsIHandleReportCallback* aHandleReport,
+ nsISupports* aData, bool aAnonymize) {
+ MOZ_COLLECT_REPORT("explicit/network/effective-TLD-service", KIND_HEAP,
+ UNITS_BYTES,
+ SizeOfIncludingThis(EffectiveTLDServiceMallocSizeOf),
+ "Memory used by the effective TLD service.");
+
+ return NS_OK;
+}
+
+size_t nsEffectiveTLDService::SizeOfIncludingThis(
+ mozilla::MallocSizeOf aMallocSizeOf) {
+ size_t n = aMallocSizeOf(this);
+
+ // Measurement of the following members may be added later if DMD finds it is
+ // worthwhile:
+ // - mIDNService
+
+ return n;
+}
+
+// External function for dealing with URI's correctly.
+// Pulls out the host portion from an nsIURI, and calls through to
+// GetPublicSuffixFromHost().
+NS_IMETHODIMP
+nsEffectiveTLDService::GetPublicSuffix(nsIURI* aURI,
+ nsACString& aPublicSuffix) {
+ NS_ENSURE_ARG_POINTER(aURI);
+
+ nsAutoCString host;
+ nsresult rv = NS_GetInnermostURIHost(aURI, host);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(host, 0, false, aPublicSuffix);
+}
+
+NS_IMETHODIMP
+nsEffectiveTLDService::GetKnownPublicSuffix(nsIURI* aURI,
+ nsACString& aPublicSuffix) {
+ NS_ENSURE_ARG_POINTER(aURI);
+
+ nsAutoCString host;
+ nsresult rv = NS_GetInnermostURIHost(aURI, host);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(host, 0, true, aPublicSuffix);
+}
+
+// External function for dealing with URI's correctly.
+// Pulls out the host portion from an nsIURI, and calls through to
+// GetBaseDomainFromHost().
+NS_IMETHODIMP
+nsEffectiveTLDService::GetBaseDomain(nsIURI* aURI, uint32_t aAdditionalParts,
+ nsACString& aBaseDomain) {
+ NS_ENSURE_ARG_POINTER(aURI);
+ NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
+
+ nsAutoCString host;
+ nsresult rv = NS_GetInnermostURIHost(aURI, host);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(host, aAdditionalParts + 1, false, aBaseDomain);
+}
+
+// External function for dealing with URIs to get a schemeless site.
+// Calls through to GetBaseDomain(), handling IP addresses and aliases by
+// just returning their serialized host.
+NS_IMETHODIMP
+nsEffectiveTLDService::GetSchemelessSite(nsIURI* aURI, nsACString& aSite) {
+ NS_ENSURE_ARG_POINTER(aURI);
+
+ nsresult rv = GetBaseDomain(aURI, 0, aSite);
+ if (rv == NS_ERROR_HOST_IS_IP_ADDRESS ||
+ rv == NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS) {
+ rv = nsContentUtils::GetHostOrIPv6WithBrackets(aURI, aSite);
+ }
+ return rv;
+}
+
+// External function for dealing with URIs to get site correctly.
+// Calls through to GetSchemelessSite(), and serializes with the scheme and
+// "://" prepended.
+NS_IMETHODIMP
+nsEffectiveTLDService::GetSite(nsIURI* aURI, nsACString& aSite) {
+ NS_ENSURE_ARG_POINTER(aURI);
+
+ nsAutoCString scheme;
+ nsresult rv = aURI->GetScheme(scheme);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsAutoCString schemeless;
+ rv = GetSchemelessSite(aURI, schemeless);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // aURI (and thus BaseDomain) may be the string '.'. If so, fail.
+ if (schemeless.Length() == 1 && schemeless.Last() == '.') {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ // Reject any URIs without a host that aren't file:// URIs.
+ if (schemeless.IsEmpty() && !aURI->SchemeIs("file")) {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ aSite.SetCapacity(scheme.Length() + 3 + schemeless.Length());
+ aSite.Append(scheme);
+ aSite.Append("://"_ns);
+ aSite.Append(schemeless);
+
+ return NS_OK;
+}
+
+// External function for dealing with a host string directly: finds the public
+// suffix (e.g. co.uk) for the given hostname. See GetBaseDomainInternal().
+NS_IMETHODIMP
+nsEffectiveTLDService::GetPublicSuffixFromHost(const nsACString& aHostname,
+ nsACString& aPublicSuffix) {
+ // Create a mutable copy of the hostname and normalize it to ACE.
+ // This will fail if the hostname includes invalid characters.
+ nsAutoCString normHostname(aHostname);
+ nsresult rv = NormalizeHostname(normHostname);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(normHostname, 0, false, aPublicSuffix);
+}
+
+NS_IMETHODIMP
+nsEffectiveTLDService::GetKnownPublicSuffixFromHost(const nsACString& aHostname,
+ nsACString& aPublicSuffix) {
+ // Create a mutable copy of the hostname and normalize it to ACE.
+ // This will fail if the hostname includes invalid characters.
+ nsAutoCString normHostname(aHostname);
+ nsresult rv = NormalizeHostname(normHostname);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(normHostname, 0, true, aPublicSuffix);
+}
+
+// External function for dealing with a host string directly: finds the base
+// domain (e.g. www.co.uk) for the given hostname and number of subdomain parts
+// requested. See GetBaseDomainInternal().
+NS_IMETHODIMP
+nsEffectiveTLDService::GetBaseDomainFromHost(const nsACString& aHostname,
+ uint32_t aAdditionalParts,
+ nsACString& aBaseDomain) {
+ NS_ENSURE_TRUE(((int32_t)aAdditionalParts) >= 0, NS_ERROR_INVALID_ARG);
+
+ // Create a mutable copy of the hostname and normalize it to ACE.
+ // This will fail if the hostname includes invalid characters.
+ nsAutoCString normHostname(aHostname);
+ nsresult rv = NormalizeHostname(normHostname);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ return GetBaseDomainInternal(normHostname, aAdditionalParts + 1, false,
+ aBaseDomain);
+}
+
+NS_IMETHODIMP
+nsEffectiveTLDService::GetNextSubDomain(const nsACString& aHostname,
+ nsACString& aBaseDomain) {
+ // Create a mutable copy of the hostname and normalize it to ACE.
+ // This will fail if the hostname includes invalid characters.
+ nsAutoCString normHostname(aHostname);
+ nsresult rv = NormalizeHostname(normHostname);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return GetBaseDomainInternal(normHostname, -1, false, aBaseDomain);
+}
+
+// Finds the base domain for a host, with requested number of additional parts.
+// This will fail, generating an error, if the host is an IPv4/IPv6 address,
+// if more subdomain parts are requested than are available, or if the hostname
+// includes characters that are not valid in a URL. Normalization is performed
+// on the host string and the result will be in UTF8.
+nsresult nsEffectiveTLDService::GetBaseDomainInternal(
+ nsCString& aHostname, int32_t aAdditionalParts, bool aOnlyKnownPublicSuffix,
+ nsACString& aBaseDomain) {
+ const int kExceptionRule = 1;
+ const int kWildcardRule = 2;
+
+ if (aHostname.IsEmpty()) {
+ return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
+ }
+
+ // chomp any trailing dot, and keep track of it for later
+ bool trailingDot = aHostname.Last() == '.';
+ if (trailingDot) {
+ aHostname.Truncate(aHostname.Length() - 1);
+ }
+
+ // check the edge cases of the host being '.' or having a second trailing '.',
+ // since subsequent checks won't catch it.
+ if (aHostname.IsEmpty() || aHostname.Last() == '.') {
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ // Lookup in the cache if this is a normal query. This is restricted to
+ // main thread-only as the cache is not thread-safe.
+ Maybe<TldCache::Entry> entry;
+ if (aAdditionalParts == 1 && NS_IsMainThread()) {
+ auto p = mMruTable.Lookup(aHostname);
+ if (p) {
+ if (NS_FAILED(p.Data().mResult)) {
+ return p.Data().mResult;
+ }
+
+ // There was a match, just return the cached value.
+ aBaseDomain = p.Data().mBaseDomain;
+ if (trailingDot) {
+ aBaseDomain.Append('.');
+ }
+
+ return NS_OK;
+ }
+
+ entry = Some(p);
+ }
+
+ // Check if we're dealing with an IPv4/IPv6 hostname, and return
+ if (mozilla::net::HostIsIPLiteral(aHostname)) {
+ // Update the MRU table if in use.
+ if (entry) {
+ entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_HOST_IS_IP_ADDRESS});
+ }
+
+ return NS_ERROR_HOST_IS_IP_ADDRESS;
+ }
+
+ // Walk up the domain tree, most specific to least specific,
+ // looking for matches at each level. Note that a given level may
+ // have multiple attributes (e.g. IsWild() and IsNormal()).
+ const char* prevDomain = nullptr;
+ const char* currDomain = aHostname.get();
+ const char* nextDot = strchr(currDomain, '.');
+ const char* end = currDomain + aHostname.Length();
+ // Default value of *eTLD is currDomain as set in the while loop below
+ const char* eTLD = nullptr;
+ bool hasKnownPublicSuffix = false;
+ while (true) {
+ // sanity check the string we're about to look up: it should not begin
+ // with a '.'; this would mean the hostname began with a '.' or had an
+ // embedded '..' sequence.
+ if (*currDomain == '.') {
+ // Update the MRU table if in use.
+ if (entry) {
+ entry->Set(TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INVALID_ARG});
+ }
+
+ return NS_ERROR_INVALID_ARG;
+ }
+
+ int result;
+ {
+ AutoReadLock lock(mGraphLock);
+ // Perform the lookup.
+ result = mGraph->Lookup(Substring(currDomain, end));
+ }
+ if (result != Dafsa::kKeyNotFound) {
+ hasKnownPublicSuffix = true;
+ if (result == kWildcardRule && prevDomain) {
+ // wildcard rules imply an eTLD one level inferior to the match.
+ eTLD = prevDomain;
+ break;
+ }
+ if (result != kExceptionRule || !nextDot) {
+ // specific match, or we've hit the top domain level
+ eTLD = currDomain;
+ break;
+ }
+ if (result == kExceptionRule) {
+ // exception rules imply an eTLD one level superior to the match.
+ eTLD = nextDot + 1;
+ break;
+ }
+ }
+
+ if (!nextDot) {
+ // we've hit the top domain level; use it by default.
+ eTLD = currDomain;
+ break;
+ }
+
+ prevDomain = currDomain;
+ currDomain = nextDot + 1;
+ nextDot = strchr(currDomain, '.');
+ }
+
+ if (aOnlyKnownPublicSuffix && !hasKnownPublicSuffix) {
+ aBaseDomain.Truncate();
+ return NS_OK;
+ }
+
+ const char *begin, *iter;
+ if (aAdditionalParts < 0) {
+ NS_ASSERTION(aAdditionalParts == -1,
+ "aAdditionalParts can't be negative and different from -1");
+
+ for (iter = aHostname.get(); iter != eTLD && *iter != '.'; iter++) {
+ ;
+ }
+
+ if (iter != eTLD) {
+ iter++;
+ }
+ if (iter != eTLD) {
+ aAdditionalParts = 0;
+ }
+ } else {
+ // count off the number of requested domains.
+ begin = aHostname.get();
+ iter = eTLD;
+
+ while (true) {
+ if (iter == begin) {
+ break;
+ }
+
+ if (*(--iter) == '.' && aAdditionalParts-- == 0) {
+ ++iter;
+ ++aAdditionalParts;
+ break;
+ }
+ }
+ }
+
+ if (aAdditionalParts != 0) {
+ // Update the MRU table if in use.
+ if (entry) {
+ entry->Set(
+ TLDCacheEntry{aHostname, ""_ns, NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS});
+ }
+
+ return NS_ERROR_INSUFFICIENT_DOMAIN_LEVELS;
+ }
+
+ aBaseDomain = Substring(iter, end);
+
+ // Update the MRU table if in use.
+ if (entry) {
+ entry->Set(TLDCacheEntry{aHostname, nsCString(aBaseDomain), NS_OK});
+ }
+
+ // add on the trailing dot, if applicable
+ if (trailingDot) {
+ aBaseDomain.Append('.');
+ }
+
+ return NS_OK;
+}
+
+// Normalizes the given hostname, component by component. ASCII/ACE
+// components are lower-cased, and UTF-8 components are normalized per
+// RFC 3454 and converted to ACE.
+nsresult nsEffectiveTLDService::NormalizeHostname(nsCString& aHostname) {
+ if (!IsAscii(aHostname)) {
+ nsresult rv = mIDNService->ConvertUTF8toACE(aHostname, aHostname);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ }
+
+ ToLowerCase(aHostname);
+ return NS_OK;
+}
+
+NS_IMETHODIMP
+nsEffectiveTLDService::HasRootDomain(const nsACString& aInput,
+ const nsACString& aHost, bool* aResult) {
+ return net::HasRootDomain(aInput, aHost, aResult);
+}