703 lines
25 KiB
C++
703 lines
25 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||
|
||
#include "MainThreadUtils.h"
|
||
#include "mozilla/ClearOnShutdown.h"
|
||
#include "mozilla/Preferences.h"
|
||
#include "nsIDNService.h"
|
||
#include "nsReadableUtils.h"
|
||
#include "nsCRT.h"
|
||
#include "nsServiceManagerUtils.h"
|
||
#include "nsString.h"
|
||
#include "nsStringFwd.h"
|
||
#include "nsUnicharUtils.h"
|
||
#include "nsUnicodeProperties.h"
|
||
#include "harfbuzz/hb.h"
|
||
#include "mozilla/ArrayUtils.h"
|
||
#include "mozilla/Casting.h"
|
||
#include "mozilla/StaticPrefs_network.h"
|
||
#include "mozilla/TextUtils.h"
|
||
#include "mozilla/Utf8.h"
|
||
#include "mozilla/intl/UnicodeProperties.h"
|
||
#include "mozilla/intl/UnicodeScriptCodes.h"
|
||
#include "nsNetUtil.h"
|
||
#include "nsStandardURL.h"
|
||
|
||
using namespace mozilla;
|
||
using namespace mozilla::intl;
|
||
using namespace mozilla::unicode;
|
||
using namespace mozilla::net;
|
||
using mozilla::Preferences;
|
||
|
||
//-----------------------------------------------------------------------------
|
||
|
||
#define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
|
||
|
||
template <int N>
|
||
static inline bool TLDEqualsLiteral(mozilla::Span<const char32_t> aTLD,
|
||
const char (&aStr)[N]) {
|
||
if (aTLD.Length() != N - 1) {
|
||
return false;
|
||
}
|
||
const char* a = aStr;
|
||
for (const char32_t c : aTLD) {
|
||
if (c != char32_t(*a)) {
|
||
return false;
|
||
}
|
||
++a;
|
||
}
|
||
return true;
|
||
}
|
||
|
||
template <int N>
|
||
static inline bool TLDStartsWith(mozilla::Span<const char32_t> aTLD,
|
||
const char (&aStr)[N]) {
|
||
// Ensure the span is long enough to contain the prefix
|
||
if (aTLD.Length() < N - 1) {
|
||
return false;
|
||
}
|
||
|
||
for (size_t i = 0; i < N - 1; ++i) {
|
||
if (aTLD[i] != char32_t(aStr[i])) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
return true;
|
||
}
|
||
|
||
static inline bool isOnlySafeChars(mozilla::Span<const char32_t> aLabel,
|
||
const nsTArray<BlocklistRange>& aBlocklist) {
|
||
if (aBlocklist.IsEmpty()) {
|
||
return true;
|
||
}
|
||
for (const char32_t c : aLabel) {
|
||
if (c > 0xFFFF) {
|
||
// The blocklist only support BMP!
|
||
continue;
|
||
}
|
||
if (CharInBlocklist(char16_t(c), aBlocklist)) {
|
||
return false;
|
||
}
|
||
}
|
||
return true;
|
||
}
|
||
|
||
static bool isCyrillicDomain(mozilla::Span<const char32_t>& aTLD) {
|
||
return TLDEqualsLiteral(aTLD, "bg") || TLDEqualsLiteral(aTLD, "by") ||
|
||
TLDEqualsLiteral(aTLD, "kz") || TLDEqualsLiteral(aTLD, "pyc") ||
|
||
TLDEqualsLiteral(aTLD, "ru") || TLDEqualsLiteral(aTLD, "su") ||
|
||
TLDEqualsLiteral(aTLD, "ua") || TLDEqualsLiteral(aTLD, "uz");
|
||
}
|
||
|
||
//-----------------------------------------------------------------------------
|
||
// nsIDNService
|
||
//-----------------------------------------------------------------------------
|
||
|
||
/* Implementation file */
|
||
NS_IMPL_ISUPPORTS(nsIDNService, nsIIDNService)
|
||
|
||
nsresult nsIDNService::Init() {
|
||
MOZ_ASSERT(NS_IsMainThread());
|
||
InitializeBlocklist(mIDNBlocklist);
|
||
|
||
InitCJKSlashConfusables();
|
||
InitCJKIdeographs();
|
||
InitDigitConfusables();
|
||
InitCyrillicLatinConfusables();
|
||
InitThaiLatinConfusables();
|
||
return NS_OK;
|
||
}
|
||
|
||
void nsIDNService::InitCJKSlashConfusables() {
|
||
mCJKSlashConfusables.Insert(0x30CE); // ノ
|
||
mCJKSlashConfusables.Insert(0x30BD); // ソ
|
||
mCJKSlashConfusables.Insert(0x30BE); // ゾ
|
||
mCJKSlashConfusables.Insert(0x30F3); // ン
|
||
mCJKSlashConfusables.Insert(0x4E36); // 丶
|
||
mCJKSlashConfusables.Insert(0x4E40); // 乀
|
||
mCJKSlashConfusables.Insert(0x4E41); // 乁
|
||
mCJKSlashConfusables.Insert(0x4E3F); // 丿
|
||
}
|
||
|
||
void nsIDNService::InitCJKIdeographs() {
|
||
mCJKIdeographs.Insert(0x4E00); // 一
|
||
mCJKIdeographs.Insert(0x3127); // ㄧ
|
||
mCJKIdeographs.Insert(0x4E28); // 丨
|
||
mCJKIdeographs.Insert(0x4E5B); // 乛
|
||
mCJKIdeographs.Insert(0x4E03); // 七
|
||
mCJKIdeographs.Insert(0x4E05); // 丅
|
||
mCJKIdeographs.Insert(0x5341); // 十
|
||
mCJKIdeographs.Insert(0x3007); // 〇
|
||
mCJKIdeographs.Insert(0x3112); // ㄒ
|
||
mCJKIdeographs.Insert(0x311A); // ㄚ
|
||
mCJKIdeographs.Insert(0x311F); // ㄟ
|
||
mCJKIdeographs.Insert(0x3128); // ㄨ
|
||
mCJKIdeographs.Insert(0x3129); // ㄩ
|
||
mCJKIdeographs.Insert(0x3108); // ㄈ
|
||
mCJKIdeographs.Insert(0x31BA); // ㆺ
|
||
mCJKIdeographs.Insert(0x31B3); // ㆳ
|
||
mCJKIdeographs.Insert(0x5DE5); // 工
|
||
mCJKIdeographs.Insert(0x31B2); // ㆲ
|
||
mCJKIdeographs.Insert(0x8BA0); // 讠
|
||
mCJKIdeographs.Insert(0x4E01); // 丁
|
||
}
|
||
|
||
void nsIDNService::InitDigitConfusables() {
|
||
mDigitConfusables.Insert(0x03B8); // θ
|
||
mDigitConfusables.Insert(0x0968); // २
|
||
mDigitConfusables.Insert(0x09E8); // ২
|
||
mDigitConfusables.Insert(0x0A68); // ੨
|
||
mDigitConfusables.Insert(0x0AE8); // ૨
|
||
mDigitConfusables.Insert(0x0CE9); // ೩
|
||
mDigitConfusables.Insert(0x0577); // շ
|
||
mDigitConfusables.Insert(0x0437); // з
|
||
mDigitConfusables.Insert(0x0499); // ҙ
|
||
mDigitConfusables.Insert(0x04E1); // ӡ
|
||
mDigitConfusables.Insert(0x0909); // उ
|
||
mDigitConfusables.Insert(0x0993); // ও
|
||
mDigitConfusables.Insert(0x0A24); // ਤ
|
||
mDigitConfusables.Insert(0x0A69); // ੩
|
||
mDigitConfusables.Insert(0x0AE9); // ૩
|
||
mDigitConfusables.Insert(0x0C69); // ౩
|
||
mDigitConfusables.Insert(0x1012); // ဒ
|
||
mDigitConfusables.Insert(0x10D5); // ვ
|
||
mDigitConfusables.Insert(0x10DE); // პ
|
||
mDigitConfusables.Insert(0x0A5C); // ੜ
|
||
mDigitConfusables.Insert(0x10D9); // კ
|
||
mDigitConfusables.Insert(0x0A6B); // ੫
|
||
mDigitConfusables.Insert(0x4E29); // 丩
|
||
mDigitConfusables.Insert(0x3110); // ㄐ
|
||
mDigitConfusables.Insert(0x0573); // ճ
|
||
mDigitConfusables.Insert(0x09EA); // ৪
|
||
mDigitConfusables.Insert(0x0A6A); // ੪
|
||
mDigitConfusables.Insert(0x0B6B); // ୫
|
||
mDigitConfusables.Insert(0x0AED); // ૭
|
||
mDigitConfusables.Insert(0x0B68); // ୨
|
||
mDigitConfusables.Insert(0x0C68); // ౨
|
||
}
|
||
|
||
void nsIDNService::InitCyrillicLatinConfusables() {
|
||
mCyrillicLatinConfusables.Insert(0x0430); // а CYRILLIC SMALL LETTER A
|
||
mCyrillicLatinConfusables.Insert(0x044B); // ы CYRILLIC SMALL LETTER YERU
|
||
mCyrillicLatinConfusables.Insert(0x0441); // с CYRILLIC SMALL LETTER ES
|
||
mCyrillicLatinConfusables.Insert(0x0501); // ԁ CYRILLIC SMALL LETTER KOMI DE
|
||
mCyrillicLatinConfusables.Insert(0x0435); // е CYRILLIC SMALL LETTER IE
|
||
mCyrillicLatinConfusables.Insert(0x050D); // ԍ CYRILLIC SMALL LETTER KOMI SJE
|
||
mCyrillicLatinConfusables.Insert(0x04BB); // һ CYRILLIC SMALL LETTER SHHA
|
||
mCyrillicLatinConfusables.Insert(
|
||
0x0456); // і CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I {Old
|
||
// Cyrillic i}
|
||
mCyrillicLatinConfusables.Insert(0x044E); // ю CYRILLIC SMALL LETTER YU
|
||
mCyrillicLatinConfusables.Insert(0x043A); // к CYRILLIC SMALL LETTER KA
|
||
mCyrillicLatinConfusables.Insert(0x0458); // ј CYRILLIC SMALL LETTER JE
|
||
mCyrillicLatinConfusables.Insert(0x04CF); // ӏ CYRILLIC SMALL LETTER PALOCHKA
|
||
mCyrillicLatinConfusables.Insert(0x043C); // м CYRILLIC SMALL LETTER EM
|
||
mCyrillicLatinConfusables.Insert(0x043E); // о CYRILLIC SMALL LETTER O
|
||
mCyrillicLatinConfusables.Insert(0x0440); // р CYRILLIC SMALL LETTER ER
|
||
mCyrillicLatinConfusables.Insert(
|
||
0x0517); // ԗ CYRILLIC SMALL LETTER RHA {voiceless r}
|
||
mCyrillicLatinConfusables.Insert(0x051B); // ԛ CYRILLIC SMALL LETTER QA
|
||
mCyrillicLatinConfusables.Insert(0x0455); // ѕ CYRILLIC SMALL LETTER DZE
|
||
mCyrillicLatinConfusables.Insert(0x051D); // ԝ CYRILLIC SMALL LETTER WE
|
||
mCyrillicLatinConfusables.Insert(0x0445); // х CYRILLIC SMALL LETTER HA
|
||
mCyrillicLatinConfusables.Insert(0x0443); // у CYRILLIC SMALL LETTER U
|
||
mCyrillicLatinConfusables.Insert(
|
||
0x044A); // ъ CYRILLIC SMALL LETTER HARD SIGN
|
||
mCyrillicLatinConfusables.Insert(
|
||
0x044C); // ь CYRILLIC SMALL LETTER SOFT SIGN
|
||
mCyrillicLatinConfusables.Insert(
|
||
0x04BD); // ҽ CYRILLIC SMALL LETTER ABKHASIAN CHE
|
||
mCyrillicLatinConfusables.Insert(0x043F); // п CYRILLIC SMALL LETTER PE
|
||
mCyrillicLatinConfusables.Insert(0x0433); // г CYRILLIC SMALL LETTER GHE
|
||
mCyrillicLatinConfusables.Insert(0x0475); // ѵ CYRILLIC SMALL LETTER IZHITSA
|
||
mCyrillicLatinConfusables.Insert(0x0461); // ѡ CYRILLIC SMALL LETTER OMEGA
|
||
}
|
||
|
||
void nsIDNService::InitThaiLatinConfusables() {
|
||
// Some of the Thai characters are only confusable on Linux.
|
||
#if defined(XP_LINUX) && !defined(ANDROID)
|
||
mThaiLatinConfusables.Insert(0x0E14); // ด
|
||
mThaiLatinConfusables.Insert(0x0E17); // ท
|
||
mThaiLatinConfusables.Insert(0x0E19); // น
|
||
mThaiLatinConfusables.Insert(0x0E1B); // ป
|
||
mThaiLatinConfusables.Insert(0x0E21); // ม
|
||
mThaiLatinConfusables.Insert(0x0E25); // ล
|
||
mThaiLatinConfusables.Insert(0x0E2B); // ห
|
||
#endif
|
||
|
||
mThaiLatinConfusables.Insert(0x0E1A); // บ
|
||
mThaiLatinConfusables.Insert(0x0E1E); // พ
|
||
mThaiLatinConfusables.Insert(0x0E1F); // ฟ
|
||
mThaiLatinConfusables.Insert(0x0E23); // ร
|
||
mThaiLatinConfusables.Insert(0x0E40); // เ
|
||
mThaiLatinConfusables.Insert(0x0E41); // แ
|
||
mThaiLatinConfusables.Insert(0x0E50); // ๐
|
||
}
|
||
|
||
nsIDNService::nsIDNService() { MOZ_ASSERT(NS_IsMainThread()); }
|
||
|
||
nsIDNService::~nsIDNService() = default;
|
||
|
||
NS_IMETHODIMP nsIDNService::DomainToASCII(const nsACString& input,
|
||
nsACString& ace) {
|
||
return NS_DomainToASCII(input, ace);
|
||
}
|
||
|
||
NS_IMETHODIMP nsIDNService::ConvertUTF8toACE(const nsACString& input,
|
||
nsACString& ace) {
|
||
return NS_DomainToASCIIAllowAnyGlyphfulASCII(input, ace);
|
||
}
|
||
|
||
NS_IMETHODIMP nsIDNService::ConvertACEtoUTF8(const nsACString& input,
|
||
nsACString& _retval) {
|
||
return NS_DomainToUnicodeAllowAnyGlyphfulASCII(input, _retval);
|
||
}
|
||
|
||
NS_IMETHODIMP nsIDNService::DomainToDisplay(const nsACString& input,
|
||
nsACString& _retval) {
|
||
nsresult rv = NS_DomainToDisplay(input, _retval);
|
||
return rv;
|
||
}
|
||
|
||
NS_IMETHODIMP nsIDNService::ConvertToDisplayIDN(const nsACString& input,
|
||
nsACString& _retval) {
|
||
nsresult rv = NS_DomainToDisplayAllowAnyGlyphfulASCII(input, _retval);
|
||
return rv;
|
||
}
|
||
|
||
//-----------------------------------------------------------------------------
|
||
|
||
namespace mozilla::net {
|
||
|
||
enum ScriptCombo : int32_t {
|
||
UNSET = -1,
|
||
BOPO = 0,
|
||
CYRL = 1,
|
||
GREK = 2,
|
||
HANG = 3,
|
||
HANI = 4,
|
||
HIRA = 5,
|
||
KATA = 6,
|
||
LATN = 7,
|
||
OTHR = 8,
|
||
JPAN = 9, // Latin + Han + Hiragana + Katakana
|
||
CHNA = 10, // Latin + Han + Bopomofo
|
||
KORE = 11, // Latin + Han + Hangul
|
||
HNLT = 12, // Latin + Han (could be any of the above combinations)
|
||
FAIL = 13,
|
||
};
|
||
|
||
// Ignore - set if the label contains a character that makes it
|
||
// obvious it's not a lookalike.
|
||
// Safe - set if the label contains no lookalike characters.
|
||
// Block - set if the label contains lookalike characters.
|
||
enum class LookalikeStatus { Ignore, Safe, Block };
|
||
|
||
class MOZ_STACK_CLASS LookalikeStatusChecker {
|
||
public:
|
||
// Constructor for Script Confusable Checkers (Cyrillic, Thai, etc)
|
||
LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables,
|
||
mozilla::Span<const char32_t>& aTLD, Script aTLDScript,
|
||
bool aValidTLD)
|
||
: mConfusables(aConfusables),
|
||
mStatus(aValidTLD ? LookalikeStatus::Ignore : LookalikeStatus::Safe),
|
||
mTLDMatchesScript(doesTLDScriptMatch(aTLD, aTLDScript)),
|
||
mTLDScript(aTLDScript) {}
|
||
|
||
// Constructor that DigitLookalikeStatusChecker inherits
|
||
explicit LookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables)
|
||
: mConfusables(aConfusables), mStatus(LookalikeStatus::Safe) {}
|
||
|
||
// For the Script Confusable Checkers
|
||
virtual void CheckCharacter(char32_t aChar, Script aScript) {
|
||
if (mStatus != LookalikeStatus::Ignore && !mTLDMatchesScript &&
|
||
aScript == mTLDScript) {
|
||
mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block
|
||
: LookalikeStatus::Ignore;
|
||
}
|
||
}
|
||
|
||
virtual LookalikeStatus Status() { return mStatus; }
|
||
|
||
protected:
|
||
// A hash set containing confusable characters
|
||
nsTHashSet<char32_t>& mConfusables;
|
||
|
||
// The current lookalike status
|
||
LookalikeStatus mStatus;
|
||
|
||
bool doesTLDScriptMatch(mozilla::Span<const char32_t>& aTLD, Script aScript) {
|
||
mozilla::Span<const char32_t>::const_iterator current = aTLD.cbegin();
|
||
mozilla::Span<const char32_t>::const_iterator end = aTLD.cend();
|
||
|
||
while (current != end) {
|
||
char32_t ch = *current++;
|
||
if (UnicodeProperties::GetScriptCode(ch) == aScript) {
|
||
return true;
|
||
}
|
||
}
|
||
|
||
return false;
|
||
}
|
||
|
||
private:
|
||
// Indicates whether the TLD matches the given script
|
||
bool mTLDMatchesScript{false};
|
||
|
||
// The script associated with the TLD to be matched
|
||
Script mTLDScript{Script::INVALID};
|
||
};
|
||
|
||
// Overrides the CheckCharacter method to validate digits
|
||
class DigitLookalikeStatusChecker : public LookalikeStatusChecker {
|
||
public:
|
||
explicit DigitLookalikeStatusChecker(nsTHashSet<char32_t>& aConfusables)
|
||
: LookalikeStatusChecker(aConfusables) {}
|
||
|
||
// Note: aScript is not used in this override.
|
||
void CheckCharacter(char32_t aChar, Script aScript) override {
|
||
if (mStatus == LookalikeStatus::Ignore) {
|
||
return;
|
||
}
|
||
|
||
// If the character is not a numeric digit, check whether it is confusable
|
||
// or not.
|
||
if (!ISDIGIT(aChar)) {
|
||
mStatus = mConfusables.Contains(aChar) ? LookalikeStatus::Block
|
||
: LookalikeStatus::Ignore;
|
||
}
|
||
}
|
||
};
|
||
|
||
} // namespace mozilla::net
|
||
|
||
bool nsIDNService::IsLabelSafe(mozilla::Span<const char32_t> aLabel,
|
||
mozilla::Span<const char32_t> aTLD) {
|
||
if (StaticPrefs::network_IDN_show_punycode()) {
|
||
return false;
|
||
}
|
||
|
||
if (!isOnlySafeChars(aLabel, mIDNBlocklist)) {
|
||
return false;
|
||
}
|
||
|
||
// Bug 1917119 - Avoid bypassing the doesTLDScriptMatch check
|
||
// aTLD should be a decoded label, but in the case of invalid labels such as
|
||
// `xn--xn--d--fg4n` we might end up with something that starts with `xn--`.
|
||
// Treat those as unsafe just in case.
|
||
if (TLDStartsWith(aTLD, "xn--")) {
|
||
return false;
|
||
}
|
||
|
||
mozilla::Span<const char32_t>::const_iterator current = aLabel.cbegin();
|
||
mozilla::Span<const char32_t>::const_iterator end = aLabel.cend();
|
||
|
||
Script lastScript = Script::INVALID;
|
||
char32_t previousChar = 0;
|
||
char32_t baseChar = 0; // last non-diacritic seen (base char for marks)
|
||
char32_t savedNumberingSystem = 0;
|
||
|
||
// Ignore digit confusables if there is a non-digit and non-digit confusable
|
||
// character. If aLabel only consists of digits and digit confusables or
|
||
// digit confusables, return false.
|
||
DigitLookalikeStatusChecker digitStatusChecker(mDigitConfusables);
|
||
// Check if all the cyrillic letters in the label are confusables
|
||
LookalikeStatusChecker cyrillicStatusChecker(mCyrillicLatinConfusables, aTLD,
|
||
Script::CYRILLIC,
|
||
isCyrillicDomain(aTLD));
|
||
// Check if all the Thai letters in the label are confusables
|
||
LookalikeStatusChecker thaiStatusChecker(
|
||
mThaiLatinConfusables, aTLD, Script::THAI, TLDEqualsLiteral(aTLD, "th"));
|
||
|
||
// Simplified/Traditional Chinese check temporarily disabled -- bug 857481
|
||
#if 0
|
||
HanVariantType savedHanVariant = HVT_NotHan;
|
||
#endif
|
||
|
||
ScriptCombo savedScript = ScriptCombo::UNSET;
|
||
|
||
while (current != end) {
|
||
char32_t ch = *current++;
|
||
|
||
IdentifierType idType = GetIdentifierType(ch);
|
||
if (idType == IDTYPE_RESTRICTED) {
|
||
return false;
|
||
}
|
||
MOZ_ASSERT(idType == IDTYPE_ALLOWED);
|
||
|
||
// Check for mixed script
|
||
Script script = UnicodeProperties::GetScriptCode(ch);
|
||
if (script != Script::COMMON && script != Script::INHERITED &&
|
||
script != lastScript) {
|
||
if (illegalScriptCombo(script, savedScript)) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
#ifdef XP_MACOSX
|
||
// U+0620, U+0f8c, U+0f8d, U+0f8e, U+0f8f and are blocked due to a font
|
||
// issue on macOS
|
||
if (ch == 0x620 || ch == 0xf8c || ch == 0xf8d || ch == 0xf8e ||
|
||
ch == 0xf8f) {
|
||
return false;
|
||
}
|
||
#endif
|
||
|
||
// U+30FC should be preceded by a Hiragana/Katakana.
|
||
if (ch == 0x30fc && lastScript != Script::HIRAGANA &&
|
||
lastScript != Script::KATAKANA) {
|
||
return false;
|
||
}
|
||
|
||
Script nextScript = Script::INVALID;
|
||
if (current != end) {
|
||
nextScript = UnicodeProperties::GetScriptCode(*current);
|
||
}
|
||
|
||
// U+3078 to U+307A (へ, べ, ぺ) in Hiragana mixed with Katakana should be
|
||
// unsafe
|
||
if (ch >= 0x3078 && ch <= 0x307A &&
|
||
(lastScript == Script::KATAKANA || nextScript == Script::KATAKANA)) {
|
||
return false;
|
||
}
|
||
// U+30D8 to U+30DA (ヘ, ベ, ペ) in Katakana mixed with Hiragana should be
|
||
// unsafe
|
||
if (ch >= 0x30D8 && ch <= 0x30DA &&
|
||
(lastScript == Script::HIRAGANA || nextScript == Script::HIRAGANA)) {
|
||
return false;
|
||
}
|
||
// U+30FD and U+30FE are allowed only after Katakana
|
||
if ((ch == 0x30FD || ch == 0x30FE) && lastScript != Script::KATAKANA) {
|
||
return false;
|
||
}
|
||
|
||
// Slash confusables not enclosed by {Han,Hiragana,Katakana} should be
|
||
// unsafe but by itself should be allowed.
|
||
if (isCJKSlashConfusable(ch) && aLabel.Length() > 1 &&
|
||
lastScript != Script::HAN && lastScript != Script::HIRAGANA &&
|
||
lastScript != Script::KATAKANA && nextScript != Script::HAN &&
|
||
nextScript != Script::HIRAGANA && nextScript != Script::KATAKANA) {
|
||
return false;
|
||
}
|
||
|
||
if (ch == 0x30FB &&
|
||
(lastScript == Script::LATIN || nextScript == Script::LATIN)) {
|
||
return false;
|
||
}
|
||
|
||
// Combining Diacritic marks (U+0300-U+0339) after a script other than
|
||
// Latin-Greek-Cyrillic is unsafe
|
||
if (ch >= 0x300 && ch <= 0x339 && lastScript != Script::LATIN &&
|
||
lastScript != Script::GREEK && lastScript != Script::CYRILLIC) {
|
||
return false;
|
||
}
|
||
|
||
if (ch == 0x307 &&
|
||
(previousChar == 'i' || previousChar == 'j' || previousChar == 'l')) {
|
||
return false;
|
||
}
|
||
|
||
// U+00B7 is only allowed on Catalan domains between two l's.
|
||
if (ch == 0xB7 && (!TLDEqualsLiteral(aTLD, "cat") || previousChar != 'l' ||
|
||
current == end || *current != 'l')) {
|
||
return false;
|
||
}
|
||
|
||
// Disallow Icelandic confusables for domains outside Icelandic and Faroese
|
||
// ccTLD (.is, .fo)
|
||
if ((ch == 0xFE || ch == 0xF0) && !TLDEqualsLiteral(aTLD, "is") &&
|
||
!TLDEqualsLiteral(aTLD, "fo")) {
|
||
return false;
|
||
}
|
||
|
||
// Disallow U+0259 for domains outside Azerbaijani ccTLD (.az)
|
||
if (ch == 0x259 && !TLDEqualsLiteral(aTLD, "az")) {
|
||
return false;
|
||
}
|
||
|
||
// Block single/double-quote-like characters.
|
||
if (ch == 0x2BB || ch == 0x2BC) {
|
||
return false;
|
||
}
|
||
|
||
// Update the status based on whether the current character is a confusable
|
||
// or not and determine if it should be blocked or ignored.
|
||
// Note: script is not used for digitStatusChecker
|
||
digitStatusChecker.CheckCharacter(ch, script);
|
||
cyrillicStatusChecker.CheckCharacter(ch, script);
|
||
thaiStatusChecker.CheckCharacter(ch, script);
|
||
|
||
// Block these CJK ideographs if they are adjacent to non-CJK characters.
|
||
// These characters can be used to spoof Latin characters/punctuation marks.
|
||
if (isCJKIdeograph(ch)) {
|
||
// Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han,
|
||
// and non-Numeric character on the left. previousChar is 0 when ch is the
|
||
// first character.
|
||
if (lastScript != Script::BOPOMOFO && lastScript != Script::HIRAGANA &&
|
||
lastScript != Script::KATAKANA && lastScript != Script::HAN &&
|
||
previousChar && !ISDIGIT(previousChar)) {
|
||
return false;
|
||
}
|
||
// Check if there is a non-Bopomofo, non-Hiragana, non-Katakana, non-Han,
|
||
// and non-Numeric character on the right.
|
||
if (nextScript != Script::BOPOMOFO && nextScript != Script::HIRAGANA &&
|
||
nextScript != Script::KATAKANA && nextScript != Script::HAN &&
|
||
current != aLabel.end() && !ISDIGIT(*current)) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
// Check for mixed numbering systems
|
||
auto genCat = GetGeneralCategory(ch);
|
||
if (genCat == HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) {
|
||
uint32_t zeroCharacter =
|
||
ch - mozilla::intl::UnicodeProperties::GetNumericValue(ch);
|
||
if (savedNumberingSystem == 0) {
|
||
// If we encounter a decimal number, save the zero character from that
|
||
// numbering system.
|
||
savedNumberingSystem = zeroCharacter;
|
||
} else if (zeroCharacter != savedNumberingSystem) {
|
||
return false;
|
||
}
|
||
}
|
||
|
||
if (genCat == HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) {
|
||
// Check for consecutive non-spacing marks.
|
||
if (previousChar != 0 && previousChar == ch) {
|
||
return false;
|
||
}
|
||
// Check for marks whose expected script doesn't match the base script.
|
||
if (lastScript != Script::INVALID) {
|
||
UnicodeProperties::ScriptExtensionVector scripts;
|
||
auto extResult = UnicodeProperties::GetExtensions(ch, scripts);
|
||
MOZ_ASSERT(extResult.isOk());
|
||
if (extResult.isErr()) {
|
||
return false;
|
||
}
|
||
|
||
int nScripts = AssertedCast<int>(scripts.length());
|
||
|
||
// nScripts will always be >= 1, because even for undefined characters
|
||
// it will return Script::INVALID.
|
||
// If the mark just has script=COMMON or INHERITED, we can't check any
|
||
// more carefully, but if it has specific scriptExtension codes, then
|
||
// assume those are the only valid scripts to use it with.
|
||
if (nScripts > 1 || (Script(scripts[0]) != Script::COMMON &&
|
||
Script(scripts[0]) != Script::INHERITED)) {
|
||
while (--nScripts >= 0) {
|
||
if (Script(scripts[nScripts]) == lastScript) {
|
||
break;
|
||
}
|
||
}
|
||
if (nScripts == -1) {
|
||
return false;
|
||
}
|
||
}
|
||
}
|
||
// Check for diacritics on dotless-i, which would be indistinguishable
|
||
// from normal accented letter i.
|
||
if (baseChar == 0x0131 &&
|
||
((ch >= 0x0300 && ch <= 0x0314) || ch == 0x031a)) {
|
||
return false;
|
||
}
|
||
} else {
|
||
baseChar = ch;
|
||
}
|
||
|
||
if (script != Script::COMMON && script != Script::INHERITED) {
|
||
lastScript = script;
|
||
}
|
||
|
||
// Simplified/Traditional Chinese check temporarily disabled -- bug 857481
|
||
#if 0
|
||
|
||
// Check for both simplified-only and traditional-only Chinese characters
|
||
HanVariantType hanVariant = GetHanVariant(ch);
|
||
if (hanVariant == HVT_SimplifiedOnly || hanVariant == HVT_TraditionalOnly) {
|
||
if (savedHanVariant == HVT_NotHan) {
|
||
savedHanVariant = hanVariant;
|
||
} else if (hanVariant != savedHanVariant) {
|
||
return false;
|
||
}
|
||
}
|
||
#endif
|
||
|
||
previousChar = ch;
|
||
}
|
||
return digitStatusChecker.Status() != LookalikeStatus::Block &&
|
||
(!StaticPrefs::network_idn_punycode_cyrillic_confusables() ||
|
||
cyrillicStatusChecker.Status() != LookalikeStatus::Block) &&
|
||
thaiStatusChecker.Status() != LookalikeStatus::Block;
|
||
}
|
||
|
||
// Scripts that we care about in illegalScriptCombo
|
||
static inline ScriptCombo findScriptIndex(Script aScript) {
|
||
switch (aScript) {
|
||
case Script::BOPOMOFO:
|
||
return ScriptCombo::BOPO;
|
||
case Script::CYRILLIC:
|
||
return ScriptCombo::CYRL;
|
||
case Script::GREEK:
|
||
return ScriptCombo::GREK;
|
||
case Script::HANGUL:
|
||
return ScriptCombo::HANG;
|
||
case Script::HAN:
|
||
return ScriptCombo::HANI;
|
||
case Script::HIRAGANA:
|
||
return ScriptCombo::HIRA;
|
||
case Script::KATAKANA:
|
||
return ScriptCombo::KATA;
|
||
case Script::LATIN:
|
||
return ScriptCombo::LATN;
|
||
default:
|
||
return ScriptCombo::OTHR;
|
||
}
|
||
}
|
||
|
||
static const ScriptCombo scriptComboTable[13][9] = {
|
||
/* thisScript: BOPO CYRL GREK HANG HANI HIRA KATA LATN OTHR
|
||
* savedScript */
|
||
/* BOPO */ {BOPO, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
|
||
/* CYRL */ {FAIL, CYRL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
|
||
/* GREK */ {FAIL, FAIL, GREK, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL},
|
||
/* HANG */ {FAIL, FAIL, FAIL, HANG, KORE, FAIL, FAIL, KORE, FAIL},
|
||
/* HANI */ {CHNA, FAIL, FAIL, KORE, HANI, JPAN, JPAN, HNLT, FAIL},
|
||
/* HIRA */ {FAIL, FAIL, FAIL, FAIL, JPAN, HIRA, JPAN, JPAN, FAIL},
|
||
/* KATA */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, KATA, JPAN, FAIL},
|
||
/* LATN */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, LATN, OTHR},
|
||
/* OTHR */ {FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, OTHR, FAIL},
|
||
/* JPAN */ {FAIL, FAIL, FAIL, FAIL, JPAN, JPAN, JPAN, JPAN, FAIL},
|
||
/* CHNA */ {CHNA, FAIL, FAIL, FAIL, CHNA, FAIL, FAIL, CHNA, FAIL},
|
||
/* KORE */ {FAIL, FAIL, FAIL, KORE, KORE, FAIL, FAIL, KORE, FAIL},
|
||
/* HNLT */ {CHNA, FAIL, FAIL, KORE, HNLT, JPAN, JPAN, HNLT, FAIL}};
|
||
|
||
bool nsIDNService::illegalScriptCombo(Script script, ScriptCombo& savedScript) {
|
||
if (savedScript == ScriptCombo::UNSET) {
|
||
savedScript = findScriptIndex(script);
|
||
return false;
|
||
}
|
||
|
||
savedScript = scriptComboTable[savedScript][findScriptIndex(script)];
|
||
|
||
return savedScript == OTHR || savedScript == FAIL;
|
||
}
|
||
|
||
extern "C" MOZ_EXPORT bool mozilla_net_is_label_safe(const char32_t* aLabel,
|
||
size_t aLabelLen,
|
||
const char32_t* aTld,
|
||
size_t aTldLen) {
|
||
return static_cast<nsIDNService*>(nsStandardURL::GetIDNService())
|
||
->IsLabelSafe(mozilla::Span<const char32_t>(aLabel, aLabelLen),
|
||
mozilla::Span<const char32_t>(aTld, aTldLen));
|
||
}
|
||
|
||
bool nsIDNService::isCJKSlashConfusable(char32_t aChar) {
|
||
return mCJKSlashConfusables.Contains(aChar);
|
||
}
|
||
|
||
bool nsIDNService::isCJKIdeograph(char32_t aChar) {
|
||
return mCJKIdeographs.Contains(aChar);
|
||
}
|