1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";
const idnService = Cc["@mozilla.org/network/idn-service;1"].getService(
Ci.nsIIDNService
);
/**
* Gets a readble Unicode hostname from a hostname.
*
* If the `hostname` is a readable ASCII hostname, such as example.org, then
* this function will simply return the original `hostname`.
*
* If the `hostname` is a Punycode hostname representing a Unicode domain name,
* such as xn--g6w.xn--8pv, then this function will return the readable Unicode
* domain name by decoding the Punycode hostname.
*
* @param {string} hostname
* the hostname from which the Unicode hostname will be
* parsed, such as example.org, xn--g6w.xn--8pv.
* @return {string} The Unicode hostname. It may be the same as the `hostname`
* passed to this function if the `hostname` itself is
* a readable ASCII hostname or a Unicode hostname.
*/
function getUnicodeHostname(hostname) {
return idnService.convertToDisplayIDN(hostname, {});
}
/**
* Gets a readble Unicode URL pathname from a URL pathname.
*
* If the `urlPath` is a readable ASCII URL pathname, such as /a/b/c.js, then
* this function will simply return the original `urlPath`.
*
* If the `urlPath` is a URI-encoded pathname, such as %E8%A9%A6/%E6%B8%AC.js,
* then this function will return the readable Unicode pathname.
*
* If the `urlPath` is a malformed URL pathname, then this function will simply
* return the original `urlPath`.
*
* @param {string} urlPath
* the URL path from which the Unicode URL path will be parsed,
* such as /a/b/c.js, %E8%A9%A6/%E6%B8%AC.js.
* @return {string} The Unicode URL Path. It may be the same as the `urlPath`
* passed to this function if the `urlPath` itself is a readable
* ASCII url or a Unicode url.
*/
function getUnicodeUrlPath(urlPath) {
try {
return decodeURIComponent(urlPath);
} catch (err) {}
return urlPath;
}
/**
* Gets a readable Unicode URL from a URL.
*
* If the `url` is a readable ASCII URL, such as http://example.org/a/b/c.js,
* then this function will simply return the original `url`.
*
* If the `url` includes either an unreadable Punycode domain name or an
* unreadable URI-encoded pathname, such as
* http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js, then this function will return
* the readable URL by decoding all its unreadable URL components to Unicode
* characters. The character `#` is not decoded from escape sequences.
*
* If the `url` is a malformed URL, then this function will return the original
* `url`.
*
* If the `url` is a data: URI, then this function will return the original
* `url`.
*
* @param {string} url
* the full URL, or a data: URI. from which the readable URL
* will be parsed, such as, http://example.org/a/b/c.js,
* http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js
* @return {string} The readable URL. It may be the same as the `url` passed to
* this function if the `url` itself is readable.
*/
function getUnicodeUrl(url) {
try {
const { protocol, hostname } = new URL(url);
if (protocol === "data:") {
// Never convert a data: URI.
return url;
}
const readableHostname = getUnicodeHostname(hostname);
/* We use `decodeURIComponent` instead of decodeURI as the
* later does not decode some characters, it only can decode characters
* previously encoded by the encodeURI. See
* https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI#Description
*/
url = decodeURIComponent(url);
return url.replace(hostname, readableHostname);
} catch (err) {}
return url;
}
module.exports = {
getUnicodeHostname,
getUnicodeUrlPath,
getUnicodeUrl,
};
|