summaryrefslogtreecommitdiffstats
path: root/devtools/client/shared/unicode-url.js
blob: c13e1314c6d249aa5c9e2213ff4e0aa4f60959c0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
"use strict";

const { Cc, Ci } = require("chrome");
const idnService = Cc["@mozilla.org/network/idn-service;1"].getService(
  Ci.nsIIDNService
);

/**
 * Gets a readble Unicode hostname from a hostname.
 *
 * If the `hostname` is a readable ASCII hostname, such as example.org, then
 * this function will simply return the original `hostname`.
 *
 * If the `hostname` is a Punycode hostname representing a Unicode domain name,
 * such as xn--g6w.xn--8pv, then this function will return the readable Unicode
 * domain name by decoding the Punycode hostname.
 *
 * @param {string}  hostname
 *                  the hostname from which the Unicode hostname will be
 *                  parsed, such as example.org, xn--g6w.xn--8pv.
 * @return {string} The Unicode hostname. It may be the same as the `hostname`
 *                  passed to this function if the `hostname` itself is
 *                  a readable ASCII hostname or a Unicode hostname.
 */
function getUnicodeHostname(hostname) {
  return idnService.convertToDisplayIDN(hostname, {});
}

/**
 * Gets a readble Unicode URL pathname from a URL pathname.
 *
 * If the `urlPath` is a readable ASCII URL pathname, such as /a/b/c.js, then
 * this function will simply return the original `urlPath`.
 *
 * If the `urlPath` is a URI-encoded pathname, such as %E8%A9%A6/%E6%B8%AC.js,
 * then this function will return the readable Unicode pathname.
 *
 * If the `urlPath` is a malformed URL pathname, then this function will simply
 * return the original `urlPath`.
 *
 * @param {string}  urlPath
 *                  the URL path from which the Unicode URL path will be parsed,
 *                  such as /a/b/c.js, %E8%A9%A6/%E6%B8%AC.js.
 * @return {string} The Unicode URL Path. It may be the same as the `urlPath`
 *                  passed to this function if the `urlPath` itself is a readable
 *                  ASCII url or a Unicode url.
 */
function getUnicodeUrlPath(urlPath) {
  try {
    return decodeURIComponent(urlPath);
  } catch (err) {}
  return urlPath;
}

/**
 * Gets a readable Unicode URL from a URL.
 *
 * If the `url` is a readable ASCII URL, such as http://example.org/a/b/c.js,
 * then this function will simply return the original `url`.
 *
 * If the `url` includes either an unreadable Punycode domain name or an
 * unreadable URI-encoded pathname, such as
 * http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js, then this function will return
 * the readable URL by decoding all its unreadable URL components to Unicode
 * characters. The character `#` is not decoded from escape sequences.
 *
 * If the `url` is a malformed URL, then this function will return the original
 * `url`.
 *
 * If the `url` is a data: URI, then this function will return the original
 * `url`.
 *
 * @param {string}  url
 *                  the full URL, or a data: URI. from which the readable URL
 *                  will be parsed, such as, http://example.org/a/b/c.js,
 *                  http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js
 * @return {string} The readable URL. It may be the same as the `url` passed to
 *                  this function if the `url` itself is readable.
 */
function getUnicodeUrl(url) {
  try {
    const { protocol, hostname } = new URL(url);
    if (protocol === "data:") {
      // Never convert a data: URI.
      return url;
    }
    const readableHostname = getUnicodeHostname(hostname);

    /* We use `decodeURIComponent` instead of decodeURI as the
     * later does not decode some characters, it only can decode characters
     * previously encoded by the encodeURI. See
     * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI#Description
     */
    url = decodeURIComponent(url);
    return url.replace(hostname, readableHostname);
  } catch (err) {}
  return url;
}

module.exports = {
  getUnicodeHostname,
  getUnicodeUrlPath,
  getUnicodeUrl,
};