From 8dd16259287f58f9273002717ec4d27e97127719 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 12 Jun 2024 07:43:14 +0200 Subject: Merging upstream version 127.0. Signed-off-by: Daniel Baumann --- dom/media/webvtt/vtt.sys.mjs | 62 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 12 deletions(-) (limited to 'dom/media/webvtt/vtt.sys.mjs') diff --git a/dom/media/webvtt/vtt.sys.mjs b/dom/media/webvtt/vtt.sys.mjs index 9e8071c427..af400fbbfe 100644 --- a/dom/media/webvtt/vtt.sys.mjs +++ b/dom/media/webvtt/vtt.sys.mjs @@ -351,15 +351,44 @@ function parseContent(window, input, mode) { return consume(m[1] ? m[1] : m[2]); } - // Unescape a string 's'. - function unescape1(e) { - return ESCAPE[e]; - } - function unescape(s) { - let m; - while ((m = s.match(/&(amp|lt|gt|lrm|rlm|nbsp);/))) { - s = s.replace(m[0], unescape1); - } + const unescapeHelper = window.document.createElement("div"); + function unescapeEntities(s) { + let match; + + // Decimal numeric character reference + s = s.replace(/&#(\d+);?/g, (candidate, number) => { + try { + const codepoint = parseInt(number); + return String.fromCodePoint(codepoint); + } catch (_) { + return candidate; + } + }); + + // Hexadecimal numeric character reference + s = s.replace(/&#x([\dA-Fa-f]+);?/g, (candidate, number) => { + try { + const codepoint = parseInt(number, 16); + return String.fromCodePoint(codepoint); + } catch (_) { + return candidate; + } + }); + + // Named character references + s = s.replace(/&\w[\w\d]*;?/g, candidate => { + // The list of entities is huge, so we use innerHTML instead. + // We should probably use setHTML instead once that is available (bug 1650370). + // Ideally we would be able to use a faster/simpler variant of setHTML (bug 1731215). + unescapeHelper.innerHTML = candidate; + const unescaped = unescapeHelper.innerText; + if (unescaped == candidate) { // not a valid entity + return candidate; + } + return unescaped; + }); + unescapeHelper.innerHTML = ""; + return s; } @@ -432,12 +461,21 @@ function parseContent(window, input, mode) { while ((t = nextToken()) !== null) { if (t[0] === '<') { if (t[1] === "/") { + const endTag = t.slice(2, -1); + const stackEnd = tagStack.at(-1); + // If the closing tag matches, move back up to the parent node. - if (tagStack.length && - tagStack[tagStack.length - 1] === t.substr(2).replace(">", "")) { + if (stackEnd == endTag) { tagStack.pop(); current = current.parentNode; + + // If the closing tag is and we're at an , move back up to + // the 's parent node. + } else if (endTag == "ruby" && current.nodeName == "RT") { + tagStack.pop(); + current = current.parentNode.parentNode; } + // Otherwise just ignore the end tag. continue; } @@ -477,7 +515,7 @@ function parseContent(window, input, mode) { } // Text nodes are leaf nodes. - current.appendChild(window.document.createTextNode(unescape(t))); + current.appendChild(window.document.createTextNode(unescapeEntities(t))); } return root; -- cgit v1.2.3