diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /browser/components/textrecognition | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'browser/components/textrecognition')
-rw-r--r-- | browser/components/textrecognition/jar.mn | 8 | ||||
-rw-r--r-- | browser/components/textrecognition/moz.build | 14 | ||||
-rw-r--r-- | browser/components/textrecognition/tests/browser/browser.toml | 12 | ||||
-rw-r--r-- | browser/components/textrecognition/tests/browser/browser_textrecognition.js | 140 | ||||
-rw-r--r-- | browser/components/textrecognition/tests/browser/browser_textrecognition_no_result.js | 99 | ||||
-rw-r--r-- | browser/components/textrecognition/tests/browser/head.js | 59 | ||||
-rw-r--r-- | browser/components/textrecognition/tests/browser/image.png | bin | 0 -> 7061 bytes | |||
-rw-r--r-- | browser/components/textrecognition/textrecognition.css | 143 | ||||
-rw-r--r-- | browser/components/textrecognition/textrecognition.html | 42 | ||||
-rw-r--r-- | browser/components/textrecognition/textrecognition.mjs | 458 |
10 files changed, 975 insertions, 0 deletions
diff --git a/browser/components/textrecognition/jar.mn b/browser/components/textrecognition/jar.mn new file mode 100644 index 0000000000..ceb4ee595a --- /dev/null +++ b/browser/components/textrecognition/jar.mn @@ -0,0 +1,8 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +browser.jar: + content/browser/textrecognition/textrecognition.html + content/browser/textrecognition/textrecognition.mjs + content/browser/textrecognition/textrecognition.css diff --git a/browser/components/textrecognition/moz.build b/browser/components/textrecognition/moz.build new file mode 100644 index 0000000000..c82a779e11 --- /dev/null +++ b/browser/components/textrecognition/moz.build @@ -0,0 +1,14 @@ +# -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- +# vim: set filetype=python: +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +JAR_MANIFESTS += ["jar.mn"] + +with Files("**"): + BUG_COMPONENT = ("Firefox", "General") + +BROWSER_CHROME_MANIFESTS += [ + "tests/browser/browser.toml", +] diff --git a/browser/components/textrecognition/tests/browser/browser.toml b/browser/components/textrecognition/tests/browser/browser.toml new file mode 100644 index 0000000000..12c79add6c --- /dev/null +++ b/browser/components/textrecognition/tests/browser/browser.toml @@ -0,0 +1,12 @@ +[DEFAULT] +support-files = [ + "head.js", + "image.png", + "!/toolkit/content/tests/browser/doggy.png", +] + +["browser_textrecognition.js"] +run-if = ["os == 'mac'"] # Mac only feature. + +["browser_textrecognition_no_result.js"] +run-if = ["os == 'mac'"] # Mac only feature. diff --git a/browser/components/textrecognition/tests/browser/browser_textrecognition.js b/browser/components/textrecognition/tests/browser/browser_textrecognition.js new file mode 100644 index 0000000000..c949902e67 --- /dev/null +++ b/browser/components/textrecognition/tests/browser/browser_textrecognition.js @@ -0,0 +1,140 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ +"use strict"; + +add_task(async function () { + const URL_IMG = + "http://mochi.test:8888/browser/browser/components/textrecognition/tests/browser/image.png"; + + await SpecialPowers.pushPrefEnv({ + set: [["dom.text-recognition.enabled", true]], + }); + + clearTelemetry(); + + await BrowserTestUtils.withNewTab(URL_IMG, async function (browser) { + setClipboardText(""); + is(getTextFromClipboard(), "", "The copied text is empty."); + ok( + !getTelemetryScalars()["browser.ui.interaction.content_context"], + "No telemetry has been recorded yet." + ); + is( + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_API_PERFORMANCE") + .snapshot().sum, + 0, + "No histogram timing was recorded." + ); + + info("Right click image to show context menu."); + let popupShownPromise = BrowserTestUtils.waitForEvent( + document, + "popupshown" + ); + await BrowserTestUtils.synthesizeMouseAtCenter( + "img", + { type: "contextmenu", button: 2 }, + browser + ); + await popupShownPromise; + + info("Click context menu to copy the image text."); + document.getElementById("context-imagetext").doCommand(); + + info("Close the context menu."); + let contextMenu = document.getElementById("contentAreaContextMenu"); + let popupHiddenPromise = BrowserTestUtils.waitForEvent( + contextMenu, + "popuphidden" + ); + contextMenu.hidePopup(); + await popupHiddenPromise; + + info("Waiting for the dialog browser to be shown."); + const { contentDocument } = await BrowserTestUtils.waitForCondition(() => + document.querySelector(".textRecognitionDialogFrame") + ); + + { + info("Check the scalar telemetry."); + const scalars = await BrowserTestUtils.waitForCondition(() => + getTelemetryScalars() + ); + const contentContext = scalars["browser.ui.interaction.content_context"]; + ok(contentContext, "Opening the context menu was recorded."); + + is(contentContext["context-imagetext"], 1, "Telemetry has been recorded"); + } + + info("Waiting for text results."); + const resultsHeader = contentDocument.querySelector( + "#text-recognition-header-results" + ); + await BrowserTestUtils.waitForCondition(() => { + return resultsHeader.style.display !== "none"; + }); + + const expectedResultText = "Mozilla\n\nFirefox"; + + { + info("Check the text results."); + const text = contentDocument.querySelector(".textRecognitionText"); + is(text.children.length, 2, "Two piece of text were found"); + const [p1, p2] = text.children; + is(p1.tagName, "P", "The children are paragraph tags."); + is(p2.tagName, "P", "The children are paragraph tags."); + is(p1.innerText, "Mozilla", "The first piece of text matches."); + is(p2.innerText, "Firefox", "The second piece of text matches."); + + const clipboardText = getTextFromClipboard(); + is(clipboardText, expectedResultText, "The copied text matches."); + + is( + clipboardText, + text.innerText, + "The copied text and the text elements innerText match." + ); + } + + Assert.greater( + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_API_PERFORMANCE") + .snapshot().sum, + 0, + "Text recognition API performance was recorded." + ); + + info("Close the dialog box."); + const close = contentDocument.querySelector("#text-recognition-close"); + close.click(); + + is( + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_TEXT_LENGTH") + .snapshot().sum, + expectedResultText.length, + "The length of the text was recorded." + ); + + info("Waiting for the dialog frame to close."); + await BrowserTestUtils.waitForCondition( + () => !document.querySelector(".textRecognitionDialogFrame") + ); + + info("Check for interaction telemetry."); + const timing = await BrowserTestUtils.waitForCondition(() => { + const { sum } = Services.telemetry + .getHistogramById("TEXT_RECOGNITION_INTERACTION_TIMING") + .snapshot(); + if (sum > 0) { + return sum; + } + return false; + }); + Assert.greater(timing, 0, "Interaction timing was measured."); + + setClipboardText(""); + clearTelemetry(); + }); +}); diff --git a/browser/components/textrecognition/tests/browser/browser_textrecognition_no_result.js b/browser/components/textrecognition/tests/browser/browser_textrecognition_no_result.js new file mode 100644 index 0000000000..2e7f1a5d49 --- /dev/null +++ b/browser/components/textrecognition/tests/browser/browser_textrecognition_no_result.js @@ -0,0 +1,99 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ +"use strict"; + +add_task(async function () { + const url = + "http://mochi.test:8888/browser/toolkit/content/tests/browser/doggy.png"; + + await SpecialPowers.pushPrefEnv({ + set: [["dom.text-recognition.enabled", true]], + }); + + clearTelemetry(); + + await BrowserTestUtils.withNewTab(url, async function (browser) { + setClipboardText(""); + is(getTextFromClipboard(), "", "The copied text is empty."); + + info("Right click image to show context menu."); + let popupShownPromise = BrowserTestUtils.waitForEvent( + document, + "popupshown" + ); + await BrowserTestUtils.synthesizeMouseAtCenter( + "img", + { type: "contextmenu", button: 2 }, + browser + ); + await popupShownPromise; + + info("Click context menu to copy the image text."); + document.getElementById("context-imagetext").doCommand(); + + info("Close the context menu."); + let contextMenu = document.getElementById("contentAreaContextMenu"); + let popupHiddenPromise = BrowserTestUtils.waitForEvent( + contextMenu, + "popuphidden" + ); + contextMenu.hidePopup(); + await popupHiddenPromise; + + info("Waiting for the dialog browser to be shown."); + const { contentDocument } = await BrowserTestUtils.waitForCondition(() => + document.querySelector(".textRecognitionDialogFrame") + ); + + info("Waiting for no results message."); + const noResultsHeader = contentDocument.querySelector( + "#text-recognition-header-no-results" + ); + await BrowserTestUtils.waitForCondition(() => { + return noResultsHeader.style.display !== "none"; + }); + + { + info("Check the scalar telemetry."); + const scalars = await BrowserTestUtils.waitForCondition(() => + getTelemetryScalars() + ); + const contentContext = scalars["browser.ui.interaction.content_context"]; + ok(contentContext, "Opening the context menu was recorded."); + + is(contentContext["context-imagetext"], 1, "Telemetry has been recorded"); + } + + const text = contentDocument.querySelector(".textRecognitionText"); + is(text.children.length, 0, "No results are listed."); + + Assert.greater( + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_API_PERFORMANCE") + .snapshot().sum, + 0, + "Histogram timing was recorded even though there were no results." + ); + + is( + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_INTERACTION_TIMING") + .snapshot().sum, + 0, + "No interaction timing has been measured yet." + ); + + info("Close the dialog box."); + const close = contentDocument.querySelector("#text-recognition-close"); + close.click(); + + info("Waiting for the dialog frame to close."); + await BrowserTestUtils.waitForCondition( + () => !document.querySelector(".textRecognitionDialogFrame") + ); + + is(getTextFromClipboard(), "", "The copied text is still empty."); + }); + + clearTelemetry(); +}); diff --git a/browser/components/textrecognition/tests/browser/head.js b/browser/components/textrecognition/tests/browser/head.js new file mode 100644 index 0000000000..a765d86501 --- /dev/null +++ b/browser/components/textrecognition/tests/browser/head.js @@ -0,0 +1,59 @@ +/* Any copyright is dedicated to the Public Domain. + http://creativecommons.org/publicdomain/zero/1.0/ */ + +/** + * @param {string} text + */ +function setClipboardText(text) { + const ClipboardHelper = Cc[ + "@mozilla.org/widget/clipboardhelper;1" + ].getService(Ci.nsIClipboardHelper); + ClipboardHelper.copyString(text); +} + +/** + * @returns {string} + */ +function getTextFromClipboard() { + const transferable = Cc["@mozilla.org/widget/transferable;1"].createInstance( + Ci.nsITransferable + ); + transferable.init(window.docShell.QueryInterface(Ci.nsILoadContext)); + transferable.addDataFlavor("text/plain"); + Services.clipboard.getData( + transferable, + Services.clipboard.kGlobalClipboard, + SpecialPowers.wrap(window).browsingContext.currentWindowContext + ); + + const results = {}; + transferable.getTransferData("text/plain", results); + return results.value.QueryInterface(Ci.nsISupportsString)?.data ?? ""; +} + +/** + * Returns events specifically for text recognition. + */ +function getTelemetryScalars() { + const snapshot = Services.telemetry.getSnapshotForKeyedScalars( + "main", + true /* clear events */ + ); + + if (!snapshot.parent) { + return {}; + } + + return snapshot.parent; +} + +function clearTelemetry() { + Services.telemetry.clearScalars(); + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_API_PERFORMANCE") + .clear(); + Services.telemetry + .getHistogramById("TEXT_RECOGNITION_INTERACTION_TIMING") + .clear(); + Services.telemetry.getHistogramById("TEXT_RECOGNITION_TEXT_LENGTH").clear(); +} diff --git a/browser/components/textrecognition/tests/browser/image.png b/browser/components/textrecognition/tests/browser/image.png Binary files differnew file mode 100644 index 0000000000..3faa11b221 --- /dev/null +++ b/browser/components/textrecognition/tests/browser/image.png diff --git a/browser/components/textrecognition/textrecognition.css b/browser/components/textrecognition/textrecognition.css new file mode 100644 index 0000000000..f1f378c8e2 --- /dev/null +++ b/browser/components/textrecognition/textrecognition.css @@ -0,0 +1,143 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +.textRecognitionText { + overflow-y: auto; + max-height: 300px; + flex: 1; + display: none; +} + +.textRecognitionText p:first-child { + margin-top: 0; +} + +.textRecognitionText p:last-child { + margin-bottom: 0; +} + +.textRecognition { + --gap: 16px; + display: flex; + flex-direction: column; + background-color: var(--in-content-page-background); + width: 412px; + gap: var(--gap) 0; + padding-block: var(--gap); +} + +.textRecognition > * { + padding-inline: var(--gap); +} + +.textRecognitionHeader { + font-weight: bold; + display: flex; + flex-direction: row; + align-items: center; +} + +.textRecognitionFooter { + text-align: end; +} + +.textRecognitionThrobber { + display: inline-block; + width: 16px; + height: 16px; + position: relative; + overflow: hidden; + margin-inline-end: 5.5px; +} + +@media (prefers-reduced-motion: no-preference) { + .textRecognitionThrobber::before { + content: ""; + position: absolute; + background-image: url("chrome://browser/skin/tabbrowser/loading.svg"); + background-position: left center; + background-repeat: no-repeat; + width: 480px; + height: 100%; + animation: throbber-animation-ltr 1.05s steps(30) infinite; + -moz-context-properties: fill; + fill: currentColor; + opacity: 0.7; + } + + .textRecognitionThrobber:dir(rtl)::before { + background-position-x: right; + animation: throbber-animation-rtl 1.05s steps(30) infinite; + } +} + +@media (prefers-reduced-motion: reduce) { + .textRecognitionThrobber { + background-image: url("chrome://browser/skin/tabbrowser/hourglass.svg"); + background-position: center; + background-repeat: no-repeat; + -moz-context-properties: fill; + fill: currentColor; + } +} + +.textRecognitionSuccessIcon { + display: inline-block; + background-color: #2AC3A2; + border: 3px solid #2AC3A2; + fill: var(--in-content-page-background); + -moz-context-properties: fill; + border-radius: 10px; + width: 12px; + height: 12px; + margin-inline-end: 6px; +} + +@media (prefers-reduced-motion: no-preference) { + .textRecognitionSuccessIcon { + animation: success-animation 0.3s cubic-bezier(.3,2,.48,.94); + } +} + +.textRecognitionNoResultIcon { + display: inline-block; + fill: #FFBF00; + -moz-context-properties: fill; + width: 18px; + height: 18px; + margin-inline-end: 8px; +} + +@media (prefers-contrast) { + .textRecognitionSuccessIcon { + background-color: currentColor; + border-color: currentColor; + fill: var(--in-content-page-background); + } + + .textRecognitionNoResultIcon { + fill: currentColor; + } +} + +@keyframes throbber-animation-ltr { + 0% { transform: translateX(0); } + 100% { transform: translateX(-100%); } +} + +@keyframes throbber-animation-rtl { + 0% { transform: translateX(0); } + 100% { transform: translateX(100%); } +} + +@keyframes success-animation { + 0% { + transform: scale(0); + opacity: 0; + } + 100% { + transform: scale(1); + opacity: 1; + } +} diff --git a/browser/components/textrecognition/textrecognition.html b/browser/components/textrecognition/textrecognition.html new file mode 100644 index 0000000000..2200baaeeb --- /dev/null +++ b/browser/components/textrecognition/textrecognition.html @@ -0,0 +1,42 @@ +<!-- This Source Code Form is subject to the terms of the Mozilla Public + - License, v. 2.0. If a copy of the MPL was not distributed with this + - file, You can obtain one at http://mozilla.org/MPL/2.0/. --> + +<!DOCTYPE html> +<html> +<head> + <meta charset="utf-8"> + <meta http-equiv="Content-Security-Policy" + content="default-src chrome:; object-src 'none';"> + <link rel="stylesheet" type="text/css" + href="chrome://global/skin/in-content/common.css"> + <link rel="stylesheet" type="text/css" + href="chrome://browser/content/textrecognition/textrecognition.css"> + <link rel="localization" href="browser/textRecognition.ftl"/> + <script type="module" src="chrome://browser/content/textrecognition/textrecognition.mjs"></script> + <!-- The title is available to screen readers only --> + <title data-l10n-id="text-recognition-modal-accessible-modal-title"></title> +</head> +<body> + <div class="textRecognition"> + <div class="textRecognitionHeader" id="text-recognition-header-loading""> + <span class="textRecognitionThrobber"></span> + <span data-l10n-id="text-recognition-modal-searching-title"></span> + </div> + <div class="textRecognitionHeader" id="text-recognition-header-results"> + <img class="textRecognitionSuccessIcon" src="chrome://global/skin/icons/check.svg" role="presentation"/> + <span data-l10n-id="text-recognition-modal-results-title"></span> + </div> + <div class="textRecognitionHeader" id="text-recognition-header-no-results"> + <img class="textRecognitionNoResultIcon" src="chrome://global/skin/icons/warning.svg" role="presentation"/> + <span data-l10n-id="text-recognition-modal-no-results-title"> + <a data-l10n-name="error-link" href="https://support.mozilla.org/1/firefox/%VERSION%/%OS%/%LOCALE%/text-recognition"></a> + </span> + </div> + <div class="textRecognitionText" role="document"></div> + <div class="textRecognitionFooter"> + <button data-l10n-id="text-recognition-modal-close-button" type="button" id="text-recognition-close" class="primary"></button> + </div> + </div> +</body> +</html> diff --git a/browser/components/textrecognition/textrecognition.mjs b/browser/components/textrecognition/textrecognition.mjs new file mode 100644 index 0000000000..e3da35691b --- /dev/null +++ b/browser/components/textrecognition/textrecognition.mjs @@ -0,0 +1,458 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +window.docShell.chromeEventHandler.classList.add("textRecognitionDialogFrame"); + +window.addEventListener("DOMContentLoaded", () => { + // The arguments are passed in as the final parameters to TabDialogBox.prototype.open. + new TextRecognitionModal(...window.arguments); +}); + +/** + * @typedef {object} TextRecognitionResult + * @property {number} confidence + * @property {string} string + * @property {DOMQuad} quad + */ + +class TextRecognitionModal { + /** + * @param {Promise<TextRecognitionResult[]>} resultsPromise + * @param {Function} resizeVertically + * @param {object} [openLinkIn] + * @param {string} openLinkIn.url + * @param {string} openLinkIn.where + * @param {object} openLinkIn.params + */ + constructor(resultsPromise, resizeVertically, openLinkIn) { + /** @type {HTMLElement} */ + this.textEl = document.querySelector(".textRecognitionText"); + + /** @type {NodeListOf<HTMLElement>} */ + this.headerEls = document.querySelectorAll(".textRecognitionHeader"); + + /** @type {HTMLAnchorElement} */ + this.linkEl = document.querySelector( + "#text-recognition-header-no-results a" + ); + + this.resizeVertically = resizeVertically; + this.openLinkIn = openLinkIn; + this.setupLink(); + this.setupCloseHandler(); + + this.showHeaderByID("text-recognition-header-loading"); + + resultsPromise.then( + ({ results, direction }) => { + if (results.length === 0) { + // Update the UI to indicate that there were no results. + this.showHeaderByID("text-recognition-header-no-results"); + // It's still worth recording telemetry times, as the API was still invoked. + TelemetryStopwatch.finish( + "TEXT_RECOGNITION_API_PERFORMANCE", + resultsPromise + ); + return; + } + + // There were results, cluster them into a nice presentation, and present + // the results to the UI. + this.runClusteringAndUpdateUI(results, direction); + this.showHeaderByID("text-recognition-header-results"); + TelemetryStopwatch.finish( + "TEXT_RECOGNITION_API_PERFORMANCE", + resultsPromise + ); + + TextRecognitionModal.recordInteractionTime(); + }, + error => { + // There was an error in the text recognition call. Treat this as the same + // as if there were no results, but report the error to the console and telemetry. + this.showHeaderByID("text-recognition-header-no-results"); + + console.error( + "There was an error recognizing the text from an image.", + error + ); + Services.telemetry.scalarAdd( + "browser.ui.interaction.textrecognition_error", + 1 + ); + TelemetryStopwatch.cancel( + "TEXT_RECOGNITION_API_PERFORMANCE", + resultsPromise + ); + } + ); + } + + /** + * After the results are shown, measure how long a user interacts with the modal. + */ + static recordInteractionTime() { + TelemetryStopwatch.start( + "TEXT_RECOGNITION_INTERACTION_TIMING", + // Pass the instance of the window in case multiple tabs are doing text recognition + // and there is a race condition. + window + ); + + const finish = () => { + TelemetryStopwatch.finish("TEXT_RECOGNITION_INTERACTION_TIMING", window); + window.removeEventListener("blur", finish); + window.removeEventListener("unload", finish); + }; + + // The user's focus went away, record this as the total interaction, even if they + // go back and interact with it more. This can be triggered by doing actions like + // clicking the URL bar, or by switching tabs. + window.addEventListener("blur", finish); + + // The modal is closed. + window.addEventListener("unload", finish); + } + + /** + * After the results are shown, measure how long a user interacts with the modal. + * + * @param {number} textLength + */ + static recordTextLengthTelemetry(textLength) { + const histogram = Services.telemetry.getHistogramById( + "TEXT_RECOGNITION_TEXT_LENGTH" + ); + histogram.add(textLength); + } + + setupCloseHandler() { + document + .querySelector("#text-recognition-close") + .addEventListener("click", () => { + window.close(); + }); + } + + /** + * Apply the variables for the support.mozilla.org URL. + */ + setupLink() { + this.linkEl.href = Services.urlFormatter.formatURL(this.linkEl.href); + this.linkEl.addEventListener("click", event => { + event.preventDefault(); + this.openLinkIn(this.linkEl.href, "tab", { + forceForeground: true, + triggeringPrincipal: + Services.scriptSecurityManager.getSystemPrincipal(), + }); + }); + } + + /** + * A helper to only show the appropriate header. + * + * @param {string} id + */ + showHeaderByID(id) { + for (const header of this.headerEls) { + header.style.display = "none"; + } + + document.getElementById(id).style.display = ""; + this.resizeVertically(); + } + + /** + * @param {string} text + */ + static copy(text) { + const clipboard = Cc["@mozilla.org/widget/clipboardhelper;1"].getService( + Ci.nsIClipboardHelper + ); + clipboard.copyString(text); + } + + /** + * Cluster the text based on its visual position. + * + * @param {TextRecognitionResult[]} results + * @param {"ltr" | "rtl"} direction + */ + runClusteringAndUpdateUI(results, direction) { + /** @type {Vec2[]} */ + const centers = []; + + for (const result of results) { + const p = result.quad; + + // Pick either the left-most or right-most edge. This optimizes for + // aligned text over centered text. + const minOrMax = direction === "ltr" ? Math.min : Math.max; + + centers.push([ + minOrMax(p.p1.x, p.p2.x, p.p3.x, p.p4.x), + (p.p1.y, p.p2.y, p.p3.y, p.p4.y) / 4, + ]); + } + + const distSq = new DistanceSquared(centers); + + // The values are ranged 0 - 1. This value might be able to be determined + // algorithmically. + const averageDistance = Math.sqrt(distSq.quantile(0.2)); + const clusters = densityCluster( + centers, + // Neighborhood radius: + averageDistance, + // Minimum points to form a cluster: + 2 + ); + + let text = ""; + for (const cluster of clusters) { + const pCluster = document.createElement("p"); + pCluster.className = "textRecognitionTextCluster"; + + for (let i = 0; i < cluster.length; i++) { + const index = cluster[i]; + const { string } = results[index]; + if (i + 1 === cluster.length) { + // Each cluster could be a paragraph, so add newlines to the end + // for better copying. + text += string + "\n\n"; + // The paragraph tag automatically uses two newlines. + pCluster.innerText += string; + } else { + // This text is assumed to be a newlines in a paragraph, so only needs + // to be separated by a space. + text += string + " "; + pCluster.innerText += string + " "; + } + } + this.textEl.appendChild(pCluster); + } + + this.textEl.style.display = "block"; + + text = text.trim(); + TextRecognitionModal.copy(text); + TextRecognitionModal.recordTextLengthTelemetry(text.length); + } +} + +/** + * A two dimensional vector. + * + * @typedef {number[]} Vec2 + */ + +/** + * @typedef {number} PointIndex + */ + +/** + * An implementation of the DBSCAN clustering algorithm. + * + * https://en.wikipedia.org/wiki/DBSCAN#Algorithm + * + * @param {Vec2[]} points + * @param {number} distance + * @param {number} minPoints + * @returns {Array<PointIndex[]>} + */ +function densityCluster(points, distance, minPoints) { + /** + * A flat of array of labels that match the index of the points array. The values have + * the following meaning: + * + * undefined := No label has been assigned + * "noise" := Noise is a point that hasn't been clustered. + * number := Cluster index + * + * @type {undefined | "noise" | Index} + */ + const labels = Array(points.length); + const noiseLabel = "noise"; + + let nextClusterIndex = 0; + + // Every point must be visited at least once. Often they will be visited earlier + // in the interior of the loop. + for (let pointIndex = 0; pointIndex < points.length; pointIndex++) { + if (labels[pointIndex] !== undefined) { + // This point is already labeled from the interior logic. + continue; + } + + // Get the neighbors that are within the range of the epsilon value, includes + // the current point. + const neighbors = getNeighborsWithinDistance(points, distance, pointIndex); + + if (neighbors.length < minPoints) { + labels[pointIndex] = noiseLabel; + continue; + } + + // Start a new cluster. + const clusterIndex = nextClusterIndex++; + labels[pointIndex] = clusterIndex; + + // Fill the cluster. The neighbors array grows. + for (let i = 0; i < neighbors.length; i++) { + const nextPointIndex = neighbors[i]; + if (typeof labels[nextPointIndex] === "number") { + // This point was already claimed, ignore it. + continue; + } + + if (labels[nextPointIndex] === noiseLabel) { + // Claim this point and move on since noise has no neighbors. + labels[nextPointIndex] = clusterIndex; + continue; + } + + // Claim this point as part of this cluster. + labels[nextPointIndex] = clusterIndex; + + const newNeighbors = getNeighborsWithinDistance( + points, + distance, + nextPointIndex + ); + + if (newNeighbors.length >= minPoints) { + // Add on to the neighbors if more are found. + for (const newNeighbor of newNeighbors) { + if (!neighbors.includes(newNeighbor)) { + neighbors.push(newNeighbor); + } + } + } + } + } + + const clusters = []; + + // Pre-populate the clusters. + for (let i = 0; i < nextClusterIndex; i++) { + clusters[i] = []; + } + + // Turn the labels into clusters, adding the noise to the end. + for (let pointIndex = 0; pointIndex < labels.length; pointIndex++) { + const label = labels[pointIndex]; + if (typeof label === "number") { + clusters[label].push(pointIndex); + } else if (label === noiseLabel) { + // Add a single cluster. + clusters.push([pointIndex]); + } else { + throw new Error("Logic error. Expected every point to have a label."); + } + } + + clusters.sort((a, b) => points[b[0]][1] - points[a[0]][1]); + + return clusters; +} + +/** + * @param {Vec2[]} points + * @param {number} distance + * @param {number} index + * @returns {Index[]} + */ +function getNeighborsWithinDistance(points, distance, index) { + let neighbors = [index]; + // There is no reason to compute the square root here if we square the + // original distance. + const distanceSquared = distance * distance; + + for (let otherIndex = 0; otherIndex < points.length; otherIndex++) { + if (otherIndex === index) { + continue; + } + const a = points[index]; + const b = points[otherIndex]; + const dx = a[0] - b[0]; + const dy = a[1] - b[1]; + + if (dx * dx + dy * dy < distanceSquared) { + neighbors.push(otherIndex); + } + } + + return neighbors; +} + +/** + * This class pre-computes the squared distances to allow for efficient distance lookups, + * and it provides a way to look up a distance quantile. + */ +class DistanceSquared { + /** @type {Map<number>} */ + #distances = new Map(); + #list; + #distancesSorted; + + /** + * @param {Vec2[]} list + */ + constructor(list) { + this.#list = list; + for (let aIndex = 0; aIndex < list.length; aIndex++) { + for (let bIndex = aIndex + 1; bIndex < list.length; bIndex++) { + const id = this.#getTupleID(aIndex, bIndex); + const a = this.#list[aIndex]; + const b = this.#list[bIndex]; + const dx = a[0] - b[0]; + const dy = a[1] - b[1]; + this.#distances.set(id, dx * dx + dy * dy); + } + } + } + + /** + * Returns a unique tuple ID to identify the relationship between two vectors. + */ + #getTupleID(aIndex, bIndex) { + return aIndex < bIndex + ? aIndex * this.#list.length + bIndex + : bIndex * this.#list.length + aIndex; + } + + /** + * Returns the distance squared between two vectors. + * + * @param {Index} aIndex + * @param {Index} bIndex + * @returns {number} The distance squared + */ + get(aIndex, bIndex) { + return this.#distances.get(this.#getTupleID(aIndex, bIndex)); + } + + /** + * Returns the quantile squared. + * + * @param {number} percentile - Ranged between 0 - 1 + * @returns {number} + */ + quantile(percentile) { + if (!this.#distancesSorted) { + this.#distancesSorted = [...this.#distances.values()].sort( + (a, b) => a - b + ); + } + const index = Math.max( + 0, + Math.min( + this.#distancesSorted.length - 1, + Math.round(this.#distancesSorted.length * percentile) + ) + ); + return this.#distancesSorted[index]; + } +} |