Adding upstream version 124.0.1.upstream/124.0.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-19 00:47:55 +0000
commit: 26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree: f435a8308119effd964b339f76abb83a57c29483 /toolkit/components/formautofill/shared/AddressParser.sys.mjs
parent: Initial commit. (diff)
download: firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz
firefox-26a029d407be480d791972afb5975cf62c9360a6.zip
1 files changed, 285 insertions, 0 deletions
diff --git a/toolkit/components/formautofill/shared/AddressParser.sys.mjs b/toolkit/components/formautofill/shared/AddressParser.sys.mjs
new file mode 100644
index 0000000000..5cb76934c1
--- /dev/null
+++ b/toolkit/components/formautofill/shared/AddressParser.sys.mjs
@@ -0,0 +1,285 @@
+/* eslint-disable no-useless-concat */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+// NamedCaptureGroup class represents a named capturing group in a regular expression
+class NamedCaptureGroup {
+  // The named of this capturing group
+  #name = null;
+
+  // The capturing group
+  #capture = null;
+
+  // The matched result
+  #match = null;
+
+  constructor(name, capture) {
+    this.#name = name;
+    this.#capture = capture;
+  }
+
+  get name() {
+    return this.#name;
+  }
+
+  get capture() {
+    return this.#capture;
+  }
+
+  get match() {
+    return this.#match;
+  }
+
+  // Setter for the matched result based on the match groups
+  setMatch(matchGroups) {
+    this.#match = matchGroups[this.#name];
+  }
+}
+
+// Base class for different part of a street address regular expression.
+// The regular expression is constructed with prefix, pattern, suffix
+// and separator to extract "value" part.
+// For examplem, when we write "apt 4." to for floor number, its prefix is `apt`,
+// suffix is `.` and value to represent apartment number is `4`.
+class StreetAddressPartRegExp extends NamedCaptureGroup {
+  constructor(name, prefix, pattern, suffix, sep, optional = false) {
+    prefix = prefix ?? "";
+    suffix = suffix ?? "";
+    super(
+      name,
+      `((?:${prefix})(?<${name}>${pattern})(?:${suffix})(?:${sep})+)${
+        optional ? "?" : ""
+      }`
+    );
+  }
+}
+
+// A regular expression to match the street number portion of a street address,
+class StreetNumberRegExp extends StreetAddressPartRegExp {
+  static PREFIX = "((no|°|º|number)(\\.|-|\\s)*)?"; // From chromium source
+
+  static PATTERN = "\\d+\\w?";
+
+  // TODO: possible suffix : (th\\.|\\.)?
+  static SUFFIX = null;
+
+  constructor(sep, optional) {
+    super(
+      StreetNumberRegExp.name,
+      StreetNumberRegExp.PREFIX,
+      StreetNumberRegExp.PATTERN,
+      StreetNumberRegExp.SUFFIX,
+      sep,
+      optional
+    );
+  }
+}
+
+// A regular expression to match the street name portion of a street address,
+class StreetNameRegExp extends StreetAddressPartRegExp {
+  static PREFIX = null;
+
+  static PATTERN = "(?:[^\\s,]+(?:[^\\S\\r\\n]+[^\\s,]+)*?)"; // From chromium source
+
+  // TODO: Should we consider suffix like (ave|st)?
+  static SUFFIX = null;
+
+  constructor(sep, optional) {
+    super(
+      StreetNameRegExp.name,
+      StreetNameRegExp.PREFIX,
+      StreetNameRegExp.PATTERN,
+      StreetNameRegExp.SUFFIX,
+      sep,
+      optional
+    );
+  }
+}
+
+// A regular expression to match the apartment number portion of a street address,
+class ApartmentNumberRegExp extends StreetAddressPartRegExp {
+  static keyword = "apt|apartment|wohnung|apto|-" + "|unit|suite|ste|#|room"; // From chromium source // Firefox specific
+  static PREFIX = `(${ApartmentNumberRegExp.keyword})(\\.|\\s|-)*`;
+
+  static PATTERN = "\\w*([-|\\/]\\w*)?";
+
+  static SUFFIX = "(\\.|\\s|-)*(ª)?"; // From chromium source
+
+  constructor(sep, optional) {
+    super(
+      ApartmentNumberRegExp.name,
+      ApartmentNumberRegExp.PREFIX,
+      ApartmentNumberRegExp.PATTERN,
+      ApartmentNumberRegExp.SUFFIX,
+      sep,
+      optional
+    );
+  }
+}
+
+// A regular expression to match the floor number portion of a street address,
+class FloorNumberRegExp extends StreetAddressPartRegExp {
+  static keyword =
+    "floor|flur|fl|og|obergeschoss|ug|untergeschoss|geschoss|andar|piso|º" + // From chromium source
+    "|level|lvl"; // Firefox specific
+  static PREFIX = `(${FloorNumberRegExp.keyword})?(\\.|\\s|-)*`; // TODO
+  static PATTERN = "\\d{1,3}\\w?";
+  static SUFFIX = `(st|nd|rd|th)?(\\.|\\s|-)*(${FloorNumberRegExp.keyword})?`; // TODO
+
+  constructor(sep, optional) {
+    super(
+      FloorNumberRegExp.name,
+      FloorNumberRegExp.PREFIX,
+      FloorNumberRegExp.PATTERN,
+      FloorNumberRegExp.SUFFIX,
+      sep,
+      optional
+    );
+  }
+}
+
+/**
+ * Class represents a street address with the following fields:
+ * - street number
+ * - street name
+ * - apartment number
+ * - floor number
+ */
+export class StructuredStreetAddress {
+  #street_number = null;
+  #street_name = null;
+  #apartment_number = null;
+  #floor_number = null;
+
+  constructor(street_number, street_name, apartment_number, floor_number) {
+    this.#street_number = street_number?.toString();
+    this.#street_name = street_name?.toString();
+    this.#apartment_number = apartment_number?.toString();
+    this.#floor_number = floor_number?.toString();
+  }
+
+  get street_number() {
+    return this.#street_number;
+  }
+
+  get street_name() {
+    return this.#street_name;
+  }
+
+  get apartment_number() {
+    return this.#apartment_number;
+  }
+
+  get floor_number() {
+    return this.#floor_number;
+  }
+
+  toString() {
+    return `
+      street number: ${this.#street_number}\n
+      street name: ${this.#street_name}\n
+      apartment number: ${this.#apartment_number}\n
+      floor number: ${this.#floor_number}\n
+    `;
+  }
+}
+
+export class AddressParser {
+  /**
+   * Parse street address with the following pattern.
+   * street number, street name, apartment number(optional), floor number(optional)
+   * For example, 2 Harrison St #175 floor 2
+   *
+   * @param {string} address The street address to be parsed.
+   * @returns {StructuredStreetAddress}
+   */
+  static parseStreetAddress(address) {
+    if (!address) {
+      return null;
+    }
+
+    const separator = "(\\s|,|$)";
+
+    const regexpes = [
+      new StreetNumberRegExp(separator),
+      new StreetNameRegExp(separator),
+      new ApartmentNumberRegExp(separator, true),
+      new FloorNumberRegExp(separator, true),
+    ];
+
+    return AddressParser.parse(address, regexpes)
+      ? new StructuredStreetAddress(...regexpes.map(regexp => regexp.match))
+      : null;
+  }
+
+  static parse(address, regexpes) {
+    const options = {
+      trim: true,
+      merge_whitespace: true,
+      ignore_case: true,
+    };
+    address = AddressParser.normalizeString(address, options);
+
+    const match = address.match(
+      new RegExp(`^(${regexpes.map(regexp => regexp.capture).join("")})$`)
+    );
+    if (!match) {
+      return null;
+    }
+
+    regexpes.forEach(regexp => regexp.setMatch(match.groups));
+    return regexpes.reduce((acc, current) => {
+      return { ...acc, [current.name]: current.match };
+    }, {});
+  }
+
+  static normalizeString(s, options) {
+    if (typeof s != "string") {
+      return s;
+    }
+
+    if (options.ignore_case) {
+      s = s.toLowerCase();
+    }
+
+    // process punctuation before whitespace because if a punctuation
+    // is replaced with whitespace, we might want to merge it later
+    if (options.remove_punctuation) {
+      s = AddressParser.replacePunctuation(s, "");
+    } else if ("replace_punctuation" in options) {
+      const replace = options.replace_punctuation;
+      s = AddressParser.replacePunctuation(s, replace);
+    }
+
+    // process whitespace
+    if (options.merge_whitespace) {
+      s = AddressParser.mergeWhitespace(s);
+    } else if (options.remove_whitespace) {
+      s = AddressParser.removeWhitespace(s);
+    }
+
+    return s.trim();
+  }
+
+  static replacePunctuation(s, replace) {
+    const regex = /\p{Punctuation}/gu;
+    return s?.replace(regex, replace);
+  }
+
+  static removePunctuation(s) {
+    return s?.replace(/[.,\/#!$%\^&\*;:{}=\-_~()]/g, "");
+  }
+
+  static replaceControlCharacters(s, replace) {
+    return s?.replace(/[\t\n\r]/g, " ");
+  }
+
+  static removeWhitespace(s) {
+    return s?.replace(/[\s]/g, "");
+  }
+
+  static mergeWhitespace(s) {
+    return s?.replace(/\s{2,}/g, " ");
+  }
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-19 00:47:55 +0000
commit	26a029d407be480d791972afb5975cf62c9360a6 (patch)
tree	f435a8308119effd964b339f76abb83a57c29483 /toolkit/components/formautofill/shared/AddressParser.sys.mjs
parent	Initial commit. (diff)
download	firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip