1 files changed, 545 insertions, 0 deletions
diff --git a/comm/calendar/extract/CalExtractParser.jsm b/comm/calendar/extract/CalExtractParser.jsm
new file mode 100644
index 0000000000..355fa3a6da
--- /dev/null
+++ b/comm/calendar/extract/CalExtractParser.jsm
@@ -0,0 +1,545 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const EXPORTED_SYMBOLS = [
+  "CalExtractToken",
+  "CalExtractParseNode",
+  "CalExtractParser",
+  "extendParseRule",
+  "prepareArguments",
+];
+
+/**
+ * CalExtractOptions holds configuration options used by CalExtractParser.
+ *
+ * @typedef {object} CalExtractOptions
+ * @property {RegExp} sentenceBoundary - A pattern used to split text at the
+ *                                       sentence boundary. This should capture
+ *                                       the boundary only and not any other
+ *                                       part of the sentence. Use lookaheads
+ *                                       if needed.
+ */
+
+/**
+ * @type {CalExtractOptions}
+ */
+const defaultOptions = {
+  sentenceBoundary: /(?<=\w)[.!?]+\s(?=[A-Z0-9])|[.!?]$/,
+};
+
+const FLAG_OPTIONAL = 1;
+const FLAG_MULTIPLE = 2;
+const FLAG_NONEMPTY = 4;
+
+const flagBits = new Map([
+  ["?", FLAG_OPTIONAL],
+  ["+", FLAG_MULTIPLE | FLAG_NONEMPTY],
+  ["*", FLAG_MULTIPLE | FLAG_OPTIONAL],
+]);
+
+/**
+ * CalExtractToken represents a lexical unit of valid text. These are produced
+ * during the tokenisation stage of CalExtractParser by matching regular
+ * expressions against a text sequence.
+ */
+class CalExtractToken {
+  /**
+   * Identifies the token. Should be in uppercase with no spaces for consistency.
+   *
+   * @type {string}
+   */
+  type = "";
+
+  /**
+   * The text captured by this token.
+   *
+   * @type {string[]}
+   */
+  text = [];
+
+  /**
+   * Indicates which sentence in the source text the token was found.
+   *
+   * @type {number}
+   */
+  sentence = -1;
+
+  /**
+   * Indicates the position with the sentence the token occurs.
+   *
+   * @type {number}
+   */
+  position = -1;
+
+  /**
+   * @param {string} type
+   * @param {string[]} text
+   * @param {number} sentence
+   * @param {number} position
+   */
+  constructor(type, text, sentence, position) {
+    this.type = type;
+    this.text = text;
+    this.sentence = sentence;
+    this.position = position;
+  }
+}
+
+/**
+ * Function used to produce a value when a CalExtractParseRule is matched.
+ *
+ * @callback CallExtractParseRuleAction
+ * @param {any[]} args - An array containing all the values produced from each
+ *                       pattern in the rule when they are matched or the
+ *                       CalExtractToken when lexical tokens are used instead.
+ */
+
+/**
+ * CalExtractParseRule specifies a named pattern that is looked for when parsing
+ * the tokenized source text. Patterns are a sequence of one or more CalExtactToken
+ * types or CalExtractParseRule names. Each pattern specified can optionally
+ * have one (and only one) of the following flags:
+ *
+ * 1) "?" - Optional flag, indicates a pattern may be skipped if not matched.
+ * 2) "*" - Multiple flag, indicates a pattern may match 0 or more times.
+ * 3) "+" - Non-empty multiple flag, indicates a pattern may match 1 or more times.
+ *
+ * Flags must be specified as the last character of the pattern name, example:
+ * ["subject", "text?", "MEET", "text*", "time+"]
+ *
+ * @typedef {object} CalExtractParseRule
+ *
+ * @property {string} name                      - The name of the rule that can
+ *                                                be used in other patterns.
+ *                                                Should be lowercase for
+ *                                                consistency.
+ * @property {string[]} patterns                - The pattern that will be
+ *                                                searched for on the tokenized
+ *                                                string. Can contain flags.
+ *
+ * @property {CalExtractParseRuleAction} action - Produces the result of the
+ *                                                rule being satisfied.
+ */
+
+/**
+ * CalExtractExtParseRule is derived from a CalExtractParseRule to include
+ * additional information needed during parsing.
+ *
+ * @typedef {CalExtractParseRule} CalExtractExtParseRule
+ *
+ * @property {string[]} patterns         - The patterns here are stripped of
+ *                                         any flags.
+ * @property {number[]} flags            - An array containing the flags
+ *                                         specified for each patterns element.
+ * @property {CalExtractParseNode} graph - A graph used to determine what parse
+ *                                         rule can be applied for an encountered
+ *                                         production.
+ */
+
+/**
+ * CalExtractParseNode is used to represent the patterns of a CalExtractParseRule
+ * as a graph. This graph is traversed during stack reduction until one of the
+ * following end conditions are met:
+ *
+ * 1) There are no more descendant nodes.
+ * 2) The only descendant node is the node itself (cyclic).
+ * 3) All of the descendant nodes are optional, there are no more tokens to
+ *    shift and we have traversed the entire stack.
+ */
+class CalExtractParseNode {
+  /**
+   * @type {string}
+   */
+  symbol = null;
+
+  /**
+   * @type {number}
+   */
+  flags = null;
+
+  /**
+   * Contains each possible descendant node of this node.
+   *
+   * @type {CalExtractParseNode[]}
+   */
+  descendants = null;
+
+  static FLAG_OPTIONAL = FLAG_OPTIONAL;
+  static FLAG_MULTIPLE = FLAG_MULTIPLE;
+  static FLAG_NONEMPTY = FLAG_NONEMPTY;
+
+  /**
+   * @param {string} symbol - The pattern this node represents.
+   * @param {number} flags - The computed flags assigned to the pattern.
+   * @param {CalExtractParseNode[]} descendants - Descendant nodes of this node.
+   */
+  constructor(symbol, flags, descendants = []) {
+    this.symbol = symbol;
+    this.flags = flags;
+    this.descendants = descendants;
+  }
+
+  /**
+   * Indicates this is the last node in its graph. This will always be false
+   * for cyclic nodes.
+   */
+  get isEnd() {
+    return !this.descendants.length;
+  }
+
+  /**
+   * Appends a new descendant to this node.
+   *
+   * @param {CalExtractParseNode} node - The node to append.
+   *
+   * @returns {CalExtractParseNode} The appended node.
+   */
+  append(node) {
+    this.descendants.push(node);
+    return node;
+  }
+
+  /**
+   * Provides the descendant CalExtractParseNode of this one given its symbol
+   * name. The result depends on the following rules:
+   * 1) If this node has a descendant that matches the name, return that node.
+   * 2) If the node does not have a matching descendant but has descendants
+   *    with the optional flag set, delegate to those nodes. This implements
+   *    the "?" and optional aspect of "*".
+   * 3) If none of the above produce a node, null is returned which means this
+   *    graph cannot be traversed any further.
+   *
+   * @returns {CalExtractParseNode|null}
+   */
+  getDescendant(name) {
+    // It is important the direct descendants are checked first.
+    let node = this.descendants.find(node => node.symbol == name);
+    if (node) {
+      return node;
+    }
+
+    // Now try any optional descendants.
+    for (let node of this.descendants) {
+      let hit = node.isOptional() && node != this && node.getDescendant(name);
+      if (hit) {
+        return hit;
+      }
+    }
+    return null;
+  }
+
+  /**
+   * Indicates this node can terminate the graph if so desired. This is acceptable
+   * if all of the descendants of this node are optional and there is nothing
+   * more to match on the stack.
+   *
+   * @returns {boolean}
+   */
+  canEnd() {
+    return this.descendants.filter(desc => desc != this).every(desc => desc.isOptional());
+  }
+
+  /**
+   * Indicates whether this node has the optional flag set.
+   *
+   * @returns {boolean}
+   */
+  isOptional() {
+    return Boolean(this.flags & FLAG_OPTIONAL);
+  }
+
+  /**
+   * Indicates whether this node is cyclic. A cyclic node is one whose only
+   * descendants is itself thus creating a loop. This occurs one a multiple
+   * flagged node is in the tail position of the graph.
+   */
+  isCyclic() {
+    return !this.isEnd && this.descendants.every(desc => desc == this);
+  }
+}
+
+/**
+ * CalExtractParser provides an API for detecting interesting information within
+ * a text sequence that can be used for event detection and creation. It is a
+ * naive implementation of a shift-reduce parser, naive in the sense that not
+ * too much attention has been paid to optimisation or semantics.
+ *
+ * This parser works by first splitting the source string into sentences, then
+ * tokenizing each using the token rules specified. The boundary for splitting
+ * into sentences can be specified in the options object.
+ *
+ * After tokenisation, the parser uses the parse rules to shift/reduce each
+ * sentence into a final result. The first parse rule is treated as the intended
+ * rule to reduce the tokens of each sentence to. If all of the tokens have been
+ * processed and the result is not the first rule, parsing is considered to have
+ * failed and null is returned for that sentence. For this reason, it is a good
+ * idea to specify parse rules that are robust but not too specific in their
+ * patterns.
+ */
+class CalExtractParser {
+  /**
+   * @type {[RegExp,string?][]}
+   */
+  tokenRules = [];
+
+  /**
+   * @type {CalExtractParseRule[]}
+   */
+  parseRules = [];
+
+  /**
+   * @type {CalExtractOptions}
+   */
+  options = null;
+
+  /**
+   * Use the static createInstance() method instead of this constructor directly.
+   *
+   * @param {[RegExp, string?][]} tokenRules
+   * @param {CalExtractExtParseRule[]} parseRules
+   * @param {CalExtractOptions} [options] - Configuration object.
+   *
+   * @private
+   */
+  constructor(tokenRules, parseRules, options = defaultOptions) {
+    this.tokenRules = tokenRules;
+    this.parseRules = parseRules;
+    this.options = options;
+  }
+
+  /**
+   * This method creates a new CalExtractParser instance using the simpler
+   * CalExtractParseRule interface instead of the extended one. It takes care
+   * of creating a graph for each rule and normalizing pattern names that may
+   * be using flags.
+   *
+   * @param {[RegExp, string?][]} tokenRules - A list of rules to apply during
+   *  tokenisation. The first element of each rule is a regular expression used
+   *  to detect lexical tokens and the second element is the type to assign to
+   *  the token. Order matters slightly here, in general, more complex but specific
+   *  rules should appear before simpler, more general ones.
+   *
+   *  When specifying token rules, they should be anchored to the start of the
+   *  string via "^" or tokenize() will produce unexpected results. Some regular
+   *  expressions should also include a word boundary to prevent matching within
+   *  a large string, example: "at" in "attachment". If a string is to be matched,
+   *  but no token is desired you can omit the token type from the rule and it
+   *  will be omitted completely.
+   *
+   * @param {CalExtractParseRule[]} parseRules - A list of CalExtractParseRules
+   *  that will be extended then used during parsing. Multiple parse rules can
+   *  share the same name and will all be considered the same when matching patterns.
+   *  Use this to specify variations of the same rule.
+   *
+   * @param {CalExtractOptions} [options] - Configuration object.
+   */
+  static createInstance(tokenRules, parseRules, options = defaultOptions) {
+    return new CalExtractParser(tokenRules, parseRules.map(extendParseRule), options);
+  }
+
+  /**
+   * Tokenizes a string to make it easier to match against the parse rule
+   * patterns. If text is encountered that cannot be tokenized, the result for
+   * that sentence is null.
+   *
+   * @param {string} str - The string to tokenize.
+   *
+   * @returns {CalExtractToken[][]} For each sentence encountered, a list of
+   *                                CalExtractTokens.
+   */
+  tokenize(str) {
+    let allTokens = [];
+    let sentences = str.split(this.options.sentenceBoundary).filter(Boolean);
+
+    for (let i = 0; i < sentences.length; i++) {
+      let sentence = sentences[i];
+      let pos = 0;
+      let tokens = [];
+      let buffer = "";
+
+      let matched;
+      while (pos < sentence.length) {
+        buffer = sentence.substr(pos);
+        for (let [pattern, type] of this.tokenRules) {
+          matched = pattern.exec(buffer);
+          if (matched) {
+            if (type) {
+              tokens.push(new CalExtractToken(type, matched[0], i, pos));
+            }
+            pos += matched[0].length;
+            break;
+          }
+        }
+
+        if (!matched) {
+          // No rules for the encountered text, bail out.
+          tokens = null;
+          break;
+        }
+      }
+      allTokens.push(tokens);
+    }
+    return allTokens;
+  }
+
+  /**
+   * Parses a string into an array of values representing the final result of
+   * parsing each sentence encountered. The elements of the resulting array
+   * are either the result of applying the action of the first (top) parse rule
+   * or null if we could not successfully parse the sentence.
+   *
+   * @param {string} str
+   *
+   * @returns {any[]}
+   */
+  parse(str) {
+    return this.tokenize(str).map(tokens => {
+      if (!this.parseRules.length || !tokens) {
+        return null;
+      }
+
+      let lookahead = null;
+      let stack = [];
+      while (true) {
+        if (tokens.length) {
+          let next = tokens.shift();
+          stack.push([next.type, next]);
+          lookahead = tokens[0] ? tokens[0].type : null;
+          while (this.reduceStack(stack, lookahead)) {
+            continue;
+          }
+        } else {
+          // Attempt to reduce anything still on the stack now that the
+          // tokens have all been pushed.
+          while (this.reduceStack(stack, lookahead)) {
+            continue;
+          }
+          break;
+        }
+      }
+      return stack.length == 1 && stack[0][0] == this.parseRules[0].name ? stack[0][1] : null;
+    });
+  }
+
+  /**
+   * Attempts to reduce the given stack exactly once using the internal parsing
+   * rules. If successful, the stack will be modified to contain the matched
+   * rule at the location it was found. This methods modifies the stack given.
+   *
+   * @returns {boolean} - True if the stack was reduced false if otherwise.
+   */
+  reduceStack(stack, lookahead) {
+    for (let i = 0; i < stack.length; i++) {
+      for (let rule of this.parseRules) {
+        let node = rule.graph;
+        let n = i;
+        let matchCount = 0;
+        while (n < stack.length && (node = node.getDescendant(stack[n][0]))) {
+          matchCount++;
+          if (
+            node.isEnd ||
+            (n == stack.length - 1 && !lookahead && (node.isCyclic() || node.canEnd()))
+          ) {
+            let result = [rule.name, null];
+            let matched = stack.splice(i, matchCount, result);
+            result[1] = rule.action(prepareArguments(rule, matched));
+            return true;
+          }
+          n++;
+        }
+      }
+    }
+    return false;
+  }
+}
+
+/**
+ * Converts a CalExtractParseRule to a CalExtractExtParseRule.
+ *
+ * @param {CalExtractParseRule} rule
+ *
+ * @returns {CalExtractExtParseRule}
+ */
+function extendParseRule(rule) {
+  let { name, action } = rule;
+  let flags = [];
+  let patterns = [];
+  let start = new CalExtractParseNode(null, null);
+  let graph = start;
+
+  for (let pattern of rule.patterns) {
+    let patternFlag = pattern[pattern.length - 1];
+    let bits = 0;
+
+    // Compute the flag value.
+    for (let [flag, value] of flagBits) {
+      if (patternFlag == flag) {
+        bits = bits | value;
+      }
+    }
+
+    // Removes the flag from patterns that have them.
+    pattern = bits ? pattern.substring(0, pattern.length - 1) : pattern;
+    patterns.push(pattern);
+    graph = graph.append(new CalExtractParseNode(pattern, bits));
+
+    // Create a loop node if this flag is set.
+    if (bits & FLAG_MULTIPLE) {
+      graph.append(graph);
+    }
+
+    flags.push(bits);
+  }
+
+  return {
+    name,
+    action,
+    patterns,
+    flags,
+    graph: start,
+  };
+}
+
+/**
+ * Normalizes the matched arguments to be passed to an CalExtractParseRuleAction
+ * by ensuring the number is the same as the patterns for the action. This takes
+ * care of converting multi matches into an array and providing "null" when
+ * an optional pattern is unmatched.
+ *
+ * @param {CalExtractExtRule} rule - The rule the action belongs to.
+ * @param {string[]} matched - An sub-array of the stack containing what
+ *                                   was actually matched. This array will be
+ *                                   modified to match the full rule (inclusive
+ *                                   of optional patterns).
+ *
+ *
+ * @returns {Array} Arguments for a CalExtractParseRuleAction.
+ */
+function prepareArguments(rule, matched) {
+  return rule.patterns.map((pattern, index) => {
+    if (rule.flags[index] & FLAG_MULTIPLE) {
+      let c = index;
+      let arrayArg = [];
+
+      while (c < matched.length && matched[c][0] == pattern) {
+        arrayArg.push(matched[c][1]);
+        c++;
+      }
+      if (!arrayArg.length) {
+        // This rule was not matched, make a blank space for it.
+        matched.splice(index, 0, null);
+      } else {
+        // Move all the matches into a single element so we match the pattern.
+        matched.splice(index, arrayArg.length, arrayArg);
+      }
+      return arrayArg;
+    } else if (matched[index] && matched[index][0] == pattern) {
+      return matched[index][1];
+    }
+
+    // The pattern was unmatched, it should be optional.
+    matched.splice(index, 0, null);
+    return null;
+  });
+}