/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

const EXPORTED_SYMBOLS = [
  "CalExtractToken",
  "CalExtractParseNode",
  "CalExtractParser",
  "extendParseRule",
  "prepareArguments",
];

/**
 * CalExtractOptions holds configuration options used by CalExtractParser.
 *
 * @typedef {object} CalExtractOptions
 * @property {RegExp} sentenceBoundary - A pattern used to split text at the
 *                                       sentence boundary. This should capture
 *                                       the boundary only and not any other
 *                                       part of the sentence. Use lookaheads
 *                                       if needed.
 */

/**
 * @type {CalExtractOptions}
 */
const defaultOptions = {
  sentenceBoundary: /(?<=\w)[.!?]+\s(?=[A-Z0-9])|[.!?]$/,
};

const FLAG_OPTIONAL = 1;
const FLAG_MULTIPLE = 2;
const FLAG_NONEMPTY = 4;

const flagBits = new Map([
  ["?", FLAG_OPTIONAL],
  ["+", FLAG_MULTIPLE | FLAG_NONEMPTY],
  ["*", FLAG_MULTIPLE | FLAG_OPTIONAL],
]);

/**
 * CalExtractToken represents a lexical unit of valid text. These are produced
 * during the tokenisation stage of CalExtractParser by matching regular
 * expressions against a text sequence.
 */
class CalExtractToken {
  /**
   * Identifies the token. Should be in uppercase with no spaces for consistency.
   *
   * @type {string}
   */
  type = "";

  /**
   * The text captured by this token.
   *
   * @type {string[]}
   */
  text = [];

  /**
   * Indicates which sentence in the source text the token was found.
   *
   * @type {number}
   */
  sentence = -1;

  /**
   * Indicates the position with the sentence the token occurs.
   *
   * @type {number}
   */
  position = -1;

  /**
   * @param {string} type
   * @param {string[]} text
   * @param {number} sentence
   * @param {number} position
   */
  constructor(type, text, sentence, position) {
    this.type = type;
    this.text = text;
    this.sentence = sentence;
    this.position = position;
  }
}

/**
 * Function used to produce a value when a CalExtractParseRule is matched.
 *
 * @callback CallExtractParseRuleAction
 * @param {any[]} args - An array containing all the values produced from each
 *                       pattern in the rule when they are matched or the
 *                       CalExtractToken when lexical tokens are used instead.
 */

/**
 * CalExtractParseRule specifies a named pattern that is looked for when parsing
 * the tokenized source text. Patterns are a sequence of one or more CalExtactToken
 * types or CalExtractParseRule names. Each pattern specified can optionally
 * have one (and only one) of the following flags:
 *
 * 1) "?" - Optional flag, indicates a pattern may be skipped if not matched.
 * 2) "*" - Multiple flag, indicates a pattern may match 0 or more times.
 * 3) "+" - Non-empty multiple flag, indicates a pattern may match 1 or more times.
 *
 * Flags must be specified as the last character of the pattern name, example:
 * ["subject", "text?", "MEET", "text*", "time+"]
 *
 * @typedef {object} CalExtractParseRule
 *
 * @property {string} name                      - The name of the rule that can
 *                                                be used in other patterns.
 *                                                Should be lowercase for
 *                                                consistency.
 * @property {string[]} patterns                - The pattern that will be
 *                                                searched for on the tokenized
 *                                                string. Can contain flags.
 *
 * @property {CalExtractParseRuleAction} action - Produces the result of the
 *                                                rule being satisfied.
 */

/**
 * CalExtractExtParseRule is derived from a CalExtractParseRule to include
 * additional information needed during parsing.
 *
 * @typedef {CalExtractParseRule} CalExtractExtParseRule
 *
 * @property {string[]} patterns         - The patterns here are stripped of
 *                                         any flags.
 * @property {number[]} flags            - An array containing the flags
 *                                         specified for each patterns element.
 * @property {CalExtractParseNode} graph - A graph used to determine what parse
 *                                         rule can be applied for an encountered
 *                                         production.
 */

/**
 * CalExtractParseNode is used to represent the patterns of a CalExtractParseRule
 * as a graph. This graph is traversed during stack reduction until one of the
 * following end conditions are met:
 *
 * 1) There are no more descendant nodes.
 * 2) The only descendant node is the node itself (cyclic).
 * 3) All of the descendant nodes are optional, there are no more tokens to
 *    shift and we have traversed the entire stack.
 */
class CalExtractParseNode {
  /**
   * @type {string}
   */
  symbol = null;

  /**
   * @type {number}
   */
  flags = null;

  /**
   * Contains each possible descendant node of this node.
   *
   * @type {CalExtractParseNode[]}
   */
  descendants = null;

  static FLAG_OPTIONAL = FLAG_OPTIONAL;
  static FLAG_MULTIPLE = FLAG_MULTIPLE;
  static FLAG_NONEMPTY = FLAG_NONEMPTY;

  /**
   * @param {string} symbol - The pattern this node represents.
   * @param {number} flags - The computed flags assigned to the pattern.
   * @param {CalExtractParseNode[]} descendants - Descendant nodes of this node.
   */
  constructor(symbol, flags, descendants = []) {
    this.symbol = symbol;
    this.flags = flags;
    this.descendants = descendants;
  }

  /**
   * Indicates this is the last node in its graph. This will always be false
   * for cyclic nodes.
   */
  get isEnd() {
    return !this.descendants.length;
  }

  /**
   * Appends a new descendant to this node.
   *
   * @param {CalExtractParseNode} node - The node to append.
   *
   * @returns {CalExtractParseNode} The appended node.
   */
  append(node) {
    this.descendants.push(node);
    return node;
  }

  /**
   * Provides the descendant CalExtractParseNode of this one given its symbol
   * name. The result depends on the following rules:
   * 1) If this node has a descendant that matches the name, return that node.
   * 2) If the node does not have a matching descendant but has descendants
   *    with the optional flag set, delegate to those nodes. This implements
   *    the "?" and optional aspect of "*".
   * 3) If none of the above produce a node, null is returned which means this
   *    graph cannot be traversed any further.
   *
   * @returns {CalExtractParseNode|null}
   */
  getDescendant(name) {
    // It is important the direct descendants are checked first.
    let node = this.descendants.find(node => node.symbol == name);
    if (node) {
      return node;
    }

    // Now try any optional descendants.
    for (let node of this.descendants) {
      let hit = node.isOptional() && node != this && node.getDescendant(name);
      if (hit) {
        return hit;
      }
    }
    return null;
  }

  /**
   * Indicates this node can terminate the graph if so desired. This is acceptable
   * if all of the descendants of this node are optional and there is nothing
   * more to match on the stack.
   *
   * @returns {boolean}
   */
  canEnd() {
    return this.descendants.filter(desc => desc != this).every(desc => desc.isOptional());
  }

  /**
   * Indicates whether this node has the optional flag set.
   *
   * @returns {boolean}
   */
  isOptional() {
    return Boolean(this.flags & FLAG_OPTIONAL);
  }

  /**
   * Indicates whether this node is cyclic. A cyclic node is one whose only
   * descendants is itself thus creating a loop. This occurs one a multiple
   * flagged node is in the tail position of the graph.
   */
  isCyclic() {
    return !this.isEnd && this.descendants.every(desc => desc == this);
  }
}

/**
 * CalExtractParser provides an API for detecting interesting information within
 * a text sequence that can be used for event detection and creation. It is a
 * naive implementation of a shift-reduce parser, naive in the sense that not
 * too much attention has been paid to optimisation or semantics.
 *
 * This parser works by first splitting the source string into sentences, then
 * tokenizing each using the token rules specified. The boundary for splitting
 * into sentences can be specified in the options object.
 *
 * After tokenisation, the parser uses the parse rules to shift/reduce each
 * sentence into a final result. The first parse rule is treated as the intended
 * rule to reduce the tokens of each sentence to. If all of the tokens have been
 * processed and the result is not the first rule, parsing is considered to have
 * failed and null is returned for that sentence. For this reason, it is a good
 * idea to specify parse rules that are robust but not too specific in their
 * patterns.
 */
class CalExtractParser {
  /**
   * @type {[RegExp,string?][]}
   */
  tokenRules = [];

  /**
   * @type {CalExtractParseRule[]}
   */
  parseRules = [];

  /**
   * @type {CalExtractOptions}
   */
  options = null;

  /**
   * Use the static createInstance() method instead of this constructor directly.
   *
   * @param {[RegExp, string?][]} tokenRules
   * @param {CalExtractExtParseRule[]} parseRules
   * @param {CalExtractOptions} [options] - Configuration object.
   *
   * @private
   */
  constructor(tokenRules, parseRules, options = defaultOptions) {
    this.tokenRules = tokenRules;
    this.parseRules = parseRules;
    this.options = options;
  }

  /**
   * This method creates a new CalExtractParser instance using the simpler
   * CalExtractParseRule interface instead of the extended one. It takes care
   * of creating a graph for each rule and normalizing pattern names that may
   * be using flags.
   *
   * @param {[RegExp, string?][]} tokenRules - A list of rules to apply during
   *  tokenisation. The first element of each rule is a regular expression used
   *  to detect lexical tokens and the second element is the type to assign to
   *  the token. Order matters slightly here, in general, more complex but specific
   *  rules should appear before simpler, more general ones.
   *
   *  When specifying token rules, they should be anchored to the start of the
   *  string via "^" or tokenize() will produce unexpected results. Some regular
   *  expressions should also include a word boundary to prevent matching within
   *  a large string, example: "at" in "attachment". If a string is to be matched,
   *  but no token is desired you can omit the token type from the rule and it
   *  will be omitted completely.
   *
   * @param {CalExtractParseRule[]} parseRules - A list of CalExtractParseRules
   *  that will be extended then used during parsing. Multiple parse rules can
   *  share the same name and will all be considered the same when matching patterns.
   *  Use this to specify variations of the same rule.
   *
   * @param {CalExtractOptions} [options] - Configuration object.
   */
  static createInstance(tokenRules, parseRules, options = defaultOptions) {
    return new CalExtractParser(tokenRules, parseRules.map(extendParseRule), options);
  }

  /**
   * Tokenizes a string to make it easier to match against the parse rule
   * patterns. If text is encountered that cannot be tokenized, the result for
   * that sentence is null.
   *
   * @param {string} str - The string to tokenize.
   *
   * @returns {CalExtractToken[][]} For each sentence encountered, a list of
   *                                CalExtractTokens.
   */
  tokenize(str) {
    let allTokens = [];
    let sentences = str.split(this.options.sentenceBoundary).filter(Boolean);

    for (let i = 0; i < sentences.length; i++) {
      let sentence = sentences[i];
      let pos = 0;
      let tokens = [];
      let buffer = "";

      let matched;
      while (pos < sentence.length) {
        buffer = sentence.substr(pos);
        for (let [pattern, type] of this.tokenRules) {
          matched = pattern.exec(buffer);
          if (matched) {
            if (type) {
              tokens.push(new CalExtractToken(type, matched[0], i, pos));
            }
            pos += matched[0].length;
            break;
          }
        }

        if (!matched) {
          // No rules for the encountered text, bail out.
          tokens = null;
          break;
        }
      }
      allTokens.push(tokens);
    }
    return allTokens;
  }

  /**
   * Parses a string into an array of values representing the final result of
   * parsing each sentence encountered. The elements of the resulting array
   * are either the result of applying the action of the first (top) parse rule
   * or null if we could not successfully parse the sentence.
   *
   * @param {string} str
   *
   * @returns {any[]}
   */
  parse(str) {
    return this.tokenize(str).map(tokens => {
      if (!this.parseRules.length || !tokens) {
        return null;
      }

      let lookahead = null;
      let stack = [];
      while (true) {
        if (tokens.length) {
          let next = tokens.shift();
          stack.push([next.type, next]);
          lookahead = tokens[0] ? tokens[0].type : null;
          while (this.reduceStack(stack, lookahead)) {
            continue;
          }
        } else {
          // Attempt to reduce anything still on the stack now that the
          // tokens have all been pushed.
          while (this.reduceStack(stack, lookahead)) {
            continue;
          }
          break;
        }
      }
      return stack.length == 1 && stack[0][0] == this.parseRules[0].name ? stack[0][1] : null;
    });
  }

  /**
   * Attempts to reduce the given stack exactly once using the internal parsing
   * rules. If successful, the stack will be modified to contain the matched
   * rule at the location it was found. This methods modifies the stack given.
   *
   * @returns {boolean} - True if the stack was reduced false if otherwise.
   */
  reduceStack(stack, lookahead) {
    for (let i = 0; i < stack.length; i++) {
      for (let rule of this.parseRules) {
        let node = rule.graph;
        let n = i;
        let matchCount = 0;
        while (n < stack.length && (node = node.getDescendant(stack[n][0]))) {
          matchCount++;
          if (
            node.isEnd ||
            (n == stack.length - 1 && !lookahead && (node.isCyclic() || node.canEnd()))
          ) {
            let result = [rule.name, null];
            let matched = stack.splice(i, matchCount, result);
            result[1] = rule.action(prepareArguments(rule, matched));
            return true;
          }
          n++;
        }
      }
    }
    return false;
  }
}

/**
 * Converts a CalExtractParseRule to a CalExtractExtParseRule.
 *
 * @param {CalExtractParseRule} rule
 *
 * @returns {CalExtractExtParseRule}
 */
function extendParseRule(rule) {
  let { name, action } = rule;
  let flags = [];
  let patterns = [];
  let start = new CalExtractParseNode(null, null);
  let graph = start;

  for (let pattern of rule.patterns) {
    let patternFlag = pattern[pattern.length - 1];
    let bits = 0;

    // Compute the flag value.
    for (let [flag, value] of flagBits) {
      if (patternFlag == flag) {
        bits = bits | value;
      }
    }

    // Removes the flag from patterns that have them.
    pattern = bits ? pattern.substring(0, pattern.length - 1) : pattern;
    patterns.push(pattern);
    graph = graph.append(new CalExtractParseNode(pattern, bits));

    // Create a loop node if this flag is set.
    if (bits & FLAG_MULTIPLE) {
      graph.append(graph);
    }

    flags.push(bits);
  }

  return {
    name,
    action,
    patterns,
    flags,
    graph: start,
  };
}

/**
 * Normalizes the matched arguments to be passed to an CalExtractParseRuleAction
 * by ensuring the number is the same as the patterns for the action. This takes
 * care of converting multi matches into an array and providing "null" when
 * an optional pattern is unmatched.
 *
 * @param {CalExtractExtRule} rule - The rule the action belongs to.
 * @param {string[]} matched - An sub-array of the stack containing what
 *                                   was actually matched. This array will be
 *                                   modified to match the full rule (inclusive
 *                                   of optional patterns).
 *
 *
 * @returns {Array} Arguments for a CalExtractParseRuleAction.
 */
function prepareArguments(rule, matched) {
  return rule.patterns.map((pattern, index) => {
    if (rule.flags[index] & FLAG_MULTIPLE) {
      let c = index;
      let arrayArg = [];

      while (c < matched.length && matched[c][0] == pattern) {
        arrayArg.push(matched[c][1]);
        c++;
      }
      if (!arrayArg.length) {
        // This rule was not matched, make a blank space for it.
        matched.splice(index, 0, null);
      } else {
        // Move all the matches into a single element so we match the pattern.
        matched.splice(index, arrayArg.length, arrayArg);
      }
      return arrayArg;
    } else if (matched[index] && matched[index][0] == pattern) {
      return matched[index][1];
    }

    // The pattern was unmatched, it should be optional.
    matched.splice(index, 0, null);
    return null;
  });
}