summaryrefslogtreecommitdiffstats
path: root/comm/calendar/base/modules/calExtract.jsm
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--comm/calendar/base/modules/calExtract.jsm1417
1 files changed, 1417 insertions, 0 deletions
diff --git a/comm/calendar/base/modules/calExtract.jsm b/comm/calendar/base/modules/calExtract.jsm
new file mode 100644
index 0000000000..4bb68cf77b
--- /dev/null
+++ b/comm/calendar/base/modules/calExtract.jsm
@@ -0,0 +1,1417 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const EXPORTED_SYMBOLS = ["Extractor"];
+var { cal } = ChromeUtils.import("resource:///modules/calendar/calUtils.jsm");
+
+/**
+ * Initializes extraction
+ *
+ * @param fallbackLocale locale to use when others are not found or
+ * detection is disabled
+ * @param dayStart ambiguous hours earlier than this are considered to
+ * be in the afternoon, when null then by default
+ * set to 6
+ * @param fixedLang whether to use only fallbackLocale for extraction
+ */
+function Extractor(fallbackLocale, dayStart, fixedLang) {
+ this.bundleUrl = "resource:///chrome/LOCALE/locale/LOCALE/calendar/calendar-extract.properties";
+ this.fallbackLocale = fallbackLocale;
+ this.email = "";
+ this.marker = "--MARK--";
+ // this should never be found in an email
+ this.defPattern = "061dc19c-719f-47f3-b2b5-e767e6f02b7a";
+ this.collected = [];
+ this.numbers = [];
+ this.hourlyNumbers = [];
+ this.dailyNumbers = [];
+ this.allMonths = "";
+ this.months = [];
+ this.dayStart = 6;
+ this.now = new Date();
+ this.bundle = "";
+ this.overrides = {};
+ this.fixedLang = true;
+
+ if (dayStart != null) {
+ this.dayStart = dayStart;
+ }
+
+ if (fixedLang != null) {
+ this.fixedLang = fixedLang;
+ }
+
+ if (!this.checkBundle(fallbackLocale)) {
+ cal.WARN(
+ "Your installed Lightning only includes a single locale, extracting event info from other languages is likely inaccurate. You can install Lightning from addons.mozilla.org manually for multiple locale support."
+ );
+ }
+}
+
+Extractor.prototype = {
+ /**
+ * Removes confusing data like urls, timezones and phone numbers from email
+ * Also removes standard signatures and quoted content from previous emails
+ */
+ cleanup() {
+ // XXX remove earlier correspondence
+ // ideally this should be considered with lower certainty to fill in
+ // missing information
+
+ // remove last line preceding quoted message and first line of the quote
+ this.email = this.email.replace(/\r?\n[^>].*\r?\n>+.*$/m, "");
+ // remove the rest of quoted content
+ this.email = this.email.replace(/^>+.*$/gm, "");
+
+ // urls often contain dates dates that can confuse extraction
+ this.email = this.email.replace(/https?:\/\/[^\s]+\s/gm, "");
+ this.email = this.email.replace(/www\.[^\s]+\s/gm, "");
+
+ // remove phone numbers
+ // TODO allow locale specific configuration of formats
+ this.email = this.email.replace(/\d-\d\d\d-\d\d\d-\d\d\d\d/gm, "");
+
+ // remove standard signature
+ this.email = this.email.replace(/\r?\n-- \r?\n[\S\s]+$/, "");
+
+ // XXX remove timezone info, for now
+ this.email = this.email.replace(/gmt[+-]\d{2}:\d{2}/gi, "");
+ },
+
+ checkBundle(locale) {
+ let path = this.bundleUrl.replace(/LOCALE/g, locale);
+ let bundle = Services.strings.createBundle(path);
+
+ try {
+ bundle.GetStringFromName("from.today");
+ return true;
+ } catch (ex) {
+ return false;
+ }
+ },
+
+ avgNonAsciiCharCode() {
+ let sum = 0;
+ let cnt = 0;
+
+ for (let i = 0; i < this.email.length; i++) {
+ let char = this.email.charCodeAt(i);
+ if (char > 128) {
+ sum += char;
+ cnt++;
+ }
+ }
+
+ let nonAscii = sum / cnt || 0;
+ cal.LOG("[calExtract] Average non-ascii charcode: " + nonAscii);
+ return nonAscii;
+ },
+
+ setLanguage() {
+ let path;
+
+ if (this.fixedLang) {
+ if (this.checkBundle(this.fallbackLocale)) {
+ cal.LOG(
+ "[calExtract] Fixed locale was used to choose " + this.fallbackLocale + " patterns."
+ );
+ } else {
+ cal.LOG(
+ "[calExtract] " + this.fallbackLocale + " patterns were not found. Using en-US instead"
+ );
+ this.fallbackLocale = "en-US";
+ }
+
+ path = this.bundleUrl.replace(/LOCALE/g, this.fallbackLocale);
+
+ let pref = "calendar.patterns.last.used.languages";
+ let lastUsedLangs = Services.prefs.getStringPref(pref, "");
+ if (lastUsedLangs == "") {
+ Services.prefs.setStringPref(pref, this.fallbackLocale);
+ } else {
+ let langs = lastUsedLangs.split(",");
+ let idx = langs.indexOf(this.fallbackLocale);
+ if (idx == -1) {
+ Services.prefs.setStringPref(pref, this.fallbackLocale + "," + lastUsedLangs);
+ } else {
+ langs.splice(idx, 1);
+ Services.prefs.setStringPref(pref, this.fallbackLocale + "," + langs.join(","));
+ }
+ }
+ } else {
+ let spellchecker = Cc["@mozilla.org/spellchecker/engine;1"].getService(
+ Ci.mozISpellCheckingEngine
+ );
+
+ let dicts = spellchecker.getDictionaryList();
+
+ if (dicts.length == 0) {
+ cal.LOG(
+ "[calExtract] There are no dictionaries installed and " +
+ "enabled. You might want to add some if date and time " +
+ "extraction from emails seems inaccurate."
+ );
+ }
+
+ let patterns;
+ let words = this.email.split(/\s+/);
+ let most = 0;
+ let mostLocale;
+ for (let dict in dicts) {
+ // dictionary locale and patterns locale match
+ if (this.checkBundle(dicts[dict])) {
+ let time1 = new Date().getTime();
+ spellchecker.dictionaries = [dicts[dict]];
+ let dur = new Date().getTime() - time1;
+ cal.LOG("[calExtract] Loading " + dicts[dict] + " dictionary took " + dur + "ms");
+ patterns = dicts[dict];
+ // beginning of dictionary locale matches patterns locale
+ } else if (this.checkBundle(dicts[dict].substring(0, 2))) {
+ let time1 = new Date().getTime();
+ spellchecker.dictionaries = [dicts[dict]];
+ let dur = new Date().getTime() - time1;
+ cal.LOG("[calExtract] Loading " + dicts[dict] + " dictionary took " + dur + "ms");
+ patterns = dicts[dict].substring(0, 2);
+ // dictionary for which patterns aren't present
+ } else {
+ cal.LOG("[calExtract] Dictionary present, rules missing: " + dicts[dict]);
+ continue;
+ }
+
+ let correct = 0;
+ let total = 0;
+ for (let word in words) {
+ words[word] = words[word].replace(/[()\d,;:?!#.]/g, "");
+ if (words[word].length >= 2) {
+ total++;
+ if (spellchecker.check(words[word])) {
+ correct++;
+ }
+ }
+ }
+
+ let percentage = (correct / total) * 100.0;
+ cal.LOG("[calExtract] " + dicts[dict] + " dictionary matches " + percentage + "% of words");
+
+ if (percentage > 50.0 && percentage > most) {
+ mostLocale = patterns;
+ most = percentage;
+ }
+ }
+
+ let avgCharCode = this.avgNonAsciiCharCode();
+
+ // using dictionaries for language recognition with non-latin letters doesn't work
+ // very well, possibly because of bug 471799
+ if (avgCharCode > 48000 && avgCharCode < 50000) {
+ cal.LOG("[calExtract] Using ko patterns based on charcodes");
+ path = this.bundleUrl.replace(/LOCALE/g, "ko");
+ // is it possible to differentiate zh-TW and zh-CN?
+ } else if (avgCharCode > 24000 && avgCharCode < 32000) {
+ cal.LOG("[calExtract] Using zh-TW patterns based on charcodes");
+ path = this.bundleUrl.replace(/LOCALE/g, "zh-TW");
+ } else if (avgCharCode > 14000 && avgCharCode < 24000) {
+ cal.LOG("[calExtract] Using ja patterns based on charcodes");
+ path = this.bundleUrl.replace(/LOCALE/g, "ja");
+ // Bulgarian also looks like that
+ } else if (avgCharCode > 1000 && avgCharCode < 1200) {
+ cal.LOG("[calExtract] Using ru patterns based on charcodes");
+ path = this.bundleUrl.replace(/LOCALE/g, "ru");
+ // dictionary based
+ } else if (most > 0) {
+ cal.LOG("[calExtract] Using " + mostLocale + " patterns based on dictionary");
+ path = this.bundleUrl.replace(/LOCALE/g, mostLocale);
+ // fallbackLocale matches patterns exactly
+ } else if (this.checkBundle(this.fallbackLocale)) {
+ cal.LOG("[calExtract] Falling back to " + this.fallbackLocale);
+ path = this.bundleUrl.replace(/LOCALE/g, this.fallbackLocale);
+ // beginning of fallbackLocale matches patterns
+ } else if (this.checkBundle(this.fallbackLocale.substring(0, 2))) {
+ this.fallbackLocale = this.fallbackLocale.substring(0, 2);
+ cal.LOG("[calExtract] Falling back to " + this.fallbackLocale);
+ path = this.bundleUrl.replace(/LOCALE/g, this.fallbackLocale);
+ } else {
+ cal.LOG("[calExtract] Using en-US");
+ path = this.bundleUrl.replace(/LOCALE/g, "en-US");
+ }
+ }
+ this.bundle = Services.strings.createBundle(path);
+ },
+
+ /**
+ * Extracts dates, times and durations from email
+ *
+ * @param body email body
+ * @param now reference time against which relative times are interpreted,
+ * when null current time is used
+ * @param sel selection object of email content, when defined times
+ * outside selection are discarded
+ * @param title email title
+ * @returns sorted list of extracted datetime objects
+ */
+ extract(title, body, now, sel) {
+ let initial = {};
+ this.collected = [];
+ this.email = title + "\r\n" + body;
+ if (now != null) {
+ this.now = now;
+ }
+
+ initial.year = now.getFullYear();
+ initial.month = now.getMonth() + 1;
+ initial.day = now.getDate();
+ initial.hour = now.getHours();
+ initial.minute = now.getMinutes();
+
+ this.collected.push({
+ year: initial.year,
+ month: initial.month,
+ day: initial.day,
+ hour: initial.hour,
+ minute: initial.minute,
+ relation: "start",
+ });
+
+ this.cleanup();
+ cal.LOG("[calExtract] Email after processing for extraction: \n" + this.email);
+
+ this.overrides = JSON.parse(Services.prefs.getStringPref("calendar.patterns.override", "{}"));
+ this.setLanguage();
+
+ for (let i = 0; i <= 31; i++) {
+ this.numbers[i] = this.getPatterns("number." + i);
+ }
+ this.dailyNumbers = this.numbers.join(this.marker);
+
+ this.hourlyNumbers = this.numbers[0] + this.marker;
+ for (let i = 1; i <= 22; i++) {
+ this.hourlyNumbers += this.numbers[i] + this.marker;
+ }
+ this.hourlyNumbers += this.numbers[23];
+
+ this.hourlyNumbers = this.hourlyNumbers.replace(/\|/g, this.marker);
+ this.dailyNumbers = this.dailyNumbers.replace(/\|/g, this.marker);
+
+ for (let i = 0; i < 12; i++) {
+ this.months[i] = this.getPatterns("month." + (i + 1));
+ }
+ this.allMonths = this.months.join(this.marker).replace(/\|/g, this.marker);
+
+ // time
+ this.extractTime("from.noon", "start", 12, 0);
+ this.extractTime("until.noon", "end", 12, 0);
+
+ this.extractHour("from.hour", "start", "none");
+ this.extractHour("from.hour.am", "start", "ante");
+ this.extractHour("from.hour.pm", "start", "post");
+ this.extractHour("until.hour", "end", "none");
+ this.extractHour("until.hour.am", "end", "ante");
+ this.extractHour("until.hour.pm", "end", "post");
+
+ this.extractHalfHour("from.half.hour.before", "start", "ante");
+ this.extractHalfHour("until.half.hour.before", "end", "ante");
+ this.extractHalfHour("from.half.hour.after", "start", "post");
+ this.extractHalfHour("until.half.hour.after", "end", "post");
+
+ this.extractHourMinutes("from.hour.minutes", "start", "none");
+ this.extractHourMinutes("from.hour.minutes.am", "start", "ante");
+ this.extractHourMinutes("from.hour.minutes.pm", "start", "post");
+ this.extractHourMinutes("until.hour.minutes", "end", "none");
+ this.extractHourMinutes("until.hour.minutes.am", "end", "ante");
+ this.extractHourMinutes("until.hour.minutes.pm", "end", "post");
+
+ // date
+ this.extractRelativeDay("from.today", "start", 0);
+ this.extractRelativeDay("from.tomorrow", "start", 1);
+ this.extractRelativeDay("until.tomorrow", "end", 1);
+ this.extractWeekDay("from.weekday.", "start");
+ this.extractWeekDay("until.weekday.", "end");
+ this.extractDate("from.ordinal.date", "start");
+ this.extractDate("until.ordinal.date", "end");
+
+ this.extractDayMonth("from.month.day", "start");
+ this.extractDayMonthYear("from.year.month.day", "start");
+ this.extractDayMonth("until.month.day", "end");
+ this.extractDayMonthYear("until.year.month.day", "end");
+ this.extractDayMonthName("from.monthname.day", "start");
+ this.extractDayMonthNameYear("from.year.monthname.day", "start");
+ this.extractDayMonthName("until.monthname.day", "end");
+ this.extractDayMonthNameYear("until.year.monthname.day", "end");
+
+ // duration
+ this.extractDuration("duration.minutes", 1);
+ this.extractDuration("duration.hours", 60);
+ this.extractDuration("duration.days", 60 * 24);
+
+ if (sel !== undefined && sel !== null) {
+ this.markSelected(sel, title);
+ }
+ this.markContained();
+ this.collected = this.collected.sort(this.sort);
+
+ return this.collected;
+ },
+
+ extractDayMonthYear(pattern, relation) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2})", "(\\d{1,2})", "(\\d{2,4})"]);
+
+ let res;
+ for (let alt in alts) {
+ let positions = alts[alt].positions;
+ let re = new RegExp(alts[alt].pattern, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let day = parseInt(res[positions[1]], 10);
+ let month = parseInt(res[positions[2]], 10);
+ let year = parseInt(this.normalizeYear(res[positions[3]]), 10);
+
+ if (this.isValidDay(day) && this.isValidMonth(month) && this.isValidYear(year)) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ year,
+ month,
+ day,
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractDayMonthNameYear(pattern, relation) {
+ let alts = this.getRepPatterns(pattern, [
+ "(\\d{1,2})",
+ "(" + this.allMonths + ")",
+ "(\\d{2,4})",
+ ]);
+
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let positions = alts[alt].positions;
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let day = parseInt(res[positions[1]], 10);
+ let month = res[positions[2]];
+ let year = parseInt(this.normalizeYear(res[positions[3]]), 10);
+
+ if (this.isValidDay(day)) {
+ for (let i = 0; i < 12; i++) {
+ if (this.months[i].split("|").includes(month.toLowerCase())) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ year,
+ i + 1,
+ day,
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+
+ extractRelativeDay(pattern, relation, offset) {
+ let re = new RegExp(this.getPatterns(pattern), "ig");
+ let res;
+ if ((res = re.exec(this.email)) != null) {
+ if (!this.limitChars(res, this.email)) {
+ let item = new Date(this.now.getTime() + 60 * 60 * 24 * 1000 * offset);
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ item.getFullYear(),
+ item.getMonth() + 1,
+ item.getDate(),
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ }
+ }
+ },
+
+ extractDayMonthName(pattern, relation) {
+ let alts = this.getRepPatterns(pattern, [
+ "(\\d{1,2}" + this.marker + this.dailyNumbers + ")",
+ "(" + this.allMonths + ")",
+ ]);
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let positions = alts[alt].positions;
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let day = this.parseNumber(res[positions[1]], this.numbers);
+ let month = res[positions[2]];
+
+ if (this.isValidDay(day)) {
+ for (let i = 0; i < 12; i++) {
+ let months = this.unescape(this.months[i]).split("|");
+ if (months.includes(month.toLowerCase())) {
+ let date = { year: this.now.getFullYear(), month: i + 1, day };
+ if (this.isPastDate(date, this.now)) {
+ // find next such date
+ let item = new Date(this.now.getTime());
+ while (true) {
+ item.setDate(item.getDate() + 1);
+ if (item.getMonth() == date.month - 1 && item.getDate() == date.day) {
+ date.year = item.getFullYear();
+ break;
+ }
+ }
+ }
+
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ date.year,
+ date.month,
+ date.day,
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ break;
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+
+ extractDayMonth(pattern, relation) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2})", "(\\d{1,2})"]);
+ let res;
+ for (let alt in alts) {
+ let re = new RegExp(alts[alt].pattern, "ig");
+ let positions = alts[alt].positions;
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let day = parseInt(res[positions[1]], 10);
+ let month = parseInt(res[positions[2]], 10);
+
+ if (this.isValidMonth(month) && this.isValidDay(day)) {
+ let date = { year: this.now.getFullYear(), month, day };
+
+ if (this.isPastDate(date, this.now)) {
+ // find next such date
+ let item = new Date(this.now.getTime());
+ while (true) {
+ item.setDate(item.getDate() + 1);
+ if (item.getMonth() == date.month - 1 && item.getDate() == date.day) {
+ date.year = item.getFullYear();
+ break;
+ }
+ }
+ }
+
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ date.year,
+ date.month,
+ date.day,
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractDate(pattern, relation) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2}" + this.marker + this.dailyNumbers + ")"]);
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let day = this.parseNumber(res[1], this.numbers);
+ if (this.isValidDay(day)) {
+ let item = new Date(this.now.getTime());
+ if (this.now.getDate() != day) {
+ // find next nth date
+ while (true) {
+ item.setDate(item.getDate() + 1);
+ if (item.getDate() == day) {
+ break;
+ }
+ }
+ }
+
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ item.getFullYear(),
+ item.getMonth() + 1,
+ day,
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern,
+ true
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractWeekDay(pattern, relation) {
+ let days = [];
+ for (let i = 0; i < 7; i++) {
+ days[i] = this.getPatterns(pattern + i);
+ let re = new RegExp(days[i], "ig");
+ let res = re.exec(this.email);
+ if (res) {
+ if (!this.limitChars(res, this.email)) {
+ let date = new Date();
+ date.setDate(this.now.getDate());
+ date.setMonth(this.now.getMonth());
+ date.setYear(this.now.getFullYear());
+
+ let diff = (i - date.getDay() + 7) % 7;
+ date.setDate(date.getDate() + diff);
+
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ date.getFullYear(),
+ date.getMonth() + 1,
+ date.getDate(),
+ null,
+ null,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern + i,
+ true
+ );
+ }
+ }
+ }
+ },
+
+ extractHour(pattern, relation, meridiem) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2}" + this.marker + this.hourlyNumbers + ")"]);
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let hour = this.parseNumber(res[1], this.numbers);
+
+ if (meridiem == "ante" && hour == 12) {
+ hour = hour - 12;
+ } else if (meridiem == "post" && hour != 12) {
+ hour = hour + 12;
+ } else {
+ hour = this.normalizeHour(hour);
+ }
+
+ if (this.isValidHour(res[1])) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ null,
+ null,
+ null,
+ hour,
+ 0,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern,
+ true
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractHalfHour(pattern, relation, direction) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2}" + this.marker + this.hourlyNumbers + ")"]);
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let hour = this.parseNumber(res[1], this.numbers);
+
+ hour = this.normalizeHour(hour);
+ if (direction == "ante") {
+ if (hour == 1) {
+ hour = 12;
+ } else {
+ hour = hour - 1;
+ }
+ }
+
+ if (this.isValidHour(hour)) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ null,
+ null,
+ null,
+ hour,
+ 30,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern,
+ true
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractHourMinutes(pattern, relation, meridiem) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2})", "(\\d{2})"]);
+ let res;
+ for (let alt in alts) {
+ let positions = alts[alt].positions;
+ let re = new RegExp(alts[alt].pattern, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let hour = parseInt(res[positions[1]], 10);
+ let minute = parseInt(res[positions[2]], 10);
+
+ if (meridiem == "ante" && hour == 12) {
+ hour = hour - 12;
+ } else if (meridiem == "post" && hour != 12) {
+ hour = hour + 12;
+ } else {
+ hour = this.normalizeHour(hour);
+ }
+
+ if (this.isValidHour(hour) && this.isValidMinute(hour)) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ null,
+ null,
+ null,
+ hour,
+ minute,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ }
+ }
+ }
+ }
+ },
+
+ extractTime(pattern, relation, hour, minute) {
+ let re = new RegExp(this.getPatterns(pattern), "ig");
+ let res;
+ if ((res = re.exec(this.email)) != null) {
+ if (!this.limitChars(res, this.email)) {
+ let rev = this.prefixSuffixStartEnd(res, relation, this.email);
+ this.guess(
+ null,
+ null,
+ null,
+ hour,
+ minute,
+ rev.start,
+ rev.end,
+ rev.pattern,
+ rev.relation,
+ pattern
+ );
+ }
+ }
+ },
+
+ extractDuration(pattern, unit) {
+ let alts = this.getRepPatterns(pattern, ["(\\d{1,2}" + this.marker + this.dailyNumbers + ")"]);
+ let res;
+ for (let alt in alts) {
+ let exp = alts[alt].pattern.split(this.marker).join("|");
+ let re = new RegExp(exp, "ig");
+
+ while ((res = re.exec(this.email)) != null) {
+ if (!this.limitNums(res, this.email) && !this.limitChars(res, this.email)) {
+ let length = this.parseNumber(res[1], this.numbers);
+ let guess = {};
+ let rev = this.prefixSuffixStartEnd(res, "duration", this.email);
+ guess.duration = length * unit;
+ guess.start = rev.start;
+ guess.end = rev.end;
+ guess.str = rev.pattern;
+ guess.relation = rev.relation;
+ guess.pattern = pattern;
+ this.collected.push(guess);
+ }
+ }
+ }
+ },
+
+ markContained() {
+ for (let outer = 0; outer < this.collected.length; outer++) {
+ for (let inner = 0; inner < this.collected.length; inner++) {
+ // included but not exactly the same
+ if (
+ outer != inner &&
+ this.collected[outer].start &&
+ this.collected[outer].end &&
+ this.collected[inner].start &&
+ this.collected[inner].end &&
+ this.collected[inner].start >= this.collected[outer].start &&
+ this.collected[inner].end <= this.collected[outer].end &&
+ !(
+ this.collected[inner].start == this.collected[outer].start &&
+ this.collected[inner].end == this.collected[outer].end
+ )
+ ) {
+ cal.LOG(
+ "[calExtract] " +
+ this.collected[outer].str +
+ " found as well, disgarding " +
+ this.collected[inner].str
+ );
+ this.collected[inner].relation = "notadatetime";
+ }
+ }
+ }
+ },
+
+ markSelected(sel, title) {
+ if (sel.rangeCount > 0) {
+ // mark the ones to not use
+ for (let i = 0; i < sel.rangeCount; i++) {
+ cal.LOG("[calExtract] Selection " + i + " is " + sel);
+ for (let j = 0; j < this.collected.length; j++) {
+ let selection = sel.getRangeAt(i).toString();
+
+ if (
+ !selection.includes(this.collected[j].str) &&
+ !title.includes(this.collected[j].str) &&
+ this.collected[j].start != null
+ ) {
+ // always keep email date, needed for tasks
+ cal.LOG(
+ "[calExtract] Marking " + JSON.stringify(this.collected[j]) + " as notadatetime"
+ );
+ this.collected[j].relation = "notadatetime";
+ }
+ }
+ }
+ }
+ },
+
+ sort(one, two) {
+ let rc;
+ // sort the guess from email date as the last one
+ if (one.start == null && two.start != null) {
+ return 1;
+ } else if (one.start != null && two.start == null) {
+ return -1;
+ } else if (one.start == null && two.start == null) {
+ return 0;
+ // sort dates before times
+ } else if (one.year != null && two.year == null) {
+ return -1;
+ } else if (one.year == null && two.year != null) {
+ return 1;
+ } else if (one.year != null && two.year != null) {
+ rc = (one.year > two.year) - (one.year < two.year);
+ if (rc == 0) {
+ rc = (one.month > two.month) - (one.month < two.month);
+ if (rc == 0) {
+ rc = (one.day > two.day) - (one.day < two.day);
+ }
+ }
+ return rc;
+ }
+ rc = (one.hour > two.hour) - (one.hour < two.hour);
+ if (rc == 0) {
+ rc = (one.minute > two.minute) - (one.minute < two.minute);
+ }
+ return rc;
+ },
+
+ /**
+ * Guesses start time from list of guessed datetimes
+ *
+ * @param isTask whether start time should be guessed for task or event
+ * @returns datetime object for start time
+ */
+ guessStart(isTask) {
+ let startTimes = this.collected.filter(val => val.relation == "start");
+ if (startTimes.length == 0) {
+ return {};
+ }
+
+ for (let val in startTimes) {
+ cal.LOG("[calExtract] Start: " + JSON.stringify(startTimes[val]));
+ }
+
+ let guess = {};
+ let wDayInit = startTimes.filter(val => val.day != null && val.start === undefined);
+
+ // with tasks we don't try to guess start but assume email date
+ if (isTask) {
+ guess.year = wDayInit[0].year;
+ guess.month = wDayInit[0].month;
+ guess.day = wDayInit[0].day;
+ guess.hour = wDayInit[0].hour;
+ guess.minute = wDayInit[0].minute;
+ return guess;
+ }
+
+ let wDay = startTimes.filter(val => val.day != null && val.start !== undefined);
+ let wDayNA = wDay.filter(val => val.ambiguous === undefined);
+
+ let wMinute = startTimes.filter(val => val.minute != null && val.start !== undefined);
+ let wMinuteNA = wMinute.filter(val => val.ambiguous === undefined);
+
+ if (wMinuteNA.length != 0) {
+ guess.hour = wMinuteNA[0].hour;
+ guess.minute = wMinuteNA[0].minute;
+ } else if (wMinute.length != 0) {
+ guess.hour = wMinute[0].hour;
+ guess.minute = wMinute[0].minute;
+ }
+
+ // first use unambiguous guesses
+ if (wDayNA.length != 0) {
+ guess.year = wDayNA[0].year;
+ guess.month = wDayNA[0].month;
+ guess.day = wDayNA[0].day;
+ // then also ambiguous ones
+ } else if (wDay.length != 0) {
+ guess.year = wDay[0].year;
+ guess.month = wDay[0].month;
+ guess.day = wDay[0].day;
+ // next possible day considering time
+ } else if (
+ guess.hour != null &&
+ (wDayInit[0].hour > guess.hour ||
+ (wDayInit[0].hour == guess.hour && wDayInit[0].minute > guess.minute))
+ ) {
+ let nextDay = new Date(wDayInit[0].year, wDayInit[0].month - 1, wDayInit[0].day);
+ nextDay.setTime(nextDay.getTime() + 60 * 60 * 24 * 1000);
+ guess.year = nextDay.getFullYear();
+ guess.month = nextDay.getMonth() + 1;
+ guess.day = nextDay.getDate();
+ // and finally when nothing was found then use initial guess from send time
+ } else {
+ guess.year = wDayInit[0].year;
+ guess.month = wDayInit[0].month;
+ guess.day = wDayInit[0].day;
+ }
+
+ cal.LOG("[calExtract] Start picked: " + JSON.stringify(guess));
+ return guess;
+ },
+
+ /**
+ * Guesses end time from list of guessed datetimes relative to start time
+ *
+ * @param start start time to consider when guessing
+ * @param doGuessStart whether start time should be guessed for task or event
+ * @returns datetime object for end time
+ */
+ guessEnd(start, doGuessStart) {
+ let guess = {};
+ let endTimes = this.collected.filter(val => val.relation == "end");
+ let durations = this.collected.filter(val => val.relation == "duration");
+ if (endTimes.length == 0 && durations.length == 0) {
+ return {};
+ }
+ for (let val in endTimes) {
+ cal.LOG("[calExtract] End: " + JSON.stringify(endTimes[val]));
+ }
+
+ let wDay = endTimes.filter(val => val.day != null);
+ let wDayNA = wDay.filter(val => val.ambiguous === undefined);
+ let wMinute = endTimes.filter(val => val.minute != null);
+ let wMinuteNA = wMinute.filter(val => val.ambiguous === undefined);
+
+ // first set non-ambiguous dates
+ let pos = doGuessStart ? 0 : wDayNA.length - 1;
+ if (wDayNA.length != 0) {
+ guess.year = wDayNA[pos].year;
+ guess.month = wDayNA[pos].month;
+ guess.day = wDayNA[pos].day;
+ // then ambiguous dates
+ } else if (wDay.length != 0) {
+ pos = doGuessStart ? 0 : wDay.length - 1;
+ guess.year = wDay[pos].year;
+ guess.month = wDay[pos].month;
+ guess.day = wDay[pos].day;
+ }
+
+ // then non-ambiguous times
+ if (wMinuteNA.length != 0) {
+ pos = doGuessStart ? 0 : wMinuteNA.length - 1;
+ guess.hour = wMinuteNA[pos].hour;
+ guess.minute = wMinuteNA[pos].minute;
+ if (guess.day == null || guess.day == start.day) {
+ if (
+ wMinuteNA[pos].hour < start.hour ||
+ (wMinuteNA[pos].hour == start.hour && wMinuteNA[pos].minute < start.minute)
+ ) {
+ let nextDay = new Date(start.year, start.month - 1, start.day);
+ nextDay.setTime(nextDay.getTime() + 60 * 60 * 24 * 1000);
+ guess.year = nextDay.getFullYear();
+ guess.month = nextDay.getMonth() + 1;
+ guess.day = nextDay.getDate();
+ }
+ }
+ // and ambiguous times
+ } else if (wMinute.length != 0) {
+ pos = doGuessStart ? 0 : wMinute.length - 1;
+ guess.hour = wMinute[pos].hour;
+ guess.minute = wMinute[pos].minute;
+ if (guess.day == null || guess.day == start.day) {
+ if (
+ wMinute[pos].hour < start.hour ||
+ (wMinute[pos].hour == start.hour && wMinute[pos].minute < start.minute)
+ ) {
+ let nextDay = new Date(start.year, start.month - 1, start.day);
+ nextDay.setTime(nextDay.getTime() + 60 * 60 * 24 * 1000);
+ guess.year = nextDay.getFullYear();
+ guess.month = nextDay.getMonth() + 1;
+ guess.day = nextDay.getDate();
+ }
+ }
+ }
+
+ // fill in date when time was guessed
+ if (guess.minute != null && guess.day == null) {
+ guess.year = start.year;
+ guess.month = start.month;
+ guess.day = start.day;
+ }
+
+ // fill in end from total duration
+ if (guess.day == null && guess.hour == null) {
+ let duration = 0;
+
+ for (let val in durations) {
+ duration += durations[val].duration;
+ cal.LOG("[calExtract] Dur: " + JSON.stringify(durations[val]));
+ }
+
+ if (duration != 0) {
+ let startDate = new Date(start.year, start.month - 1, start.day);
+ if ("hour" in start) {
+ startDate.setHours(start.hour);
+ startDate.setMinutes(start.minute);
+ } else {
+ startDate.setHours(0);
+ startDate.setMinutes(0);
+ }
+
+ let endTime = new Date(startDate.getTime() + duration * 60 * 1000);
+ guess.year = endTime.getFullYear();
+ guess.month = endTime.getMonth() + 1;
+ guess.day = endTime.getDate();
+ if (!(endTime.getHours() == 0 && endTime.getMinutes() == 0)) {
+ guess.hour = endTime.getHours();
+ guess.minute = endTime.getMinutes();
+ }
+ }
+ }
+
+ // no zero or negative length events/tasks
+ let startTime = new Date(
+ start.year || 0,
+ start.month - 1 || 0,
+ start.day || 0,
+ start.hour || 0,
+ start.minute || 0
+ ).getTime();
+ let guessTime = new Date(
+ guess.year || 0,
+ guess.month - 1 || 0,
+ guess.day || 0,
+ guess.hour || 0,
+ guess.minute || 0
+ ).getTime();
+ if (guessTime <= startTime) {
+ guess.year = null;
+ guess.month = null;
+ guess.day = null;
+ guess.hour = null;
+ guess.minute = null;
+ }
+
+ if (guess.year != null && guess.minute == null && doGuessStart) {
+ guess.hour = 0;
+ guess.minute = 0;
+ }
+
+ cal.LOG("[calExtract] End picked: " + JSON.stringify(guess));
+ return guess;
+ },
+
+ getPatterns(name) {
+ let value;
+ try {
+ value = this.bundle.GetStringFromName(name);
+ if (value.trim() == "") {
+ cal.LOG("[calExtract] Pattern not found: " + name);
+ return this.defPattern;
+ }
+
+ let vals = this.cleanPatterns(value).split("|");
+ for (let idx = vals.length - 1; idx >= 0; idx--) {
+ if (vals[idx].trim() == "") {
+ vals.splice(idx, 1);
+ console.error("[calExtract] Faulty extraction pattern " + value + " for " + name);
+ }
+ }
+
+ if (this.overrides[name] !== undefined && this.overrides[name].add !== undefined) {
+ let additions = this.overrides[name].add;
+ additions = this.cleanPatterns(additions).split("|");
+ for (let pattern in additions) {
+ vals.push(additions[pattern]);
+ cal.LOG("[calExtract] Added " + additions[pattern] + " to " + name);
+ }
+ }
+
+ if (this.overrides[name] !== undefined && this.overrides[name].remove !== undefined) {
+ let removals = this.overrides[name].remove;
+ removals = this.cleanPatterns(removals).split("|");
+ for (let pattern in removals) {
+ let idx = vals.indexOf(removals[pattern]);
+ if (idx != -1) {
+ vals.splice(idx, 1);
+ cal.LOG("[calExtract] Removed " + removals[pattern] + " from " + name);
+ }
+ }
+ }
+
+ vals.sort((a, b) => b.length - a.length);
+ return vals.join("|");
+ } catch (ex) {
+ cal.LOG("[calExtract] Pattern not found: " + name);
+
+ // fake a value to avoid empty regexes creating endless loops
+ return this.defPattern;
+ }
+ },
+
+ getRepPatterns(name, replaceables) {
+ let alts = [];
+ let patterns = [];
+
+ try {
+ let value = this.bundle.GetStringFromName(name);
+ if (value.trim() == "") {
+ cal.LOG("[calExtract] Pattern empty: " + name);
+ return alts;
+ }
+
+ let vals = this.cleanPatterns(value).split("|");
+ for (let idx = vals.length - 1; idx >= 0; idx--) {
+ if (vals[idx].trim() == "") {
+ vals.splice(idx, 1);
+ console.error("[calExtract] Faulty extraction pattern " + value + " for " + name);
+ }
+ }
+
+ if (this.overrides[name] !== undefined && this.overrides[name].add !== undefined) {
+ let additions = this.overrides[name].add;
+ additions = this.cleanPatterns(additions).split("|");
+ for (let pattern in additions) {
+ vals.push(additions[pattern]);
+ cal.LOG("[calExtract] Added " + additions[pattern] + " to " + name);
+ }
+ }
+
+ if (this.overrides[name] !== undefined && this.overrides[name].remove !== undefined) {
+ let removals = this.overrides[name].remove;
+ removals = this.cleanPatterns(removals).split("|");
+ for (let pattern in removals) {
+ let idx = vals.indexOf(removals[pattern]);
+ if (idx != -1) {
+ vals.splice(idx, 1);
+ cal.LOG("[calExtract] Removed " + removals[pattern] + " from " + name);
+ }
+ }
+ }
+
+ vals.sort((a, b) => b.length - a.length);
+ for (let val in vals) {
+ let pattern = vals[val];
+ for (let cnt = 1; cnt <= replaceables.length; cnt++) {
+ pattern = pattern.split("#" + cnt).join(replaceables[cnt - 1]);
+ }
+ patterns.push(pattern);
+ }
+
+ for (let val in vals) {
+ let positions = [];
+ if (replaceables.length == 1) {
+ positions[1] = 1;
+ } else {
+ positions = this.getPositionsFor(vals[val], name, replaceables.length);
+ }
+ alts[val] = { pattern: patterns[val], positions };
+ }
+ } catch (ex) {
+ cal.LOG("[calExtract] Pattern not found: " + name);
+ }
+ return alts;
+ },
+
+ getPositionsFor(str, name, count) {
+ let positions = [];
+ let re = /#(\d)/g;
+ let match;
+ let i = 0;
+ while ((match = re.exec(str))) {
+ i++;
+ positions[parseInt(match[1], 10)] = i;
+ }
+
+ // correctness checking
+ for (i = 1; i <= count; i++) {
+ if (positions[i] === undefined) {
+ console.error(
+ "[calExtract] Faulty extraction pattern " + name + ", missing parameter #" + i
+ );
+ }
+ }
+ return positions;
+ },
+
+ cleanPatterns(pattern) {
+ // remove whitespace around | if present
+ let value = pattern.replace(/\s*\|\s*/g, "|");
+ // allow matching for patterns with missing or excessive whitespace
+ return this.sanitize(value).replace(/\s+/g, "\\s*");
+ },
+
+ isValidYear(year) {
+ return year >= 2000 && year <= 2050;
+ },
+
+ isValidMonth(month) {
+ return month >= 1 && month <= 12;
+ },
+
+ isValidDay(day) {
+ return day >= 1 && day <= 31;
+ },
+
+ isValidHour(hour) {
+ return hour >= 0 && hour <= 23;
+ },
+
+ isValidMinute(minute) {
+ return minute >= 0 && minute <= 59;
+ },
+
+ isPastDate(date, referenceDate) {
+ // avoid changing original refDate
+ let refDate = new Date(referenceDate.getTime());
+ refDate.setHours(0);
+ refDate.setMinutes(0);
+ refDate.setSeconds(0);
+ refDate.setMilliseconds(0);
+ let jsDate;
+ if (date.day != null) {
+ jsDate = new Date(date.year, date.month - 1, date.day);
+ }
+ return jsDate < refDate;
+ },
+
+ normalizeHour(hour) {
+ if (hour < this.dayStart && hour <= 11) {
+ return hour + 12;
+ }
+ return hour;
+ },
+
+ normalizeYear(year) {
+ return year.length == 2 ? "20" + year : year;
+ },
+
+ limitNums(res, email) {
+ let pattern = email.substring(res.index, res.index + res[0].length);
+ let before = email.charAt(res.index - 1);
+ let after = email.charAt(res.index + res[0].length);
+ let result =
+ (/\d/.exec(before) && /\d/.exec(pattern.charAt(0))) ||
+ (/\d/.exec(pattern.charAt(pattern.length - 1)) && /\d/.exec(after));
+ return result != null;
+ },
+
+ limitChars(res, email) {
+ let alphabet = this.getPatterns("alphabet");
+ // for languages without regular alphabet surrounding characters are ignored
+ if (alphabet == this.defPattern) {
+ return false;
+ }
+
+ let pattern = email.substring(res.index, res.index + res[0].length);
+ let before = email.charAt(res.index - 1);
+ let after = email.charAt(res.index + res[0].length);
+
+ let re = new RegExp("[" + alphabet + "]");
+ let result =
+ (re.exec(before) && re.exec(pattern.charAt(0))) ||
+ (re.exec(pattern.charAt(pattern.length - 1)) && re.exec(after));
+ return result != null;
+ },
+
+ prefixSuffixStartEnd(res, relation, email) {
+ let pattern = email.substring(res.index, res.index + res[0].length);
+ let prev = email.substring(0, res.index);
+ let next = email.substring(res.index + res[0].length);
+ let prefixSuffix = {
+ start: res.index,
+ end: res.index + res[0].length,
+ pattern,
+ relation,
+ };
+ let char = "\\s*";
+ let psres;
+
+ let re = new RegExp("(" + this.getPatterns("end.prefix") + ")" + char + "$", "ig");
+ if ((psres = re.exec(prev)) != null) {
+ prefixSuffix.relation = "end";
+ prefixSuffix.start = psres.index;
+ prefixSuffix.pattern = psres[0] + pattern;
+ }
+
+ re = new RegExp("^" + char + "(" + this.getPatterns("end.suffix") + ")", "ig");
+ if ((psres = re.exec(next)) != null) {
+ prefixSuffix.relation = "end";
+ prefixSuffix.end = prefixSuffix.end + psres[0].length;
+ prefixSuffix.pattern = pattern + psres[0];
+ }
+
+ re = new RegExp("(" + this.getPatterns("start.prefix") + ")" + char + "$", "ig");
+ if ((psres = re.exec(prev)) != null) {
+ prefixSuffix.relation = "start";
+ prefixSuffix.start = psres.index;
+ prefixSuffix.pattern = psres[0] + pattern;
+ }
+
+ re = new RegExp("^" + char + "(" + this.getPatterns("start.suffix") + ")", "ig");
+ if ((psres = re.exec(next)) != null) {
+ prefixSuffix.relation = "start";
+ prefixSuffix.end = prefixSuffix.end + psres[0].length;
+ prefixSuffix.pattern = pattern + psres[0];
+ }
+
+ re = new RegExp("\\s(" + this.getPatterns("no.datetime.prefix") + ")" + char + "$", "ig");
+
+ if ((psres = re.exec(prev)) != null) {
+ prefixSuffix.relation = "notadatetime";
+ }
+
+ re = new RegExp("^" + char + "(" + this.getPatterns("no.datetime.suffix") + ")", "ig");
+ if ((psres = re.exec(next)) != null) {
+ prefixSuffix.relation = "notadatetime";
+ }
+
+ return prefixSuffix;
+ },
+
+ parseNumber(numberString, numbers) {
+ let number = parseInt(numberString, 10);
+ // number comes in as plain text, numbers are already adjusted for usage
+ // in regular expression
+ let cleanNumberString = this.cleanPatterns(numberString);
+ if (isNaN(number)) {
+ for (let i = 0; i <= 31; i++) {
+ let numberparts = numbers[i].split("|");
+ if (numberparts.includes(cleanNumberString.toLowerCase())) {
+ return i;
+ }
+ }
+ return -1;
+ }
+ return number;
+ },
+
+ guess(year, month, day, hour, minute, start, end, str, relation, pattern, ambiguous) {
+ let dateGuess = {
+ year,
+ month,
+ day,
+ hour,
+ minute,
+ start,
+ end,
+ str,
+ relation,
+ pattern,
+ ambiguous,
+ };
+
+ // past dates are kept for containment checks
+ if (this.isPastDate(dateGuess, this.now)) {
+ dateGuess.relation = "notadatetime";
+ }
+ this.collected.push(dateGuess);
+ },
+
+ sanitize(str) {
+ return str.replace(/[-[\]{}()*+?.,\\^$]/g, "\\$&");
+ },
+
+ unescape(str) {
+ return str.replace(/\\([.])/g, "$1");
+ },
+};