summaryrefslogtreecommitdiffstats
path: root/comm/mailnews/base/src/converterWorker.js
diff options
context:
space:
mode:
Diffstat (limited to 'comm/mailnews/base/src/converterWorker.js')
-rw-r--r--comm/mailnews/base/src/converterWorker.js533
1 files changed, 533 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/converterWorker.js b/comm/mailnews/base/src/converterWorker.js
new file mode 100644
index 0000000000..188476c1e1
--- /dev/null
+++ b/comm/mailnews/base/src/converterWorker.js
@@ -0,0 +1,533 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* eslint-env mozilla/chrome-worker, node */
+
+/**
+ * This worker will perform mbox<->maildir conversions on a tree of
+ * directories. It operates purely at the filesystem level.
+ *
+ * The initial message data should pass in these params to control
+ * the conversion:
+ *
+ * srcType - source mailstore type ('mbox' or 'maildir')
+ * destType - destination mailstore type ('maildir' or 'mbox')
+ * srcRoot - root path of source (eg ".../ImapMail/imap.example.com")
+ * destRoot - root path of destination (eg "/tmp/imap.example.com-maildir")
+ *
+ * The conversion is non-destructive - srcRoot will be left untouched.
+ *
+ * The worker will post progress messages back to the main thread of
+ * the form:
+ *
+ * {"msg": "progress", "val": val, "total": total}
+ *
+ * Where `val` is the current progress, out of `total`.
+ * The units used for val and total are undefined.
+ *
+ * When the conversion is complete, before exiting, the worker sends a
+ * message of the form:
+ *
+ * {"msg": "success"}
+ *
+ * Errors are posted back to the main thread via the standard
+ * "error" event.
+ *
+ */
+
+/**
+ * Merge all the messages in a maildir into a single mbox file.
+ *
+ * @param {string} maildir - Path to the source maildir.
+ * @param {string} mboxFilename - Path of the mbox file to create.
+ * @param {Function(number)} progressFn - Function to be invoked regularly with
+ * progress updates. Param is number of
+ * "units" processed since last update.
+ */
+async function maildirToMBox(maildir, mboxFilename, progressFn) {
+ // Helper to format dates
+ // eg "Thu Jan 18 12:34:56 2018"
+ let fmtUTC = function (d) {
+ const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"];
+ const monthNames = [
+ "Jan",
+ "Feb",
+ "Mar",
+ "Apr",
+ "May",
+ "Jun",
+ "Jul",
+ "Aug",
+ "Sep",
+ "Oct",
+ "Nov",
+ "Dec",
+ ];
+ return (
+ dayNames[d.getUTCDay()] +
+ " " +
+ monthNames[d.getUTCMonth()] +
+ " " +
+ d.getUTCDate().toString().padStart(2) +
+ " " +
+ d.getUTCHours().toString().padStart(2, "0") +
+ ":" +
+ d.getUTCMinutes().toString().padStart(2, "0") +
+ ":" +
+ d.getUTCSeconds().toString().padStart(2, "0") +
+ " " +
+ d.getUTCFullYear()
+ );
+ };
+
+ // Initialize mbox file
+ await IOUtils.write(mboxFilename, new Uint8Array(), {
+ mode: "create",
+ });
+
+ // Iterate over all the message files in "cur".
+ let curPath = PathUtils.join(maildir, "cur");
+ let paths = await IOUtils.getChildren(curPath);
+ let files = await Promise.all(
+ paths.map(async path => {
+ let stat = await IOUtils.stat(path);
+ return {
+ path,
+ creationDate: stat.creationTime,
+ };
+ })
+ );
+ // We write out the mbox messages ordered by creation time.
+ // Not ideal, but best we can do without parsing message.
+ files.sort(function (a, b) {
+ return a.creationDate - b.creationDate;
+ });
+
+ for (let ent of files) {
+ let raw = await IOUtils.read(ent.path);
+ // Old converter had a bug where maildir messages included the
+ // leading "From " marker, so we need to cope with any
+ // cases of this left in the wild.
+ if (String.fromCharCode.apply(null, raw.slice(0, 5)) != "From ") {
+ // Write the separator line.
+ // Technically, timestamp should be the reception time of the
+ // message, but we don't really want to have to parse the
+ // message here and nothing is likely to rely on it.
+ let sepLine = "From - " + fmtUTC(new Date()) + "\n";
+ await IOUtils.writeUTF8(mboxFilename, sepLine, {
+ mode: "append",
+ });
+ }
+
+ await IOUtils.write(mboxFilename, raw, {
+ mode: "append",
+ });
+ // Maildir progress is one per message.
+ progressFn(1);
+ }
+}
+
+/**
+ * Split an mbox file up into a maildir.
+ *
+ * @param {string} mboxPath - Path of the mbox file to split.
+ * @param {string} maildirPath - Path of the maildir to create.
+ * @param {Function(number)} progressFn - Function to be invoked regularly with
+ * progress updates. One parameter is
+ * passed - the number of "cost units"
+ * since the previous update.
+ */
+async function mboxToMaildir(mboxPath, maildirPath, progressFn) {
+ // Create the maildir structure.
+ await IOUtils.makeDirectory(maildirPath);
+ let curDirPath = PathUtils.join(maildirPath, "cur");
+ let tmpDirPath = PathUtils.join(maildirPath, "tmp");
+ await IOUtils.makeDirectory(curDirPath);
+ await IOUtils.makeDirectory(tmpDirPath);
+
+ const CHUNK_SIZE = 1000000;
+ // SAFE_MARGIN is how much to keep back between chunks in order to
+ // cope with separator lines which might span chunks.
+ const SAFE_MARGIN = 100;
+
+ // A regexp to match mbox separator lines. Separator lines in the wild can
+ // have all sorts of forms, for example:
+ //
+ // "From "
+ // "From MAILER-DAEMON Fri Jul 8 12:08:34 2011"
+ // "From - Mon Jul 11 12:08:34 2011"
+ // "From bob@example.com Fri Jul 8 12:08:34 2011"
+ //
+ // So we accept any line beginning with "From " and ignore the rest of it.
+ //
+ // We also require a message header on the next line, in order
+ // to better cope with unescaped "From " lines in the message body.
+ // note: the first subexpression matches the separator line, so
+ // that it can be removed from the input.
+ let sepRE = /^(From (?:.*?)\r?\n)[\x21-\x7E]+:/gm;
+
+ // Use timestamp as starting name for output messages, incrementing
+ // by one for each.
+ let ident = Date.now();
+
+ /**
+ * Helper. Convert a string into a Uint8Array, using no encoding. The low
+ * byte of each 16 bit character will be used, the high byte discarded.
+ *
+ * @param {string} str - Input string with chars in 0-255 range.
+ * @returns {Uint8Array} The output bytes.
+ */
+ let stringToBytes = function (str) {
+ var bytes = new Uint8Array(str.length);
+ for (let i = 0; i < str.length; i++) {
+ bytes[i] = str.charCodeAt(i);
+ }
+ return bytes;
+ };
+
+ /**
+ * Helper. Convert a Uint8Array directly into a string, using each byte
+ * directly as a character code. So all characters in the resulting string
+ * will range from 0 to 255, even though they are 16 bit values.
+ *
+ * @param {Uint8Array} bytes - The bytes to convert.
+ * @returns {string} The byte values in string form.
+ */
+ let bytesToString = function (bytes) {
+ return bytes.reduce(function (str, b) {
+ return str + String.fromCharCode(b);
+ }, "");
+ };
+
+ let outPath;
+
+ /**
+ * Helper. Write out a block of bytes to the current message file, starting
+ * a new file if required.
+ *
+ * @param {string} str - The bytes to append (as chars in range 0-255).
+ */
+ let writeToMsg = async function (str) {
+ let mode = "append";
+ if (!outPath) {
+ outPath = PathUtils.join(curDirPath, ident.toString() + ".eml");
+ ident += 1;
+ mode = "create";
+ }
+ // We know that str is really raw 8-bit data, not UTF-16. So we can
+ // discard the upper byte and just keep the low byte of each char.
+ let raw = stringToBytes(str);
+ await IOUtils.write(outPath, raw, { mode });
+ // For mbox->maildir conversion, progress is measured in bytes.
+ progressFn(raw.byteLength);
+ };
+
+ let buf = "";
+ let eof = false;
+ let offset = 0;
+ while (!eof) {
+ let rawBytes = await IOUtils.read(mboxPath, {
+ offset,
+ maxBytes: CHUNK_SIZE,
+ });
+ // We're using JavaScript strings (which hold 16bit characters) to store
+ // 8 bit data. This sucks, but is faster than trying to operate directly
+ // upon Uint8Arrays. A lot of work goes into optimising JavaScript strings.
+ buf += bytesToString(rawBytes);
+ offset += rawBytes.byteLength;
+ eof = rawBytes.byteLength < CHUNK_SIZE;
+
+ let pos = 0;
+ sepRE.lastIndex = 0; // start at beginning of buf
+ let m = null;
+ while ((m = sepRE.exec(buf)) !== null) {
+ // Output everything up to the line separator.
+ if (m.index > pos) {
+ await writeToMsg(buf.substring(pos, m.index));
+ }
+ pos = m.index;
+ pos += m[1].length; // skip the "From " line
+ // Reset the current message file path if any.
+ if (outPath) {
+ outPath = null;
+ }
+ }
+
+ // Deal with whatever is left in the buffer.
+ let endPos = buf.length;
+ if (!eof) {
+ // Keep back enough to cope with separator lines crossing
+ // chunk boundaries.
+ endPos -= SAFE_MARGIN;
+ if (endPos < pos) {
+ endPos = pos;
+ }
+ }
+
+ if (endPos > pos) {
+ await writeToMsg(buf.substring(pos, endPos));
+ }
+ buf = buf.substring(endPos);
+ }
+}
+
+/**
+ * Check if directory is a subfolder directory.
+ *
+ * @param {string} name - Name of directory to check.
+ * @returns {boolean} - true if subfolder.
+ */
+function isSBD(name) {
+ return name.substr(-4) == ".sbd";
+}
+
+/**
+ * Check if file is a type which should be copied verbatim as part of a
+ * conversion.
+ * See also: nsMsgLocalStoreUtils::nsShouldIgnoreFile().
+ *
+ * @param {string} name - Name of file to check.
+ * @returns {boolean} - true if file should be copied verbatim.
+ */
+function isFileToCopy(name) {
+ let ext4 = name.substr(-4);
+ // Database and config files.
+ if (ext4 == ".msf" || ext4 == ".dat") {
+ return true;
+ }
+ // Summary files.
+ if (ext4 == ".snm" || ext4 == ".toc") {
+ return true;
+ }
+ // A few files we know might be lurking there.
+ const SPECIAL_FILES = [
+ "filterlog.html",
+ "junklog.html",
+ "feeds.json",
+ "feeds.json.tmp",
+ "feeds.json.backup",
+ "feeds.json.corrupt",
+ "feeditems.json",
+ "feeditems.json.tmp",
+ "feeditems.json.backup",
+ "feeditems.json.corrupt",
+ "mailfilt.log",
+ "filters.js",
+ ];
+ if (SPECIAL_FILES.includes(name)) {
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Check if file is an mbox.
+ * (actually we can't really tell if it's an mbox or not just from the name.
+ * we just assume it is, if it's not .msf or .dat).
+ *
+ * @param {string} name - Name of file to check.
+ * @returns {boolean} - true if file is an mbox
+ */
+function isMBoxName(name) {
+ // If it's not a "special" file, assume it's mbox.
+ return !isFileToCopy(name);
+}
+
+/**
+ * Check if directory is a maildir (by looking for a "cur" subdir).
+ *
+ * @param {string} dir - Path of directory to check.
+ * @returns {Promise<boolean>} - true if directory is a maildir.
+ */
+async function isMaildir(dir) {
+ try {
+ let cur = PathUtils.join(dir, "cur");
+ let fi = await IOUtils.stat(cur);
+ return fi.type === "directory";
+ } catch (ex) {
+ if (ex instanceof DOMException && ex.name === "NotFoundError") {
+ // "cur" does not exist - not a maildir.
+ return false;
+ }
+ throw ex; // Other error.
+ }
+}
+
+/**
+ * Count the number of messages in the "cur" dir of maildir.
+ *
+ * @param {string} maildir - Path of maildir.
+ * @returns {Promise<number>} - number of messages found.
+ */
+async function countMaildirMsgs(maildir) {
+ let cur = PathUtils.join(maildir, "cur");
+ let paths = await IOUtils.getChildren(cur);
+ return paths.length;
+}
+
+/**
+ * Recursively calculate the 'cost' of a hierarchy of maildir folders.
+ * This is the figure used for progress updates.
+ * For maildir, cost is 1 per message.
+ *
+ * @param {string} srcPath - Path of root dir containing maildirs.
+ * @returns {Promise<number>} - calculated conversion cost.
+ */
+async function calcMaildirCost(srcPath) {
+ let cost = 0;
+ for (let path of await IOUtils.getChildren(srcPath)) {
+ let stat = await IOUtils.stat(path);
+ if (stat.type === "directory") {
+ let name = PathUtils.filename(path);
+ if (isSBD(name)) {
+ // Recurse into subfolder.
+ cost += await calcMaildirCost(path);
+ } else if (await isMaildir(path)) {
+ // Looks like a maildir. Cost is number of messages.
+ cost += await countMaildirMsgs(path);
+ }
+ }
+ }
+ return cost;
+}
+
+/**
+ * Recursively calculate the 'cost' of a hierarchy of mbox folders.
+ * This is the figure used for progress updates.
+ * For mbox, cost is the total byte size of data. This avoids the need to
+ * parse the mbox files to count the number of messages.
+ * Note that this byte count cost is not 100% accurate because it includes
+ * the "From " lines which are not written into the maildir files. But it's
+ * definitely close enough to give good user feedback.
+ *
+ * @param {string} srcPath - Path of root dir containing maildirs.
+ * @returns {Promise<number>} - calculated conversion cost.
+ */
+async function calcMBoxCost(srcPath) {
+ let cost = 0;
+ for (const path of await IOUtils.getChildren(srcPath)) {
+ let stat = await IOUtils.stat(path);
+ let name = PathUtils.filename(path);
+ if (stat.type === "directory") {
+ if (isSBD(name)) {
+ // Recurse into .sbd subfolder.
+ cost += await calcMBoxCost(path);
+ }
+ } else if (isMBoxName(name)) {
+ cost += stat.size;
+ }
+ }
+ return cost;
+}
+
+/**
+ * Recursively convert a tree of mbox-based folders to maildirs.
+ *
+ * @param {string} srcPath - Root path containing mboxes.
+ * @param {string} destPath - Where to create destination root.
+ * @param {Function(number)} progressFn - Function to be invoked regularly with
+ * progress updates (called with number of
+ * cost "units" since last update)
+ */
+async function convertTreeMBoxToMaildir(srcPath, destPath, progressFn) {
+ await IOUtils.makeDirectory(destPath);
+
+ for (const path of await IOUtils.getChildren(srcPath)) {
+ let name = PathUtils.filename(path);
+ let dest = PathUtils.join(destPath, name);
+ let stat = await IOUtils.stat(path);
+ if (stat.type === "directory") {
+ if (isSBD(name)) {
+ // Recurse into .sbd subfolder.
+ await convertTreeMBoxToMaildir(path, dest, progressFn);
+ }
+ } else if (isFileToCopy(name)) {
+ await IOUtils.copy(path, dest);
+ } else if (isMBoxName(name)) {
+ // It's an mbox. Convert it.
+ await mboxToMaildir(path, dest, progressFn);
+ }
+ }
+}
+
+/**
+ * Recursively convert a tree of maildir-based folders to mbox.
+ *
+ * @param {string} srcPath - Root path containing maildirs.
+ * @param {string} destPath - Where to create destination root.
+ * @param {Function(number)} progressFn - Function to be invoked regularly with
+ * progress updates (called with number of
+ * cost "units" since last update)
+ */
+async function convertTreeMaildirToMBox(srcPath, destPath, progressFn) {
+ await IOUtils.makeDirectory(destPath);
+
+ for (let path of await IOUtils.getChildren(srcPath)) {
+ let name = PathUtils.filename(path);
+ let dest = PathUtils.join(destPath, name);
+ let stat = await IOUtils.stat(path);
+ if (stat.type === "directory") {
+ if (isSBD(name)) {
+ // Recurse into .sbd subfolder.
+ await convertTreeMaildirToMBox(path, dest, progressFn);
+ } else if (await isMaildir(path)) {
+ // It's a maildir - convert it.
+ await maildirToMBox(path, dest, progressFn);
+ }
+ } else if (isFileToCopy(name)) {
+ await IOUtils.copy(path, dest);
+ }
+ }
+}
+
+// propagate unhandled rejections to the error handler on the main thread
+self.addEventListener("unhandledrejection", function (error) {
+ throw error.reason;
+});
+
+self.addEventListener("message", function (e) {
+ // Unpack the request params from the main thread.
+ let srcType = e.data.srcType;
+ let destType = e.data.destType;
+ let srcRoot = e.data.srcRoot;
+ let destRoot = e.data.destRoot;
+ // destRoot will be a temporary dir, so if it all goes pear-shaped
+ // we can just bail out without cleaning up.
+
+ // Configure the conversion.
+ let costFn = null;
+ let convertFn = null;
+ if (srcType == "maildir" && destType == "mbox") {
+ costFn = calcMaildirCost;
+ convertFn = convertTreeMaildirToMBox;
+ } else if (srcType == "mbox" && destType == "maildir") {
+ costFn = calcMBoxCost;
+ convertFn = convertTreeMBoxToMaildir;
+ } else {
+ throw new Error(`Unsupported conversion: ${srcType} => ${destType}`);
+ }
+
+ // Go!
+ costFn(srcRoot).then(totalCost => {
+ let v = 0;
+ let progressFn = function (n) {
+ v += n;
+ self.postMessage({ msg: "progress", val: v, total: totalCost });
+ };
+ convertFn(srcRoot, destRoot, progressFn).then(() => {
+ // We fake a final progress update, with exactly 100% completed.
+ // Our byte-counting on mbox->maildir conversion will fall slightly short:
+ // The total is estimated from the mbox filesize, but progress is tracked
+ // by counting bytes as they are written out - and the mbox "From " lines
+ // are _not_ written out to the maildir files.
+ // This is still accurate enough to provide progress to the user, but we
+ // don't want the GUI left showing "progress 97% - conversion complete!"
+ // or anything silly like that.
+ self.postMessage({ msg: "progress", val: totalCost, total: totalCost });
+
+ // Let the main thread know we succeeded.
+ self.postMessage({ msg: "success" });
+ });
+ });
+});