/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* eslint-env mozilla/chrome-worker, node */ /** * This worker will perform mbox<->maildir conversions on a tree of * directories. It operates purely at the filesystem level. * * The initial message data should pass in these params to control * the conversion: * * srcType - source mailstore type ('mbox' or 'maildir') * destType - destination mailstore type ('maildir' or 'mbox') * srcRoot - root path of source (eg ".../ImapMail/imap.example.com") * destRoot - root path of destination (eg "/tmp/imap.example.com-maildir") * * The conversion is non-destructive - srcRoot will be left untouched. * * The worker will post progress messages back to the main thread of * the form: * * {"msg": "progress", "val": val, "total": total} * * Where `val` is the current progress, out of `total`. * The units used for val and total are undefined. * * When the conversion is complete, before exiting, the worker sends a * message of the form: * * {"msg": "success"} * * Errors are posted back to the main thread via the standard * "error" event. * */ /** * Merge all the messages in a maildir into a single mbox file. * * @param {string} maildir - Path to the source maildir. * @param {string} mboxFilename - Path of the mbox file to create. * @param {Function(number)} progressFn - Function to be invoked regularly with * progress updates. Param is number of * "units" processed since last update. */ async function maildirToMBox(maildir, mboxFilename, progressFn) { // Helper to format dates // eg "Thu Jan 18 12:34:56 2018" let fmtUTC = function (d) { const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; const monthNames = [ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", ]; return ( dayNames[d.getUTCDay()] + " " + monthNames[d.getUTCMonth()] + " " + d.getUTCDate().toString().padStart(2) + " " + d.getUTCHours().toString().padStart(2, "0") + ":" + d.getUTCMinutes().toString().padStart(2, "0") + ":" + d.getUTCSeconds().toString().padStart(2, "0") + " " + d.getUTCFullYear() ); }; // Initialize mbox file await IOUtils.write(mboxFilename, new Uint8Array(), { mode: "create", }); // Iterate over all the message files in "cur". let curPath = PathUtils.join(maildir, "cur"); let paths = await IOUtils.getChildren(curPath); let files = await Promise.all( paths.map(async path => { let stat = await IOUtils.stat(path); return { path, creationDate: stat.creationTime, }; }) ); // We write out the mbox messages ordered by creation time. // Not ideal, but best we can do without parsing message. files.sort(function (a, b) { return a.creationDate - b.creationDate; }); for (let ent of files) { let raw = await IOUtils.read(ent.path); // Old converter had a bug where maildir messages included the // leading "From " marker, so we need to cope with any // cases of this left in the wild. if (String.fromCharCode.apply(null, raw.slice(0, 5)) != "From ") { // Write the separator line. // Technically, timestamp should be the reception time of the // message, but we don't really want to have to parse the // message here and nothing is likely to rely on it. let sepLine = "From - " + fmtUTC(new Date()) + "\n"; await IOUtils.writeUTF8(mboxFilename, sepLine, { mode: "append", }); } await IOUtils.write(mboxFilename, raw, { mode: "append", }); // Maildir progress is one per message. progressFn(1); } } /** * Split an mbox file up into a maildir. * * @param {string} mboxPath - Path of the mbox file to split. * @param {string} maildirPath - Path of the maildir to create. * @param {Function(number)} progressFn - Function to be invoked regularly with * progress updates. One parameter is * passed - the number of "cost units" * since the previous update. */ async function mboxToMaildir(mboxPath, maildirPath, progressFn) { // Create the maildir structure. await IOUtils.makeDirectory(maildirPath); let curDirPath = PathUtils.join(maildirPath, "cur"); let tmpDirPath = PathUtils.join(maildirPath, "tmp"); await IOUtils.makeDirectory(curDirPath); await IOUtils.makeDirectory(tmpDirPath); const CHUNK_SIZE = 1000000; // SAFE_MARGIN is how much to keep back between chunks in order to // cope with separator lines which might span chunks. const SAFE_MARGIN = 100; // A regexp to match mbox separator lines. Separator lines in the wild can // have all sorts of forms, for example: // // "From " // "From MAILER-DAEMON Fri Jul 8 12:08:34 2011" // "From - Mon Jul 11 12:08:34 2011" // "From bob@example.com Fri Jul 8 12:08:34 2011" // // So we accept any line beginning with "From " and ignore the rest of it. // // We also require a message header on the next line, in order // to better cope with unescaped "From " lines in the message body. // note: the first subexpression matches the separator line, so // that it can be removed from the input. let sepRE = /^(From (?:.*?)\r?\n)[\x21-\x7E]+:/gm; // Use timestamp as starting name for output messages, incrementing // by one for each. let ident = Date.now(); /** * Helper. Convert a string into a Uint8Array, using no encoding. The low * byte of each 16 bit character will be used, the high byte discarded. * * @param {string} str - Input string with chars in 0-255 range. * @returns {Uint8Array} The output bytes. */ let stringToBytes = function (str) { var bytes = new Uint8Array(str.length); for (let i = 0; i < str.length; i++) { bytes[i] = str.charCodeAt(i); } return bytes; }; /** * Helper. Convert a Uint8Array directly into a string, using each byte * directly as a character code. So all characters in the resulting string * will range from 0 to 255, even though they are 16 bit values. * * @param {Uint8Array} bytes - The bytes to convert. * @returns {string} The byte values in string form. */ let bytesToString = function (bytes) { return bytes.reduce(function (str, b) { return str + String.fromCharCode(b); }, ""); }; let outPath; /** * Helper. Write out a block of bytes to the current message file, starting * a new file if required. * * @param {string} str - The bytes to append (as chars in range 0-255). */ let writeToMsg = async function (str) { let mode = "append"; if (!outPath) { outPath = PathUtils.join(curDirPath, ident.toString() + ".eml"); ident += 1; mode = "create"; } // We know that str is really raw 8-bit data, not UTF-16. So we can // discard the upper byte and just keep the low byte of each char. let raw = stringToBytes(str); await IOUtils.write(outPath, raw, { mode }); // For mbox->maildir conversion, progress is measured in bytes. progressFn(raw.byteLength); }; let buf = ""; let eof = false; let offset = 0; while (!eof) { let rawBytes = await IOUtils.read(mboxPath, { offset, maxBytes: CHUNK_SIZE, }); // We're using JavaScript strings (which hold 16bit characters) to store // 8 bit data. This sucks, but is faster than trying to operate directly // upon Uint8Arrays. A lot of work goes into optimising JavaScript strings. buf += bytesToString(rawBytes); offset += rawBytes.byteLength; eof = rawBytes.byteLength < CHUNK_SIZE; let pos = 0; sepRE.lastIndex = 0; // start at beginning of buf let m = null; while ((m = sepRE.exec(buf)) !== null) { // Output everything up to the line separator. if (m.index > pos) { await writeToMsg(buf.substring(pos, m.index)); } pos = m.index; pos += m[1].length; // skip the "From " line // Reset the current message file path if any. if (outPath) { outPath = null; } } // Deal with whatever is left in the buffer. let endPos = buf.length; if (!eof) { // Keep back enough to cope with separator lines crossing // chunk boundaries. endPos -= SAFE_MARGIN; if (endPos < pos) { endPos = pos; } } if (endPos > pos) { await writeToMsg(buf.substring(pos, endPos)); } buf = buf.substring(endPos); } } /** * Check if directory is a subfolder directory. * * @param {string} name - Name of directory to check. * @returns {boolean} - true if subfolder. */ function isSBD(name) { return name.substr(-4) == ".sbd"; } /** * Check if file is a type which should be copied verbatim as part of a * conversion. * See also: nsMsgLocalStoreUtils::nsShouldIgnoreFile(). * * @param {string} name - Name of file to check. * @returns {boolean} - true if file should be copied verbatim. */ function isFileToCopy(name) { let ext4 = name.substr(-4); // Database and config files. if (ext4 == ".msf" || ext4 == ".dat") { return true; } // Summary files. if (ext4 == ".snm" || ext4 == ".toc") { return true; } // A few files we know might be lurking there. const SPECIAL_FILES = [ "filterlog.html", "junklog.html", "feeds.json", "feeds.json.tmp", "feeds.json.backup", "feeds.json.corrupt", "feeditems.json", "feeditems.json.tmp", "feeditems.json.backup", "feeditems.json.corrupt", "mailfilt.log", "filters.js", ]; if (SPECIAL_FILES.includes(name)) { return true; } return false; } /** * Check if file is an mbox. * (actually we can't really tell if it's an mbox or not just from the name. * we just assume it is, if it's not .msf or .dat). * * @param {string} name - Name of file to check. * @returns {boolean} - true if file is an mbox */ function isMBoxName(name) { // If it's not a "special" file, assume it's mbox. return !isFileToCopy(name); } /** * Check if directory is a maildir (by looking for a "cur" subdir). * * @param {string} dir - Path of directory to check. * @returns {Promise} - true if directory is a maildir. */ async function isMaildir(dir) { try { let cur = PathUtils.join(dir, "cur"); let fi = await IOUtils.stat(cur); return fi.type === "directory"; } catch (ex) { if (ex instanceof DOMException && ex.name === "NotFoundError") { // "cur" does not exist - not a maildir. return false; } throw ex; // Other error. } } /** * Count the number of messages in the "cur" dir of maildir. * * @param {string} maildir - Path of maildir. * @returns {Promise} - number of messages found. */ async function countMaildirMsgs(maildir) { let cur = PathUtils.join(maildir, "cur"); let paths = await IOUtils.getChildren(cur); return paths.length; } /** * Recursively calculate the 'cost' of a hierarchy of maildir folders. * This is the figure used for progress updates. * For maildir, cost is 1 per message. * * @param {string} srcPath - Path of root dir containing maildirs. * @returns {Promise} - calculated conversion cost. */ async function calcMaildirCost(srcPath) { let cost = 0; for (let path of await IOUtils.getChildren(srcPath)) { let stat = await IOUtils.stat(path); if (stat.type === "directory") { let name = PathUtils.filename(path); if (isSBD(name)) { // Recurse into subfolder. cost += await calcMaildirCost(path); } else if (await isMaildir(path)) { // Looks like a maildir. Cost is number of messages. cost += await countMaildirMsgs(path); } } } return cost; } /** * Recursively calculate the 'cost' of a hierarchy of mbox folders. * This is the figure used for progress updates. * For mbox, cost is the total byte size of data. This avoids the need to * parse the mbox files to count the number of messages. * Note that this byte count cost is not 100% accurate because it includes * the "From " lines which are not written into the maildir files. But it's * definitely close enough to give good user feedback. * * @param {string} srcPath - Path of root dir containing maildirs. * @returns {Promise} - calculated conversion cost. */ async function calcMBoxCost(srcPath) { let cost = 0; for (const path of await IOUtils.getChildren(srcPath)) { let stat = await IOUtils.stat(path); let name = PathUtils.filename(path); if (stat.type === "directory") { if (isSBD(name)) { // Recurse into .sbd subfolder. cost += await calcMBoxCost(path); } } else if (isMBoxName(name)) { cost += stat.size; } } return cost; } /** * Recursively convert a tree of mbox-based folders to maildirs. * * @param {string} srcPath - Root path containing mboxes. * @param {string} destPath - Where to create destination root. * @param {Function(number)} progressFn - Function to be invoked regularly with * progress updates (called with number of * cost "units" since last update) */ async function convertTreeMBoxToMaildir(srcPath, destPath, progressFn) { await IOUtils.makeDirectory(destPath); for (const path of await IOUtils.getChildren(srcPath)) { let name = PathUtils.filename(path); let dest = PathUtils.join(destPath, name); let stat = await IOUtils.stat(path); if (stat.type === "directory") { if (isSBD(name)) { // Recurse into .sbd subfolder. await convertTreeMBoxToMaildir(path, dest, progressFn); } } else if (isFileToCopy(name)) { await IOUtils.copy(path, dest); } else if (isMBoxName(name)) { // It's an mbox. Convert it. await mboxToMaildir(path, dest, progressFn); } } } /** * Recursively convert a tree of maildir-based folders to mbox. * * @param {string} srcPath - Root path containing maildirs. * @param {string} destPath - Where to create destination root. * @param {Function(number)} progressFn - Function to be invoked regularly with * progress updates (called with number of * cost "units" since last update) */ async function convertTreeMaildirToMBox(srcPath, destPath, progressFn) { await IOUtils.makeDirectory(destPath); for (let path of await IOUtils.getChildren(srcPath)) { let name = PathUtils.filename(path); let dest = PathUtils.join(destPath, name); let stat = await IOUtils.stat(path); if (stat.type === "directory") { if (isSBD(name)) { // Recurse into .sbd subfolder. await convertTreeMaildirToMBox(path, dest, progressFn); } else if (await isMaildir(path)) { // It's a maildir - convert it. await maildirToMBox(path, dest, progressFn); } } else if (isFileToCopy(name)) { await IOUtils.copy(path, dest); } } } // propagate unhandled rejections to the error handler on the main thread self.addEventListener("unhandledrejection", function (error) { throw error.reason; }); self.addEventListener("message", function (e) { // Unpack the request params from the main thread. let srcType = e.data.srcType; let destType = e.data.destType; let srcRoot = e.data.srcRoot; let destRoot = e.data.destRoot; // destRoot will be a temporary dir, so if it all goes pear-shaped // we can just bail out without cleaning up. // Configure the conversion. let costFn = null; let convertFn = null; if (srcType == "maildir" && destType == "mbox") { costFn = calcMaildirCost; convertFn = convertTreeMaildirToMBox; } else if (srcType == "mbox" && destType == "maildir") { costFn = calcMBoxCost; convertFn = convertTreeMBoxToMaildir; } else { throw new Error(`Unsupported conversion: ${srcType} => ${destType}`); } // Go! costFn(srcRoot).then(totalCost => { let v = 0; let progressFn = function (n) { v += n; self.postMessage({ msg: "progress", val: v, total: totalCost }); }; convertFn(srcRoot, destRoot, progressFn).then(() => { // We fake a final progress update, with exactly 100% completed. // Our byte-counting on mbox->maildir conversion will fall slightly short: // The total is estimated from the mbox filesize, but progress is tracked // by counting bytes as they are written out - and the mbox "From " lines // are _not_ written out to the maildir files. // This is still accurate enough to provide progress to the user, but we // don't want the GUI left showing "progress 97% - conversion complete!" // or anything silly like that. self.postMessage({ msg: "progress", val: totalCost, total: totalCost }); // Let the main thread know we succeeded. self.postMessage({ msg: "success" }); }); }); });