diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 17:32:43 +0000 |
commit | 6bf0a5cb5034a7e684dcc3500e841785237ce2dd (patch) | |
tree | a68f146d7fa01f0134297619fbe7e33db084e0aa /comm/mailnews/base/src/converterWorker.js | |
parent | Initial commit. (diff) | |
download | thunderbird-upstream.tar.xz thunderbird-upstream.zip |
Adding upstream version 1:115.7.0.upstream/1%115.7.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'comm/mailnews/base/src/converterWorker.js')
-rw-r--r-- | comm/mailnews/base/src/converterWorker.js | 533 |
1 files changed, 533 insertions, 0 deletions
diff --git a/comm/mailnews/base/src/converterWorker.js b/comm/mailnews/base/src/converterWorker.js new file mode 100644 index 0000000000..188476c1e1 --- /dev/null +++ b/comm/mailnews/base/src/converterWorker.js @@ -0,0 +1,533 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* eslint-env mozilla/chrome-worker, node */ + +/** + * This worker will perform mbox<->maildir conversions on a tree of + * directories. It operates purely at the filesystem level. + * + * The initial message data should pass in these params to control + * the conversion: + * + * srcType - source mailstore type ('mbox' or 'maildir') + * destType - destination mailstore type ('maildir' or 'mbox') + * srcRoot - root path of source (eg ".../ImapMail/imap.example.com") + * destRoot - root path of destination (eg "/tmp/imap.example.com-maildir") + * + * The conversion is non-destructive - srcRoot will be left untouched. + * + * The worker will post progress messages back to the main thread of + * the form: + * + * {"msg": "progress", "val": val, "total": total} + * + * Where `val` is the current progress, out of `total`. + * The units used for val and total are undefined. + * + * When the conversion is complete, before exiting, the worker sends a + * message of the form: + * + * {"msg": "success"} + * + * Errors are posted back to the main thread via the standard + * "error" event. + * + */ + +/** + * Merge all the messages in a maildir into a single mbox file. + * + * @param {string} maildir - Path to the source maildir. + * @param {string} mboxFilename - Path of the mbox file to create. + * @param {Function(number)} progressFn - Function to be invoked regularly with + * progress updates. Param is number of + * "units" processed since last update. + */ +async function maildirToMBox(maildir, mboxFilename, progressFn) { + // Helper to format dates + // eg "Thu Jan 18 12:34:56 2018" + let fmtUTC = function (d) { + const dayNames = ["Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"]; + const monthNames = [ + "Jan", + "Feb", + "Mar", + "Apr", + "May", + "Jun", + "Jul", + "Aug", + "Sep", + "Oct", + "Nov", + "Dec", + ]; + return ( + dayNames[d.getUTCDay()] + + " " + + monthNames[d.getUTCMonth()] + + " " + + d.getUTCDate().toString().padStart(2) + + " " + + d.getUTCHours().toString().padStart(2, "0") + + ":" + + d.getUTCMinutes().toString().padStart(2, "0") + + ":" + + d.getUTCSeconds().toString().padStart(2, "0") + + " " + + d.getUTCFullYear() + ); + }; + + // Initialize mbox file + await IOUtils.write(mboxFilename, new Uint8Array(), { + mode: "create", + }); + + // Iterate over all the message files in "cur". + let curPath = PathUtils.join(maildir, "cur"); + let paths = await IOUtils.getChildren(curPath); + let files = await Promise.all( + paths.map(async path => { + let stat = await IOUtils.stat(path); + return { + path, + creationDate: stat.creationTime, + }; + }) + ); + // We write out the mbox messages ordered by creation time. + // Not ideal, but best we can do without parsing message. + files.sort(function (a, b) { + return a.creationDate - b.creationDate; + }); + + for (let ent of files) { + let raw = await IOUtils.read(ent.path); + // Old converter had a bug where maildir messages included the + // leading "From " marker, so we need to cope with any + // cases of this left in the wild. + if (String.fromCharCode.apply(null, raw.slice(0, 5)) != "From ") { + // Write the separator line. + // Technically, timestamp should be the reception time of the + // message, but we don't really want to have to parse the + // message here and nothing is likely to rely on it. + let sepLine = "From - " + fmtUTC(new Date()) + "\n"; + await IOUtils.writeUTF8(mboxFilename, sepLine, { + mode: "append", + }); + } + + await IOUtils.write(mboxFilename, raw, { + mode: "append", + }); + // Maildir progress is one per message. + progressFn(1); + } +} + +/** + * Split an mbox file up into a maildir. + * + * @param {string} mboxPath - Path of the mbox file to split. + * @param {string} maildirPath - Path of the maildir to create. + * @param {Function(number)} progressFn - Function to be invoked regularly with + * progress updates. One parameter is + * passed - the number of "cost units" + * since the previous update. + */ +async function mboxToMaildir(mboxPath, maildirPath, progressFn) { + // Create the maildir structure. + await IOUtils.makeDirectory(maildirPath); + let curDirPath = PathUtils.join(maildirPath, "cur"); + let tmpDirPath = PathUtils.join(maildirPath, "tmp"); + await IOUtils.makeDirectory(curDirPath); + await IOUtils.makeDirectory(tmpDirPath); + + const CHUNK_SIZE = 1000000; + // SAFE_MARGIN is how much to keep back between chunks in order to + // cope with separator lines which might span chunks. + const SAFE_MARGIN = 100; + + // A regexp to match mbox separator lines. Separator lines in the wild can + // have all sorts of forms, for example: + // + // "From " + // "From MAILER-DAEMON Fri Jul 8 12:08:34 2011" + // "From - Mon Jul 11 12:08:34 2011" + // "From bob@example.com Fri Jul 8 12:08:34 2011" + // + // So we accept any line beginning with "From " and ignore the rest of it. + // + // We also require a message header on the next line, in order + // to better cope with unescaped "From " lines in the message body. + // note: the first subexpression matches the separator line, so + // that it can be removed from the input. + let sepRE = /^(From (?:.*?)\r?\n)[\x21-\x7E]+:/gm; + + // Use timestamp as starting name for output messages, incrementing + // by one for each. + let ident = Date.now(); + + /** + * Helper. Convert a string into a Uint8Array, using no encoding. The low + * byte of each 16 bit character will be used, the high byte discarded. + * + * @param {string} str - Input string with chars in 0-255 range. + * @returns {Uint8Array} The output bytes. + */ + let stringToBytes = function (str) { + var bytes = new Uint8Array(str.length); + for (let i = 0; i < str.length; i++) { + bytes[i] = str.charCodeAt(i); + } + return bytes; + }; + + /** + * Helper. Convert a Uint8Array directly into a string, using each byte + * directly as a character code. So all characters in the resulting string + * will range from 0 to 255, even though they are 16 bit values. + * + * @param {Uint8Array} bytes - The bytes to convert. + * @returns {string} The byte values in string form. + */ + let bytesToString = function (bytes) { + return bytes.reduce(function (str, b) { + return str + String.fromCharCode(b); + }, ""); + }; + + let outPath; + + /** + * Helper. Write out a block of bytes to the current message file, starting + * a new file if required. + * + * @param {string} str - The bytes to append (as chars in range 0-255). + */ + let writeToMsg = async function (str) { + let mode = "append"; + if (!outPath) { + outPath = PathUtils.join(curDirPath, ident.toString() + ".eml"); + ident += 1; + mode = "create"; + } + // We know that str is really raw 8-bit data, not UTF-16. So we can + // discard the upper byte and just keep the low byte of each char. + let raw = stringToBytes(str); + await IOUtils.write(outPath, raw, { mode }); + // For mbox->maildir conversion, progress is measured in bytes. + progressFn(raw.byteLength); + }; + + let buf = ""; + let eof = false; + let offset = 0; + while (!eof) { + let rawBytes = await IOUtils.read(mboxPath, { + offset, + maxBytes: CHUNK_SIZE, + }); + // We're using JavaScript strings (which hold 16bit characters) to store + // 8 bit data. This sucks, but is faster than trying to operate directly + // upon Uint8Arrays. A lot of work goes into optimising JavaScript strings. + buf += bytesToString(rawBytes); + offset += rawBytes.byteLength; + eof = rawBytes.byteLength < CHUNK_SIZE; + + let pos = 0; + sepRE.lastIndex = 0; // start at beginning of buf + let m = null; + while ((m = sepRE.exec(buf)) !== null) { + // Output everything up to the line separator. + if (m.index > pos) { + await writeToMsg(buf.substring(pos, m.index)); + } + pos = m.index; + pos += m[1].length; // skip the "From " line + // Reset the current message file path if any. + if (outPath) { + outPath = null; + } + } + + // Deal with whatever is left in the buffer. + let endPos = buf.length; + if (!eof) { + // Keep back enough to cope with separator lines crossing + // chunk boundaries. + endPos -= SAFE_MARGIN; + if (endPos < pos) { + endPos = pos; + } + } + + if (endPos > pos) { + await writeToMsg(buf.substring(pos, endPos)); + } + buf = buf.substring(endPos); + } +} + +/** + * Check if directory is a subfolder directory. + * + * @param {string} name - Name of directory to check. + * @returns {boolean} - true if subfolder. + */ +function isSBD(name) { + return name.substr(-4) == ".sbd"; +} + +/** + * Check if file is a type which should be copied verbatim as part of a + * conversion. + * See also: nsMsgLocalStoreUtils::nsShouldIgnoreFile(). + * + * @param {string} name - Name of file to check. + * @returns {boolean} - true if file should be copied verbatim. + */ +function isFileToCopy(name) { + let ext4 = name.substr(-4); + // Database and config files. + if (ext4 == ".msf" || ext4 == ".dat") { + return true; + } + // Summary files. + if (ext4 == ".snm" || ext4 == ".toc") { + return true; + } + // A few files we know might be lurking there. + const SPECIAL_FILES = [ + "filterlog.html", + "junklog.html", + "feeds.json", + "feeds.json.tmp", + "feeds.json.backup", + "feeds.json.corrupt", + "feeditems.json", + "feeditems.json.tmp", + "feeditems.json.backup", + "feeditems.json.corrupt", + "mailfilt.log", + "filters.js", + ]; + if (SPECIAL_FILES.includes(name)) { + return true; + } + return false; +} + +/** + * Check if file is an mbox. + * (actually we can't really tell if it's an mbox or not just from the name. + * we just assume it is, if it's not .msf or .dat). + * + * @param {string} name - Name of file to check. + * @returns {boolean} - true if file is an mbox + */ +function isMBoxName(name) { + // If it's not a "special" file, assume it's mbox. + return !isFileToCopy(name); +} + +/** + * Check if directory is a maildir (by looking for a "cur" subdir). + * + * @param {string} dir - Path of directory to check. + * @returns {Promise<boolean>} - true if directory is a maildir. + */ +async function isMaildir(dir) { + try { + let cur = PathUtils.join(dir, "cur"); + let fi = await IOUtils.stat(cur); + return fi.type === "directory"; + } catch (ex) { + if (ex instanceof DOMException && ex.name === "NotFoundError") { + // "cur" does not exist - not a maildir. + return false; + } + throw ex; // Other error. + } +} + +/** + * Count the number of messages in the "cur" dir of maildir. + * + * @param {string} maildir - Path of maildir. + * @returns {Promise<number>} - number of messages found. + */ +async function countMaildirMsgs(maildir) { + let cur = PathUtils.join(maildir, "cur"); + let paths = await IOUtils.getChildren(cur); + return paths.length; +} + +/** + * Recursively calculate the 'cost' of a hierarchy of maildir folders. + * This is the figure used for progress updates. + * For maildir, cost is 1 per message. + * + * @param {string} srcPath - Path of root dir containing maildirs. + * @returns {Promise<number>} - calculated conversion cost. + */ +async function calcMaildirCost(srcPath) { + let cost = 0; + for (let path of await IOUtils.getChildren(srcPath)) { + let stat = await IOUtils.stat(path); + if (stat.type === "directory") { + let name = PathUtils.filename(path); + if (isSBD(name)) { + // Recurse into subfolder. + cost += await calcMaildirCost(path); + } else if (await isMaildir(path)) { + // Looks like a maildir. Cost is number of messages. + cost += await countMaildirMsgs(path); + } + } + } + return cost; +} + +/** + * Recursively calculate the 'cost' of a hierarchy of mbox folders. + * This is the figure used for progress updates. + * For mbox, cost is the total byte size of data. This avoids the need to + * parse the mbox files to count the number of messages. + * Note that this byte count cost is not 100% accurate because it includes + * the "From " lines which are not written into the maildir files. But it's + * definitely close enough to give good user feedback. + * + * @param {string} srcPath - Path of root dir containing maildirs. + * @returns {Promise<number>} - calculated conversion cost. + */ +async function calcMBoxCost(srcPath) { + let cost = 0; + for (const path of await IOUtils.getChildren(srcPath)) { + let stat = await IOUtils.stat(path); + let name = PathUtils.filename(path); + if (stat.type === "directory") { + if (isSBD(name)) { + // Recurse into .sbd subfolder. + cost += await calcMBoxCost(path); + } + } else if (isMBoxName(name)) { + cost += stat.size; + } + } + return cost; +} + +/** + * Recursively convert a tree of mbox-based folders to maildirs. + * + * @param {string} srcPath - Root path containing mboxes. + * @param {string} destPath - Where to create destination root. + * @param {Function(number)} progressFn - Function to be invoked regularly with + * progress updates (called with number of + * cost "units" since last update) + */ +async function convertTreeMBoxToMaildir(srcPath, destPath, progressFn) { + await IOUtils.makeDirectory(destPath); + + for (const path of await IOUtils.getChildren(srcPath)) { + let name = PathUtils.filename(path); + let dest = PathUtils.join(destPath, name); + let stat = await IOUtils.stat(path); + if (stat.type === "directory") { + if (isSBD(name)) { + // Recurse into .sbd subfolder. + await convertTreeMBoxToMaildir(path, dest, progressFn); + } + } else if (isFileToCopy(name)) { + await IOUtils.copy(path, dest); + } else if (isMBoxName(name)) { + // It's an mbox. Convert it. + await mboxToMaildir(path, dest, progressFn); + } + } +} + +/** + * Recursively convert a tree of maildir-based folders to mbox. + * + * @param {string} srcPath - Root path containing maildirs. + * @param {string} destPath - Where to create destination root. + * @param {Function(number)} progressFn - Function to be invoked regularly with + * progress updates (called with number of + * cost "units" since last update) + */ +async function convertTreeMaildirToMBox(srcPath, destPath, progressFn) { + await IOUtils.makeDirectory(destPath); + + for (let path of await IOUtils.getChildren(srcPath)) { + let name = PathUtils.filename(path); + let dest = PathUtils.join(destPath, name); + let stat = await IOUtils.stat(path); + if (stat.type === "directory") { + if (isSBD(name)) { + // Recurse into .sbd subfolder. + await convertTreeMaildirToMBox(path, dest, progressFn); + } else if (await isMaildir(path)) { + // It's a maildir - convert it. + await maildirToMBox(path, dest, progressFn); + } + } else if (isFileToCopy(name)) { + await IOUtils.copy(path, dest); + } + } +} + +// propagate unhandled rejections to the error handler on the main thread +self.addEventListener("unhandledrejection", function (error) { + throw error.reason; +}); + +self.addEventListener("message", function (e) { + // Unpack the request params from the main thread. + let srcType = e.data.srcType; + let destType = e.data.destType; + let srcRoot = e.data.srcRoot; + let destRoot = e.data.destRoot; + // destRoot will be a temporary dir, so if it all goes pear-shaped + // we can just bail out without cleaning up. + + // Configure the conversion. + let costFn = null; + let convertFn = null; + if (srcType == "maildir" && destType == "mbox") { + costFn = calcMaildirCost; + convertFn = convertTreeMaildirToMBox; + } else if (srcType == "mbox" && destType == "maildir") { + costFn = calcMBoxCost; + convertFn = convertTreeMBoxToMaildir; + } else { + throw new Error(`Unsupported conversion: ${srcType} => ${destType}`); + } + + // Go! + costFn(srcRoot).then(totalCost => { + let v = 0; + let progressFn = function (n) { + v += n; + self.postMessage({ msg: "progress", val: v, total: totalCost }); + }; + convertFn(srcRoot, destRoot, progressFn).then(() => { + // We fake a final progress update, with exactly 100% completed. + // Our byte-counting on mbox->maildir conversion will fall slightly short: + // The total is estimated from the mbox filesize, but progress is tracked + // by counting bytes as they are written out - and the mbox "From " lines + // are _not_ written out to the maildir files. + // This is still accurate enough to provide progress to the user, but we + // don't want the GUI left showing "progress 97% - conversion complete!" + // or anything silly like that. + self.postMessage({ msg: "progress", val: totalCost, total: totalCost }); + + // Let the main thread know we succeeded. + self.postMessage({ msg: "success" }); + }); + }); +}); |