#!/usr/bin/env node /* * Extracts translatable strings from HTML files in the following forms: * * String * String * String * * * Supports the following angular-gettext compatible forms: * * String * Singular * * Note that some of the use of the translated may not support all the strings * depending on the code actually using these strings to translate the HTML. */ import fs from 'fs'; import path from 'path'; import htmlparser from 'htmlparser'; import { ArgumentParser } from 'argparse'; function fatal(message, code) { console.log((filename || "html2po") + ": " + message); process.exit(code || 1); } const parser = new ArgumentParser(); parser.add_argument('-d', '--directory', { help: "Base directory for input files" }); parser.add_argument('-o', '--output', { help: 'Output file', required: true }); parser.add_argument('files', { nargs: '+', help: "One or more input files", metavar: "FILE" }); const args = parser.parse_args(); const input = args.files; const entries = { }; /* Filename being parsed and offset of line number */ let filename = null; let offsets = 0; /* The HTML parser we're using */ const handler = new htmlparser.DefaultHandler(function(error, dom) { if (error) fatal(error); else walk(dom); }); /* Now process each file in turn */ step(); function step() { filename = input.shift(); if (filename === undefined) { finish(); return; } /* Qualify the filename if necessary */ let full = filename; if (args.directory) full = path.join(args.directory, filename); fs.readFile(full, { encoding: "utf-8" }, function(err, data) { if (err) fatal(err.message); const parser = new htmlparser.Parser(handler, { includeLocation: true }); parser.parseComplete(data); step(); }); } /* Process an array of nodes */ function walk(children) { if (!children) return; children.forEach(function(child) { const line = (child.location || { }).line || 0; const offset = line - 1; /* Scripts get their text processed as HTML */ if (child.type == 'script' && child.children) { const parser = new htmlparser.Parser(handler, { includeLocation: true }); /* Make note of how far into the outer HTML file we are */ offsets += offset; child.children.forEach(function(node) { parser.parseChunk(node.raw); }); parser.done(); offsets -= offset; /* Tags get extracted as usual */ } else if (child.type == 'tag') { tag(child); } }); } /* Process a single loaded tag */ function tag(node) { let tasks, line, entry; const attrs = node.attribs || { }; let nest = true; /* Extract translate strings */ if ("translate" in attrs || "translatable" in attrs) { tasks = (attrs.translate || attrs.translatable || "yes").split(" "); /* Calculate the line location taking into account nested parsing */ line = (node.location || { }).line || 0; line += offsets; entry = { msgctxt: attrs['translate-context'] || attrs.context, msgid_plural: attrs['translate-plural'], locations: [filename + ":" + line] }; /* For each thing listed */ tasks.forEach(function(task) { const copy = Object.assign({}, entry); /* The element text itself */ if (task == "yes" || task == "translate") { copy.msgid = extract(node.children); nest = false; /* An attribute */ } else if (task) { copy.msgid = attrs[task]; } if (copy.msgid) push(copy); }); } /* Walk through all the children */ if (nest) walk(node.children); } /* Push an entry onto the list */ function push(entry) { const key = entry.msgid + "\0" + entry.msgid_plural + "\0" + entry.msgctxt; const prev = entries[key]; if (prev) { prev.locations = prev.locations.concat(entry.locations); } else { entries[key] = entry; } } /* Extract the given text */ function extract(children) { if (!children) return null; const str = []; children.forEach(function(node) { if (node.type == 'tag' && node.children) str.push(extract(node.children)); else if (node.type == 'text' && node.data) str.push(node.data); }); const msgid = str.join(""); if (msgid != msgid.trim()) { console.error("FATAL: string contains leading or trailing whitespace:", msgid); process.exit(1); } return msgid; } /* Escape a string for inclusion in po file */ function escape(string) { const bs = string.split('\\') .join('\\\\') .split('"') .join('\\"'); return bs.split("\n").map(function(line) { return '"' + line + '"'; }).join("\n"); } /* Finish by writing out the strings */ function finish() { const result = [ 'msgid ""', 'msgstr ""', '"Project-Id-Version: PACKAGE_VERSION\\n"', '"MIME-Version: 1.0\\n"', '"Content-Type: text/plain; charset=UTF-8\\n"', '"Content-Transfer-Encoding: 8bit\\n"', '"X-Generator: Cockpit html2po\\n"', '', ]; for (const msgid in entries) { const entry = entries[msgid]; result.push('#: ' + entry.locations.join(" ")); if (entry.msgctxt) result.push('msgctxt ' + escape(entry.msgctxt)); result.push('msgid ' + escape(entry.msgid)); if (entry.msgid_plural) { result.push('msgid_plural ' + escape(entry.msgid_plural)); result.push('msgstr[0] ""'); result.push('msgstr[1] ""'); } else { result.push('msgstr ""'); } result.push(''); } const data = result.join('\n'); if (!args.output) { process.stdout.write(data); process.exit(0); } else { fs.writeFile(args.output, data, function(err) { if (err) fatal(err.message); process.exit(0); }); } }