#!/usr/bin/env vpython
# Copyright 2019 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Helper script used to manage locale-related files in Chromium.

This script is used to check, and potentially fix, many locale-related files
in your Chromium workspace, such as:

  - GRIT input files (.grd) and the corresponding translations (.xtb).

  - BUILD.gn files listing Android localized resource string resource .xml
    generated by GRIT for all supported Chrome locales. These correspond to
    <output> elements that use the type="android" attribute.

The --scan-dir <dir> option can be used to check for all files under a specific
directory, and the --fix-inplace option can be used to try fixing any file
that doesn't pass the check.

This can be very handy to avoid tedious and repetitive work when adding new
translations / locales to the Chrome code base, since this script can update
said input files for you.

Important note: checks and fix may fail on some input files. For example
remoting/resources/remoting_strings.grd contains an in-line comment element
inside its <outputs> section that breaks the script. The check will fail, and
trying to fix it too, but at least the file will not be modified.
"""

from __future__ import print_function

import argparse
import json
import os
import re
import shutil
import subprocess
import sys
import unittest

# Assume this script is under build/
_SCRIPT_DIR = os.path.dirname(__file__)
_SCRIPT_NAME = os.path.join(_SCRIPT_DIR, os.path.basename(__file__))
_TOP_SRC_DIR = os.path.join(_SCRIPT_DIR, '..')

# Need to import android/gyp/util/resource_utils.py here.
sys.path.insert(0, os.path.join(_SCRIPT_DIR, 'android/gyp'))

from util import build_utils
from util import resource_utils


# This locale is the default and doesn't have translations.
_DEFAULT_LOCALE = 'en-US'

# Misc terminal codes to provide human friendly progress output.
_CONSOLE_CODE_MOVE_CURSOR_TO_COLUMN_0 = '\x1b[0G'
_CONSOLE_CODE_ERASE_LINE = '\x1b[K'
_CONSOLE_START_LINE = (
    _CONSOLE_CODE_MOVE_CURSOR_TO_COLUMN_0 + _CONSOLE_CODE_ERASE_LINE)

##########################################################################
##########################################################################
#####
#####    G E N E R I C   H E L P E R   F U N C T I O N S
#####
##########################################################################
##########################################################################

def _FixChromiumLangAttribute(lang):
  """Map XML "lang" attribute values to Chromium locale names."""
  _CHROMIUM_LANG_FIXES = {
      'en': 'en-US',  # For now, Chromium doesn't have an 'en' locale.
      'iw': 'he',  # 'iw' is the obsolete form of ISO 639-1 for Hebrew
      'no': 'nb',  # 'no' is used by the Translation Console for Norwegian (nb).
  }
  return _CHROMIUM_LANG_FIXES.get(lang, lang)


def _FixTranslationConsoleLocaleName(locale):
  _FIXES = {
      'nb': 'no',  # Norwegian.
      'he': 'iw',  # Hebrew
  }
  return _FIXES.get(locale, locale)


def _CompareLocaleLists(list_a, list_expected, list_name):
  """Compare two lists of locale names. Print errors if they differ.

  Args:
    list_a: First list of locales.
    list_expected: Second list of locales, as expected.
    list_name: Name of list printed in error messages.
  Returns:
    On success, return False. On error, print error messages and return True.
  """
  errors = []
  missing_locales = sorted(set(list_a) - set(list_expected))
  if missing_locales:
    errors.append('Missing locales: %s' % missing_locales)

  extra_locales = sorted(set(list_expected) - set(list_a))
  if extra_locales:
    errors.append('Unexpected locales: %s' % extra_locales)

  if errors:
    print('Errors in %s definition:' % list_name)
    for error in errors:
      print('  %s\n' % error)
    return True

  return False


def _BuildIntervalList(input_list, predicate):
  """Find ranges of contiguous list items that pass a given predicate.

  Args:
    input_list: An input list of items of any type.
    predicate: A function that takes a list item and return True if it
      passes a given test.
  Returns:
    A list of (start_pos, end_pos) tuples, where all items in
    [start_pos, end_pos) pass the predicate.
  """
  result = []
  size = len(input_list)
  start = 0
  while True:
    # Find first item in list that passes the predicate.
    while start < size and not predicate(input_list[start]):
      start += 1

    if start >= size:
      return result

    # Find first item in the rest of the list that does not pass the
    # predicate.
    end = start + 1
    while end < size and predicate(input_list[end]):
      end += 1

    result.append((start, end))
    start = end + 1


def _SortListSubRange(input_list, start, end, key_func):
  """Sort an input list's sub-range according to a specific key function.

  Args:
    input_list: An input list.
    start: Sub-range starting position in list.
    end: Sub-range limit position in list.
    key_func: A function that extracts a sort key from a line.
  Returns:
    A copy of |input_list|, with all items in [|start|, |end|) sorted
    according to |key_func|.
  """
  result = input_list[:start]
  inputs = []
  for pos in xrange(start, end):
    line = input_list[pos]
    key = key_func(line)
    inputs.append((key, line))

  for _, line in sorted(inputs):
    result.append(line)

  result += input_list[end:]
  return result


def _SortElementsRanges(lines, element_predicate, element_key):
  """Sort all elements of a given type in a list of lines by a given key.

  Args:
    lines: input lines.
    element_predicate: predicate function to select elements to sort.
    element_key: lambda returning a comparison key for each element that
      passes the predicate.
  Returns:
    A new list of input lines, with lines [start..end) sorted.
  """
  intervals = _BuildIntervalList(lines, element_predicate)
  for start, end in intervals:
    lines = _SortListSubRange(lines, start, end, element_key)

  return lines


def _ProcessFile(input_file, locales, check_func, fix_func):
  """Process a given input file, potentially fixing it.

  Args:
    input_file: Input file path.
    locales: List of Chrome locales to consider / expect.
    check_func: A lambda called to check the input file lines with
      (input_lines, locales) argument. It must return an list of error
      messages, or None on success.
    fix_func: None, or a lambda called to fix the input file lines with
      (input_lines, locales). It must return the new list of lines for
      the input file, and may raise an Exception in case of error.
  Returns:
    True at the moment.
  """
  print('%sProcessing %s...' % (_CONSOLE_START_LINE, input_file), end=' ')
  sys.stdout.flush()
  with open(input_file) as f:
    input_lines = f.readlines()
  errors = check_func(input_file, input_lines, locales)
  if errors:
    print('\n%s%s' % (_CONSOLE_START_LINE, '\n'.join(errors)))
    if fix_func:
      try:
        input_lines = fix_func(input_file, input_lines, locales)
        output = ''.join(input_lines)
        with open(input_file, 'wt') as f:
          f.write(output)
        print('Fixed %s.' % input_file)
      except Exception as e:  # pylint: disable=broad-except
        print('Skipped %s: %s' % (input_file, e))

  return True


def _ScanDirectoriesForFiles(scan_dirs, file_predicate):
  """Scan a directory for files that match a given predicate.

  Args:
    scan_dir: A list of top-level directories to start scan in.
    file_predicate: lambda function which is passed the file's base name
      and returns True if its full path, relative to |scan_dir|, should be
      passed in the result.
  Returns:
    A list of file full paths.
  """
  result = []
  for src_dir in scan_dirs:
    for root, _, files in os.walk(src_dir):
      result.extend(os.path.join(root, f) for f in files if file_predicate(f))
  return result


def _WriteFile(file_path, file_data):
  """Write |file_data| to |file_path|."""
  with open(file_path, 'w') as f:
    f.write(file_data)


def _FindGnExecutable():
  """Locate the real GN executable used by this Chromium checkout.

  This is needed because the depot_tools 'gn' wrapper script will look
  for .gclient and other things we really don't need here.

  Returns:
    Path of real host GN executable from current Chromium src/ checkout.
  """
  # Simply scan buildtools/*/gn and return the first one found so we don't
  # have to guess the platform-specific sub-directory name (e.g. 'linux64'
  # for 64-bit Linux machines).
  buildtools_dir = os.path.join(_TOP_SRC_DIR, 'buildtools')
  for subdir in os.listdir(buildtools_dir):
    subdir_path = os.path.join(buildtools_dir, subdir)
    if not os.path.isdir(subdir_path):
      continue
    gn_path = os.path.join(subdir_path, 'gn')
    if os.path.exists(gn_path):
      return gn_path
  return None


def _PrettyPrintListAsLines(input_list, available_width, trailing_comma=False):
  result = []
  input_str = ', '.join(input_list)
  while len(input_str) > available_width:
    pos = input_str.rfind(',', 0, available_width)
    result.append(input_str[:pos + 1])
    input_str = input_str[pos + 1:].lstrip()
  if trailing_comma and input_str:
    input_str += ','
  result.append(input_str)
  return result


class _PrettyPrintListAsLinesTest(unittest.TestCase):

  def test_empty_list(self):
    self.assertListEqual([''], _PrettyPrintListAsLines([], 10))

  def test_wrapping(self):
    input_list = ['foo', 'bar', 'zoo', 'tool']
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 8),
        ['foo,', 'bar,', 'zoo,', 'tool'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 12), ['foo, bar,', 'zoo, tool'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 79), ['foo, bar, zoo, tool'])

  def test_trailing_comma(self):
    input_list = ['foo', 'bar', 'zoo', 'tool']
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 8, trailing_comma=True),
        ['foo,', 'bar,', 'zoo,', 'tool,'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 12, trailing_comma=True),
        ['foo, bar,', 'zoo, tool,'])
    self.assertListEqual(
        _PrettyPrintListAsLines(input_list, 79, trailing_comma=True),
        ['foo, bar, zoo, tool,'])


##########################################################################
##########################################################################
#####
#####    L O C A L E S   L I S T S
#####
##########################################################################
##########################################################################

# Various list of locales that will be extracted from build/config/locales.gni
# Do not use these directly, use ChromeLocales(), and IosUnsupportedLocales()
# instead to access these lists.
_INTERNAL_CHROME_LOCALES = []
_INTERNAL_IOS_UNSUPPORTED_LOCALES = []


def ChromeLocales():
  """Return the list of all locales supported by Chrome."""
  if not _INTERNAL_CHROME_LOCALES:
    _ExtractAllChromeLocalesLists()
  return _INTERNAL_CHROME_LOCALES


def IosUnsupportedLocales():
  """Return the list of locales that are unsupported on iOS."""
  if not _INTERNAL_IOS_UNSUPPORTED_LOCALES:
    _ExtractAllChromeLocalesLists()
  return _INTERNAL_IOS_UNSUPPORTED_LOCALES


def _PrepareTinyGnWorkspace(work_dir, out_subdir_name='out'):
  """Populate an empty directory with a tiny set of working GN config files.

  This allows us to run 'gn gen <out> --root <work_dir>' as fast as possible
  to generate files containing the locales list. This takes about 300ms on
  a decent machine, instead of more than 5 seconds when running the equivalent
  commands from a real Chromium workspace, which requires regenerating more
  than 23k targets.

  Args:
    work_dir: target working directory.
    out_subdir_name: Name of output sub-directory.
  Returns:
    Full path of output directory created inside |work_dir|.
  """
  # Create top-level .gn file that must point to the BUILDCONFIG.gn.
  _WriteFile(os.path.join(work_dir, '.gn'),
             'buildconfig = "//BUILDCONFIG.gn"\n')
  # Create BUILDCONFIG.gn which must set a default toolchain. Also add
  # all variables that may be used in locales.gni in a declare_args() block.
  _WriteFile(
      os.path.join(work_dir, 'BUILDCONFIG.gn'),
      r'''set_default_toolchain("toolchain")
declare_args () {
  is_ios = false
  is_android = true
}
''')

  # Create fake toolchain required by BUILDCONFIG.gn.
  os.mkdir(os.path.join(work_dir, 'toolchain'))
  _WriteFile(os.path.join(work_dir, 'toolchain', 'BUILD.gn'),
             r'''toolchain("toolchain") {
  tool("stamp") {
    command = "touch {{output}}"  # Required by action()
  }
}
''')

  # Create top-level BUILD.gn, GN requires at least one target to build so do
  # that with a fake action which will never be invoked. Also write the locales
  # to misc files in the output directory.
  _WriteFile(
      os.path.join(work_dir, 'BUILD.gn'), r'''import("//locales.gni")

action("create_foo") {   # fake action to avoid GN complaints.
  script = "//build/create_foo.py"
  inputs = []
  outputs = [ "$target_out_dir/$target_name" ]
}

# Write the locales lists to files in the output directory.
_filename = root_build_dir + "/foo"
write_file(_filename + ".locales", locales, "json")
write_file(_filename + ".ios_unsupported_locales",
            ios_unsupported_locales,
            "json")
''')

  # Copy build/config/locales.gni to the workspace, as required by BUILD.gn.
  shutil.copyfile(os.path.join(_TOP_SRC_DIR, 'build', 'config', 'locales.gni'),
                  os.path.join(work_dir, 'locales.gni'))

  # Create output directory.
  out_path = os.path.join(work_dir, out_subdir_name)
  os.mkdir(out_path)

  # And ... we're good.
  return out_path


# Set this global variable to the path of a given temporary directory
# before calling _ExtractAllChromeLocalesLists() if you want to debug
# the locales list extraction process.
_DEBUG_LOCALES_WORK_DIR = None


def _ReadJsonList(file_path):
  """Read a JSON file that must contain a list, and return it."""
  with open(file_path) as f:
    data = json.load(f)
    assert isinstance(data, list), "JSON file %s is not a list!" % file_path
  return [item.encode('utf8') for item in data]


def _ExtractAllChromeLocalesLists():
  with build_utils.TempDir() as tmp_path:
    if _DEBUG_LOCALES_WORK_DIR:
      tmp_path = _DEBUG_LOCALES_WORK_DIR
      build_utils.DeleteDirectory(tmp_path)
      build_utils.MakeDirectory(tmp_path)

    out_path = _PrepareTinyGnWorkspace(tmp_path, 'out')

    # NOTE: The file suffixes used here should be kept in sync with
    # build/config/locales.gni
    gn_executable = _FindGnExecutable()
    try:
      subprocess.check_output(
          [gn_executable, 'gen', out_path, '--root=' + tmp_path])
    except subprocess.CalledProcessError as e:
      print(e.output)
      raise e

    global _INTERNAL_CHROME_LOCALES
    _INTERNAL_CHROME_LOCALES = _ReadJsonList(
        os.path.join(out_path, 'foo.locales'))

    global _INTERNAL_IOS_UNSUPPORTED_LOCALES
    _INTERNAL_IOS_UNSUPPORTED_LOCALES = _ReadJsonList(
        os.path.join(out_path, 'foo.ios_unsupported_locales'))


##########################################################################
##########################################################################
#####
#####    G R D   H E L P E R   F U N C T I O N S
#####
##########################################################################
##########################################################################

# Technical note:
#
# Even though .grd files are XML, an xml parser library is not used in order
# to preserve the original file's structure after modification. ElementTree
# tends to re-order attributes in each element when re-writing an XML
# document tree, which is undesirable here.
#
# Thus simple line-based regular expression matching is used instead.
#

# Misc regular expressions used to match elements and their attributes.
_RE_OUTPUT_ELEMENT = re.compile(r'<output (.*)\s*/>')
_RE_TRANSLATION_ELEMENT = re.compile(r'<file( | .* )path="(.*\.xtb)".*/>')
_RE_FILENAME_ATTRIBUTE = re.compile(r'filename="([^"]*)"')
_RE_LANG_ATTRIBUTE = re.compile(r'lang="([^"]*)"')
_RE_PATH_ATTRIBUTE = re.compile(r'path="([^"]*)"')
_RE_TYPE_ANDROID_ATTRIBUTE = re.compile(r'type="android"')



def _IsGritInputFile(input_file):
  """Returns True iff this is a GRIT input file."""
  return input_file.endswith('.grd')


def _GetXmlLangAttribute(xml_line):
  """Extract the lang attribute value from an XML input line."""
  m = _RE_LANG_ATTRIBUTE.search(xml_line)
  if not m:
    return None
  return m.group(1)


class _GetXmlLangAttributeTest(unittest.TestCase):
  TEST_DATA = {
      '': None,
      'foo': None,
      'lang=foo': None,
      'lang="foo"': 'foo',
      '<something lang="foo bar" />': 'foo bar',
      '<file lang="fr-CA" path="path/to/strings_fr-CA.xtb" />': 'fr-CA',
  }

  def test_GetXmlLangAttribute(self):
    for test_line, expected in self.TEST_DATA.iteritems():
      self.assertEquals(_GetXmlLangAttribute(test_line), expected)


def _SortGrdElementsRanges(grd_lines, element_predicate):
  """Sort all .grd elements of a given type by their lang attribute."""
  return _SortElementsRanges(grd_lines, element_predicate, _GetXmlLangAttribute)


def _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales):
  """Check the element 'lang' attributes in specific .grd lines range.

  This really checks the following:
    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  locales = set()
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      errors.append('%d: Missing "lang" attribute in <output> element' % pos +
                    1)
      continue
    cr_locale = _FixChromiumLangAttribute(lang)
    if cr_locale in locales:
      errors.append(
          '%d: Redefinition of <output> for "%s" locale' % (pos + 1, lang))
    locales.add(cr_locale)

  extra_locales = locales.difference(wanted_locales)
  if extra_locales:
    errors.append('%d-%d: Extra locales found: %s' % (start + 1, end + 1,
                                                      sorted(extra_locales)))

  missing_locales = wanted_locales.difference(locales)
  if missing_locales:
    errors.append('%d-%d: Missing locales: %s' % (start + 1, end + 1,
                                                  sorted(missing_locales)))

  return errors


##########################################################################
##########################################################################
#####
#####    G R D   A N D R O I D   O U T P U T S
#####
##########################################################################
##########################################################################

def _IsGrdAndroidOutputLine(line):
  """Returns True iff this is an Android-specific <output> line."""
  m = _RE_OUTPUT_ELEMENT.search(line)
  if m:
    return 'type="android"' in m.group(1)
  return False

assert _IsGrdAndroidOutputLine('  <output type="android"/>')

# Many of the functions below have unused arguments due to genericity.
# pylint: disable=unused-argument

def _CheckGrdElementRangeAndroidOutputFilename(grd_lines, start, end,
                                               wanted_locales):
  """Check all <output> elements in specific input .grd lines range.

  This really checks the following:
    - Filenames exist for each listed locale.
    - Filenames are well-formed.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      continue
    cr_locale = _FixChromiumLangAttribute(lang)

    m = _RE_FILENAME_ATTRIBUTE.search(line)
    if not m:
      errors.append('%d: Missing filename attribute in <output> element' % pos +
                    1)
    else:
      filename = m.group(1)
      if not filename.endswith('.xml'):
        errors.append(
            '%d: Filename should end with ".xml": %s' % (pos + 1, filename))

      dirname = os.path.basename(os.path.dirname(filename))
      prefix = ('values-%s' % resource_utils.ToAndroidLocaleName(cr_locale)
                if cr_locale != _DEFAULT_LOCALE else 'values')
      if dirname != prefix:
        errors.append(
            '%s: Directory name should be %s: %s' % (pos + 1, prefix, filename))

  return errors


def _CheckGrdAndroidOutputElements(grd_file, grd_lines, wanted_locales):
  """Check all <output> elements related to Android.

  Args:
    grd_file: Input .grd file path.
    grd_lines: List of input .grd lines.
    wanted_locales: set of wanted Chromium locale names.
  Returns:
    List of error message strings. Empty on success.
  """
  intervals = _BuildIntervalList(grd_lines, _IsGrdAndroidOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales)
    errors += _CheckGrdElementRangeAndroidOutputFilename(grd_lines, start, end,
                                                         wanted_locales)
  return errors


def _AddMissingLocalesInGrdAndroidOutputs(grd_file, grd_lines, wanted_locales):
  """Fix an input .grd line by adding missing Android outputs.

  Args:
    grd_file: Input .grd file path.
    grd_lines: Input .grd line list.
    wanted_locales: set of Chromium locale names.
  Returns:
    A new list of .grd lines, containing new <output> elements when needed
    for locales from |wanted_locales| that were not part of the input.
  """
  intervals = _BuildIntervalList(grd_lines, _IsGrdAndroidOutputLine)
  for start, end in reversed(intervals):
    locales = set()
    for pos in xrange(start, end):
      lang = _GetXmlLangAttribute(grd_lines[pos])
      locale = _FixChromiumLangAttribute(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'bg'
    src_lang_attribute = 'lang="%s"' % src_locale
    src_line = None
    for pos in xrange(start, end):
      if src_lang_attribute in grd_lines[pos]:
        src_line = grd_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find <output> element with "%s" lang attribute' % src_locale)

    line_count = end - 1
    for locale in missing_locales:
      android_locale = resource_utils.ToAndroidLocaleName(locale)
      dst_line = src_line.replace(
          'lang="%s"' % src_locale, 'lang="%s"' % locale).replace(
              'values-%s/' % src_locale, 'values-%s/' % android_locale)
      grd_lines.insert(line_count, dst_line)
      line_count += 1

  # Sort the new <output> elements.
  return _SortGrdElementsRanges(grd_lines, _IsGrdAndroidOutputLine)


##########################################################################
##########################################################################
#####
#####    G R D   T R A N S L A T I O N S
#####
##########################################################################
##########################################################################


def _IsTranslationGrdOutputLine(line):
  """Returns True iff this is an output .xtb <file> element."""
  m = _RE_TRANSLATION_ELEMENT.search(line)
  return m is not None


class _IsTranslationGrdOutputLineTest(unittest.TestCase):

  def test_GrdTranslationOutputLines(self):
    _VALID_INPUT_LINES = [
        '<file path="foo/bar.xtb" />',
        '<file path="foo/bar.xtb"/>',
        '<file lang="fr-CA" path="translations/aw_strings_fr-CA.xtb"/>',
        '<file lang="fr-CA" path="translations/aw_strings_fr-CA.xtb" />',
        '  <file path="translations/aw_strings_ar.xtb" lang="ar" />',
    ]
    _INVALID_INPUT_LINES = ['<file path="foo/bar.xml" />']

    for line in _VALID_INPUT_LINES:
      self.assertTrue(
          _IsTranslationGrdOutputLine(line),
          '_IsTranslationGrdOutputLine() returned False for [%s]' % line)

    for line in _INVALID_INPUT_LINES:
      self.assertFalse(
          _IsTranslationGrdOutputLine(line),
          '_IsTranslationGrdOutputLine() returned True for [%s]' % line)


def _CheckGrdTranslationElementRange(grd_lines, start, end,
                                     wanted_locales):
  """Check all <translations> sub-elements in specific input .grd lines range.

  This really checks the following:
    - Each item has a 'path' attribute.
    - Each such path value ends up with '.xtb'.

  Args:
    grd_lines: Input .grd lines.
    start: Sub-range start position in input line list.
    end: Sub-range limit position in input line list.
    wanted_locales: Set of wanted Chromium locale names.
  Returns:
    List of error message strings for this input. Empty on success.
  """
  errors = []
  for pos in xrange(start, end):
    line = grd_lines[pos]
    lang = _GetXmlLangAttribute(line)
    if not lang:
      continue
    m = _RE_PATH_ATTRIBUTE.search(line)
    if not m:
      errors.append('%d: Missing path attribute in <file> element' % pos +
                    1)
    else:
      filename = m.group(1)
      if not filename.endswith('.xtb'):
        errors.append(
            '%d: Path should end with ".xtb": %s' % (pos + 1, filename))

  return errors


def _CheckGrdTranslations(grd_file, grd_lines, wanted_locales):
  """Check all <file> elements that correspond to an .xtb output file.

  Args:
    grd_file: Input .grd file path.
    grd_lines: List of input .grd lines.
    wanted_locales: set of wanted Chromium locale names.
  Returns:
    List of error message strings. Empty on success.
  """
  wanted_locales = wanted_locales - set([_DEFAULT_LOCALE])
  intervals = _BuildIntervalList(grd_lines, _IsTranslationGrdOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGrdElementRangeLang(grd_lines, start, end, wanted_locales)
    errors += _CheckGrdTranslationElementRange(grd_lines, start, end,
                                              wanted_locales)
  return errors


# Regular expression used to replace the lang attribute inside .xtb files.
_RE_TRANSLATIONBUNDLE = re.compile('<translationbundle lang="(.*)">')


def _CreateFakeXtbFileFrom(src_xtb_path, dst_xtb_path, dst_locale):
  """Create a fake .xtb file.

  Args:
    src_xtb_path: Path to source .xtb file to copy from.
    dst_xtb_path: Path to destination .xtb file to write to.
    dst_locale: Destination locale, the lang attribute in the source file
      will be substituted with this value before its lines are written
      to the destination file.
  """
  with open(src_xtb_path) as f:
    src_xtb_lines = f.readlines()

  def replace_xtb_lang_attribute(line):
    m = _RE_TRANSLATIONBUNDLE.search(line)
    if not m:
      return line
    return line[:m.start(1)] + dst_locale + line[m.end(1):]

  dst_xtb_lines = [replace_xtb_lang_attribute(line) for line in src_xtb_lines]
  with build_utils.AtomicOutput(dst_xtb_path) as tmp:
    tmp.writelines(dst_xtb_lines)


def _AddMissingLocalesInGrdTranslations(grd_file, grd_lines, wanted_locales):
  """Fix an input .grd line by adding missing Android outputs.

  This also creates fake .xtb files from the one provided for 'en-GB'.

  Args:
    grd_file: Input .grd file path.
    grd_lines: Input .grd line list.
    wanted_locales: set of Chromium locale names.
  Returns:
    A new list of .grd lines, containing new <output> elements when needed
    for locales from |wanted_locales| that were not part of the input.
  """
  wanted_locales = wanted_locales - set([_DEFAULT_LOCALE])
  intervals = _BuildIntervalList(grd_lines, _IsTranslationGrdOutputLine)
  for start, end in reversed(intervals):
    locales = set()
    for pos in xrange(start, end):
      lang = _GetXmlLangAttribute(grd_lines[pos])
      locale = _FixChromiumLangAttribute(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'en-GB'
    src_lang_attribute = 'lang="%s"' % src_locale
    src_line = None
    for pos in xrange(start, end):
      if src_lang_attribute in grd_lines[pos]:
        src_line = grd_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find <file> element with "%s" lang attribute' % src_locale)

    src_path = os.path.join(
        os.path.dirname(grd_file),
        _RE_PATH_ATTRIBUTE.search(src_line).group(1))

    line_count = end - 1
    for locale in missing_locales:
      dst_line = src_line.replace(
          'lang="%s"' % src_locale, 'lang="%s"' % locale).replace(
              '_%s.xtb' % src_locale, '_%s.xtb' % locale)
      grd_lines.insert(line_count, dst_line)
      line_count += 1

      dst_path = src_path.replace('_%s.xtb' % src_locale, '_%s.xtb' % locale)
      _CreateFakeXtbFileFrom(src_path, dst_path, locale)


  # Sort the new <output> elements.
  return _SortGrdElementsRanges(grd_lines, _IsTranslationGrdOutputLine)


##########################################################################
##########################################################################
#####
#####    G N   A N D R O I D   O U T P U T S
#####
##########################################################################
##########################################################################

_RE_GN_VALUES_LIST_LINE = re.compile(
    r'^\s*".*values(\-([A-Za-z0-9-]+))?/.*\.xml",\s*$')

def _IsBuildGnInputFile(input_file):
  """Returns True iff this is a BUILD.gn file."""
  return os.path.basename(input_file) == 'BUILD.gn'


def _GetAndroidGnOutputLocale(line):
  """Check a GN list, and return its Android locale if it is an output .xml"""
  m = _RE_GN_VALUES_LIST_LINE.match(line)
  if not m:
    return None

  if m.group(1):  # First group is optional and contains group 2.
    return m.group(2)

  return resource_utils.ToAndroidLocaleName(_DEFAULT_LOCALE)


def _IsAndroidGnOutputLine(line):
  """Returns True iff this is an Android-specific localized .xml output."""
  return _GetAndroidGnOutputLocale(line) != None


def _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
  """Check that a range of GN lines corresponds to localized strings.

  Special case: Some BUILD.gn files list several non-localized .xml files
  that should be ignored by this function, e.g. in
  components/cronet/android/BUILD.gn, the following appears:

    inputs = [
      ...
      "sample/res/layout/activity_main.xml",
      "sample/res/layout/dialog_url.xml",
      "sample/res/values/dimens.xml",
      "sample/res/values/strings.xml",
      ...
    ]

  These are non-localized strings, and should be ignored. This function is
  used to detect them quickly.
  """
  for pos in xrange(start, end):
    if not 'values/' in gn_lines[pos]:
      return True
  return False


def _CheckGnOutputsRange(gn_lines, start, end, wanted_locales):
  if not _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
    return []

  errors = []
  locales = set()
  for pos in xrange(start, end):
    line = gn_lines[pos]
    android_locale = _GetAndroidGnOutputLocale(line)
    assert android_locale != None
    cr_locale = resource_utils.ToChromiumLocaleName(android_locale)
    if cr_locale in locales:
      errors.append('%s: Redefinition of output for "%s" locale' %
                    (pos + 1, android_locale))
    locales.add(cr_locale)

  extra_locales = locales.difference(wanted_locales)
  if extra_locales:
    errors.append('%d-%d: Extra locales: %s' % (start + 1, end + 1,
                                                sorted(extra_locales)))

  missing_locales = wanted_locales.difference(locales)
  if missing_locales:
    errors.append('%d-%d: Missing locales: %s' % (start + 1, end + 1,
                                                  sorted(missing_locales)))

  return errors


def _CheckGnAndroidOutputs(gn_file, gn_lines, wanted_locales):
  intervals = _BuildIntervalList(gn_lines, _IsAndroidGnOutputLine)
  errors = []
  for start, end in intervals:
    errors += _CheckGnOutputsRange(gn_lines, start, end, wanted_locales)
  return errors


def _AddMissingLocalesInGnAndroidOutputs(gn_file, gn_lines, wanted_locales):
  intervals = _BuildIntervalList(gn_lines, _IsAndroidGnOutputLine)
  # NOTE: Since this may insert new lines to each interval, process the
  # list in reverse order to maintain valid (start,end) positions during
  # the iteration.
  for start, end in reversed(intervals):
    if not _CheckGnOutputsRangeForLocalizedStrings(gn_lines, start, end):
      continue

    locales = set()
    for pos in xrange(start, end):
      lang = _GetAndroidGnOutputLocale(gn_lines[pos])
      locale = resource_utils.ToChromiumLocaleName(lang)
      locales.add(locale)

    missing_locales = wanted_locales.difference(locales)
    if not missing_locales:
      continue

    src_locale = 'bg'
    src_values = 'values-%s/' % resource_utils.ToAndroidLocaleName(src_locale)
    src_line = None
    for pos in xrange(start, end):
      if src_values in gn_lines[pos]:
        src_line = gn_lines[pos]
        break

    if not src_line:
      raise Exception(
          'Cannot find output list item with "%s" locale' % src_locale)

    line_count = end - 1
    for locale in missing_locales:
      if locale == _DEFAULT_LOCALE:
        dst_line = src_line.replace('values-%s/' % src_locale, 'values/')
      else:
        dst_line = src_line.replace(
            'values-%s/' % src_locale,
            'values-%s/' % resource_utils.ToAndroidLocaleName(locale))
      gn_lines.insert(line_count, dst_line)
      line_count += 1

    gn_lines = _SortListSubRange(
        gn_lines, start, line_count,
        lambda line: _RE_GN_VALUES_LIST_LINE.match(line).group(1))

  return gn_lines


##########################################################################
##########################################################################
#####
#####    T R A N S L A T I O N   E X P E C T A T I O N S
#####
##########################################################################
##########################################################################

_EXPECTATIONS_FILENAME = 'translation_expectations.pyl'

# Technical note: the format of translation_expectations.pyl
# is a 'Python literal', which defines a python dictionary, so should
# be easy to parse. However, when modifying it, care should be taken
# to respect the line comments and the order of keys within the text
# file.


def _ReadPythonLiteralFile(pyl_path):
  """Read a .pyl file into a Python data structure."""
  with open(pyl_path) as f:
    pyl_content = f.read()
  # Evaluate as a Python data structure, use an empty global
  # and local dictionary.
  return eval(pyl_content, dict(), dict())


def _UpdateLocalesInExpectationLines(pyl_lines,
                                     wanted_locales,
                                     available_width=79):
  """Update the locales list(s) found in an expectations file.

  Args:
    pyl_lines: Iterable of input lines from the file.
    wanted_locales: Set or list of new locale names.
    available_width: Optional, number of character colums used
      to word-wrap the new list items.
  Returns:
    New list of updated lines.
  """
  locales_list = ['"%s"' % loc for loc in sorted(wanted_locales)]
  result = []
  line_count = len(pyl_lines)
  line_num = 0
  DICT_START = '"languages": ['
  while line_num < line_count:
    line = pyl_lines[line_num]
    line_num += 1
    result.append(line)
    # Look for start of "languages" dictionary.
    pos = line.find(DICT_START)
    if pos < 0:
      continue

    start_margin = pos
    start_line = line_num
    # Skip over all lines from the list.
    while (line_num < line_count and
           not pyl_lines[line_num].rstrip().endswith('],')):
      line_num += 1
      continue

    if line_num == line_count:
      raise Exception('%d: Missing list termination!' % start_line)

    # Format the new list according to the new margin.
    locale_width = available_width - (start_margin + 2)
    locale_lines = _PrettyPrintListAsLines(
        locales_list, locale_width, trailing_comma=True)
    for locale_line in locale_lines:
      result.append(' ' * (start_margin + 2) + locale_line)
    result.append(' ' * start_margin + '],')
    line_num += 1

  return result


class _UpdateLocalesInExpectationLinesTest(unittest.TestCase):

  def test_simple(self):
    self.maxDiff = 1000
    input_text = r'''
# This comment should be preserved
# 23456789012345678901234567890123456789
{
  "android_grd": {
    "languages": [
      "aa", "bb", "cc", "dd", "ee",
      "ff", "gg", "hh", "ii", "jj",
      "kk"],
  },
  # Example with bad indentation in input.
  "another_grd": {
         "languages": [
  "aa", "bb", "cc", "dd", "ee", "ff", "gg", "hh", "ii", "jj", "kk",
      ],
  },
}
'''
    expected_text = r'''
# This comment should be preserved
# 23456789012345678901234567890123456789
{
  "android_grd": {
    "languages": [
      "A2", "AA", "BB", "CC", "DD",
      "E2", "EE", "FF", "GG", "HH",
      "I2", "II", "JJ", "KK",
    ],
  },
  # Example with bad indentation in input.
  "another_grd": {
         "languages": [
           "A2", "AA", "BB", "CC", "DD",
           "E2", "EE", "FF", "GG", "HH",
           "I2", "II", "JJ", "KK",
         ],
  },
}
'''
    input_lines = input_text.splitlines()
    test_locales = ([
        'AA', 'BB', 'CC', 'DD', 'EE', 'FF', 'GG', 'HH', 'II', 'JJ', 'KK', 'A2',
        'E2', 'I2'
    ])
    expected_lines = expected_text.splitlines()
    self.assertListEqual(
        _UpdateLocalesInExpectationLines(input_lines, test_locales, 40),
        expected_lines)

  def test_missing_list_termination(self):
    input_lines = r'''
  "languages": ['
    "aa", "bb", "cc", "dd"
'''.splitlines()
    with self.assertRaises(Exception) as cm:
      _UpdateLocalesInExpectationLines(input_lines, ['a', 'b'], 40)

    self.assertEqual(str(cm.exception), '2: Missing list termination!')


def _UpdateLocalesInExpectationFile(pyl_path, wanted_locales):
  """Update all locales listed in a given expectations file.

  Args:
    pyl_path: Path to .pyl file to update.
    wanted_locales: List of locales that need to be written to
      the file.
  """
  tc_locales = {
      _FixTranslationConsoleLocaleName(locale)
      for locale in set(wanted_locales) - set([_DEFAULT_LOCALE])
  }

  with open(pyl_path) as f:
    input_lines = [l.rstrip() for l in f.readlines()]

  updated_lines = _UpdateLocalesInExpectationLines(input_lines, tc_locales)
  with build_utils.AtomicOutput(pyl_path) as f:
    f.writelines('\n'.join(updated_lines) + '\n')


##########################################################################
##########################################################################
#####
#####    C H E C K   E V E R Y T H I N G
#####
##########################################################################
##########################################################################

# pylint: enable=unused-argument


def _IsAllInputFile(input_file):
  return _IsGritInputFile(input_file) or _IsBuildGnInputFile(input_file)


def _CheckAllFiles(input_file, input_lines, wanted_locales):
  errors = []
  if _IsGritInputFile(input_file):
    errors += _CheckGrdTranslations(input_file, input_lines, wanted_locales)
    errors += _CheckGrdAndroidOutputElements(
        input_file, input_lines, wanted_locales)
  elif _IsBuildGnInputFile(input_file):
    errors += _CheckGnAndroidOutputs(input_file, input_lines, wanted_locales)
  return errors


def _AddMissingLocalesInAllFiles(input_file, input_lines, wanted_locales):
  if _IsGritInputFile(input_file):
    lines = _AddMissingLocalesInGrdTranslations(
        input_file, input_lines, wanted_locales)
    lines = _AddMissingLocalesInGrdAndroidOutputs(
        input_file, lines, wanted_locales)
  elif _IsBuildGnInputFile(input_file):
    lines = _AddMissingLocalesInGnAndroidOutputs(
        input_file, input_lines, wanted_locales)
  return lines


##########################################################################
##########################################################################
#####
#####    C O M M A N D   H A N D L I N G
#####
##########################################################################
##########################################################################

class _Command(object):
  """A base class for all commands recognized by this script.

  Usage is the following:
    1) Derived classes must re-define the following class-based fields:
       - name: Command name (e.g. 'list-locales')
       - description: Command short description.
       - long_description: Optional. Command long description.
         NOTE: As a convenience, if the first character is a newline,
         it will be omitted in the help output.

    2) Derived classes for commands that take arguments should override
       RegisterExtraArgs(), which receives a corresponding argparse
       sub-parser as argument.

    3) Derived classes should implement a Run() command, which can read
       the current arguments from self.args.
  """
  name = None
  description = None
  long_description = None

  def __init__(self):
    self._parser = None
    self.args = None

  def RegisterExtraArgs(self, subparser):
    pass

  def RegisterArgs(self, parser):
    subp = parser.add_parser(
        self.name, help=self.description,
        description=self.long_description or self.description,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    self._parser = subp
    subp.set_defaults(command=self)
    group = subp.add_argument_group('%s arguments' % self.name)
    self.RegisterExtraArgs(group)

  def ProcessArgs(self, args):
    self.args = args


class _ListLocalesCommand(_Command):
  """Implement the 'list-locales' command to list locale lists of interest."""
  name = 'list-locales'
  description = 'List supported Chrome locales'
  long_description = r'''
List locales of interest, by default this prints all locales supported by
Chrome, but `--type=ios_unsupported` can be used for the list of locales
unsupported on iOS.

These values are extracted directly from build/config/locales.gni.

Additionally, use the --as-json argument to print the list as a JSON list,
instead of the default format (which is a space-separated list of locale names).
'''

  # Maps type argument to a function returning the corresponding locales list.
  TYPE_MAP = {
      'all': ChromeLocales,
      'ios_unsupported': IosUnsupportedLocales,
  }

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '--as-json',
        action='store_true',
        help='Output as JSON list.')
    group.add_argument(
        '--type',
        choices=tuple(self.TYPE_MAP.viewkeys()),
        default='all',
        help='Select type of locale list to print.')

  def Run(self):
    locale_list = self.TYPE_MAP[self.args.type]()
    if self.args.as_json:
      print('[%s]' % ", ".join("'%s'" % loc for loc in locale_list))
    else:
      print(' '.join(locale_list))


class _CheckInputFileBaseCommand(_Command):
  """Used as a base for other _Command subclasses that check input files.

  Subclasses should also define the following class-level variables:

  - select_file_func:
      A predicate that receives a file name (not path) and return True if it
      should be selected for inspection. Used when scanning directories with
      '--scan-dir <dir>'.

  - check_func:
  - fix_func:
      Two functions passed as parameters to _ProcessFile(), see relevant
      documentation in this function's definition.
  """
  select_file_func = None
  check_func = None
  fix_func = None

  def RegisterExtraArgs(self, group):
    group.add_argument(
      '--scan-dir',
      action='append',
      help='Optional directory to scan for input files recursively.')
    group.add_argument(
      'input',
      nargs='*',
      help='Input file(s) to check.')
    group.add_argument(
      '--fix-inplace',
      action='store_true',
      help='Try to fix the files in-place too.')
    group.add_argument(
      '--add-locales',
      help='Space-separated list of additional locales to use')

  def Run(self):
    args = self.args
    input_files = []
    if args.input:
      input_files = args.input
    if args.scan_dir:
      input_files.extend(_ScanDirectoriesForFiles(
          args.scan_dir, self.select_file_func.__func__))
    locales = ChromeLocales()
    if args.add_locales:
      locales.extend(args.add_locales.split(' '))

    locales = set(locales)

    for input_file in input_files:
      _ProcessFile(input_file,
                   locales,
                   self.check_func.__func__,
                   self.fix_func.__func__ if args.fix_inplace else None)
    print('%sDone.' % (_CONSOLE_START_LINE))


class _CheckGrdAndroidOutputsCommand(_CheckInputFileBaseCommand):
  name = 'check-grd-android-outputs'
  description = (
      'Check the Android resource (.xml) files outputs in GRIT input files.')
  long_description = r'''
Check the Android .xml files outputs in one or more input GRIT (.grd) files
for the following conditions:

    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.
    - Filenames exist for each listed locale.
    - Filenames are well-formed.
'''
  select_file_func = _IsGritInputFile
  check_func = _CheckGrdAndroidOutputElements
  fix_func = _AddMissingLocalesInGrdAndroidOutputs


class _CheckGrdTranslationsCommand(_CheckInputFileBaseCommand):
  name = 'check-grd-translations'
  description = (
      'Check the translation (.xtb) files outputted by .grd input files.')
  long_description = r'''
Check the translation (.xtb) file outputs in one or more input GRIT (.grd) files
for the following conditions:

    - Each item has a correct 'lang' attribute.
    - There are no duplicated lines for the same 'lang' attribute.
    - That there are no extra locales that Chromium doesn't want.
    - That no wanted locale is missing.
    - Each item has a 'path' attribute.
    - Each such path value ends up with '.xtb'.
'''
  select_file_func = _IsGritInputFile
  check_func = _CheckGrdTranslations
  fix_func = _AddMissingLocalesInGrdTranslations


class _CheckGnAndroidOutputsCommand(_CheckInputFileBaseCommand):
  name = 'check-gn-android-outputs'
  description = 'Check the Android .xml file lists in GN build files.'
  long_description = r'''
Check one or more BUILD.gn file, looking for lists of Android resource .xml
files, and checking that:

  - There are no duplicated output files in the list.
  - Each output file belongs to a wanted Chromium locale.
  - There are no output files for unwanted Chromium locales.
'''
  select_file_func = _IsBuildGnInputFile
  check_func = _CheckGnAndroidOutputs
  fix_func = _AddMissingLocalesInGnAndroidOutputs


class _CheckAllCommand(_CheckInputFileBaseCommand):
  name = 'check-all'
  description = 'Check everything.'
  long_description = 'Equivalent to calling all other check-xxx commands.'
  select_file_func = _IsAllInputFile
  check_func = _CheckAllFiles
  fix_func = _AddMissingLocalesInAllFiles


class _UpdateExpectationsCommand(_Command):
  name = 'update-expectations'
  description = 'Update translation expectations file.'
  long_description = r'''
Update %s files to match the current list of locales supported by Chromium.
This is especially useful to add new locales before updating any GRIT or GN
input file with the --add-locales option.
''' % _EXPECTATIONS_FILENAME

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '--add-locales',
        help='Space-separated list of additional locales to use.')

  def Run(self):
    locales = ChromeLocales()
    add_locales = self.args.add_locales
    if add_locales:
      locales.extend(add_locales.split(' '))

    expectation_paths = [
        'tools/gritsettings/translation_expectations.pyl',
        'clank/tools/translation_expectations.pyl',
    ]
    missing_expectation_files = []
    for path in enumerate(expectation_paths):
      file_path = os.path.join(_TOP_SRC_DIR, path)
      if not os.path.exists(file_path):
        missing_expectation_files.append(file_path)
        continue
      _UpdateLocalesInExpectationFile(file_path, locales)

    if missing_expectation_files:
      sys.stderr.write('WARNING: Missing file(s): %s\n' %
                       (', '.join(missing_expectation_files)))


class _UnitTestsCommand(_Command):
  name = 'unit-tests'
  description = 'Run internal unit-tests for this script'

  def RegisterExtraArgs(self, group):
    group.add_argument(
        '-v', '--verbose', action='count', help='Increase test verbosity.')
    group.add_argument('args', nargs=argparse.REMAINDER)

  def Run(self):
    argv = [_SCRIPT_NAME] + self.args.args
    unittest.main(argv=argv, verbosity=self.args.verbose)


# List of all commands supported by this script.
_COMMANDS = [
    _ListLocalesCommand,
    _CheckGrdAndroidOutputsCommand,
    _CheckGrdTranslationsCommand,
    _CheckGnAndroidOutputsCommand,
    _CheckAllCommand,
    _UpdateExpectationsCommand,
    _UnitTestsCommand,
]


def main(argv):
  parser = argparse.ArgumentParser(
      description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)

  subparsers = parser.add_subparsers()
  commands = [clazz() for clazz in _COMMANDS]
  for command in commands:
    command.RegisterArgs(subparsers)

  if not argv:
    argv = ['--help']

  args = parser.parse_args(argv)
  args.command.ProcessArgs(args)
  args.command.Run()


if __name__ == "__main__":
  main(sys.argv[1:])