summaryrefslogtreecommitdiffstats
path: root/third_party/libwebrtc/tools/grit/grit/node/message.py
blob: 4fa83cf26b6f8a10431cbb14e0cf6b23279d0bba (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

'''Handling of the <message> element.
'''

from __future__ import print_function

import re

import six

from grit.node import base

from grit import clique
from grit import exception
from grit import lazy_re
from grit import tclib
from grit import util


# Matches exactly three dots ending a line or followed by whitespace.
_ELLIPSIS_PATTERN = lazy_re.compile(r'(?<!\.)\.\.\.(?=$|\s)')
_ELLIPSIS_SYMBOL = u'\u2026'  # Ellipsis

# Finds whitespace at the start and end of a string which can be multiline.
_WHITESPACE = lazy_re.compile(r'(?P<start>\s*)(?P<body>.+?)(?P<end>\s*)\Z',
                              re.DOTALL | re.MULTILINE)

# <ph> placeholder elements should contain the special character formatters
# used to format <ph> element content.
# Android format.
_ANDROID_FORMAT = (r'%[1-9]+\$'
                   r'([-#+ 0,(]*)([0-9]+)?(\.[0-9]+)?'
                   r'([bBhHsScCdoxXeEfgGaAtT%n])')
# Chrome l10n format.
_CHROME_FORMAT = r'\$+\d'
# Windows EWT numeric and GRIT %s %d formats.
_OTHER_FORMAT = r'%[0-9sd]'

# Finds formatters that must be in a placeholder (<ph>) element.
_FORMATTERS = lazy_re.compile(
    '(%s)|(%s)|(%s)' % (_ANDROID_FORMAT, _CHROME_FORMAT, _OTHER_FORMAT))
_BAD_PLACEHOLDER_MSG = ('ERROR: Placeholder formatter found outside of <ph> '
                        'tag in message "%s" in %s.')
_INVALID_PH_CHAR_MSG = ('ERROR: Invalid format characters found in message '
                        '"%s" <ph> tag in %s.')

# Finds HTML tag tokens.
_HTMLTOKEN = lazy_re.compile(r'<[/]?[a-z][a-z0-9]*[^>]*>', re.I)

# Finds HTML entities.
_HTMLENTITY = lazy_re.compile(r'&[^\s]*;')


class MessageNode(base.ContentNode):
  '''A <message> element.'''

  # For splitting a list of things that can be separated by commas or
  # whitespace
  _SPLIT_RE = lazy_re.compile(r'\s*,\s*|\s+')

  def __init__(self):
    super(MessageNode, self).__init__()
    # Valid after EndParsing, this is the MessageClique that contains the
    # source message and any translations of it that have been loaded.
    self.clique = None

    # We don't send leading and trailing whitespace into the translation
    # console, but rather tack it onto the source message and any
    # translations when formatting them into RC files or what have you.
    self.ws_at_start = ''  # Any whitespace characters at the start of the text
    self.ws_at_end = ''  # --"-- at the end of the text

    # A list of "shortcut groups" this message is in.  We check to make sure
    # that shortcut keys (e.g. &J) within each shortcut group are unique.
    self.shortcut_groups_ = []

    # Formatter-specific data used to control the output of individual strings.
    # formatter_data is a space separated list of C preprocessor-style
    # definitions. Names without values are given the empty string value.
    # Example: "foo=5 bar baz=100"
    self.formatter_data = {}

    # Whether or not to convert ... -> U+2026 within Translate().
    self._replace_ellipsis = False

  def _IsValidChild(self, child):
    return isinstance(child, (PhNode))

  def _IsValidAttribute(self, name, value):
    if name not in [
        'name', 'offset', 'translateable', 'desc', 'meaning',
        'internal_comment', 'shortcut_groups', 'custom_type', 'validation_expr',
        'use_name_for_id', 'sub_variable', 'formatter_data',
        'is_accessibility_with_no_ui'
    ]:
      return False
    if (name in ('translateable', 'sub_variable') and
        value not in ['true', 'false']):
      return False
    return True

  def SetReplaceEllipsis(self, value):
    r'''Sets whether to replace ... with \u2026.
    '''
    self._replace_ellipsis = value

  def MandatoryAttributes(self):
    return ['name|offset']

  def DefaultAttributes(self):
    return {
        'custom_type': '',
        'desc': '',
        'formatter_data': '',
        'internal_comment': '',
        'is_accessibility_with_no_ui': 'false',
        'meaning': '',
        'shortcut_groups': '',
        'sub_variable': 'false',
        'translateable': 'true',
        'use_name_for_id': 'false',
        'validation_expr': '',
    }

  def HandleAttribute(self, attrib, value):
    base.ContentNode.HandleAttribute(self, attrib, value)
    if attrib != 'formatter_data':
      return

    # Parse value, a space-separated list of defines, into a dict.
    # Example: "foo=5 bar" -> {'foo':'5', 'bar':''}
    for item in value.split():
      name, _, val = item.partition('=')
      self.formatter_data[name] = val

  def GetTextualIds(self):
    '''
    Returns the concatenation of the parent's node first_id and
    this node's offset if it has one, otherwise just call the
    superclass' implementation
    '''
    if 'offset' not in self.attrs:
      return super(MessageNode, self).GetTextualIds()

    # we search for the first grouping node in the parents' list
    # to take care of the case where the first parent is an <if> node
    grouping_parent = self.parent
    import grit.node.empty
    while grouping_parent and not isinstance(grouping_parent,
                                             grit.node.empty.GroupingNode):
      grouping_parent = grouping_parent.parent

    assert 'first_id' in grouping_parent.attrs
    return [grouping_parent.attrs['first_id'] + '_' + self.attrs['offset']]

  def IsTranslateable(self):
    return self.attrs['translateable'] == 'true'

  def EndParsing(self):
    super(MessageNode, self).EndParsing()

    # Make the text (including placeholder references) and list of placeholders,
    # verify placeholder formats, then strip and store leading and trailing
    # whitespace and create the tclib.Message() and a clique to contain it.

    text = ''
    placeholders = []

    for item in self.mixed_content:
      if isinstance(item, six.string_types):
        # Not a <ph> element: fail if any <ph> formatters are detected.
        if _FORMATTERS.search(item):
          print(_BAD_PLACEHOLDER_MSG % (item, self.source))
          raise exception.PlaceholderNotInsidePhNode
        text += item
      else:
        # Extract the <ph> element components.
        presentation = item.attrs['name'].upper()
        text += presentation
        ex = ' '  # <ex> example element cdata if present.
        if len(item.children):
          ex = item.children[0].GetCdata()
        original = item.GetCdata()

        # Sanity check the <ph> element content.
        cdata = original
        # Replace all HTML tag tokens in cdata.
        match = _HTMLTOKEN.search(cdata)
        while match:
          cdata = cdata.replace(match.group(0), '_')
          match = _HTMLTOKEN.search(cdata)
        # Replace all HTML entities in cdata.
        match = _HTMLENTITY.search(cdata)
        while match:
          cdata = cdata.replace(match.group(0), '_')
          match = _HTMLENTITY.search(cdata)
        # Remove first matching formatter from cdata.
        match = _FORMATTERS.search(cdata)
        if match:
          cdata = cdata.replace(match.group(0), '')
        # Fail if <ph> special chars remain in cdata.
        if re.search(r'[%\$]', cdata):
          message_id = self.attrs['name'] + ' ' + original;
          print(_INVALID_PH_CHAR_MSG % (message_id, self.source))
          raise exception.InvalidCharactersInsidePhNode

        # Otherwise, accept this <ph> placeholder.
        placeholders.append(tclib.Placeholder(presentation, original, ex))

    m = _WHITESPACE.match(text)
    if m:
      self.ws_at_start = m.group('start')
      self.ws_at_end = m.group('end')
      text = m.group('body')

    self.shortcut_groups_ = self._SPLIT_RE.split(self.attrs['shortcut_groups'])
    self.shortcut_groups_ = [i for i in self.shortcut_groups_ if i != '']

    description_or_id = self.attrs['desc']
    if description_or_id == '' and 'name' in self.attrs:
      description_or_id = 'ID: %s' % self.attrs['name']

    assigned_id = None
    if self.attrs['use_name_for_id'] == 'true':
      assigned_id = self.attrs['name']
    message = tclib.Message(text=text, placeholders=placeholders,
                            description=description_or_id,
                            meaning=self.attrs['meaning'],
                            assigned_id=assigned_id)
    self.InstallMessage(message)

  def InstallMessage(self, message):
    '''Sets this node's clique from a tclib.Message instance.

    Args:
      message: A tclib.Message.
    '''
    self.clique = self.UberClique().MakeClique(message, self.IsTranslateable())
    for group in self.shortcut_groups_:
      self.clique.AddToShortcutGroup(group)
    if self.attrs['custom_type'] != '':
      self.clique.SetCustomType(util.NewClassInstance(self.attrs['custom_type'],
                                                      clique.CustomType))
    elif self.attrs['validation_expr'] != '':
      self.clique.SetCustomType(
        clique.OneOffCustomType(self.attrs['validation_expr']))

  def SubstituteMessages(self, substituter):
    '''Applies substitution to this message.

    Args:
      substituter: a grit.util.Substituter object.
    '''
    message = substituter.SubstituteMessage(self.clique.GetMessage())
    if message is not self.clique.GetMessage():
      self.InstallMessage(message)

  def GetCliques(self):
    return [self.clique] if self.clique else []

  def Translate(self, lang):
    '''Returns a translated version of this message.
    '''
    assert self.clique
    msg = self.clique.MessageForLanguage(lang,
                                         self.PseudoIsAllowed(),
                                         self.ShouldFallbackToEnglish()
                                         ).GetRealContent()
    if self._replace_ellipsis:
      msg = _ELLIPSIS_PATTERN.sub(_ELLIPSIS_SYMBOL, msg)
    # Always remove all byte order marks (\uFEFF) https://crbug.com/1033305
    msg = msg.replace(u'\uFEFF','')
    return msg.replace('[GRITLANGCODE]', lang)

  def NameOrOffset(self):
    key = 'name' if 'name' in self.attrs else 'offset'
    return self.attrs[key]

  def ExpandVariables(self):
    '''We always expand variables on Messages.'''
    return True

  def GetDataPackValue(self, lang, encoding):
    '''Returns a str represenation for a data_pack entry.'''
    message = self.ws_at_start + self.Translate(lang) + self.ws_at_end
    return util.Encode(message, encoding)

  def IsResourceMapSource(self):
    return True

  @staticmethod
  def Construct(parent, message, name, desc='', meaning='', translateable=True):
    '''Constructs a new message node that is a child of 'parent', with the
    name, desc, meaning and translateable attributes set using the same-named
    parameters and the text of the message and any placeholders taken from
    'message', which must be a tclib.Message() object.'''
    # Convert type to appropriate string
    translateable = 'true' if translateable else 'false'

    node = MessageNode()
    node.StartParsing('message', parent)
    node.HandleAttribute('name', name)
    node.HandleAttribute('desc', desc)
    node.HandleAttribute('meaning', meaning)
    node.HandleAttribute('translateable', translateable)

    items = message.GetContent()
    for ix, item in enumerate(items):
      if isinstance(item, six.string_types):
        # Ensure whitespace at front and back of message is correctly handled.
        if ix == 0:
          item = "'''" + item
        if ix == len(items) - 1:
          item = item + "'''"

        node.AppendContent(item)
      else:
        phnode = PhNode()
        phnode.StartParsing('ph', node)
        phnode.HandleAttribute('name', item.GetPresentation())
        phnode.AppendContent(item.GetOriginal())

        if len(item.GetExample()) and item.GetExample() != ' ':
          exnode = ExNode()
          exnode.StartParsing('ex', phnode)
          exnode.AppendContent(item.GetExample())
          exnode.EndParsing()
          phnode.AddChild(exnode)

        phnode.EndParsing()
        node.AddChild(phnode)

    node.EndParsing()
    return node


class PhNode(base.ContentNode):
  '''A <ph> element.'''

  def _IsValidChild(self, child):
    return isinstance(child, ExNode)

  def MandatoryAttributes(self):
    return ['name']

  def EndParsing(self):
    super(PhNode, self).EndParsing()
    # We only allow a single example for each placeholder
    if len(self.children) > 1:
      raise exception.TooManyExamples()

  def GetTextualIds(self):
    # The 'name' attribute is not an ID.
    return []


class ExNode(base.ContentNode):
  '''An <ex> element.'''
  pass