summaryrefslogtreecommitdiffstats
path: root/third_party/python/compare_locales
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/python/compare_locales
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/python/compare_locales')
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md373
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA84
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD45
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL6
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt3
-rw-r--r--third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt1
-rw-r--r--third_party/python/compare_locales/compare_locales/__init__.py1
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/__init__.py27
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/android.py256
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/base.py122
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/dtd.py238
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/fluent.py351
-rw-r--r--third_party/python/compare_locales/compare_locales/checks/properties.py162
-rw-r--r--third_party/python/compare_locales/compare_locales/commands.py203
-rw-r--r--third_party/python/compare_locales/compare_locales/compare/__init__.py89
-rw-r--r--third_party/python/compare_locales/compare_locales/compare/content.py304
-rw-r--r--third_party/python/compare_locales/compare_locales/compare/observer.py215
-rw-r--r--third_party/python/compare_locales/compare_locales/compare/utils.py133
-rw-r--r--third_party/python/compare_locales/compare_locales/integration_tests/__init__.py5
-rw-r--r--third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py51
-rw-r--r--third_party/python/compare_locales/compare_locales/keyedtuple.py55
-rw-r--r--third_party/python/compare_locales/compare_locales/lint/__init__.py0
-rw-r--r--third_party/python/compare_locales/compare_locales/lint/cli.py93
-rw-r--r--third_party/python/compare_locales/compare_locales/lint/linter.py121
-rw-r--r--third_party/python/compare_locales/compare_locales/lint/util.py38
-rw-r--r--third_party/python/compare_locales/compare_locales/merge.py143
-rw-r--r--third_party/python/compare_locales/compare_locales/mozpath.py154
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/__init__.py81
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/android.py303
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/base.py443
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/defines.py104
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/dtd.py115
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/fluent.py218
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/ini.py56
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/po.py125
-rw-r--r--third_party/python/compare_locales/compare_locales/parser/properties.py113
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/__init__.py53
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/configparser.py138
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/files.py224
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/ini.py224
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/matcher.py470
-rw-r--r--third_party/python/compare_locales/compare_locales/paths/project.py260
-rw-r--r--third_party/python/compare_locales/compare_locales/plurals.py221
-rw-r--r--third_party/python/compare_locales/compare_locales/serializer.py137
-rw-r--r--third_party/python/compare_locales/compare_locales/util.py11
45 files changed, 6569 insertions, 0 deletions
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md
new file mode 100644
index 0000000000..a612ad9813
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md
@@ -0,0 +1,373 @@
+Mozilla Public License Version 2.0
+==================================
+
+1. Definitions
+--------------
+
+1.1. "Contributor"
+ means each individual or legal entity that creates, contributes to
+ the creation of, or owns Covered Software.
+
+1.2. "Contributor Version"
+ means the combination of the Contributions of others (if any) used
+ by a Contributor and that particular Contributor's Contribution.
+
+1.3. "Contribution"
+ means Covered Software of a particular Contributor.
+
+1.4. "Covered Software"
+ means Source Code Form to which the initial Contributor has attached
+ the notice in Exhibit A, the Executable Form of such Source Code
+ Form, and Modifications of such Source Code Form, in each case
+ including portions thereof.
+
+1.5. "Incompatible With Secondary Licenses"
+ means
+
+ (a) that the initial Contributor has attached the notice described
+ in Exhibit B to the Covered Software; or
+
+ (b) that the Covered Software was made available under the terms of
+ version 1.1 or earlier of the License, but not also under the
+ terms of a Secondary License.
+
+1.6. "Executable Form"
+ means any form of the work other than Source Code Form.
+
+1.7. "Larger Work"
+ means a work that combines Covered Software with other material, in
+ a separate file or files, that is not Covered Software.
+
+1.8. "License"
+ means this document.
+
+1.9. "Licensable"
+ means having the right to grant, to the maximum extent possible,
+ whether at the time of the initial grant or subsequently, any and
+ all of the rights conveyed by this License.
+
+1.10. "Modifications"
+ means any of the following:
+
+ (a) any file in Source Code Form that results from an addition to,
+ deletion from, or modification of the contents of Covered
+ Software; or
+
+ (b) any new file in Source Code Form that contains any Covered
+ Software.
+
+1.11. "Patent Claims" of a Contributor
+ means any patent claim(s), including without limitation, method,
+ process, and apparatus claims, in any patent Licensable by such
+ Contributor that would be infringed, but for the grant of the
+ License, by the making, using, selling, offering for sale, having
+ made, import, or transfer of either its Contributions or its
+ Contributor Version.
+
+1.12. "Secondary License"
+ means either the GNU General Public License, Version 2.0, the GNU
+ Lesser General Public License, Version 2.1, the GNU Affero General
+ Public License, Version 3.0, or any later versions of those
+ licenses.
+
+1.13. "Source Code Form"
+ means the form of the work preferred for making modifications.
+
+1.14. "You" (or "Your")
+ means an individual or a legal entity exercising rights under this
+ License. For legal entities, "You" includes any entity that
+ controls, is controlled by, or is under common control with You. For
+ purposes of this definition, "control" means (a) the power, direct
+ or indirect, to cause the direction or management of such entity,
+ whether by contract or otherwise, or (b) ownership of more than
+ fifty percent (50%) of the outstanding shares or beneficial
+ ownership of such entity.
+
+2. License Grants and Conditions
+--------------------------------
+
+2.1. Grants
+
+Each Contributor hereby grants You a world-wide, royalty-free,
+non-exclusive license:
+
+(a) under intellectual property rights (other than patent or trademark)
+ Licensable by such Contributor to use, reproduce, make available,
+ modify, display, perform, distribute, and otherwise exploit its
+ Contributions, either on an unmodified basis, with Modifications, or
+ as part of a Larger Work; and
+
+(b) under Patent Claims of such Contributor to make, use, sell, offer
+ for sale, have made, import, and otherwise transfer either its
+ Contributions or its Contributor Version.
+
+2.2. Effective Date
+
+The licenses granted in Section 2.1 with respect to any Contribution
+become effective for each Contribution on the date the Contributor first
+distributes such Contribution.
+
+2.3. Limitations on Grant Scope
+
+The licenses granted in this Section 2 are the only rights granted under
+this License. No additional rights or licenses will be implied from the
+distribution or licensing of Covered Software under this License.
+Notwithstanding Section 2.1(b) above, no patent license is granted by a
+Contributor:
+
+(a) for any code that a Contributor has removed from Covered Software;
+ or
+
+(b) for infringements caused by: (i) Your and any other third party's
+ modifications of Covered Software, or (ii) the combination of its
+ Contributions with other software (except as part of its Contributor
+ Version); or
+
+(c) under Patent Claims infringed by Covered Software in the absence of
+ its Contributions.
+
+This License does not grant any rights in the trademarks, service marks,
+or logos of any Contributor (except as may be necessary to comply with
+the notice requirements in Section 3.4).
+
+2.4. Subsequent Licenses
+
+No Contributor makes additional grants as a result of Your choice to
+distribute the Covered Software under a subsequent version of this
+License (see Section 10.2) or under the terms of a Secondary License (if
+permitted under the terms of Section 3.3).
+
+2.5. Representation
+
+Each Contributor represents that the Contributor believes its
+Contributions are its original creation(s) or it has sufficient rights
+to grant the rights to its Contributions conveyed by this License.
+
+2.6. Fair Use
+
+This License is not intended to limit any rights You have under
+applicable copyright doctrines of fair use, fair dealing, or other
+equivalents.
+
+2.7. Conditions
+
+Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
+in Section 2.1.
+
+3. Responsibilities
+-------------------
+
+3.1. Distribution of Source Form
+
+All distribution of Covered Software in Source Code Form, including any
+Modifications that You create or to which You contribute, must be under
+the terms of this License. You must inform recipients that the Source
+Code Form of the Covered Software is governed by the terms of this
+License, and how they can obtain a copy of this License. You may not
+attempt to alter or restrict the recipients' rights in the Source Code
+Form.
+
+3.2. Distribution of Executable Form
+
+If You distribute Covered Software in Executable Form then:
+
+(a) such Covered Software must also be made available in Source Code
+ Form, as described in Section 3.1, and You must inform recipients of
+ the Executable Form how they can obtain a copy of such Source Code
+ Form by reasonable means in a timely manner, at a charge no more
+ than the cost of distribution to the recipient; and
+
+(b) You may distribute such Executable Form under the terms of this
+ License, or sublicense it under different terms, provided that the
+ license for the Executable Form does not attempt to limit or alter
+ the recipients' rights in the Source Code Form under this License.
+
+3.3. Distribution of a Larger Work
+
+You may create and distribute a Larger Work under terms of Your choice,
+provided that You also comply with the requirements of this License for
+the Covered Software. If the Larger Work is a combination of Covered
+Software with a work governed by one or more Secondary Licenses, and the
+Covered Software is not Incompatible With Secondary Licenses, this
+License permits You to additionally distribute such Covered Software
+under the terms of such Secondary License(s), so that the recipient of
+the Larger Work may, at their option, further distribute the Covered
+Software under the terms of either this License or such Secondary
+License(s).
+
+3.4. Notices
+
+You may not remove or alter the substance of any license notices
+(including copyright notices, patent notices, disclaimers of warranty,
+or limitations of liability) contained within the Source Code Form of
+the Covered Software, except that You may alter any license notices to
+the extent required to remedy known factual inaccuracies.
+
+3.5. Application of Additional Terms
+
+You may choose to offer, and to charge a fee for, warranty, support,
+indemnity or liability obligations to one or more recipients of Covered
+Software. However, You may do so only on Your own behalf, and not on
+behalf of any Contributor. You must make it absolutely clear that any
+such warranty, support, indemnity, or liability obligation is offered by
+You alone, and You hereby agree to indemnify every Contributor for any
+liability incurred by such Contributor as a result of warranty, support,
+indemnity or liability terms You offer. You may include additional
+disclaimers of warranty and limitations of liability specific to any
+jurisdiction.
+
+4. Inability to Comply Due to Statute or Regulation
+---------------------------------------------------
+
+If it is impossible for You to comply with any of the terms of this
+License with respect to some or all of the Covered Software due to
+statute, judicial order, or regulation then You must: (a) comply with
+the terms of this License to the maximum extent possible; and (b)
+describe the limitations and the code they affect. Such description must
+be placed in a text file included with all distributions of the Covered
+Software under this License. Except to the extent prohibited by statute
+or regulation, such description must be sufficiently detailed for a
+recipient of ordinary skill to be able to understand it.
+
+5. Termination
+--------------
+
+5.1. The rights granted under this License will terminate automatically
+if You fail to comply with any of its terms. However, if You become
+compliant, then the rights granted under this License from a particular
+Contributor are reinstated (a) provisionally, unless and until such
+Contributor explicitly and finally terminates Your grants, and (b) on an
+ongoing basis, if such Contributor fails to notify You of the
+non-compliance by some reasonable means prior to 60 days after You have
+come back into compliance. Moreover, Your grants from a particular
+Contributor are reinstated on an ongoing basis if such Contributor
+notifies You of the non-compliance by some reasonable means, this is the
+first time You have received notice of non-compliance with this License
+from such Contributor, and You become compliant prior to 30 days after
+Your receipt of the notice.
+
+5.2. If You initiate litigation against any entity by asserting a patent
+infringement claim (excluding declaratory judgment actions,
+counter-claims, and cross-claims) alleging that a Contributor Version
+directly or indirectly infringes any patent, then the rights granted to
+You by any and all Contributors for the Covered Software under Section
+2.1 of this License shall terminate.
+
+5.3. In the event of termination under Sections 5.1 or 5.2 above, all
+end user license agreements (excluding distributors and resellers) which
+have been validly granted by You or Your distributors under this License
+prior to termination shall survive termination.
+
+************************************************************************
+* *
+* 6. Disclaimer of Warranty *
+* ------------------------- *
+* *
+* Covered Software is provided under this License on an "as is" *
+* basis, without warranty of any kind, either expressed, implied, or *
+* statutory, including, without limitation, warranties that the *
+* Covered Software is free of defects, merchantable, fit for a *
+* particular purpose or non-infringing. The entire risk as to the *
+* quality and performance of the Covered Software is with You. *
+* Should any Covered Software prove defective in any respect, You *
+* (not any Contributor) assume the cost of any necessary servicing, *
+* repair, or correction. This disclaimer of warranty constitutes an *
+* essential part of this License. No use of any Covered Software is *
+* authorized under this License except under this disclaimer. *
+* *
+************************************************************************
+
+************************************************************************
+* *
+* 7. Limitation of Liability *
+* -------------------------- *
+* *
+* Under no circumstances and under no legal theory, whether tort *
+* (including negligence), contract, or otherwise, shall any *
+* Contributor, or anyone who distributes Covered Software as *
+* permitted above, be liable to You for any direct, indirect, *
+* special, incidental, or consequential damages of any character *
+* including, without limitation, damages for lost profits, loss of *
+* goodwill, work stoppage, computer failure or malfunction, or any *
+* and all other commercial damages or losses, even if such party *
+* shall have been informed of the possibility of such damages. This *
+* limitation of liability shall not apply to liability for death or *
+* personal injury resulting from such party's negligence to the *
+* extent applicable law prohibits such limitation. Some *
+* jurisdictions do not allow the exclusion or limitation of *
+* incidental or consequential damages, so this exclusion and *
+* limitation may not apply to You. *
+* *
+************************************************************************
+
+8. Litigation
+-------------
+
+Any litigation relating to this License may be brought only in the
+courts of a jurisdiction where the defendant maintains its principal
+place of business and such litigation shall be governed by laws of that
+jurisdiction, without reference to its conflict-of-law provisions.
+Nothing in this Section shall prevent a party's ability to bring
+cross-claims or counter-claims.
+
+9. Miscellaneous
+----------------
+
+This License represents the complete agreement concerning the subject
+matter hereof. If any provision of this License is held to be
+unenforceable, such provision shall be reformed only to the extent
+necessary to make it enforceable. Any law or regulation which provides
+that the language of a contract shall be construed against the drafter
+shall not be used to construe this License against a Contributor.
+
+10. Versions of the License
+---------------------------
+
+10.1. New Versions
+
+Mozilla Foundation is the license steward. Except as provided in Section
+10.3, no one other than the license steward has the right to modify or
+publish new versions of this License. Each version will be given a
+distinguishing version number.
+
+10.2. Effect of New Versions
+
+You may distribute the Covered Software under the terms of the version
+of the License under which You originally received the Covered Software,
+or under the terms of any subsequent version published by the license
+steward.
+
+10.3. Modified Versions
+
+If you create software not governed by this License, and you want to
+create a new license for such software, you may create and use a
+modified version of this License if you rename the license and remove
+any references to the name of the license steward (except to note that
+such modified license differs from this License).
+
+10.4. Distributing Source Code Form that is Incompatible With Secondary
+Licenses
+
+If You choose to distribute Source Code Form that is Incompatible With
+Secondary Licenses under the terms of this version of the License, the
+notice described in Exhibit B of this License must be attached.
+
+Exhibit A - Source Code Form License Notice
+-------------------------------------------
+
+ This Source Code Form is subject to the terms of the Mozilla Public
+ License, v. 2.0. If a copy of the MPL was not distributed with this
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+If it is not possible or desirable to put the notice in a particular
+file, then You may include the notice in a location (such as a LICENSE
+file in a relevant directory) where a recipient would be likely to look
+for such a notice.
+
+You may add additional accurate notices of copyright ownership.
+
+Exhibit B - "Incompatible With Secondary Licenses" Notice
+---------------------------------------------------------
+
+ This Source Code Form is "Incompatible With Secondary Licenses", as
+ defined by the Mozilla Public License, v. 2.0.
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA
new file mode 100644
index 0000000000..65ff8760bf
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA
@@ -0,0 +1,84 @@
+Metadata-Version: 2.1
+Name: compare-locales
+Version: 9.0.1
+Summary: Lint Mozilla localizations
+Home-page: https://github.com/mozilla/compare-locales
+Author: Axel Hecht
+Author-email: axel@mozilla.com
+License: MPL 2.0
+Platform: any
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Classifier: Topic :: Software Development :: Localization
+Classifier: Topic :: Software Development :: Testing
+Requires-Python: >=3.7, <4
+Description-Content-Type: text/markdown
+License-File: LICENSE.md
+Requires-Dist: fluent.syntax (<0.20,>=0.18.0)
+Requires-Dist: six
+Requires-Dist: toml
+
+![Build tests](https://github.com/mozilla/compare-locales/workflows/test/badge.svg)
+# compare-locales
+Lint Mozilla localizations
+
+Finds
+* missing strings
+* obsolete strings
+* errors on runtime errors without false positives
+* warns on possible runtime errors
+
+It also includes `l10n-merge` functionality, which pads localizations with
+missing English strings, and replaces entities with errors with English.
+
+If you want to check your original code for errors like duplicated messages,
+use `moz-l10n-lint`, which is also part of this package. You can also use
+this to check for conflicts between your strings and those already exposed
+to l10n.
+
+# Configuration
+
+You configure `compare-locales` (and `moz-l10n-lint`) through a
+[project configuration](https://moz-l10n-config.readthedocs.io/en/latest/fileformat.html)
+file, `l10n.toml`.
+
+# Examples
+
+To check all locales in a project use
+
+```bash
+compare-locales l10n.toml .
+```
+
+To check Firefox against a local check-out of l10n-central, use
+
+```bash
+compare-locales browser/locales/l10n.toml ../l10n-central
+```
+
+If you just want to check particular locales, specify them as additional
+commandline parameters.
+
+To lint your local work, use
+
+```bash
+moz-l10n-lint l10n.toml
+```
+
+To check for conflicts against already existing strings:
+
+```bash
+moz-l10n-lint --reference-project ../android-l10n/mozilla-mobile/fenix l10n.toml
+moz-l10n-lint --l10n-reference ../gecko-strings browser/locales/l10n.toml
+```
+
+to check for a monolithic project like Fenix or a gecko project like Firefox,
+resp.
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD
new file mode 100644
index 0000000000..1d81d9fca6
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD
@@ -0,0 +1,45 @@
+compare_locales/__init__.py,sha256=Lbi3Zk69IMtSQjV8b_gDCN24gZf_Vjd35WfEDZu9VNI,18
+compare_locales/commands.py,sha256=cAy0ZseVq2oAkXQyacn671PqfNx_zSraPgeSjAV7pWs,8428
+compare_locales/keyedtuple.py,sha256=WVOkwqS2y3-gH1GwU8oPhay5OeN1YsXTEPb1WacqiS4,1507
+compare_locales/merge.py,sha256=Cuaw783A0YaEpK_cV19iFNayg28l3VwsHLOvUX06y2w,4657
+compare_locales/mozpath.py,sha256=ZzBm7Y9LgO161UgqzHgniyIIXwAlTUDbF1Q2O9FxHL4,4232
+compare_locales/plurals.py,sha256=s5M29AZElgB4z9L24xtc3_W7lUK6UZr_j1APv89fx28,4015
+compare_locales/serializer.py,sha256=uJR-fL2h_X1j0lNnv3PwJ4RRV_x-5kc66KDJg863JvU,4408
+compare_locales/util.py,sha256=ttl1tcGveJpYqoHKVlIplhb0wSjAjAaTRQT0z6xoYrQ,439
+compare_locales/checks/__init__.py,sha256=7S1or4MzMxMA_MtRu-CB5eFyPDPnv1Zq6GGCToaztwo,969
+compare_locales/checks/android.py,sha256=L0z-DJatylz7NeQnAq0sA_fXHTXj0dfZ-nNS1DJPa-8,8318
+compare_locales/checks/base.py,sha256=ld5YSptqIU8xWWs9KKY-u9XP7oN8NrmvzqN605dwRPE,4165
+compare_locales/checks/dtd.py,sha256=OHG99oQI-tT9ZkSPCJR_T9idSSycI6mFSPrb6OJmdHw,9961
+compare_locales/checks/fluent.py,sha256=QP_709JGmEaqruYCyc17WuBcbet6MCa2jexuRHJaMQk,13019
+compare_locales/checks/properties.py,sha256=gtd-5fLWDdowN_KYgQ3dZLsElQHQ6NVvp4jx57GRPjA,6558
+compare_locales/compare/__init__.py,sha256=VMGx8O_MavjZGrcn_6DSfT-J75_ry8m2GxLgUcoUQjM,3293
+compare_locales/compare/content.py,sha256=qCOLcFCoWqktVS-FbsNeI0w1JPhi3t3gqz26Or592D8,10990
+compare_locales/compare/observer.py,sha256=RopVbCeq8nWilR7kfrAfBNfDkF2wHUv98Y8ki49TKMM,7357
+compare_locales/compare/utils.py,sha256=crRWvQYRoKwQbpu1z1IuLjWqOq-PMx23EHNIIAH3eDU,4197
+compare_locales/integration_tests/__init__.py,sha256=eOFgaCLveRf8s90SCQUeZRRxG5LAXwUSxQHxi4H4hvc,154
+compare_locales/integration_tests/test_plurals.py,sha256=Hs4pkXf-DJL7yxnsXo1lbz_1gBpL-1DKaeYy1cS4UY8,1643
+compare_locales/lint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+compare_locales/lint/cli.py,sha256=dVf9TV5QgDy_5W1jpTIKzhZyvmRDZIZg1mZPBl9RbLE,2965
+compare_locales/lint/linter.py,sha256=cyS6SivquOgXUpQbjpFHs7GgdJbYgsW-5jT0F3RDGyQ,4211
+compare_locales/lint/util.py,sha256=hgHkSvNqWqEiFN38daujWXBUmlQAdy-XBRVGVY9RBfY,1290
+compare_locales/parser/__init__.py,sha256=BVL7HrZOmRo0tGDoROn1f2Ka93314LhrTGPU4Cx0pVU,2041
+compare_locales/parser/android.py,sha256=SvTeAInvGBlal8Ahpv9uA8SaHIZ1LOS0s9Kb-36DJQk,9212
+compare_locales/parser/base.py,sha256=1cDXMnkzM7Qt1KbwGlgKuNm17hPsoWgpdpJDC_9Icqg,12923
+compare_locales/parser/defines.py,sha256=LFseFNLFGb5bHNEmcYqeBymy7VzKIm7OPc6vSoQ298w,3549
+compare_locales/parser/dtd.py,sha256=Dmb8Rk-ptooLbHE9Le9lUUvdtWWFUtSBTlS8w2uWH94,4325
+compare_locales/parser/fluent.py,sha256=GHFCKuqaozGoN5C1c0PGBDhtQ994Swutw_aHXtu0WoM,7035
+compare_locales/parser/ini.py,sha256=I-t-hmGq6VH-sinAxjnIUwtPM2EE_AfMXlJ9G9hKnAs,1545
+compare_locales/parser/po.py,sha256=d9SYQ3WBTICGO_yFvz5SIHjM8mld7oYd-ZupXRN-qZ4,3220
+compare_locales/parser/properties.py,sha256=rnmomMr1-EDvjyC3R1lGl-nYkIZA1B9E2C-U-N_7YXY,3716
+compare_locales/paths/__init__.py,sha256=pQZ4FlsedUtR8dA-uqTqhiNC3rQvPZNzEoTRdJLbyts,1419
+compare_locales/paths/configparser.py,sha256=xIWYDgasIt_qXIcHvH6DMLtXiiF5zbu3Zi8bbrnArtY,4377
+compare_locales/paths/files.py,sha256=2uEhVEjpkGZBJNiF2jwiN5oyxhNouLCI7Hivw4SgkRE,9165
+compare_locales/paths/ini.py,sha256=5IPcgacKYCxKx3dEiNpi8MztYWWFQT6ATOgtpFaT54I,8411
+compare_locales/paths/matcher.py,sha256=4k0UZr1PvFAb29R_nATR5qdWP4ThJGy36yMf6Ipie58,15099
+compare_locales/paths/project.py,sha256=Tl6CfikkOKDi0E3BcxWS4Q3PSU-rjFKVdeNcENwQVN4,8784
+compare_locales-9.0.1.dist-info/LICENSE.md,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
+compare_locales-9.0.1.dist-info/METADATA,sha256=j59rhNO4K7-WQKT_uxjCMBXlBcCyYuu3trfMS5Sskuw,2595
+compare_locales-9.0.1.dist-info/WHEEL,sha256=a-zpFRIJzOq5QfuhBzbhiA1eHTzNCJn8OdRvhdNX0Rk,110
+compare_locales-9.0.1.dist-info/entry_points.txt,sha256=EYuE78Z7UKpwisLmRuYHZdosK06cETbXNN4BZICR6xM,127
+compare_locales-9.0.1.dist-info/top_level.txt,sha256=eSEPLAFZcEPFC1j0N9GtVpMaKCFKw67ehDx9CMcoel0,16
+compare_locales-9.0.1.dist-info/RECORD,,
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL
new file mode 100644
index 0000000000..f771c29b87
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.40.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any
+
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt
new file mode 100644
index 0000000000..03d6f06f40
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt
@@ -0,0 +1,3 @@
+[console_scripts]
+compare-locales = compare_locales.commands:CompareLocales.call
+moz-l10n-lint = compare_locales.lint.cli:main
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt
new file mode 100644
index 0000000000..d9c74fc101
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt
@@ -0,0 +1 @@
+compare_locales
diff --git a/third_party/python/compare_locales/compare_locales/__init__.py b/third_party/python/compare_locales/compare_locales/__init__.py
new file mode 100644
index 0000000000..23b7f329ba
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/__init__.py
@@ -0,0 +1 @@
+version = "9.0.1"
diff --git a/third_party/python/compare_locales/compare_locales/checks/__init__.py b/third_party/python/compare_locales/compare_locales/checks/__init__.py
new file mode 100644
index 0000000000..c15ede03f9
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/__init__.py
@@ -0,0 +1,27 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from .base import Checker, EntityPos
+from .android import AndroidChecker
+from .dtd import DTDChecker
+from .fluent import FluentChecker
+from .properties import PropertiesChecker
+
+
+__all__ = [
+ 'Checker', 'EntityPos',
+ 'AndroidChecker', 'DTDChecker', 'FluentChecker', 'PropertiesChecker',
+]
+
+
+def getChecker(file, extra_tests=None):
+ if PropertiesChecker.use(file):
+ return PropertiesChecker(extra_tests, locale=file.locale)
+ if DTDChecker.use(file):
+ return DTDChecker(extra_tests, locale=file.locale)
+ if FluentChecker.use(file):
+ return FluentChecker(extra_tests, locale=file.locale)
+ if AndroidChecker.use(file):
+ return AndroidChecker(extra_tests, locale=file.locale)
+ return Checker(extra_tests, locale=file.locale)
diff --git a/third_party/python/compare_locales/compare_locales/checks/android.py b/third_party/python/compare_locales/compare_locales/checks/android.py
new file mode 100644
index 0000000000..d5a1f2f25f
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/android.py
@@ -0,0 +1,256 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+from xml.dom import minidom
+
+from .base import Checker
+from ..parser.android import textContent
+
+
+class AndroidChecker(Checker):
+ pattern = re.compile('(.*)?strings.*\\.xml$')
+
+ def check(self, refEnt, l10nEnt):
+ '''Given the reference and localized Entities, performs checks.
+
+ This is a generator yielding tuples of
+ - "warning" or "error", depending on what should be reported,
+ - tuple of line, column info for the error within the string
+ - description string to be shown in the report
+ '''
+ yield from super().check(refEnt, l10nEnt)
+ refNode = refEnt.node
+ l10nNode = l10nEnt.node
+ # Apples and oranges, error out.
+ if refNode.nodeName != l10nNode.nodeName:
+ yield ("error", 0, "Incompatible resource types", "android")
+ return
+ # Once we start parsing more resource types, make sure to add checks
+ # for them.
+ if refNode.nodeName != "string":
+ yield ("warning", 0, "Unsupported resource type", "android")
+ return
+ yield from self.check_string([refNode], l10nEnt)
+
+ def check_string(self, refs, l10nEnt):
+ '''Check a single string literal against a list of references.
+
+ There should be multiple nodes given for <plurals> or <string-array>.
+ '''
+ l10n = l10nEnt.node
+ if self.not_translatable(l10n, *refs):
+ yield (
+ "error",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ return
+ if self.no_at_string(l10n):
+ yield (
+ "error",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ return
+ if self.no_at_string(*refs):
+ yield (
+ "warning",
+ 0,
+ "strings must be translatable",
+ "android"
+ )
+ if self.non_simple_data(l10n):
+ yield (
+ "error",
+ 0,
+ "Only plain text allowed, "
+ "or one CDATA surrounded by whitespace",
+ "android"
+ )
+ return
+ yield from check_apostrophes(l10nEnt.val)
+
+ params, errors = get_params(refs)
+ for error, pos in errors:
+ yield (
+ "warning",
+ pos,
+ error,
+ "android"
+ )
+ if params:
+ yield from check_params(params, l10nEnt.val)
+
+ def not_translatable(self, *nodes):
+ return any(
+ node.hasAttribute("translatable")
+ and node.getAttribute("translatable") == "false"
+ for node in nodes
+ )
+
+ def no_at_string(self, *ref_nodes):
+ '''Android allows to reference other strings by using
+ @string/identifier
+ instead of the actual value. Those references don't belong into
+ a localizable file, warn on that.
+ '''
+ return any(
+ textContent(node).startswith('@string/')
+ for node in ref_nodes
+ )
+
+ def non_simple_data(self, node):
+ '''Only allow single text nodes, or, a single CDATA node
+ surrounded by whitespace.
+ '''
+ cdata = [
+ child
+ for child in node.childNodes
+ if child.nodeType == minidom.Node.CDATA_SECTION_NODE
+ ]
+ if len(cdata) == 0:
+ if node.childNodes.length == 0:
+ # empty translation is OK
+ return False
+ if node.childNodes.length != 1:
+ return True
+ return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE
+ if len(cdata) > 1:
+ return True
+ for child in node.childNodes:
+ if child == cdata[0]:
+ continue
+ if child.nodeType != minidom.Node.TEXT_NODE:
+ return True
+ if child.data.strip() != "":
+ return True
+ return False
+
+
+silencer = re.compile(r'\\.|""')
+
+
+def check_apostrophes(string):
+ '''Check Android logic for quotes and apostrophes.
+
+ If you have an apostrophe (') in your string, you must either escape it
+ with a backslash (\') or enclose the string in double-quotes (").
+
+ Unescaped quotes are not visually shown on Android, but they're
+ also harmless, so we're not checking for quotes. We might do once we're
+ better at checking for inline XML, which is full of quotes.
+ Pairing quotes as in '""' is bad, though, so report errors for that.
+ Mostly, because it's hard to tell if a string is consider quoted or not
+ by Android in the end.
+
+ https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes
+ '''
+ for m in re.finditer('""', string):
+ yield (
+ "error",
+ m.start(),
+ "Double straight quotes not allowed",
+ "android"
+ )
+ string = silencer.sub(" ", string)
+
+ is_quoted = string.startswith('"') and string.endswith('"')
+ if not is_quoted:
+ # apostrophes need to be escaped
+ for m in re.finditer("'", string):
+ yield (
+ "error",
+ m.start(),
+ "Apostrophe must be escaped",
+ "android"
+ )
+
+
+def get_params(refs):
+ '''Get printf parameters and internal errors.
+
+ Returns a sparse map of positions to formatter, and a list
+ of errors. Errors covered so far are mismatching formatters.
+ '''
+ params = {}
+ errors = []
+ next_implicit = 1
+ for ref in refs:
+ if isinstance(ref, minidom.Node):
+ ref = textContent(ref)
+ for m in re.finditer(r'%(?P<order>[1-9]\$)?(?P<format>[sSd])', ref):
+ order = m.group('order')
+ if order:
+ order = int(order[0])
+ else:
+ order = next_implicit
+ next_implicit += 1
+ fmt = m.group('format')
+ if order not in params:
+ params[order] = fmt
+ else:
+ # check for consistency errors
+ if params[order] == fmt:
+ continue
+ msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}"
+ errors.append((
+ msg.format(order=order, f1=fmt, f2=params[order]),
+ m.start()
+ ))
+ return params, errors
+
+
+def check_params(params, string):
+ '''Compare the printf parameters in the given string to the reference
+ parameters.
+
+ Also yields errors that are internal to the parameters inside string,
+ as found by `get_params`.
+ '''
+ lparams, errors = get_params([string])
+ for error, pos in errors:
+ yield (
+ "error",
+ pos,
+ error,
+ "android"
+ )
+ # Compare reference for each localized parameter.
+ # If there's no reference found, error, as an out-of-bounds
+ # parameter crashes.
+ # This assumes that all parameters are actually used in the reference,
+ # which should be OK.
+ # If there's a mismatch in the formatter, error.
+ for order in sorted(lparams):
+ if order not in params:
+ yield (
+ "error",
+ 0,
+ "Formatter %{}${} not found in reference".format(
+ order, lparams[order]
+ ),
+ "android"
+ )
+ elif params[order] != lparams[order]:
+ yield (
+ "error",
+ 0,
+ "Mismatching formatter",
+ "android"
+ )
+ # All parameters used in the reference are expected to be included.
+ # Warn if this isn't the case.
+ for order in params:
+ if order not in sorted(lparams):
+ yield (
+ "warning",
+ 0,
+ "Formatter %{}${} not found in translation".format(
+ order, params[order]
+ ),
+ "android",
+ )
diff --git a/third_party/python/compare_locales/compare_locales/checks/base.py b/third_party/python/compare_locales/compare_locales/checks/base.py
new file mode 100644
index 0000000000..95f4bc7b59
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/base.py
@@ -0,0 +1,122 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+
+class EntityPos(int):
+ pass
+
+
+mochibake = re.compile('\ufffd')
+
+
+class Checker:
+ '''Abstract class to implement checks per file type.
+ '''
+ pattern = None
+ # if a check uses all reference entities, set this to True
+ needs_reference = False
+
+ @classmethod
+ def use(cls, file):
+ return cls.pattern.match(file.file)
+
+ def __init__(self, extra_tests, locale=None):
+ self.extra_tests = extra_tests
+ self.locale = locale
+ self.reference = None
+
+ def check(self, refEnt, l10nEnt):
+ '''Given the reference and localized Entities, performs checks.
+
+ This is a generator yielding tuples of
+ - "warning" or "error", depending on what should be reported,
+ - tuple of line, column info for the error within the string
+ - description string to be shown in the report
+
+ By default, check for possible encoding errors.
+ '''
+ for m in mochibake.finditer(l10nEnt.all):
+ yield (
+ "warning",
+ EntityPos(m.start()),
+ f"\ufffd in: {l10nEnt.key}",
+ "encodings"
+ )
+
+ def set_reference(self, reference):
+ '''Set the reference entities.
+ Only do this if self.needs_reference is True.
+ '''
+ self.reference = reference
+
+
+class CSSCheckMixin:
+ def maybe_style(self, ref_value, l10n_value):
+ ref_map, _ = self.parse_css_spec(ref_value)
+ if not ref_map:
+ return
+ l10n_map, errors = self.parse_css_spec(l10n_value)
+ yield from self.check_style(ref_map, l10n_map, errors)
+
+ def check_style(self, ref_map, l10n_map, errors):
+ if not l10n_map:
+ yield ('error', 0, 'reference is a CSS spec', 'css')
+ return
+ if errors:
+ yield ('error', 0, 'reference is a CSS spec', 'css')
+ return
+ msgs = []
+ for prop, unit in l10n_map.items():
+ if prop not in ref_map:
+ msgs.insert(0, '%s only in l10n' % prop)
+ continue
+ else:
+ ref_unit = ref_map.pop(prop)
+ if unit != ref_unit:
+ msgs.append("units for %s don't match "
+ "(%s != %s)" % (prop, unit, ref_unit))
+ for prop in ref_map.keys():
+ msgs.insert(0, '%s only in reference' % prop)
+ if msgs:
+ yield ('warning', 0, ', '.join(msgs), 'css')
+
+ def parse_css_spec(self, val):
+ if not hasattr(self, '_css_spec'):
+ self._css_spec = re.compile(
+ r'(?:'
+ r'(?P<prop>(?:min\-|max\-)?(?:width|height))'
+ r'[ \t\r\n]*:[ \t\r\n]*'
+ r'(?P<length>[0-9]+|[0-9]*\.[0-9]+)'
+ r'(?P<unit>ch|em|ex|rem|px|cm|mm|in|pc|pt)'
+ r')'
+ r'|\Z'
+ )
+ self._css_sep = re.compile(r'[ \t\r\n]*(?P<semi>;)?[ \t\r\n]*$')
+ refMap = errors = None
+ end = 0
+ for m in self._css_spec.finditer(val):
+ if end == 0 and m.start() == m.end():
+ # no CSS spec found, just immediately end of string
+ return None, None
+ if m.start() > end:
+ split = self._css_sep.match(val, end, m.start())
+ if split is None:
+ errors = errors or []
+ errors.append({
+ 'pos': end,
+ 'code': 'css-bad-content',
+ })
+ elif end > 0 and split.group('semi') is None:
+ errors = errors or []
+ errors.append({
+ 'pos': end,
+ 'code': 'css-missing-semicolon',
+ })
+ if m.group('prop'):
+ refMap = refMap or {}
+ refMap[m.group('prop')] = m.group('unit')
+ end = m.end()
+ return refMap, errors
diff --git a/third_party/python/compare_locales/compare_locales/checks/dtd.py b/third_party/python/compare_locales/compare_locales/checks/dtd.py
new file mode 100644
index 0000000000..139624f98f
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/dtd.py
@@ -0,0 +1,238 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from io import BytesIO
+import re
+from xml import sax
+
+from compare_locales.parser import DTDParser
+from .base import Checker, CSSCheckMixin
+
+
+class DTDChecker(Checker, CSSCheckMixin):
+ """Tests to run on DTD files.
+
+ Uses xml.sax for the heavy lifting of xml parsing.
+
+ The code tries to parse until it doesn't find any unresolved entities
+ anymore. If it finds one, it tries to grab the key, and adds an empty
+ <!ENTITY key ""> definition to the header.
+
+ Also checks for some CSS and number heuristics in the values.
+ """
+ pattern = re.compile(r'.*\.dtd$')
+ needs_reference = True # to cast a wider net for known entity references
+
+ eref = re.compile('&(%s);' % DTDParser.Name)
+ tmpl = b'''<!DOCTYPE elem [%s]>
+<elem>%s</elem>
+'''
+ xmllist = {'amp', 'lt', 'gt', 'apos', 'quot'}
+
+ def __init__(self, extra_tests, locale=None):
+ super().__init__(extra_tests, locale=locale)
+ self.processContent = False
+ if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
+ self.processContent = True
+ self.__known_entities = None
+
+ def known_entities(self, refValue):
+ if self.__known_entities is None and self.reference is not None:
+ self.__known_entities = set()
+ for ent in self.reference.values():
+ self.__known_entities.update(
+ self.entities_for_value(ent.raw_val))
+ return self.__known_entities if self.__known_entities is not None \
+ else self.entities_for_value(refValue)
+
+ def entities_for_value(self, value):
+ reflist = {m.group(1) for m in self.eref.finditer(value)}
+ reflist -= self.xmllist
+ return reflist
+
+ # Setup for XML parser, with default and text-only content handler
+ class TextContent(sax.handler.ContentHandler):
+ textcontent = ''
+
+ def characters(self, content):
+ self.textcontent += content
+
+ defaulthandler = sax.handler.ContentHandler()
+ texthandler = TextContent()
+
+ numPattern = r'([0-9]+|[0-9]*\.[0-9]+)'
+ num = re.compile('^%s$' % numPattern)
+ lengthPattern = '%s(em|px|ch|cm|in)' % numPattern
+ length = re.compile('^%s$' % lengthPattern)
+
+ def check(self, refEnt, l10nEnt):
+ """Try to parse the refvalue inside a dummy element, and keep
+ track of entities that we need to define to make that work.
+
+ Return a checker that offers just those entities.
+ """
+ yield from super().check(refEnt, l10nEnt)
+ refValue, l10nValue = refEnt.raw_val, l10nEnt.raw_val
+ # find entities the refValue references,
+ # reusing markup from DTDParser.
+ reflist = self.known_entities(refValue)
+ inContext = self.entities_for_value(refValue)
+ entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist))
+ parser = sax.make_parser()
+ parser.setFeature(sax.handler.feature_external_ges, False)
+
+ parser.setContentHandler(self.defaulthandler)
+ try:
+ parser.parse(
+ BytesIO(self.tmpl %
+ (entities.encode('utf-8'),
+ refValue.encode('utf-8'))))
+ # also catch stray %
+ parser.parse(
+ BytesIO(self.tmpl %
+ ((refEnt.all + entities).encode('utf-8'),
+ b'&%s;' % refEnt.key.encode('utf-8'))))
+ except sax.SAXParseException as e:
+ e # noqa
+ yield ('warning',
+ (0, 0),
+ "can't parse en-US value", 'xmlparse')
+
+ # find entities the l10nValue references,
+ # reusing markup from DTDParser.
+ l10nlist = self.entities_for_value(l10nValue)
+ missing = sorted(l10nlist - reflist)
+ _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing)
+ if self.processContent:
+ self.texthandler.textcontent = ''
+ parser.setContentHandler(self.texthandler)
+ try:
+ parser.parse(BytesIO(self.tmpl % (_entities.encode('utf-8'),
+ l10nValue.encode('utf-8'))))
+ # also catch stray %
+ # if this fails, we need to substract the entity definition
+ parser.setContentHandler(self.defaulthandler)
+ parser.parse(
+ BytesIO(self.tmpl %
+ ((l10nEnt.all + _entities).encode('utf-8'),
+ b'&%s;' % l10nEnt.key.encode('utf-8'))))
+ except sax.SAXParseException as e:
+ # xml parse error, yield error
+ # sometimes, the error is reported on our fake closing
+ # element, make that the end of the last line
+ lnr = e.getLineNumber() - 1
+ lines = l10nValue.splitlines()
+ if lnr > len(lines):
+ lnr = len(lines)
+ col = len(lines[lnr-1])
+ else:
+ col = e.getColumnNumber()
+ if lnr == 1:
+ # first line starts with <elem>, substract
+ col -= len("<elem>")
+ elif lnr == 0:
+ col -= len("<!DOCTYPE elem [") # first line is DOCTYPE
+ yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse')
+
+ warntmpl = 'Referencing unknown entity `%s`'
+ if reflist:
+ if inContext:
+ elsewhere = reflist - inContext
+ warntmpl += ' (%s used in context' % \
+ ', '.join(sorted(inContext))
+ if elsewhere:
+ warntmpl += ', %s known)' % ', '.join(sorted(elsewhere))
+ else:
+ warntmpl += ')'
+ else:
+ warntmpl += ' (%s known)' % ', '.join(sorted(reflist))
+ for key in missing:
+ yield ('warning', (0, 0), warntmpl % key,
+ 'xmlparse')
+ if inContext and l10nlist and l10nlist - inContext - set(missing):
+ mismatch = sorted(l10nlist - inContext - set(missing))
+ for key in mismatch:
+ yield ('warning', (0, 0),
+ 'Entity {} referenced, but {} used in context'.format(
+ key,
+ ', '.join(sorted(inContext))
+ ), 'xmlparse')
+
+ # Number check
+ if self.num.match(refValue) and not self.num.match(l10nValue):
+ yield ('warning', 0, 'reference is a number', 'number')
+ # CSS checks
+ # just a length, width="100em"
+ if self.length.match(refValue) and not self.length.match(l10nValue):
+ yield ('error', 0, 'reference is a CSS length', 'css')
+ # Check for actual CSS style attribute values
+ yield from self.maybe_style(refValue, l10nValue)
+
+ if self.extra_tests is not None and 'android-dtd' in self.extra_tests:
+ yield from self.processAndroidContent(self.texthandler.textcontent)
+
+ quoted = re.compile("(?P<q>[\"']).*(?P=q)$")
+
+ def unicode_escape(self, str):
+ """Helper method to try to decode all unicode escapes in a string.
+
+ This code uses the standard python decode for unicode-escape, but
+ that's somewhat tricky, as its input needs to be ascii. To get to
+ ascii, the unicode string gets converted to ascii with
+ backslashreplace, i.e., all non-ascii unicode chars get unicode
+ escaped. And then we try to roll all of that back.
+ Now, when that hits an error, that's from the original string, and we
+ need to search for the actual error position in the original string,
+ as the backslashreplace code changes string positions quite badly.
+ See also the last check in TestAndroid.test_android_dtd, with a
+ lengthy chinese string.
+ """
+ val = str.encode('ascii', 'backslashreplace')
+ try:
+ val.decode('unicode-escape')
+ except UnicodeDecodeError as e:
+ args = list(e.args)
+ badstring = args[1][args[2]:args[3]]
+ i = len(args[1][:args[2]].decode('unicode-escape'))
+ args[2] = i
+ args[3] = i + len(badstring)
+ raise UnicodeDecodeError(*args)
+
+ def processAndroidContent(self, val):
+ """Check for the string values that Android puts into an XML container.
+
+ http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa
+
+ Check for unicode escapes and unescaped quotes and apostrophes,
+ if string's not quoted.
+ """
+ # first, try to decode unicode escapes
+ try:
+ self.unicode_escape(val)
+ except UnicodeDecodeError as e:
+ yield ('error', e.args[2], e.args[4], 'android')
+ # check for unescaped single or double quotes.
+ # first, see if the complete string is single or double quoted,
+ # that changes the rules
+ m = self.quoted.match(val)
+ if m:
+ q = m.group('q')
+ offset = 0
+ val = val[1:-1] # strip quotes
+ else:
+ q = "[\"']"
+ offset = -1
+ stray_quot = re.compile(r"[\\\\]*(%s)" % q)
+
+ for m in stray_quot.finditer(val):
+ if len(m.group(0)) % 2:
+ # found an unescaped single or double quote, which message?
+ if m.group(1) == '"':
+ msg = "Quotes in Android DTDs need escaping with \\\" "\
+ "or \\u0022, or put string in apostrophes."
+ else:
+ msg = "Apostrophes in Android DTDs need escaping with "\
+ "\\' or \\u0027, or use \u2019, or put string in "\
+ "quotes."
+ yield ('error', m.end(0)+offset, msg, 'android')
diff --git a/third_party/python/compare_locales/compare_locales/checks/fluent.py b/third_party/python/compare_locales/compare_locales/checks/fluent.py
new file mode 100644
index 0000000000..f82ecbd54f
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/fluent.py
@@ -0,0 +1,351 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+from collections import defaultdict
+
+from fluent.syntax import ast as ftl
+from fluent.syntax.serializer import serialize_variant_key
+from fluent.syntax.visitor import Visitor
+
+from .base import Checker, CSSCheckMixin
+from compare_locales import plurals
+
+
+MSGS = {
+ 'missing-msg-ref': 'Missing message reference: {ref}',
+ 'missing-term-ref': 'Missing term reference: {ref}',
+ 'obsolete-msg-ref': 'Obsolete message reference: {ref}',
+ 'obsolete-term-ref': 'Obsolete term reference: {ref}',
+ 'duplicate-attribute': 'Attribute "{name}" is duplicated',
+ 'missing-value': 'Missing value',
+ 'obsolete-value': 'Obsolete value',
+ 'missing-attribute': 'Missing attribute: {name}',
+ 'obsolete-attribute': 'Obsolete attribute: {name}',
+ 'duplicate-variant': 'Variant key "{name}" is duplicated',
+ 'missing-plural': 'Plural categories missing: {categories}',
+ 'plain-message': '{message}',
+}
+
+
+def pattern_variants(pattern):
+ """Get variants of plain text of a pattern.
+
+ For now, just return simple text patterns.
+ This can be improved to allow for SelectExpressions
+ of simple text patterns, or even nested expressions, and Literals.
+ Variants with Variable-, Message-, or TermReferences should be ignored.
+ """
+ elements = pattern.elements
+ if len(elements) == 1:
+ if isinstance(elements[0], ftl.TextElement):
+ return [elements[0].value]
+ return []
+
+
+class ReferenceMessageVisitor(Visitor, CSSCheckMixin):
+ def __init__(self):
+ # References to Messages, their Attributes, and Terms
+ # Store reference name and type
+ self.entry_refs = defaultdict(dict)
+ # The currently active references
+ self.refs = {}
+ # Start with the Entry value (associated with None)
+ self.entry_refs[None] = self.refs
+ # If we're a messsage, store if there was a value
+ self.message_has_value = False
+ # Map attribute names to positions
+ self.attribute_positions = {}
+ # Map of CSS style attribute properties and units
+ self.css_styles = None
+ self.css_errors = None
+
+ def generic_visit(self, node):
+ if isinstance(
+ node,
+ (ftl.Span, ftl.Annotation, ftl.BaseComment)
+ ):
+ return
+ super().generic_visit(node)
+
+ def visit_Message(self, node):
+ if node.value is not None:
+ self.message_has_value = True
+ super().generic_visit(node)
+
+ def visit_Attribute(self, node):
+ self.attribute_positions[node.id.name] = node.span.start
+ old_refs = self.refs
+ self.refs = self.entry_refs[node.id.name]
+ super().generic_visit(node)
+ self.refs = old_refs
+ if node.id.name != 'style':
+ return
+ text_values = pattern_variants(node.value)
+ if not text_values:
+ self.css_styles = 'skip'
+ return
+ # right now, there's just one possible text value
+ self.css_styles, self.css_errors = self.parse_css_spec(text_values[0])
+
+ def visit_SelectExpression(self, node):
+ # optimize select expressions to only go through the variants
+ self.visit(node.variants)
+
+ def visit_MessageReference(self, node):
+ ref = node.id.name
+ if node.attribute:
+ ref += '.' + node.attribute.name
+ self.refs[ref] = 'msg-ref'
+
+ def visit_TermReference(self, node):
+ # only collect term references, but not attributes of terms
+ if node.attribute:
+ return
+ self.refs['-' + node.id.name] = 'term-ref'
+
+
+class GenericL10nChecks:
+ '''Helper Mixin for checks shared between Terms and Messages.'''
+ def check_duplicate_attributes(self, node):
+ warned = set()
+ for left in range(len(node.attributes) - 1):
+ if left in warned:
+ continue
+ left_attr = node.attributes[left]
+ warned_left = False
+ for right in range(left+1, len(node.attributes)):
+ right_attr = node.attributes[right]
+ if left_attr.id.name == right_attr.id.name:
+ if not warned_left:
+ warned_left = True
+ self.messages.append(
+ (
+ 'warning', left_attr.span.start,
+ MSGS['duplicate-attribute'].format(
+ name=left_attr.id.name
+ )
+ )
+ )
+ warned.add(right)
+ self.messages.append(
+ (
+ 'warning', right_attr.span.start,
+ MSGS['duplicate-attribute'].format(
+ name=left_attr.id.name
+ )
+ )
+ )
+
+ def check_variants(self, variants):
+ # Check for duplicate variants
+ warned = set()
+ for left in range(len(variants) - 1):
+ if left in warned:
+ continue
+ left_key = variants[left].key
+ key_string = None
+ for right in range(left+1, len(variants)):
+ if left_key.equals(variants[right].key):
+ if key_string is None:
+ key_string = serialize_variant_key(left_key)
+ self.messages.append(
+ (
+ 'warning', left_key.span.start,
+ MSGS['duplicate-variant'].format(
+ name=key_string
+ )
+ )
+ )
+ warned.add(right)
+ self.messages.append(
+ (
+ 'warning', variants[right].key.span.start,
+ MSGS['duplicate-variant'].format(
+ name=key_string
+ )
+ )
+ )
+ # Check for plural categories
+ known_plurals = plurals.get_plural(self.locale)
+ if known_plurals:
+ known_plurals = set(known_plurals)
+ # Ask for known plurals, but check for plurals w/out `other`.
+ # `other` is used for all kinds of things.
+ check_plurals = known_plurals.copy()
+ check_plurals.discard('other')
+ given_plurals = {serialize_variant_key(v.key) for v in variants}
+ if given_plurals & check_plurals:
+ missing_plurals = sorted(known_plurals - given_plurals)
+ if missing_plurals:
+ self.messages.append(
+ (
+ 'warning', variants[0].key.span.start,
+ MSGS['missing-plural'].format(
+ categories=', '.join(missing_plurals)
+ )
+ )
+ )
+
+
+class L10nMessageVisitor(GenericL10nChecks, ReferenceMessageVisitor):
+ def __init__(self, locale, reference):
+ super().__init__()
+ self.locale = locale
+ # Overload refs to map to sets, just store what we found
+ # References to Messages, their Attributes, and Terms
+ # Store reference name and type
+ self.entry_refs = defaultdict(set)
+ # The currently active references
+ self.refs = set()
+ # Start with the Entry value (associated with None)
+ self.entry_refs[None] = self.refs
+ self.reference = reference
+ self.reference_refs = reference.entry_refs[None]
+ self.messages = []
+
+ def visit_Message(self, node):
+ self.check_duplicate_attributes(node)
+ super().visit_Message(node)
+ if self.message_has_value and not self.reference.message_has_value:
+ self.messages.append(
+ ('error', node.value.span.start, MSGS['obsolete-value'])
+ )
+ if not self.message_has_value and self.reference.message_has_value:
+ self.messages.append(
+ ('error', 0, MSGS['missing-value'])
+ )
+ ref_attrs = set(self.reference.attribute_positions)
+ l10n_attrs = set(self.attribute_positions)
+ for missing_attr in ref_attrs - l10n_attrs:
+ self.messages.append(
+ (
+ 'error', 0,
+ MSGS['missing-attribute'].format(name=missing_attr)
+ )
+ )
+ for obs_attr in l10n_attrs - ref_attrs:
+ self.messages.append(
+ (
+ 'error', self.attribute_positions[obs_attr],
+ MSGS['obsolete-attribute'].format(name=obs_attr)
+ )
+ )
+
+ def visit_Term(self, node):
+ raise RuntimeError("Should not use L10nMessageVisitor for Terms")
+
+ def visit_Attribute(self, node):
+ old_reference_refs = self.reference_refs
+ self.reference_refs = self.reference.entry_refs[node.id.name]
+ super().visit_Attribute(node)
+ self.reference_refs = old_reference_refs
+ if node.id.name != 'style' or self.css_styles == 'skip':
+ return
+ ref_styles = self.reference.css_styles
+ if ref_styles in ('skip', None):
+ # Reference is complex, l10n isn't.
+ # Let's still validate the css spec.
+ ref_styles = {}
+ for cat, msg, pos, _ in self.check_style(
+ ref_styles,
+ self.css_styles,
+ self.css_errors
+ ):
+ self.messages.append((cat, msg, pos))
+
+ def visit_SelectExpression(self, node):
+ super().visit_SelectExpression(node)
+ self.check_variants(node.variants)
+
+ def visit_MessageReference(self, node):
+ ref = node.id.name
+ if node.attribute:
+ ref += '.' + node.attribute.name
+ self.refs.add(ref)
+ self.check_obsolete_ref(node, ref, 'msg-ref')
+
+ def visit_TermReference(self, node):
+ if node.attribute:
+ return
+ ref = '-' + node.id.name
+ self.refs.add(ref)
+ self.check_obsolete_ref(node, ref, 'term-ref')
+
+ def check_obsolete_ref(self, node, ref, ref_type):
+ if ref not in self.reference_refs:
+ self.messages.append(
+ (
+ 'warning', node.span.start,
+ MSGS['obsolete-' + ref_type].format(ref=ref),
+ )
+ )
+
+
+class TermVisitor(GenericL10nChecks, Visitor):
+ def __init__(self, locale):
+ super().__init__()
+ self.locale = locale
+ self.messages = []
+
+ def generic_visit(self, node):
+ if isinstance(
+ node,
+ (ftl.Span, ftl.Annotation, ftl.BaseComment)
+ ):
+ return
+ super().generic_visit(node)
+
+ def visit_Message(self, node):
+ raise RuntimeError("Should not use TermVisitor for Messages")
+
+ def visit_Term(self, node):
+ self.check_duplicate_attributes(node)
+ super().generic_visit(node)
+
+ def visit_SelectExpression(self, node):
+ super().generic_visit(node)
+ self.check_variants(node.variants)
+
+
+class FluentChecker(Checker):
+ '''Tests to run on Fluent (FTL) files.
+ '''
+ pattern = re.compile(r'.*\.ftl')
+
+ def check_message(self, ref_entry, l10n_entry):
+ '''Run checks on localized messages against reference message.'''
+ ref_data = ReferenceMessageVisitor()
+ ref_data.visit(ref_entry)
+ l10n_data = L10nMessageVisitor(self.locale, ref_data)
+ l10n_data.visit(l10n_entry)
+
+ messages = l10n_data.messages
+ for attr_or_val, refs in ref_data.entry_refs.items():
+ for ref, ref_type in refs.items():
+ if ref not in l10n_data.entry_refs[attr_or_val]:
+ msg = MSGS['missing-' + ref_type].format(ref=ref)
+ messages.append(('warning', 0, msg))
+ return messages
+
+ def check_term(self, l10n_entry):
+ '''Check localized terms.'''
+ l10n_data = TermVisitor(self.locale)
+ l10n_data.visit(l10n_entry)
+ return l10n_data.messages
+
+ def check(self, refEnt, l10nEnt):
+ yield from super().check(refEnt, l10nEnt)
+ l10n_entry = l10nEnt.entry
+ if isinstance(l10n_entry, ftl.Message):
+ ref_entry = refEnt.entry
+ messages = self.check_message(ref_entry, l10n_entry)
+ elif isinstance(l10n_entry, ftl.Term):
+ messages = self.check_term(l10n_entry)
+
+ messages.sort(key=lambda t: t[1])
+ for cat, pos, msg in messages:
+ if pos:
+ pos = pos - l10n_entry.span.start
+ yield (cat, pos, msg, 'fluent')
diff --git a/third_party/python/compare_locales/compare_locales/checks/properties.py b/third_party/python/compare_locales/compare_locales/checks/properties.py
new file mode 100644
index 0000000000..549e8533b6
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/checks/properties.py
@@ -0,0 +1,162 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+from difflib import SequenceMatcher
+
+from compare_locales.parser import PropertiesEntity
+from compare_locales import plurals
+from .base import Checker
+
+
+class PrintfException(Exception):
+ def __init__(self, msg, pos):
+ self.pos = pos
+ self.msg = msg
+
+
+class PropertiesChecker(Checker):
+ '''Tests to run on .properties files.
+ '''
+ pattern = re.compile(r'.*\.properties$')
+ printf = re.compile(r'%(?P<good>%|'
+ r'(?:(?P<number>[1-9][0-9]*)\$)?'
+ r'(?P<width>\*|[0-9]+)?'
+ r'(?P<prec>\.(?:\*|[0-9]+)?)?'
+ r'(?P<spec>[duxXosScpfg]))?')
+
+ def check(self, refEnt, l10nEnt):
+ '''Test for the different variable formats.
+ '''
+ yield from super().check(refEnt, l10nEnt)
+ refValue, l10nValue = refEnt.val, l10nEnt.val
+ refSpecs = None
+ # check for PluralForm.jsm stuff, should have the docs in the
+ # comment
+ # That also includes intl.properties' pluralRule, so exclude
+ # entities with that key and values with just numbers
+ if (refEnt.pre_comment
+ and 'Localization_and_Plurals' in refEnt.pre_comment.all
+ and refEnt.key != 'pluralRule'
+ and not re.match(r'\d+$', refValue)):
+ yield from self.check_plural(refValue, l10nValue)
+ return
+ # check for lost escapes
+ raw_val = l10nEnt.raw_val
+ for m in PropertiesEntity.escape.finditer(raw_val):
+ if m.group('single') and \
+ m.group('single') not in PropertiesEntity.known_escapes:
+ yield ('warning', m.start(),
+ 'unknown escape sequence, \\' + m.group('single'),
+ 'escape')
+ try:
+ refSpecs = self.getPrintfSpecs(refValue)
+ except PrintfException:
+ refSpecs = []
+ if refSpecs:
+ yield from self.checkPrintf(refSpecs, l10nValue)
+ return
+
+ def check_plural(self, refValue, l10nValue):
+ '''Check for the stringbundle plurals logic.
+ The common variable pattern is #1.
+ '''
+ known_plurals = plurals.get_plural(self.locale)
+ if known_plurals:
+ expected_forms = len(known_plurals)
+ found_forms = l10nValue.count(';') + 1
+ msg = 'expecting {} plurals, found {}'.format(
+ expected_forms,
+ found_forms
+ )
+ if expected_forms > found_forms:
+ yield ('warning', 0, msg, 'plural')
+ if expected_forms < found_forms:
+ yield ('warning', 0, msg, 'plural')
+ pats = {int(m.group(1)) for m in re.finditer('#([0-9]+)', refValue)}
+ if len(pats) == 0:
+ return
+ lpats = {int(m.group(1)) for m in re.finditer('#([0-9]+)', l10nValue)}
+ if pats - lpats:
+ yield ('warning', 0, 'not all variables used in l10n',
+ 'plural')
+ return
+ if lpats - pats:
+ yield ('error', 0, 'unreplaced variables in l10n',
+ 'plural')
+
+ def checkPrintf(self, refSpecs, l10nValue):
+ try:
+ l10nSpecs = self.getPrintfSpecs(l10nValue)
+ except PrintfException as e:
+ yield ('error', e.pos, e.msg, 'printf')
+ return
+ if refSpecs != l10nSpecs:
+ sm = SequenceMatcher()
+ sm.set_seqs(refSpecs, l10nSpecs)
+ msgs = []
+ warn = None
+ for action, i1, i2, j1, j2 in sm.get_opcodes():
+ if action == 'equal':
+ continue
+ if action == 'delete':
+ # missing argument in l10n
+ if i2 == len(refSpecs):
+ # trailing specs missing, that's just a warning
+ warn = ', '.join('trailing argument %d `%s` missing' %
+ (i+1, refSpecs[i])
+ for i in range(i1, i2))
+ else:
+ for i in range(i1, i2):
+ msgs.append('argument %d `%s` missing' %
+ (i+1, refSpecs[i]))
+ continue
+ if action == 'insert':
+ # obsolete argument in l10n
+ for i in range(j1, j2):
+ msgs.append('argument %d `%s` obsolete' %
+ (i+1, l10nSpecs[i]))
+ continue
+ if action == 'replace':
+ for i, j in zip(range(i1, i2), range(j1, j2)):
+ msgs.append('argument %d `%s` should be `%s`' %
+ (j+1, l10nSpecs[j], refSpecs[i]))
+ if msgs:
+ yield ('error', 0, ', '.join(msgs), 'printf')
+ if warn is not None:
+ yield ('warning', 0, warn, 'printf')
+
+ def getPrintfSpecs(self, val):
+ hasNumber = False
+ specs = []
+ for m in self.printf.finditer(val):
+ if m.group("good") is None:
+ # found just a '%', signal an error
+ raise PrintfException('Found single %', m.start())
+ if m.group("good") == '%':
+ # escaped %
+ continue
+ if ((hasNumber and m.group('number') is None) or
+ (not hasNumber and specs and
+ m.group('number') is not None)):
+ # mixed style, numbered and not
+ raise PrintfException('Mixed ordered and non-ordered args',
+ m.start())
+ hasNumber = m.group('number') is not None
+ if hasNumber:
+ pos = int(m.group('number')) - 1
+ ls = len(specs)
+ if pos >= ls:
+ # pad specs
+ nones = pos - ls
+ specs[ls:pos] = nones*[None]
+ specs.append(m.group('spec'))
+ else:
+ specs[pos] = m.group('spec')
+ else:
+ specs.append(m.group('spec'))
+ # check for missing args
+ if hasNumber and not all(specs):
+ raise PrintfException('Ordered argument missing', 0)
+ return specs
diff --git a/third_party/python/compare_locales/compare_locales/commands.py b/third_party/python/compare_locales/compare_locales/commands.py
new file mode 100644
index 0000000000..58266e308a
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/commands.py
@@ -0,0 +1,203 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Commands exposed to commandlines'
+
+import logging
+from argparse import ArgumentParser
+from json import dump as json_dump
+import os
+import sys
+
+from compare_locales import mozpath
+from compare_locales import version
+from compare_locales.paths import EnumerateApp, TOMLParser, ConfigNotFound
+from compare_locales.compare import compareProjects
+
+
+class CompareLocales:
+ """Check the localization status of gecko applications.
+The first arguments are paths to the l10n.toml or ini files for the
+applications, followed by the base directory of the localization repositories.
+Then you pass in the list of locale codes you want to compare. If there are
+no locales given, the list of locales will be taken from the l10n.toml file
+or the all-locales file referenced by the application\'s l10n.ini."""
+
+ def __init__(self):
+ self.parser = self.get_parser()
+
+ def get_parser(self):
+ """Get an ArgumentParser, with class docstring as description.
+ """
+ parser = ArgumentParser(description=self.__doc__)
+ parser.add_argument('--version', action='version',
+ version='%(prog)s ' + version)
+ parser.add_argument('-v', '--verbose', action='count',
+ default=0, help='Make more noise')
+ parser.add_argument('-q', '--quiet', action='count',
+ default=0, help='''Show less data.
+Specified once, don't show obsolete entities. Specified twice, also hide
+missing entities. Specify thrice to exclude warnings and four times to
+just show stats''')
+ parser.add_argument('--validate', action='store_true',
+ help='Run compare-locales against reference')
+ parser.add_argument('-m', '--merge',
+ help='''Use this directory to stage merged files,
+use {ab_CD} to specify a different directory for each locale''')
+ parser.add_argument('config_paths', metavar='l10n.toml', nargs='+',
+ help='TOML or INI file for the project')
+ parser.add_argument('l10n_base_dir', metavar='l10n-base-dir',
+ help='Parent directory of localizations')
+ parser.add_argument('locales', nargs='*', metavar='locale-code',
+ help='Locale code and top-level directory of '
+ 'each localization')
+ parser.add_argument('--json',
+ help='''Serialize to JSON. Value is the name of
+the output file, pass "-" to serialize to stdout and hide the default output.
+''')
+ parser.add_argument('-D', action='append', metavar='var=value',
+ default=[], dest='defines',
+ help='Overwrite variables in TOML files')
+ parser.add_argument('--full', action="store_true",
+ help="Compare sub-projects that are disabled")
+ parser.add_argument('--return-zero', action="store_true",
+ help="Return 0 regardless of l10n status")
+ parser.add_argument('--clobber-merge', action="store_true",
+ default=False, dest='clobber',
+ help="""WARNING: DATALOSS.
+Use this option with care. If specified, the merge directory will
+be clobbered for each module. That means, the subdirectory will
+be completely removed, any files that were there are lost.
+Be careful to specify the right merge directory when using this option.""")
+ return parser
+
+ @classmethod
+ def call(cls):
+ """Entry_point for setuptools.
+ The actual command handling is done in the handle() method of the
+ subclasses.
+ """
+ cmd = cls()
+ args = cmd.parser.parse_args()
+ return cmd.handle(**vars(args))
+
+ def handle(
+ self,
+ quiet=0, verbose=0,
+ validate=False,
+ merge=None,
+ config_paths=[], l10n_base_dir=None, locales=[],
+ defines=[],
+ full=False,
+ return_zero=False,
+ clobber=False,
+ json=None,
+ ):
+ """The instance part of the classmethod call.
+
+ Using keyword arguments as that is what we need for mach
+ commands in mozilla-central.
+ """
+ # log as verbose or quiet as we want, warn by default
+ logging_level = logging.WARNING - (verbose - quiet) * 10
+ logging.basicConfig()
+ logging.getLogger().setLevel(logging_level)
+
+ config_paths, l10n_base_dir, locales = self.extract_positionals(
+ validate=validate,
+ config_paths=config_paths,
+ l10n_base_dir=l10n_base_dir,
+ locales=locales,
+ )
+
+ # when we compare disabled projects, we set our locales
+ # on all subconfigs, so deep is True.
+ locales_deep = full
+ configs = []
+ config_env = {
+ 'l10n_base': l10n_base_dir
+ }
+ for define in defines:
+ var, _, value = define.partition('=')
+ config_env[var] = value
+ for config_path in config_paths:
+ if config_path.endswith('.toml'):
+ try:
+ config = TOMLParser().parse(config_path, env=config_env)
+ except ConfigNotFound as e:
+ self.parser.exit('config file %s not found' % e.filename)
+ if locales_deep:
+ if not locales:
+ # no explicit locales given, force all locales
+ config.set_locales(config.all_locales, deep=True)
+ else:
+ config.set_locales(locales, deep=True)
+ configs.append(config)
+ else:
+ app = EnumerateApp(config_path, l10n_base_dir)
+ configs.append(app.asConfig())
+ try:
+ observers = compareProjects(
+ configs,
+ locales,
+ l10n_base_dir,
+ quiet=quiet,
+ merge_stage=merge, clobber_merge=clobber)
+ except OSError as exc:
+ print("FAIL: " + str(exc))
+ self.parser.exit(2)
+
+ if json is None or json != '-':
+ details = observers.serializeDetails()
+ if details:
+ print(details)
+ if len(configs) > 1:
+ if details:
+ print('')
+ print("Summaries for")
+ for config_path in config_paths:
+ print(" " + config_path)
+ print(" and the union of these, counting each string once")
+ print(observers.serializeSummaries())
+ if json is not None:
+ data = [observer.toJSON() for observer in observers]
+ stdout = json == '-'
+ indent = 1 if stdout else None
+ fh = sys.stdout if stdout else open(json, 'w')
+ json_dump(data, fh, sort_keys=True, indent=indent)
+ if stdout:
+ fh.write('\n')
+ fh.close()
+ rv = 1 if not return_zero and observers.error else 0
+ return rv
+
+ def extract_positionals(
+ self,
+ validate=False,
+ config_paths=[], l10n_base_dir=None, locales=[],
+ ):
+ # using nargs multiple times in argparser totally screws things
+ # up, repair that.
+ # First files are configs, then the base dir, everything else is
+ # locales
+ all_args = config_paths + [l10n_base_dir] + locales
+ config_paths = []
+ # The first directory is our l10n base, split there.
+ while all_args and not os.path.isdir(all_args[0]):
+ config_paths.append(all_args.pop(0))
+ if not config_paths:
+ self.parser.error('no configuration file given')
+ for cf in config_paths:
+ if not os.path.isfile(cf):
+ self.parser.error('config file %s not found' % cf)
+ if not all_args:
+ self.parser.error('l10n-base-dir not found')
+ l10n_base_dir = mozpath.abspath(all_args.pop(0))
+ if validate:
+ # signal validation mode by setting locale list to [None]
+ locales = [None]
+ else:
+ locales = all_args
+
+ return config_paths, l10n_base_dir, locales
diff --git a/third_party/python/compare_locales/compare_locales/compare/__init__.py b/third_party/python/compare_locales/compare_locales/compare/__init__.py
new file mode 100644
index 0000000000..6d4f3735bf
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/compare/__init__.py
@@ -0,0 +1,89 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+import os
+import shutil
+
+from compare_locales import paths, mozpath
+
+from .content import ContentComparer
+from .observer import Observer, ObserverList
+from .utils import Tree, AddRemove
+
+
+__all__ = [
+ 'ContentComparer',
+ 'Observer', 'ObserverList',
+ 'AddRemove', 'Tree',
+ 'compareProjects',
+]
+
+
+def compareProjects(
+ project_configs,
+ locales,
+ l10n_base_dir,
+ stat_observer=None,
+ merge_stage=None,
+ clobber_merge=False,
+ quiet=0,
+ ):
+ all_locales = set(locales)
+ comparer = ContentComparer(quiet)
+ observers = comparer.observers
+ for project in project_configs:
+ # disable filter if we're in validation mode
+ if None in locales:
+ filter = None
+ else:
+ filter = project.filter
+ observers.append(
+ Observer(
+ quiet=quiet,
+ filter=filter,
+ ))
+ if not locales:
+ all_locales.update(project.all_locales)
+ for locale in sorted(all_locales):
+ files = paths.ProjectFiles(locale, project_configs,
+ mergebase=merge_stage)
+ if merge_stage is not None:
+ if clobber_merge:
+ mergematchers = {_m.get('merge') for _m in files.matchers}
+ mergematchers.discard(None)
+ for matcher in mergematchers:
+ clobberdir = matcher.prefix
+ if os.path.exists(clobberdir):
+ shutil.rmtree(clobberdir)
+ print("clobbered " + clobberdir)
+ for l10npath, refpath, mergepath, extra_tests in files:
+ # module and file path are needed for legacy filter.py support
+ module = None
+ fpath = mozpath.relpath(l10npath, l10n_base_dir)
+ for _m in files.matchers:
+ if _m['l10n'].match(l10npath):
+ if _m['module']:
+ # legacy ini support, set module, and resolve
+ # local path against the matcher prefix,
+ # which includes the module
+ module = _m['module']
+ fpath = mozpath.relpath(l10npath, _m['l10n'].prefix)
+ break
+ reffile = paths.File(refpath, fpath or refpath, module=module)
+ if locale is None:
+ # When validating the reference files, set locale
+ # to a private subtag. This only shows in the output.
+ locale = paths.REFERENCE_LOCALE
+ l10n = paths.File(l10npath, fpath or l10npath,
+ module=module, locale=locale)
+ if not os.path.exists(l10npath):
+ comparer.add(reffile, l10n, mergepath)
+ continue
+ if not os.path.exists(refpath):
+ comparer.remove(reffile, l10n, mergepath)
+ continue
+ comparer.compare(reffile, l10n, mergepath, extra_tests)
+ return observers
diff --git a/third_party/python/compare_locales/compare_locales/compare/content.py b/third_party/python/compare_locales/compare_locales/compare/content.py
new file mode 100644
index 0000000000..1e879a643c
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/compare/content.py
@@ -0,0 +1,304 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+import codecs
+import os
+import shutil
+import re
+
+from compare_locales import parser
+from compare_locales import mozpath
+from compare_locales.checks import getChecker, EntityPos
+from compare_locales.keyedtuple import KeyedTuple
+
+from .observer import ObserverList
+from .utils import AddRemove
+
+
+class ContentComparer:
+ keyRE = re.compile('[kK]ey')
+ nl = re.compile('\n', re.M)
+
+ def __init__(self, quiet=0):
+ '''Create a ContentComparer.
+ observer is usually a instance of Observer. The return values
+ of the notify method are used to control the handling of missing
+ entities.
+ '''
+ self.observers = ObserverList(quiet=quiet)
+
+ def create_merge_dir(self, merge_file):
+ outdir = mozpath.dirname(merge_file)
+ os.makedirs(outdir, exist_ok=True)
+
+ def merge(self, ref_entities, ref_file, l10n_file, merge_file,
+ missing, skips, ctx, capabilities, encoding):
+ '''Create localized file in merge dir
+
+ `ref_entities` and `ref_map` are the parser result of the
+ reference file
+ `ref_file` and `l10n_file` are the File objects for the reference and
+ the l10n file, resp.
+ `merge_file` is the output path for the generated content. This is None
+ if we're just comparing or validating.
+ `missing` are the missing messages in l10n - potentially copied from
+ reference
+ `skips` are entries to be dropped from the localized file
+ `ctx` is the parsing context
+ `capabilities` are the capabilities for the merge algorithm
+ `encoding` is the encoding to be used when serializing, usually utf-8
+ '''
+
+ if not merge_file:
+ return
+
+ if capabilities == parser.CAN_NONE:
+ return
+
+ self.create_merge_dir(merge_file)
+
+ if capabilities & parser.CAN_COPY:
+ # copy the l10n file if it's good, or the reference file if not
+ if skips or missing:
+ src = ref_file.fullpath
+ else:
+ src = l10n_file.fullpath
+ shutil.copyfile(src, merge_file)
+ print("copied reference to " + merge_file)
+ return
+
+ if not (capabilities & parser.CAN_SKIP):
+ return
+
+ # Start with None in case the merge file doesn't need to be created.
+ f = None
+
+ if skips:
+ # skips come in ordered by key name, we need them in file order
+ skips.sort(key=lambda s: s.span[0])
+
+ # we need to skip a few erroneous blocks in the input, copy by hand
+ f = codecs.open(merge_file, 'wb', encoding)
+ offset = 0
+ for skip in skips:
+ chunk = skip.span
+ f.write(ctx.contents[offset:chunk[0]])
+ offset = chunk[1]
+ f.write(ctx.contents[offset:])
+
+ if f is None:
+ # l10n file is a good starting point
+ shutil.copyfile(l10n_file.fullpath, merge_file)
+
+ if not (capabilities & parser.CAN_MERGE):
+ if f:
+ f.close()
+ return
+
+ if skips or missing:
+ if f is None:
+ f = codecs.open(merge_file, 'ab', encoding)
+ trailing = (['\n'] +
+ [ref_entities[key].all for key in missing] +
+ [ref_entities[skip.key].all for skip in skips
+ if not isinstance(skip, parser.Junk)])
+
+ def ensureNewline(s):
+ if not s.endswith('\n'):
+ return s + '\n'
+ return s
+
+ print("adding to " + merge_file)
+ f.write(''.join(map(ensureNewline, trailing)))
+
+ if f is not None:
+ f.close()
+
+ def remove(self, ref_file, l10n, merge_file):
+ '''Obsolete l10n file.
+
+ Copy to merge stage if we can.
+ '''
+ self.observers.notify('obsoleteFile', l10n, None)
+ self.merge(
+ KeyedTuple([]), ref_file, l10n, merge_file,
+ [], [], None, parser.CAN_COPY, None
+ )
+
+ def compare(self, ref_file, l10n, merge_file, extra_tests=None):
+ try:
+ p = parser.getParser(ref_file.file)
+ except UserWarning:
+ # no comparison, XXX report?
+ # At least, merge
+ self.merge(
+ KeyedTuple([]), ref_file, l10n, merge_file, [], [], None,
+ parser.CAN_COPY, None)
+ return
+ try:
+ p.readFile(ref_file)
+ except Exception as e:
+ self.observers.notify('error', ref_file, str(e))
+ return
+ ref_entities = p.parse()
+ try:
+ p.readFile(l10n)
+ l10n_entities = p.parse()
+ l10n_ctx = p.ctx
+ except Exception as e:
+ self.observers.notify('error', l10n, str(e))
+ return
+
+ ar = AddRemove()
+ ar.set_left(ref_entities.keys())
+ ar.set_right(l10n_entities.keys())
+ report = missing = obsolete = changed = unchanged = keys = 0
+ missing_w = changed_w = unchanged_w = 0 # word stats
+ missings = []
+ skips = []
+ checker = getChecker(l10n, extra_tests=extra_tests)
+ if checker and checker.needs_reference:
+ checker.set_reference(ref_entities)
+ for msg in p.findDuplicates(ref_entities):
+ self.observers.notify('warning', l10n, msg)
+ for msg in p.findDuplicates(l10n_entities):
+ self.observers.notify('error', l10n, msg)
+ for action, entity_id in ar:
+ if action == 'delete':
+ # missing entity
+ if isinstance(ref_entities[entity_id], parser.Junk):
+ self.observers.notify(
+ 'warning', l10n, 'Parser error in en-US'
+ )
+ continue
+ _rv = self.observers.notify('missingEntity', l10n, entity_id)
+ if _rv == "ignore":
+ continue
+ if _rv == "error":
+ # only add to missing entities for l10n-merge on error,
+ # not report
+ missings.append(entity_id)
+ missing += 1
+ refent = ref_entities[entity_id]
+ missing_w += refent.count_words()
+ else:
+ # just report
+ report += 1
+ elif action == 'add':
+ # obsolete entity or junk
+ if isinstance(l10n_entities[entity_id],
+ parser.Junk):
+ junk = l10n_entities[entity_id]
+ self.observers.notify(
+ 'error', l10n,
+ junk.error_message()
+ )
+ if merge_file is not None:
+ skips.append(junk)
+ elif (
+ self.observers.notify('obsoleteEntity', l10n, entity_id)
+ != 'ignore'
+ ):
+ obsolete += 1
+ else:
+ # entity found in both ref and l10n, check for changed
+ refent = ref_entities[entity_id]
+ l10nent = l10n_entities[entity_id]
+ if self.keyRE.search(entity_id):
+ keys += 1
+ else:
+ if refent.equals(l10nent):
+ self.doUnchanged(l10nent)
+ unchanged += 1
+ unchanged_w += refent.count_words()
+ else:
+ self.doChanged(ref_file, refent, l10nent)
+ changed += 1
+ changed_w += refent.count_words()
+ # run checks:
+ if checker:
+ for tp, pos, msg, cat in checker.check(refent, l10nent):
+ if isinstance(pos, EntityPos):
+ line, col = l10nent.position(pos)
+ else:
+ line, col = l10nent.value_position(pos)
+ # skip error entities when merging
+ if tp == 'error' and merge_file is not None:
+ skips.append(l10nent)
+ self.observers.notify(
+ tp, l10n,
+ "%s at line %d, column %d for %s" %
+ (msg, line, col, refent.key)
+ )
+ pass
+
+ if merge_file is not None:
+ self.merge(
+ ref_entities, ref_file,
+ l10n, merge_file, missings, skips, l10n_ctx,
+ p.capabilities, p.encoding)
+
+ stats = {
+ 'missing': missing,
+ 'missing_w': missing_w,
+ 'report': report,
+ 'obsolete': obsolete,
+ 'changed': changed,
+ 'changed_w': changed_w,
+ 'unchanged': unchanged,
+ 'unchanged_w': unchanged_w,
+ 'keys': keys,
+ }
+ self.observers.updateStats(l10n, stats)
+ pass
+
+ def add(self, orig, missing, merge_file):
+ ''' Add missing localized file.'''
+ f = orig
+ try:
+ p = parser.getParser(f.file)
+ except UserWarning:
+ p = None
+
+ # if we don't support this file, assume CAN_COPY to mimick
+ # l10n dir as closely as possible
+ caps = p.capabilities if p else parser.CAN_COPY
+ if (caps & (parser.CAN_COPY | parser.CAN_MERGE)):
+ # even if we can merge, pretend we can only copy
+ self.merge(
+ KeyedTuple([]), orig, missing, merge_file,
+ ['trigger copy'], [], None, parser.CAN_COPY, None
+ )
+
+ if self.observers.notify('missingFile', missing, None) == "ignore":
+ # filter said that we don't need this file, don't count it
+ return
+
+ if p is None:
+ # We don't have a parser, cannot count missing strings
+ return
+
+ try:
+ p.readFile(f)
+ entities = p.parse()
+ except Exception as ex:
+ self.observers.notify('error', f, str(ex))
+ return
+ # strip parse errors
+ entities = [e for e in entities if not isinstance(e, parser.Junk)]
+ self.observers.updateStats(missing, {'missing': len(entities)})
+ missing_w = 0
+ for e in entities:
+ missing_w += e.count_words()
+ self.observers.updateStats(missing, {'missing_w': missing_w})
+
+ def doUnchanged(self, entity):
+ # overload this if needed
+ pass
+
+ def doChanged(self, file, ref_entity, l10n_entity):
+ # overload this if needed
+ pass
diff --git a/third_party/python/compare_locales/compare_locales/compare/observer.py b/third_party/python/compare_locales/compare_locales/compare/observer.py
new file mode 100644
index 0000000000..d336a004b3
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/compare/observer.py
@@ -0,0 +1,215 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+from collections import defaultdict
+
+from .utils import Tree
+
+
+class Observer:
+
+ def __init__(self, quiet=0, filter=None):
+ '''Create Observer
+ For quiet=1, skip per-entity missing and obsolete strings,
+ for quiet=2, skip missing and obsolete files. For quiet=3,
+ skip warnings and errors.
+ '''
+ self.summary = defaultdict(lambda: {
+ "errors": 0,
+ "warnings": 0,
+ "missing": 0,
+ "missing_w": 0,
+ "report": 0,
+ "obsolete": 0,
+ "changed": 0,
+ "changed_w": 0,
+ "unchanged": 0,
+ "unchanged_w": 0,
+ "keys": 0,
+ })
+ self.details = Tree(list)
+ self.quiet = quiet
+ self.filter = filter
+ self.error = False
+
+ def _dictify(self, d):
+ plaindict = {}
+ for k, v in d.items():
+ plaindict[k] = dict(v)
+ return plaindict
+
+ def toJSON(self):
+ # Don't export file stats, even if we collected them.
+ # Those are not part of the data we use toJSON for.
+ return {
+ 'summary': self._dictify(self.summary),
+ 'details': self.details.toJSON()
+ }
+
+ def updateStats(self, file, stats):
+ # in multi-project scenarios, this file might not be ours,
+ # check that.
+ # Pass in a dummy entity key '' to avoid getting in to
+ # generic file filters. If we have stats for those,
+ # we want to aggregate the counts
+ if (self.filter is not None and
+ self.filter(file, entity='') == 'ignore'):
+ return
+ for category, value in stats.items():
+ if category == 'errors':
+ # updateStats isn't called with `errors`, but make sure
+ # we handle this if that changes
+ self.error = True
+ self.summary[file.locale][category] += value
+
+ def notify(self, category, file, data):
+ rv = 'error'
+ if category in ['missingFile', 'obsoleteFile']:
+ if self.filter is not None:
+ rv = self.filter(file)
+ if rv == "ignore" or self.quiet >= 2:
+ return rv
+ if self.quiet == 0 or category == 'missingFile':
+ self.details[file].append({category: rv})
+ return rv
+ if self.filter is not None:
+ rv = self.filter(file, data)
+ if rv == "ignore":
+ return rv
+ if category in ['missingEntity', 'obsoleteEntity']:
+ if (
+ (category == 'missingEntity' and self.quiet < 2)
+ or (category == 'obsoleteEntity' and self.quiet < 1)
+ ):
+ self.details[file].append({category: data})
+ return rv
+ if category == 'error':
+ # Set error independently of quiet
+ self.error = True
+ if category in ('error', 'warning'):
+ if (
+ (category == 'error' and self.quiet < 4)
+ or (category == 'warning' and self.quiet < 3)
+ ):
+ self.details[file].append({category: data})
+ self.summary[file.locale][category + 's'] += 1
+ return rv
+
+
+class ObserverList(Observer):
+ def __init__(self, quiet=0):
+ super().__init__(quiet=quiet)
+ self.observers = []
+
+ def __iter__(self):
+ return iter(self.observers)
+
+ def append(self, observer):
+ self.observers.append(observer)
+
+ def notify(self, category, file, data):
+ """Check observer for the found data, and if it's
+ not to ignore, notify stat_observers.
+ """
+ rvs = {
+ observer.notify(category, file, data)
+ for observer in self.observers
+ }
+ if all(rv == 'ignore' for rv in rvs):
+ return 'ignore'
+ # our return value doesn't count
+ super().notify(category, file, data)
+ rvs.discard('ignore')
+ if 'error' in rvs:
+ return 'error'
+ assert len(rvs) == 1
+ return rvs.pop()
+
+ def updateStats(self, file, stats):
+ """Check observer for the found data, and if it's
+ not to ignore, notify stat_observers.
+ """
+ for observer in self.observers:
+ observer.updateStats(file, stats)
+ super().updateStats(file, stats)
+
+ def serializeDetails(self):
+
+ def tostr(t):
+ if t[1] == 'key':
+ return ' ' * t[0] + '/'.join(t[2])
+ o = []
+ indent = ' ' * (t[0] + 1)
+ for item in t[2]:
+ if 'error' in item:
+ o += [indent + 'ERROR: ' + item['error']]
+ elif 'warning' in item:
+ o += [indent + 'WARNING: ' + item['warning']]
+ elif 'missingEntity' in item:
+ o += [indent + '+' + item['missingEntity']]
+ elif 'obsoleteEntity' in item:
+ o += [indent + '-' + item['obsoleteEntity']]
+ elif 'missingFile' in item:
+ o.append(indent + '// add and localize this file')
+ elif 'obsoleteFile' in item:
+ o.append(indent + '// remove this file')
+ return '\n'.join(o)
+
+ return '\n'.join(tostr(c) for c in self.details.getContent())
+
+ def serializeSummaries(self):
+ summaries = {
+ loc: []
+ for loc in self.summary.keys()
+ }
+ for observer in self.observers:
+ for loc, lst in summaries.items():
+ # Not all locales are on all projects,
+ # default to empty summary
+ lst.append(observer.summary.get(loc, {}))
+ if len(self.observers) > 1:
+ # add ourselves if there's more than one project
+ for loc, lst in summaries.items():
+ lst.append(self.summary[loc])
+ keys = (
+ 'errors',
+ 'warnings',
+ 'missing', 'missing_w',
+ 'obsolete',
+ 'changed', 'changed_w',
+ 'unchanged', 'unchanged_w',
+ 'keys',
+ )
+ leads = [
+ f'{k:12}' for k in keys
+ ]
+ out = []
+ for locale, summaries in sorted(summaries.items()):
+ if locale:
+ out.append(locale + ':')
+ segment = [''] * len(keys)
+ for summary in summaries:
+ for row, key in enumerate(keys):
+ segment[row] += ' {:6}'.format(summary.get(key) or '')
+
+ out += [
+ lead + row
+ for lead, row in zip(leads, segment)
+ if row.strip()
+ ]
+
+ total = sum(summaries[-1].get(k, 0)
+ for k in ['changed', 'unchanged', 'report', 'missing']
+ )
+ rate = 0
+ if total:
+ rate = (('changed' in summary and summary['changed'] * 100) or
+ 0) / total
+ out.append('%d%% of entries changed' % rate)
+ return '\n'.join(out)
+
+ def __str__(self):
+ return 'observer'
diff --git a/third_party/python/compare_locales/compare_locales/compare/utils.py b/third_party/python/compare_locales/compare_locales/compare/utils.py
new file mode 100644
index 0000000000..e298f80bc5
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/compare/utils.py
@@ -0,0 +1,133 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mozilla l10n compare locales tool'
+
+from compare_locales import paths
+
+
+class Tree:
+ def __init__(self, valuetype):
+ self.branches = dict()
+ self.valuetype = valuetype
+ self.value = None
+
+ def __getitem__(self, leaf):
+ parts = []
+ if isinstance(leaf, paths.File):
+ parts = []
+ if leaf.module:
+ parts += [leaf.locale] + leaf.module.split('/')
+ parts += leaf.file.split('/')
+ else:
+ parts = leaf.split('/')
+ return self.__get(parts)
+
+ def __get(self, parts):
+ common = None
+ old = None
+ new = tuple(parts)
+ t = self
+ for k, v in self.branches.items():
+ for i, part in enumerate(zip(k, parts)):
+ if part[0] != part[1]:
+ i -= 1
+ break
+ if i < 0:
+ continue
+ i += 1
+ common = tuple(k[:i])
+ old = tuple(k[i:])
+ new = tuple(parts[i:])
+ break
+ if old:
+ self.branches.pop(k)
+ t = Tree(self.valuetype)
+ t.branches[old] = v
+ self.branches[common] = t
+ elif common:
+ t = self.branches[common]
+ if new:
+ if common:
+ return t.__get(new)
+ t2 = t
+ t = Tree(self.valuetype)
+ t2.branches[new] = t
+ if t.value is None:
+ t.value = t.valuetype()
+ return t.value
+
+ indent = ' '
+
+ def getContent(self, depth=0):
+ '''
+ Returns iterator of (depth, flag, key_or_value) tuples.
+ If flag is 'value', key_or_value is a value object, otherwise
+ (flag is 'key') it's a key string.
+ '''
+ keys = sorted(self.branches.keys())
+ if self.value is not None:
+ yield (depth, 'value', self.value)
+ for key in keys:
+ yield (depth, 'key', key)
+ yield from self.branches[key].getContent(depth + 1)
+
+ def toJSON(self):
+ '''
+ Returns this Tree as a JSON-able tree of hashes.
+ Only the values need to take care that they're JSON-able.
+ '''
+ if self.value is not None:
+ return self.value
+ return {'/'.join(key): self.branches[key].toJSON()
+ for key in self.branches.keys()}
+
+ def getStrRows(self):
+ def tostr(t):
+ if t[1] == 'key':
+ return self.indent * t[0] + '/'.join(t[2])
+ return self.indent * (t[0] + 1) + str(t[2])
+
+ return [tostr(c) for c in self.getContent()]
+
+ def __str__(self):
+ return '\n'.join(self.getStrRows())
+
+
+class AddRemove:
+ def __init__(self):
+ self.left = self.right = None
+
+ def set_left(self, left):
+ if not isinstance(left, list):
+ left = list(l for l in left)
+ self.left = left
+
+ def set_right(self, right):
+ if not isinstance(right, list):
+ right = list(l for l in right)
+ self.right = right
+
+ def __iter__(self):
+ # order_map stores index in left and then index in right
+ order_map = {item: (i, -1) for i, item in enumerate(self.left)}
+ left_items = set(order_map)
+ # as we go through the right side, keep track of which left
+ # item we had in right last, and for items not in left,
+ # set the sortmap to (left_offset, right_index)
+ left_offset = -1
+ right_items = set()
+ for i, item in enumerate(self.right):
+ right_items.add(item)
+ if item in order_map:
+ left_offset = order_map[item][0]
+ else:
+ order_map[item] = (left_offset, i)
+ for item in sorted(order_map, key=lambda item: order_map[item]):
+ if item in left_items and item in right_items:
+ yield ('equal', item)
+ elif item in left_items:
+ yield ('delete', item)
+ else:
+ yield ('add', item)
diff --git a/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py b/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py
new file mode 100644
index 0000000000..ba9db8b8ec
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py
@@ -0,0 +1,5 @@
+'''Tests that are not run by default.
+
+They might just take long, or depend on external services, or both.
+They might also fail for external changes.
+'''
diff --git a/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py b/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py
new file mode 100644
index 0000000000..e63ff861f7
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import json
+import unittest
+from urllib.error import URLError
+from urllib.request import urlopen
+
+from compare_locales import plurals
+
+
+TRANSVISION_URL = (
+ 'https://transvision.mozfr.org/'
+ 'api/v1/entity/gecko_strings/'
+ '?id=toolkit/chrome/global/intl.properties:pluralRule'
+)
+
+
+class TestPlural(unittest.TestCase):
+ '''Integration test for plural forms and l10n-central.
+
+ Having more plural forms than in l10n-central is OK, missing or
+ mismatching ones isn't.
+ Depends on Transvision.
+ '''
+ maxDiff = None
+
+ def test_valid_forms(self):
+ reference_form_map = self._load_transvision()
+ # Strip matches from dicts, to make diff for test small
+ locales = list(reference_form_map)
+ cl_form_map = {}
+ for locale in locales:
+ cl_form = str(plurals.get_plural_rule(locale))
+ if cl_form == reference_form_map[locale]:
+ reference_form_map.pop(locale)
+ else:
+ cl_form_map[locale] = cl_form
+ self.assertDictEqual(reference_form_map, cl_form_map)
+
+ def _load_transvision(self):
+ '''Use the Transvision API to load all values of pluralRule
+ in intl.properties.
+ Skip test on load failure.
+ '''
+ try:
+ data = urlopen(TRANSVISION_URL).read()
+ except URLError:
+ raise unittest.SkipTest("Couldn't load Transvision API.")
+ return json.loads(data)
diff --git a/third_party/python/compare_locales/compare_locales/keyedtuple.py b/third_party/python/compare_locales/compare_locales/keyedtuple.py
new file mode 100644
index 0000000000..af703e8fa2
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/keyedtuple.py
@@ -0,0 +1,55 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''A tuple with keys.
+
+A Sequence type that allows to refer to its elements by key.
+Making this immutable, 'cause keeping track of mutations is hard.
+
+compare-locales uses strings for Entity keys, and tuples in the
+case of PO. Support both.
+
+In the interfaces that check for membership, dicts check keys and
+sequences check values. Always try our dict cache `__map` first,
+and fall back to the superclass implementation.
+'''
+
+
+class KeyedTuple(tuple):
+
+ def __new__(cls, iterable):
+ return super().__new__(cls, iterable)
+
+ def __init__(self, iterable):
+ self.__map = {}
+ if iterable:
+ for index, item in enumerate(self):
+ self.__map[item.key] = index
+
+ def __contains__(self, key):
+ try:
+ contains = key in self.__map
+ if contains:
+ return True
+ except TypeError:
+ pass
+ return super().__contains__(key)
+
+ def __getitem__(self, key):
+ try:
+ key = self.__map[key]
+ except (KeyError, TypeError):
+ pass
+ return super().__getitem__(key)
+
+ def keys(self):
+ for value in self:
+ yield value.key
+
+ def items(self):
+ for value in self:
+ yield value.key, value
+
+ def values(self):
+ return self
diff --git a/third_party/python/compare_locales/compare_locales/lint/__init__.py b/third_party/python/compare_locales/compare_locales/lint/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/lint/__init__.py
diff --git a/third_party/python/compare_locales/compare_locales/lint/cli.py b/third_party/python/compare_locales/compare_locales/lint/cli.py
new file mode 100644
index 0000000000..dc476e1b77
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/lint/cli.py
@@ -0,0 +1,93 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import argparse
+import os
+
+from compare_locales.lint.linter import L10nLinter
+from compare_locales.lint.util import (
+ default_reference_and_tests,
+ mirror_reference_and_tests,
+ l10n_base_reference_and_tests,
+)
+from compare_locales import mozpath
+from compare_locales import paths
+from compare_locales import parser
+from compare_locales import version
+
+
+epilog = '''\
+moz-l10n-lint checks for common mistakes in localizable files. It tests for
+duplicate entries, parsing errors, and the like. Optionally, it can compare
+the strings to an external reference with strings and warn if a string might
+need to get a new ID.
+'''
+
+
+def main():
+ p = argparse.ArgumentParser(
+ description='Validate localizable strings',
+ epilog=epilog,
+ )
+ p.add_argument('l10n_toml')
+ p.add_argument(
+ '--version', action='version', version='%(prog)s ' + version
+ )
+ p.add_argument('-W', action='store_true', help='error on warnings')
+ p.add_argument(
+ '--l10n-reference',
+ dest='l10n_reference',
+ metavar='PATH',
+ help='check for conflicts against an l10n-only reference repository '
+ 'like gecko-strings',
+ )
+ p.add_argument(
+ '--reference-project',
+ dest='ref_project',
+ metavar='PATH',
+ help='check for conflicts against a reference project like '
+ 'android-l10n',
+ )
+ args = p.parse_args()
+ if args.l10n_reference:
+ l10n_base, locale = \
+ os.path.split(os.path.abspath(args.l10n_reference))
+ if not locale or not os.path.isdir(args.l10n_reference):
+ p.error('Pass an existing l10n reference')
+ else:
+ l10n_base = '.'
+ locale = None
+ pc = paths.TOMLParser().parse(args.l10n_toml, env={'l10n_base': l10n_base})
+ if locale:
+ pc.set_locales([locale], deep=True)
+ files = paths.ProjectFiles(locale, [pc])
+ get_reference_and_tests = default_reference_and_tests
+ if args.l10n_reference:
+ get_reference_and_tests = l10n_base_reference_and_tests(files)
+ elif args.ref_project:
+ get_reference_and_tests = mirror_reference_and_tests(
+ files, args.ref_project
+ )
+ linter = L10nLinter()
+ results = linter.lint(
+ (f for f, _, _, _ in files.iter_reference() if parser.hasParser(f)),
+ get_reference_and_tests
+ )
+ rv = 0
+ if results:
+ rv = 1
+ if all(r['level'] == 'warning' for r in results) and not args.W:
+ rv = 0
+ for result in results:
+ print('{} ({}:{}): {}'.format(
+ mozpath.relpath(result['path'], '.'),
+ result.get('lineno', 0),
+ result.get('column', 0),
+ result['message']
+ ))
+ return rv
+
+
+if __name__ == '__main__':
+ main()
diff --git a/third_party/python/compare_locales/compare_locales/lint/linter.py b/third_party/python/compare_locales/compare_locales/lint/linter.py
new file mode 100644
index 0000000000..a946608d97
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/lint/linter.py
@@ -0,0 +1,121 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from collections import Counter
+import os
+
+from compare_locales import parser, checks
+from compare_locales.paths import File, REFERENCE_LOCALE
+
+
+class L10nLinter:
+
+ def lint(self, files, get_reference_and_tests):
+ results = []
+ for path in files:
+ if not parser.hasParser(path):
+ continue
+ ref, extra_tests = get_reference_and_tests(path)
+ results.extend(self.lint_file(path, ref, extra_tests))
+ return results
+
+ def lint_file(self, path, ref, extra_tests):
+ file_parser = parser.getParser(path)
+ if ref is not None and os.path.isfile(ref):
+ file_parser.readFile(ref)
+ reference = file_parser.parse()
+ else:
+ reference = {}
+ file_parser.readFile(path)
+ current = file_parser.parse()
+ checker = checks.getChecker(
+ File(path, path, locale=REFERENCE_LOCALE),
+ extra_tests=extra_tests
+ )
+ if checker and checker.needs_reference:
+ checker.set_reference(current)
+ linter = EntityLinter(current, checker, reference)
+ for current_entity in current:
+ for result in linter.lint_entity(current_entity):
+ result['path'] = path
+ yield result
+
+
+class EntityLinter:
+ '''Factored out helper to run linters on a single entity.'''
+ def __init__(self, current, checker, reference):
+ self.key_count = Counter(entity.key for entity in current)
+ self.checker = checker
+ self.reference = reference
+
+ def lint_entity(self, current_entity):
+ res = self.handle_junk(current_entity)
+ if res:
+ yield res
+ return
+ for res in self.lint_full_entity(current_entity):
+ yield res
+ for res in self.lint_value(current_entity):
+ yield res
+
+ def lint_full_entity(self, current_entity):
+ '''Checks that go good or bad for a full entity,
+ without a particular spot inside the entity.
+ '''
+ lineno = col = None
+ if self.key_count[current_entity.key] > 1:
+ lineno, col = current_entity.position()
+ yield {
+ 'lineno': lineno,
+ 'column': col,
+ 'level': 'error',
+ 'message': 'Duplicate string with ID: {}'.format(
+ current_entity.key
+ )
+ }
+
+ if current_entity.key in self.reference:
+ reference_entity = self.reference[current_entity.key]
+ if not current_entity.equals(reference_entity):
+ if lineno is None:
+ lineno, col = current_entity.position()
+ msg = 'Changes to string require a new ID: {}'.format(
+ current_entity.key
+ )
+ yield {
+ 'lineno': lineno,
+ 'column': col,
+ 'level': 'warning',
+ 'message': msg,
+ }
+
+ def lint_value(self, current_entity):
+ '''Checks that error on particular locations in the entity value.
+ '''
+ if self.checker:
+ for tp, pos, msg, cat in self.checker.check(
+ current_entity, current_entity
+ ):
+ if isinstance(pos, checks.EntityPos):
+ lineno, col = current_entity.position(pos)
+ else:
+ lineno, col = current_entity.value_position(pos)
+ yield {
+ 'lineno': lineno,
+ 'column': col,
+ 'level': tp,
+ 'message': msg,
+ }
+
+ def handle_junk(self, current_entity):
+ if not isinstance(current_entity, parser.Junk):
+ return None
+
+ lineno, col = current_entity.position()
+ return {
+ 'lineno': lineno,
+ 'column': col,
+ 'level': 'error',
+ 'message': current_entity.error_message()
+ }
diff --git a/third_party/python/compare_locales/compare_locales/lint/util.py b/third_party/python/compare_locales/compare_locales/lint/util.py
new file mode 100644
index 0000000000..f5e1fb5e6e
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/lint/util.py
@@ -0,0 +1,38 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from compare_locales import paths
+
+
+def default_reference_and_tests(path):
+ return None, None
+
+
+def mirror_reference_and_tests(files, basedir):
+ '''Get reference files to check for conflicts in android-l10n and friends.
+ '''
+ def get_reference_and_tests(path):
+ for matchers in files.matchers:
+ if 'reference' not in matchers:
+ continue
+ matcher = matchers['reference']
+ if matcher.match(path) is None:
+ continue
+ ref_matcher = paths.Matcher(matcher, root=basedir)
+ ref_path = matcher.sub(ref_matcher, path)
+ return ref_path, matchers.get('test')
+ return None, None
+ return get_reference_and_tests
+
+
+def l10n_base_reference_and_tests(files):
+ '''Get reference files to check for conflicts in gecko-strings and friends.
+ '''
+ def get_reference_and_tests(path):
+ match = files.match(path)
+ if match is None:
+ return None, None
+ ref, _, _, extra_tests = match
+ return ref, extra_tests
+ return get_reference_and_tests
diff --git a/third_party/python/compare_locales/compare_locales/merge.py b/third_party/python/compare_locales/compare_locales/merge.py
new file mode 100644
index 0000000000..1d73560bb9
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/merge.py
@@ -0,0 +1,143 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''Merge resources across channels.
+
+Merging resources is done over a series of parsed resources, or source
+strings.
+The nomenclature is that the resources are ordered from newest to oldest.
+The generated file structure is taken from the newest file, and then the
+next-newest, etc. The values of the returned entities are taken from the
+newest to the oldest resource, too.
+
+In merge_resources, there's an option to choose the values from oldest
+to newest instead.
+'''
+
+from collections import OrderedDict, defaultdict
+from codecs import encode
+from functools import reduce
+
+
+from compare_locales import parser as cl
+from compare_locales.parser.base import StickyEntry
+from compare_locales.compare.utils import AddRemove
+
+
+class MergeNotSupportedError(ValueError):
+ pass
+
+
+def merge_channels(name, resources):
+ try:
+ parser = cl.getParser(name)
+ except UserWarning:
+ raise MergeNotSupportedError(
+ f'Unsupported file format ({name}).')
+
+ entities = merge_resources(parser, resources)
+ return encode(serialize_legacy_resource(entities), parser.encoding)
+
+
+def merge_resources(parser, resources, keep_newest=True):
+ '''Merge parsed or unparsed resources, returning a enumerable of Entities.
+
+ Resources are ordered from newest to oldest in the input. The structure
+ of the generated content is taken from the newest resource first, and
+ then filled by the next etc.
+ Values are also taken from the newest, unless keep_newest is False,
+ then values are taken from the oldest first.
+ '''
+
+ def parse_resource(resource):
+ # The counter dict keeps track of number of identical comments.
+ counter = defaultdict(int)
+ if isinstance(resource, bytes):
+ parser.readContents(resource)
+ resource = parser.walk()
+ pairs = [get_key_value(entity, counter) for entity in resource]
+ return OrderedDict(pairs)
+
+ def get_key_value(entity, counter):
+ if isinstance(entity, cl.Comment):
+ counter[entity.val] += 1
+ # Use the (value, index) tuple as the key. AddRemove will
+ # de-deplicate identical comments at the same index.
+ return ((entity.val, counter[entity.val]), entity)
+
+ if isinstance(entity, cl.Whitespace):
+ # Use the Whitespace instance as the key so that it's always
+ # unique. Adjecent whitespace will be folded into the longer one in
+ # prune.
+ return (entity, entity)
+
+ return (entity.key, entity)
+
+ entities = reduce(
+ lambda x, y: merge_two(x, y, keep_newer=keep_newest),
+ map(parse_resource, resources))
+ return entities.values()
+
+
+def merge_two(newer, older, keep_newer=True):
+ '''Merge two OrderedDicts.
+
+ The order of the result dict is determined by `newer`.
+ The values in the dict are the newer ones by default, too.
+ If `keep_newer` is False, the values will be taken from the older
+ dict.
+ '''
+ diff = AddRemove()
+ diff.set_left(newer.keys())
+ diff.set_right(older.keys())
+
+ # Create a flat sequence of all entities in order reported by AddRemove.
+ get_entity = get_newer_entity if keep_newer else get_older_entity
+ contents = [(key, get_entity(newer, older, key)) for _, key in diff]
+
+ def prune(acc, cur):
+ _, entity = cur
+ if entity is None:
+ # Prune Nones which stand for duplicated comments.
+ return acc
+
+ if len(acc) and isinstance(entity, cl.Whitespace):
+ _, prev_entity = acc[-1]
+
+ if isinstance(prev_entity, cl.Whitespace):
+ # Prefer the longer whitespace.
+ if len(entity.all) > len(prev_entity.all):
+ acc[-1] = (entity, entity)
+ return acc
+
+ acc.append(cur)
+ return acc
+
+ pruned = reduce(prune, contents, [])
+ return OrderedDict(pruned)
+
+
+def get_newer_entity(newer, older, key):
+ entity = newer.get(key, None)
+
+ # Always prefer the newer version.
+ if entity is not None:
+ return entity
+
+ return older.get(key)
+
+
+def get_older_entity(newer, older, key):
+ entity = older.get(key, None)
+
+ # If we don't have an older version, or it's a StickyEntry,
+ # get a newer version
+ if entity is None or isinstance(entity, StickyEntry):
+ return newer.get(key)
+
+ return entity
+
+
+def serialize_legacy_resource(entities):
+ return "".join(entity.all for entity in entities)
diff --git a/third_party/python/compare_locales/compare_locales/mozpath.py b/third_party/python/compare_locales/compare_locales/mozpath.py
new file mode 100644
index 0000000000..d2b1575858
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/mozpath.py
@@ -0,0 +1,154 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''
+Like :py:mod:`os.path`, with a reduced set of functions, and with normalized
+path separators (always use forward slashes).
+Also contains a few additional utilities not found in :py:mod:`os.path`.
+'''
+
+
+import posixpath
+import os
+import re
+
+
+def normsep(path):
+ '''
+ Normalize path separators, by using forward slashes instead of whatever
+ :py:const:`os.sep` is.
+ '''
+ if os.sep != '/':
+ path = path.replace(os.sep, '/')
+ if os.altsep and os.altsep != '/':
+ path = path.replace(os.altsep, '/')
+ return path
+
+
+def relpath(path, start):
+ rel = normsep(os.path.relpath(path, start))
+ return '' if rel == '.' else rel
+
+
+def realpath(path):
+ return normsep(os.path.realpath(path))
+
+
+def abspath(path):
+ return normsep(os.path.abspath(path))
+
+
+def join(*paths):
+ return normsep(os.path.join(*paths))
+
+
+def normpath(path):
+ return posixpath.normpath(normsep(path))
+
+
+def dirname(path):
+ return posixpath.dirname(normsep(path))
+
+
+def commonprefix(paths):
+ return posixpath.commonprefix([normsep(path) for path in paths])
+
+
+def basename(path):
+ return os.path.basename(path)
+
+
+def splitext(path):
+ return posixpath.splitext(normsep(path))
+
+
+def split(path):
+ '''
+ Return the normalized path as a list of its components.
+
+ ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']``
+ '''
+ return normsep(path).split('/')
+
+
+def basedir(path, bases):
+ '''
+ Given a list of directories (`bases`), return which one contains the given
+ path. If several matches are found, the deepest base directory is returned.
+
+ ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'``
+ (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match)
+ '''
+ path = normsep(path)
+ bases = [normsep(b) for b in bases]
+ if path in bases:
+ return path
+ for b in sorted(bases, reverse=True):
+ if b == '' or path.startswith(b + '/'):
+ return b
+
+
+re_cache = {}
+
+
+def match(path, pattern):
+ '''
+ Return whether the given path matches the given pattern.
+ An asterisk can be used to match any string, including the null string, in
+ one part of the path:
+
+ ``foo`` matches ``*``, ``f*`` or ``fo*o``
+
+ However, an asterisk matching a subdirectory may not match the null string:
+
+ ``foo/bar`` does *not* match ``foo/*/bar``
+
+ If the pattern matches one of the ancestor directories of the path, the
+ patch is considered matching:
+
+ ``foo/bar`` matches ``foo``
+
+ Two adjacent asterisks can be used to match files and zero or more
+ directories and subdirectories.
+
+ ``foo/bar`` matches ``foo/**/bar``, or ``**/bar``
+ '''
+ if not pattern:
+ return True
+ if pattern not in re_cache:
+ last_end = 0
+ p = ''
+ for m in re.finditer(r'(?:(^|/)\*\*(/|$))|(?P<star>\*)', pattern):
+ if m.start() > last_end:
+ p += re.escape(pattern[last_end:m.start()])
+ if m.group('star'):
+ p += '[^/]*'
+ elif m.group(2):
+ p += re.escape(m.group(1)) + r'(?:.+%s)?' % m.group(2)
+ else:
+ p += r'(?:%s.+)?' % re.escape(m.group(1))
+ last_end = m.end()
+ p += re.escape(pattern[last_end:]) + '(?:/.*)?$'
+ re_cache[pattern] = re.compile(p)
+ return re_cache[pattern].match(path) is not None
+
+
+def rebase(oldbase, base, relativepath):
+ '''
+ Return `relativepath` relative to `base` instead of `oldbase`.
+ '''
+ if base == oldbase:
+ return relativepath
+ if len(base) < len(oldbase):
+ assert basedir(oldbase, [base]) == base
+ relbase = relpath(oldbase, base)
+ result = join(relbase, relativepath)
+ else:
+ assert basedir(base, [oldbase]) == oldbase
+ relbase = relpath(base, oldbase)
+ result = relpath(relativepath, relbase)
+ result = normpath(result)
+ if relativepath.endswith('/') and not result.endswith('/'):
+ result += '/'
+ return result
diff --git a/third_party/python/compare_locales/compare_locales/parser/__init__.py b/third_party/python/compare_locales/compare_locales/parser/__init__.py
new file mode 100644
index 0000000000..b537bb0686
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/__init__.py
@@ -0,0 +1,81 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+from .base import (
+ CAN_NONE, CAN_COPY, CAN_SKIP, CAN_MERGE,
+ Entry, Entity, Comment, OffsetComment, Junk, Whitespace,
+ BadEntity, Parser,
+)
+from .android import (
+ AndroidParser
+)
+from .defines import (
+ DefinesParser, DefinesInstruction
+)
+from .dtd import (
+ DTDEntity, DTDParser
+)
+from .fluent import (
+ FluentParser, FluentComment, FluentEntity, FluentMessage, FluentTerm,
+)
+from .ini import (
+ IniParser, IniSection,
+)
+from .po import (
+ PoParser
+)
+from .properties import (
+ PropertiesParser, PropertiesEntity
+)
+
+__all__ = [
+ "CAN_NONE", "CAN_COPY", "CAN_SKIP", "CAN_MERGE",
+ "Junk", "Entry", "Entity", "Whitespace", "Comment", "OffsetComment",
+ "BadEntity", "Parser",
+ "AndroidParser",
+ "DefinesParser", "DefinesInstruction",
+ "DTDParser", "DTDEntity",
+ "FluentParser", "FluentComment", "FluentEntity",
+ "FluentMessage", "FluentTerm",
+ "IniParser", "IniSection",
+ "PoParser",
+ "PropertiesParser", "PropertiesEntity",
+]
+
+__constructors = []
+
+
+def getParser(path):
+ for item in __constructors:
+ if re.search(item[0], path):
+ return item[1]
+ try:
+ from pkg_resources import iter_entry_points
+ for entry_point in iter_entry_points('compare_locales.parsers'):
+ p = entry_point.resolve()()
+ if p.use(path):
+ return p
+ except (ImportError, OSError):
+ pass
+ raise UserWarning("Cannot find Parser")
+
+
+def hasParser(path):
+ try:
+ return bool(getParser(path))
+ except UserWarning:
+ return False
+
+
+__constructors = [
+ ('strings.*\\.xml$', AndroidParser()),
+ ('\\.dtd$', DTDParser()),
+ ('\\.properties$', PropertiesParser()),
+ ('\\.ini$', IniParser()),
+ ('\\.inc$', DefinesParser()),
+ ('\\.ftl$', FluentParser()),
+ ('\\.pot?$', PoParser()),
+]
diff --git a/third_party/python/compare_locales/compare_locales/parser/android.py b/third_party/python/compare_locales/compare_locales/parser/android.py
new file mode 100644
index 0000000000..ba4197da84
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/android.py
@@ -0,0 +1,303 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Android strings.xml parser
+
+Parses strings.xml files per
+https://developer.android.com/guide/topics/resources/localization.
+As we're using a built-in XML parser underneath, errors on that level
+break the full parsing, and result in a single Junk entry.
+"""
+
+
+import re
+from xml.dom import minidom
+from xml.dom.minidom import Node
+
+from .base import (
+ CAN_SKIP,
+ Entity, Comment, Junk, Whitespace,
+ StickyEntry, LiteralEntity,
+ Parser
+)
+
+
+class AndroidEntity(Entity):
+ def __init__(
+ self, ctx, pre_comment, white_space, node, all, key, raw_val, val
+ ):
+ # fill out superclass as good as we can right now
+ # most span can get modified at endElement
+ super().__init__(
+ ctx, pre_comment, white_space,
+ (None, None),
+ (None, None),
+ (None, None)
+ )
+ self.node = node
+ self._all_literal = all
+ self._key_literal = key
+ self._raw_val_literal = raw_val
+ self._val_literal = val
+
+ @property
+ def all(self):
+ chunks = []
+ if self.pre_comment is not None:
+ chunks.append(self.pre_comment.all)
+ if self.inner_white is not None:
+ chunks.append(self.inner_white.all)
+ chunks.append(self._all_literal)
+ return ''.join(chunks)
+
+ @property
+ def key(self):
+ return self._key_literal
+
+ @property
+ def raw_val(self):
+ return self._raw_val_literal
+
+ def position(self, offset=0):
+ return (0, offset)
+
+ def value_position(self, offset=0):
+ return (0, offset)
+
+ def wrap(self, raw_val):
+ clone = self.node.cloneNode(True)
+ if clone.childNodes.length == 1:
+ child = clone.childNodes[0]
+ else:
+ for child in clone.childNodes:
+ if child.nodeType == Node.CDATA_SECTION_NODE:
+ break
+ child.data = raw_val
+ all = []
+ if self.pre_comment is not None:
+ all.append(self.pre_comment.all)
+ if self.inner_white is not None:
+ all.append(self.inner_white.all)
+ all.append(clone.toxml())
+ return LiteralEntity(self.key, raw_val, ''.join(all))
+
+
+class NodeMixin:
+ def __init__(self, all, value):
+ self._all_literal = all
+ self._val_literal = value
+
+ @property
+ def all(self):
+ return self._all_literal
+
+ @property
+ def key(self):
+ return self._all_literal
+
+ @property
+ def raw_val(self):
+ return self._val_literal
+
+ def position(self, offset=0):
+ return (0, offset)
+
+ def value_position(self, offset=0):
+ return (0, offset)
+
+
+class XMLWhitespace(NodeMixin, Whitespace):
+ pass
+
+
+class XMLComment(NodeMixin, Comment):
+ @property
+ def val(self):
+ return self._val_literal
+
+ @property
+ def key(self):
+ return None
+
+
+# DocumentWrapper is sticky in serialization.
+# Always keep the one from the reference document.
+class DocumentWrapper(NodeMixin, StickyEntry):
+ def __init__(self, key, all):
+ self._all_literal = all
+ self._val_literal = all
+ self._key_literal = key
+
+ @property
+ def key(self):
+ return self._key_literal
+
+
+class XMLJunk(Junk):
+ def __init__(self, all):
+ super().__init__(None, (0, 0))
+ self._all_literal = all
+
+ @property
+ def all(self):
+ return self._all_literal
+
+ def position(self, offset=0):
+ return (0, offset)
+
+ def value_position(self, offset=0):
+ return (0, offset)
+
+
+def textContent(node):
+ if node.childNodes.length == 0:
+ return ''
+ for child in node.childNodes:
+ if child.nodeType == minidom.Node.CDATA_SECTION_NODE:
+ return child.data
+ if (
+ node.childNodes.length != 1 or
+ node.childNodes[0].nodeType != minidom.Node.TEXT_NODE
+ ):
+ # Return something, we'll fail in checks on this
+ return node.toxml()
+ return node.childNodes[0].data
+
+
+NEWLINE = re.compile(r'[ \t]*\n[ \t]*')
+
+
+def normalize(val):
+ return NEWLINE.sub('\n', val.strip(' \t'))
+
+
+class AndroidParser(Parser):
+ # Android does l10n fallback at runtime, don't merge en-US strings
+ capabilities = CAN_SKIP
+
+ def __init__(self):
+ super().__init__()
+ self.last_comment = None
+
+ def walk(self, only_localizable=False):
+ if not self.ctx:
+ # loading file failed, or we just didn't load anything
+ return
+ ctx = self.ctx
+ contents = ctx.contents
+ try:
+ doc = minidom.parseString(contents.encode('utf-8'))
+ except Exception:
+ yield XMLJunk(contents)
+ return
+ docElement = doc.documentElement
+ if docElement.nodeName != 'resources':
+ yield XMLJunk(doc.toxml())
+ return
+ root_children = docElement.childNodes
+ if not only_localizable:
+ yield DocumentWrapper(
+ '<?xml?><resources>',
+ '<?xml version="1.0" encoding="utf-8"?>\n<resources'
+ )
+ for attr_name, attr_value in docElement.attributes.items():
+ yield DocumentWrapper(
+ attr_name,
+ f' {attr_name}="{attr_value}"'
+ )
+ yield DocumentWrapper('>', '>')
+ child_num = 0
+ while child_num < len(root_children):
+ node = root_children[child_num]
+ if node.nodeType == Node.COMMENT_NODE:
+ current_comment, child_num = self.handleComment(
+ node, root_children, child_num
+ )
+ if child_num < len(root_children):
+ node = root_children[child_num]
+ else:
+ if not only_localizable:
+ yield current_comment
+ break
+ else:
+ current_comment = None
+ if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
+ white_space = XMLWhitespace(node.toxml(), node.nodeValue)
+ child_num += 1
+ if current_comment is None:
+ if not only_localizable:
+ yield white_space
+ continue
+ if node.nodeValue.count('\n') > 1:
+ if not only_localizable:
+ if current_comment is not None:
+ yield current_comment
+ yield white_space
+ continue
+ if child_num < len(root_children):
+ node = root_children[child_num]
+ else:
+ if not only_localizable:
+ if current_comment is not None:
+ yield current_comment
+ yield white_space
+ break
+ else:
+ white_space = None
+ if node.nodeType == Node.ELEMENT_NODE:
+ yield self.handleElement(node, current_comment, white_space)
+ else:
+ if not only_localizable:
+ if current_comment:
+ yield current_comment
+ if white_space:
+ yield white_space
+ child_num += 1
+ if not only_localizable:
+ yield DocumentWrapper('</resources>', '</resources>\n')
+
+ def handleElement(self, element, current_comment, white_space):
+ if element.nodeName == 'string' and element.hasAttribute('name'):
+ return AndroidEntity(
+ self.ctx,
+ current_comment,
+ white_space,
+ element,
+ element.toxml(),
+ element.getAttribute('name'),
+ textContent(element),
+ ''.join(c.toxml() for c in element.childNodes)
+ )
+ else:
+ return XMLJunk(element.toxml())
+
+ def handleComment(self, node, root_children, child_num):
+ all = node.toxml()
+ val = normalize(node.nodeValue)
+ while True:
+ child_num += 1
+ if child_num >= len(root_children):
+ break
+ node = root_children[child_num]
+ if node.nodeType == Node.TEXT_NODE:
+ if node.nodeValue.count('\n') > 1:
+ break
+ white = node
+ child_num += 1
+ if child_num >= len(root_children):
+ break
+ node = root_children[child_num]
+ else:
+ white = None
+ if node.nodeType != Node.COMMENT_NODE:
+ if white is not None:
+ # do not consume this node
+ child_num -= 1
+ break
+ if white:
+ all += white.toxml()
+ val += normalize(white.nodeValue)
+ all += node.toxml()
+ val += normalize(node.nodeValue)
+ return XMLComment(all, val), child_num
diff --git a/third_party/python/compare_locales/compare_locales/parser/base.py b/third_party/python/compare_locales/compare_locales/parser/base.py
new file mode 100644
index 0000000000..b8972beb33
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/base.py
@@ -0,0 +1,443 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+import bisect
+import codecs
+from collections import Counter
+from compare_locales.keyedtuple import KeyedTuple
+from compare_locales.paths import File
+
+__constructors = []
+
+
+# The allowed capabilities for the Parsers. They define the exact strategy
+# used by ContentComparer.merge.
+
+# Don't perform any merging
+CAN_NONE = 0
+# Copy the entire reference file
+CAN_COPY = 1
+# Remove broken entities from localization
+# Without CAN_MERGE, en-US is not good to use for localization.
+CAN_SKIP = 2
+# Add missing and broken entities from the reference to localization
+# This effectively means that en-US is good to use for localized files.
+CAN_MERGE = 4
+
+
+class Entry:
+ '''
+ Abstraction layer for a localizable entity.
+ Currently supported are grammars of the form:
+
+ 1: entity definition
+ 2: entity key (name)
+ 3: entity value
+
+ <!ENTITY key "value">
+
+ <--- definition ---->
+ '''
+ def __init__(
+ self, ctx, pre_comment, inner_white, span, key_span, val_span
+ ):
+ self.ctx = ctx
+ self.span = span
+ self.key_span = key_span
+ self.val_span = val_span
+ self.pre_comment = pre_comment
+ self.inner_white = inner_white
+
+ def position(self, offset=0):
+ """Get the 1-based line and column of the character
+ with given offset into the Entity.
+
+ If offset is negative, return the end of the Entity.
+ """
+ if offset < 0:
+ pos = self.span[1]
+ else:
+ pos = self.span[0] + offset
+ return self.ctx.linecol(pos)
+
+ def value_position(self, offset=0):
+ """Get the 1-based line and column of the character
+ with given offset into the value.
+
+ If offset is negative, return the end of the value.
+ """
+ assert self.val_span is not None
+ if offset < 0:
+ pos = self.val_span[1]
+ else:
+ pos = self.val_span[0] + offset
+ return self.ctx.linecol(pos)
+
+ def _span_start(self):
+ start = self.span[0]
+ if hasattr(self, 'pre_comment') and self.pre_comment is not None:
+ start = self.pre_comment.span[0]
+ return start
+
+ @property
+ def all(self):
+ start = self._span_start()
+ end = self.span[1]
+ return self.ctx.contents[start:end]
+
+ @property
+ def key(self):
+ return self.ctx.contents[self.key_span[0]:self.key_span[1]]
+
+ @property
+ def raw_val(self):
+ if self.val_span is None:
+ return None
+ return self.ctx.contents[self.val_span[0]:self.val_span[1]]
+
+ @property
+ def val(self):
+ return self.raw_val
+
+ def __repr__(self):
+ return self.key
+
+ re_br = re.compile('<br[ \t\r\n]*/?>', re.U)
+ re_sgml = re.compile(r'</?\w+.*?>', re.U | re.M)
+
+ def count_words(self):
+ """Count the words in an English string.
+ Replace a couple of xml markup to make that safer, too.
+ """
+ value = self.re_br.sub('\n', self.val)
+ value = self.re_sgml.sub('', value)
+ return len(value.split())
+
+ def equals(self, other):
+ return self.key == other.key and self.val == other.val
+
+
+class StickyEntry(Entry):
+ """Subclass of Entry to use in for syntax fragments
+ which should always be overwritten in the serializer.
+ """
+ pass
+
+
+class Entity(Entry):
+ @property
+ def localized(self):
+ '''Is this entity localized.
+
+ Always true for monolingual files.
+ In bilingual files, this is a dynamic property.
+ '''
+ return True
+
+ def unwrap(self):
+ """Return the literal value to be used by tools.
+ """
+ return self.raw_val
+
+ def wrap(self, raw_val):
+ """Create literal entity based on reference and raw value.
+
+ This is used by the serialization logic.
+ """
+ start = self._span_start()
+ all = (
+ self.ctx.contents[start:self.val_span[0]] +
+ raw_val +
+ self.ctx.contents[self.val_span[1]:self.span[1]]
+ )
+ return LiteralEntity(self.key, raw_val, all)
+
+
+class LiteralEntity(Entity):
+ """Subclass of Entity to represent entities without context slices.
+
+ It's storing string literals for key, raw_val and all instead of spans.
+ """
+ def __init__(self, key, val, all):
+ super().__init__(None, None, None, None, None, None)
+ self._key = key
+ self._raw_val = val
+ self._all = all
+
+ @property
+ def key(self):
+ return self._key
+
+ @property
+ def raw_val(self):
+ return self._raw_val
+
+ @property
+ def all(self):
+ return self._all
+
+
+class PlaceholderEntity(LiteralEntity):
+ """Subclass of Entity to be removed in merges.
+ """
+ def __init__(self, key):
+ super().__init__(key, "", "\nplaceholder\n")
+
+
+class Comment(Entry):
+ def __init__(self, ctx, span):
+ self.ctx = ctx
+ self.span = span
+ self.val_span = None
+ self._val_cache = None
+
+ @property
+ def key(self):
+ return None
+
+ @property
+ def val(self):
+ if self._val_cache is None:
+ self._val_cache = self.all
+ return self._val_cache
+
+ def __repr__(self):
+ return self.all
+
+
+class OffsetComment(Comment):
+ '''Helper for file formats that have a constant number of leading
+ chars to strip from comments.
+ Offset defaults to 1
+ '''
+ comment_offset = 1
+
+ @property
+ def val(self):
+ if self._val_cache is None:
+ self._val_cache = ''.join(
+ l[self.comment_offset:] for l in self.all.splitlines(True)
+ )
+ return self._val_cache
+
+
+class Junk:
+ '''
+ An almost-Entity, representing junk data that we didn't parse.
+ This way, we can signal bad content as stuff we don't understand.
+ And the either fix that, or report real bugs in localizations.
+ '''
+ junkid = 0
+
+ def __init__(self, ctx, span):
+ self.ctx = ctx
+ self.span = span
+ self.__class__.junkid += 1
+ self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1])
+
+ def position(self, offset=0):
+ """Get the 1-based line and column of the character
+ with given offset into the Entity.
+
+ If offset is negative, return the end of the Entity.
+ """
+ if offset < 0:
+ pos = self.span[1]
+ else:
+ pos = self.span[0] + offset
+ return self.ctx.linecol(pos)
+
+ @property
+ def all(self):
+ return self.ctx.contents[self.span[0]:self.span[1]]
+
+ @property
+ def raw_val(self):
+ return self.all
+
+ @property
+ def val(self):
+ return self.all
+
+ def error_message(self):
+ params = (self.val,) + self.position() + self.position(-1)
+ return (
+ 'Unparsed content "%s" from line %d column %d'
+ ' to line %d column %d' % params
+ )
+
+ def __repr__(self):
+ return self.key
+
+
+class Whitespace(Entry):
+ '''Entity-like object representing an empty file with whitespace,
+ if allowed
+ '''
+ def __init__(self, ctx, span):
+ self.ctx = ctx
+ self.span = self.key_span = self.val_span = span
+
+ def __repr__(self):
+ return self.raw_val
+
+
+class BadEntity(ValueError):
+ '''Raised when the parser can't create an Entity for a found match.
+ '''
+ pass
+
+
+class Parser:
+ capabilities = CAN_SKIP | CAN_MERGE
+ reWhitespace = re.compile('[ \t\r\n]+', re.M)
+ Comment = Comment
+ # NotImplementedError would be great, but also tedious
+ reKey = reComment = None
+
+ class Context:
+ "Fixture for content and line numbers"
+ def __init__(self, contents):
+ self.contents = contents
+ # cache split lines
+ self._lines = None
+
+ def linecol(self, position):
+ "Returns 1-based line and column numbers."
+ if self._lines is None:
+ nl = re.compile('\n', re.M)
+ self._lines = [m.end()
+ for m in nl.finditer(self.contents)]
+
+ line_offset = bisect.bisect(self._lines, position)
+ line_start = self._lines[line_offset - 1] if line_offset else 0
+ col_offset = position - line_start
+
+ return line_offset + 1, col_offset + 1
+
+ def __init__(self):
+ if not hasattr(self, 'encoding'):
+ self.encoding = 'utf-8'
+ self.ctx = None
+
+ def readFile(self, file):
+ '''Read contents from disk, with universal_newlines'''
+ if isinstance(file, File):
+ file = file.fullpath
+ # python 2 has binary input with universal newlines,
+ # python 3 doesn't. Let's split code paths
+ with open(
+ file,
+ encoding=self.encoding, errors='replace',
+ newline=None
+ ) as f:
+ self.readUnicode(f.read())
+
+ def readContents(self, contents):
+ '''Read contents and create parsing context.
+
+ contents are in native encoding, but with normalized line endings.
+ '''
+ (contents, _) = codecs.getdecoder(self.encoding)(contents, 'replace')
+ self.readUnicode(contents)
+
+ def readUnicode(self, contents):
+ self.ctx = self.Context(contents)
+
+ def parse(self):
+ return KeyedTuple(self)
+
+ def __iter__(self):
+ return self.walk(only_localizable=True)
+
+ def walk(self, only_localizable=False):
+ if not self.ctx:
+ # loading file failed, or we just didn't load anything
+ return
+ ctx = self.ctx
+ contents = ctx.contents
+
+ next_offset = 0
+ while next_offset < len(contents):
+ entity = self.getNext(ctx, next_offset)
+
+ if isinstance(entity, (Entity, Junk)):
+ yield entity
+ elif not only_localizable:
+ yield entity
+
+ next_offset = entity.span[1]
+
+ def getNext(self, ctx, offset):
+ '''Parse the next fragment.
+
+ Parse comments first, then white-space.
+ If an entity follows, create that entity with such pre_comment and
+ inner white-space. If not, emit comment or white-space as standlone.
+ It's OK that this might parse whitespace more than once.
+ Comments are associated with entities if they're not separated by
+ blank lines. Multiple consecutive comments are joined.
+ '''
+ junk_offset = offset
+ m = self.reComment.match(ctx.contents, offset)
+ if m:
+ current_comment = self.Comment(ctx, m.span())
+ if offset < 2 and 'License' in current_comment.val:
+ # Heuristic. A early comment with "License" is probably
+ # a license header, and should be standalone.
+ # Not glueing ourselves to offset == 0 as we might have
+ # skipped a BOM.
+ return current_comment
+ offset = m.end()
+ else:
+ current_comment = None
+ m = self.reWhitespace.match(ctx.contents, offset)
+ if m:
+ white_space = Whitespace(ctx, m.span())
+ offset = m.end()
+ if (
+ current_comment is not None
+ and white_space.raw_val.count('\n') > 1
+ ):
+ # standalone comment
+ # return the comment, and reparse the whitespace next time
+ return current_comment
+ if current_comment is None:
+ return white_space
+ else:
+ white_space = None
+ m = self.reKey.match(ctx.contents, offset)
+ if m:
+ try:
+ return self.createEntity(ctx, m, current_comment, white_space)
+ except BadEntity:
+ # fall through to Junk, probably
+ pass
+ if current_comment is not None:
+ return current_comment
+ if white_space is not None:
+ return white_space
+ return self.getJunk(ctx, junk_offset, self.reKey, self.reComment)
+
+ def getJunk(self, ctx, offset, *expressions):
+ junkend = None
+ for exp in expressions:
+ m = exp.search(ctx.contents, offset)
+ if m:
+ junkend = min(junkend, m.start()) if junkend else m.start()
+ return Junk(ctx, (offset, junkend or len(ctx.contents)))
+
+ def createEntity(self, ctx, m, current_comment, white_space):
+ return Entity(
+ ctx, current_comment, white_space,
+ m.span(), m.span('key'), m.span('val')
+ )
+
+ @classmethod
+ def findDuplicates(cls, entities):
+ found = Counter(entity.key for entity in entities)
+ for entity_id, cnt in found.items():
+ if cnt > 1:
+ yield f'{entity_id} occurs {cnt} times'
diff --git a/third_party/python/compare_locales/compare_locales/parser/defines.py b/third_party/python/compare_locales/compare_locales/parser/defines.py
new file mode 100644
index 0000000000..dd4511e4a8
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/defines.py
@@ -0,0 +1,104 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+from .base import (
+ CAN_COPY,
+ Entry, OffsetComment, Junk, Whitespace,
+ Parser
+)
+
+
+class DefinesInstruction(Entry):
+ '''Entity-like object representing processing instructions in inc files
+ '''
+ def __init__(self, ctx, span, val_span):
+ self.ctx = ctx
+ self.span = span
+ self.key_span = self.val_span = val_span
+
+ def __repr__(self):
+ return self.raw_val
+
+
+class DefinesParser(Parser):
+ # can't merge, #unfilter needs to be the last item, which we don't support
+ capabilities = CAN_COPY
+ reWhitespace = re.compile('\n+', re.M)
+
+ EMPTY_LINES = 1 << 0
+
+ class Comment(OffsetComment):
+ comment_offset = 2
+
+ class Context(Parser.Context):
+ def __init__(self, contents):
+ super(DefinesParser.Context, self).__init__(contents)
+ self.filter_empty_lines = False
+
+ def __init__(self):
+ self.reComment = re.compile('(?:^# .*?\n)*(?:^# [^\n]*)', re.M)
+ # corresponds to
+ # https://hg.mozilla.org/mozilla-central/file/72ee4800d4156931c89b58bd807af4a3083702bb/python/mozbuild/mozbuild/preprocessor.py#l561 # noqa
+ self.reKey = re.compile(
+ r'#define[ \t]+(?P<key>\w+)(?:[ \t](?P<val>[^\n]*))?', re.M)
+ self.rePI = re.compile(r'#(?P<val>\w+[ \t]+[^\n]+)', re.M)
+ Parser.__init__(self)
+
+ def getNext(self, ctx, offset):
+ junk_offset = offset
+ contents = ctx.contents
+
+ m = self.reComment.match(ctx.contents, offset)
+ if m:
+ current_comment = self.Comment(ctx, m.span())
+ offset = m.end()
+ else:
+ current_comment = None
+
+ m = self.reWhitespace.match(contents, offset)
+ if m:
+ # blank lines outside of filter_empty_lines or
+ # leading whitespace are bad
+ if (
+ offset == 0 or
+ not (len(m.group()) == 1 or ctx.filter_empty_lines)
+ ):
+ if current_comment:
+ return current_comment
+ return Junk(ctx, m.span())
+ white_space = Whitespace(ctx, m.span())
+ offset = m.end()
+ if (
+ current_comment is not None
+ and white_space.raw_val.count('\n') > 1
+ ):
+ # standalone comment
+ # return the comment, and reparse the whitespace next time
+ return current_comment
+ if current_comment is None:
+ return white_space
+ else:
+ white_space = None
+
+ m = self.reKey.match(contents, offset)
+ if m:
+ return self.createEntity(ctx, m, current_comment, white_space)
+ # defines instructions don't have comments
+ # Any pending commment is standalone
+ if current_comment:
+ return current_comment
+ if white_space:
+ return white_space
+ m = self.rePI.match(contents, offset)
+ if m:
+ instr = DefinesInstruction(ctx, m.span(), m.span('val'))
+ if instr.val == 'filter emptyLines':
+ ctx.filter_empty_lines = True
+ if instr.val == 'unfilter emptyLines':
+ ctx.filter_empty_lines = False
+ return instr
+ return self.getJunk(
+ ctx, junk_offset, self.reComment, self.reKey, self.rePI)
diff --git a/third_party/python/compare_locales/compare_locales/parser/dtd.py b/third_party/python/compare_locales/compare_locales/parser/dtd.py
new file mode 100644
index 0000000000..55086177a8
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/dtd.py
@@ -0,0 +1,115 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+try:
+ from html import unescape as html_unescape
+except ImportError:
+ from HTMLParser import HTMLParser
+ html_parser = HTMLParser()
+ html_unescape = html_parser.unescape
+
+from .base import (
+ Entity, Comment, Junk,
+ Parser
+)
+
+
+class DTDEntityMixin:
+ @property
+ def val(self):
+ '''Unescape HTML entities into corresponding Unicode characters.
+
+ Named (&amp;), decimal (&#38;), and hex (&#x26; and &#x0026;) formats
+ are supported. Unknown entities are left intact.
+
+ As of Python 3.7 the following 252 named entities are
+ recognized and unescaped:
+
+ https://github.com/python/cpython/blob/3.7/Lib/html/entities.py
+ '''
+ return html_unescape(self.raw_val)
+
+ def value_position(self, offset=0):
+ # DTDChecker already returns tuples of (line, col) positions
+ if isinstance(offset, tuple):
+ line_pos, col_pos = offset
+ line, col = super().value_position()
+ if line_pos == 1:
+ col = col + col_pos
+ else:
+ col = col_pos
+ line += line_pos - 1
+ return line, col
+ else:
+ return super().value_position(offset)
+
+
+class DTDEntity(DTDEntityMixin, Entity):
+ pass
+
+
+class DTDParser(Parser):
+ # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
+ # ":" | [A-Z] | "_" | [a-z] |
+ # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
+ # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] |
+ # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
+ # [#x10000-#xEFFFF]
+ CharMinusDash = '\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD'
+ XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash
+ NameStartChar = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \
+ '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \
+ '\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD'
+ # + \U00010000-\U000EFFFF seems to be unsupported in python
+
+ # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 |
+ # [#x0300-#x036F] | [#x203F-#x2040]
+ NameChar = NameStartChar + r'\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040'
+ Name = '[' + NameStartChar + '][' + NameChar + ']*'
+ reKey = re.compile('<!ENTITY[ \t\r\n]+(?P<key>' + Name + ')[ \t\r\n]+'
+ '(?P<val>\"[^\"]*\"|\'[^\']*\'?)[ \t\r\n]*>',
+ re.DOTALL | re.M)
+ # add BOM to DTDs, details in bug 435002
+ reHeader = re.compile('^\ufeff')
+ reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash,
+ re.S)
+ rePE = re.compile('<!ENTITY[ \t\r\n]+%[ \t\r\n]+(?P<key>' + Name + ')'
+ '[ \t\r\n]+SYSTEM[ \t\r\n]+'
+ '(?P<val>\"[^\"]*\"|\'[^\']*\')[ \t\r\n]*>[ \t\r\n]*'
+ '%' + Name + ';'
+ '(?:[ \t]*(?:' + XmlComment + '[ \t\r\n]*)*\n?)?')
+
+ class Comment(Comment):
+ @property
+ def val(self):
+ if self._val_cache is None:
+ # Strip "<!--" and "-->" to comment contents
+ self._val_cache = self.all[4:-3]
+ return self._val_cache
+
+ def getNext(self, ctx, offset):
+ '''
+ Overload Parser.getNext to special-case ParsedEntities.
+ Just check for a parsed entity if that method claims junk.
+
+ <!ENTITY % foo SYSTEM "url">
+ %foo;
+ '''
+ if offset == 0 and self.reHeader.match(ctx.contents):
+ offset += 1
+ entity = Parser.getNext(self, ctx, offset)
+ if (entity and isinstance(entity, Junk)) or entity is None:
+ m = self.rePE.match(ctx.contents, offset)
+ if m:
+ entity = DTDEntity(
+ ctx, None, None, m.span(), m.span('key'), m.span('val'))
+ return entity
+
+ def createEntity(self, ctx, m, current_comment, white_space):
+ valspan = m.span('val')
+ valspan = (valspan[0]+1, valspan[1]-1)
+ return DTDEntity(ctx, current_comment, white_space,
+ m.span(), m.span('key'), valspan)
diff --git a/third_party/python/compare_locales/compare_locales/parser/fluent.py b/third_party/python/compare_locales/compare_locales/parser/fluent.py
new file mode 100644
index 0000000000..a74f8cb4f4
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/fluent.py
@@ -0,0 +1,218 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+from fluent.syntax import FluentParser as FTLParser
+from fluent.syntax import ast as ftl
+from fluent.syntax.serializer import serialize_comment
+from fluent.syntax.visitor import Visitor
+from .base import (
+ CAN_SKIP,
+ Entry, Entity, Comment, Junk, Whitespace,
+ LiteralEntity,
+ Parser
+)
+
+
+class WordCounter(Visitor):
+ def __init__(self):
+ self.word_count = 0
+
+ def generic_visit(self, node):
+ if isinstance(
+ node,
+ (ftl.Span, ftl.Annotation, ftl.BaseComment)
+ ):
+ return
+ super().generic_visit(node)
+
+ def visit_SelectExpression(self, node):
+ # optimize select expressions to only go through the variants
+ self.visit(node.variants)
+
+ def visit_TextElement(self, node):
+ self.word_count += len(node.value.split())
+
+
+class FluentAttribute(Entry):
+ ignored_fields = ['span']
+
+ def __init__(self, entity, attr_node):
+ self.ctx = entity.ctx
+ self.attr = attr_node
+ self.key_span = (attr_node.id.span.start, attr_node.id.span.end)
+ self.val_span = (attr_node.value.span.start, attr_node.value.span.end)
+
+ def equals(self, other):
+ if not isinstance(other, FluentAttribute):
+ return False
+ return self.attr.equals(
+ other.attr, ignored_fields=self.ignored_fields)
+
+
+class FluentEntity(Entity):
+ # Fields ignored when comparing two entities.
+ ignored_fields = ['comment', 'span']
+
+ def __init__(self, ctx, entry):
+ start = entry.span.start
+ end = entry.span.end
+
+ self.ctx = ctx
+ self.span = (start, end)
+
+ if isinstance(entry, ftl.Term):
+ # Terms don't have their '-' as part of the id, use the prior
+ # character
+ self.key_span = (entry.id.span.start - 1, entry.id.span.end)
+ else:
+ # Message
+ self.key_span = (entry.id.span.start, entry.id.span.end)
+
+ if entry.value is not None:
+ self.val_span = (entry.value.span.start, entry.value.span.end)
+ else:
+ self.val_span = None
+
+ self.entry = entry
+
+ # Entry instances are expected to have pre_comment. It's used by
+ # other formats to associate a Comment with an Entity. FluentEntities
+ # don't need it because message comments are part of the entry AST and
+ # are not separate Comment instances.
+ self.pre_comment = None
+
+ @property
+ def root_node(self):
+ '''AST node at which to start traversal for count_words.
+
+ By default we count words in the value and in all attributes.
+ '''
+ return self.entry
+
+ _word_count = None
+
+ def count_words(self):
+ if self._word_count is None:
+ counter = WordCounter()
+ counter.visit(self.root_node)
+ self._word_count = counter.word_count
+
+ return self._word_count
+
+ def equals(self, other):
+ return self.entry.equals(
+ other.entry, ignored_fields=self.ignored_fields)
+
+ # In Fluent we treat entries as a whole. FluentChecker reports errors at
+ # offsets calculated from the beginning of the entry.
+ def value_position(self, offset=None):
+ if offset is None:
+ # no offset given, use our value start or id end
+ if self.val_span:
+ offset = self.val_span[0] - self.span[0]
+ else:
+ offset = self.key_span[1] - self.span[0]
+ return self.position(offset)
+
+ @property
+ def attributes(self):
+ for attr_node in self.entry.attributes:
+ yield FluentAttribute(self, attr_node)
+
+ def unwrap(self):
+ return self.all
+
+ def wrap(self, raw_val):
+ """Create literal entity the given raw value.
+
+ For Fluent, we're exposing the message source to tools like
+ Pontoon.
+ We also recreate the comment from this entity to the created entity.
+ """
+ all = raw_val
+ if self.entry.comment is not None:
+ all = serialize_comment(self.entry.comment) + all
+ return LiteralEntity(self.key, raw_val, all)
+
+
+class FluentMessage(FluentEntity):
+ pass
+
+
+class FluentTerm(FluentEntity):
+ # Fields ignored when comparing two terms.
+ ignored_fields = ['attributes', 'comment', 'span']
+
+ @property
+ def root_node(self):
+ '''AST node at which to start traversal for count_words.
+
+ In Fluent Terms we only count words in the value. Attributes are
+ private and do not count towards the word total.
+ '''
+ return self.entry.value
+
+
+class FluentComment(Comment):
+ def __init__(self, ctx, span, entry):
+ super().__init__(ctx, span)
+ self._val_cache = entry.content
+
+
+class FluentParser(Parser):
+ capabilities = CAN_SKIP
+
+ def __init__(self):
+ super().__init__()
+ self.ftl_parser = FTLParser()
+
+ def walk(self, only_localizable=False):
+ if not self.ctx:
+ # loading file failed, or we just didn't load anything
+ return
+
+ resource = self.ftl_parser.parse(self.ctx.contents)
+
+ last_span_end = 0
+
+ for entry in resource.body:
+ if not only_localizable:
+ if entry.span.start > last_span_end:
+ yield Whitespace(
+ self.ctx, (last_span_end, entry.span.start))
+
+ if isinstance(entry, ftl.Message):
+ yield FluentMessage(self.ctx, entry)
+ elif isinstance(entry, ftl.Term):
+ yield FluentTerm(self.ctx, entry)
+ elif isinstance(entry, ftl.Junk):
+ start = entry.span.start
+ end = entry.span.end
+ # strip leading whitespace
+ start += re.match('[ \t\r\n]*', entry.content).end()
+ if not only_localizable and entry.span.start < start:
+ yield Whitespace(
+ self.ctx, (entry.span.start, start)
+ )
+ # strip trailing whitespace
+ ws, we = re.search('[ \t\r\n]*$', entry.content).span()
+ end -= we - ws
+ yield Junk(self.ctx, (start, end))
+ if not only_localizable and end < entry.span.end:
+ yield Whitespace(
+ self.ctx, (end, entry.span.end)
+ )
+ elif isinstance(entry, ftl.BaseComment) and not only_localizable:
+ span = (entry.span.start, entry.span.end)
+ yield FluentComment(self.ctx, span, entry)
+
+ last_span_end = entry.span.end
+
+ # Yield Whitespace at the EOF.
+ if not only_localizable:
+ eof_offset = len(self.ctx.contents)
+ if eof_offset > last_span_end:
+ yield Whitespace(self.ctx, (last_span_end, eof_offset))
diff --git a/third_party/python/compare_locales/compare_locales/parser/ini.py b/third_party/python/compare_locales/compare_locales/parser/ini.py
new file mode 100644
index 0000000000..623f7c15a4
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/ini.py
@@ -0,0 +1,56 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+from .base import (
+ Entry, OffsetComment,
+ Parser
+)
+
+
+class IniSection(Entry):
+ '''Entity-like object representing sections in ini files
+ '''
+ def __init__(self, ctx, span, val_span):
+ self.ctx = ctx
+ self.span = span
+ self.key_span = self.val_span = val_span
+
+ def __repr__(self):
+ return self.raw_val
+
+
+class IniParser(Parser):
+ '''
+ Parse files of the form:
+ # initial comment
+ [cat]
+ whitespace*
+ #comment
+ string=value
+ ...
+ '''
+
+ Comment = OffsetComment
+
+ def __init__(self):
+ self.reComment = re.compile('(?:^[;#][^\n]*\n)*(?:^[;#][^\n]*)', re.M)
+ self.reSection = re.compile(r'\[(?P<val>.*?)\]', re.M)
+ self.reKey = re.compile('(?P<key>.+?)=(?P<val>.*)', re.M)
+ Parser.__init__(self)
+
+ def getNext(self, ctx, offset):
+ contents = ctx.contents
+ m = self.reSection.match(contents, offset)
+ if m:
+ return IniSection(ctx, m.span(), m.span('val'))
+
+ return super().getNext(ctx, offset)
+
+ def getJunk(self, ctx, offset, *expressions):
+ # base.Parser.getNext calls us with self.reKey, self.reComment.
+ # Add self.reSection to the end-of-junk expressions
+ expressions = expressions + (self.reSection,)
+ return super().getJunk(ctx, offset, *expressions)
diff --git a/third_party/python/compare_locales/compare_locales/parser/po.py b/third_party/python/compare_locales/compare_locales/parser/po.py
new file mode 100644
index 0000000000..48ea05ca2b
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/po.py
@@ -0,0 +1,125 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+"""Gettext PO(T) parser
+
+Parses gettext po and pot files.
+"""
+
+
+import re
+
+from .base import (
+ CAN_SKIP,
+ Entity,
+ BadEntity,
+ Parser
+)
+
+
+class PoEntityMixin:
+
+ @property
+ def val(self):
+ return (
+ self.stringlist_val
+ if self.stringlist_val
+ else self.stringlist_key[0]
+ )
+
+ @property
+ def key(self):
+ return self.stringlist_key
+
+ @property
+ def localized(self):
+ # gettext denotes a non-localized string by an empty value
+ return bool(self.stringlist_val)
+
+ def __repr__(self):
+ return self.key[0]
+
+
+class PoEntity(PoEntityMixin, Entity):
+ pass
+
+
+# Unescape and concat a string list
+def eval_stringlist(lines):
+ return ''.join(
+ (
+ l
+ .replace(r'\\', '\\')
+ .replace(r'\t', '\t')
+ .replace(r'\r', '\r')
+ .replace(r'\n', '\n')
+ .replace(r'\"', '"')
+ )
+ for l in lines
+ )
+
+
+class PoParser(Parser):
+ # gettext l10n fallback at runtime, don't merge en-US strings
+ capabilities = CAN_SKIP
+
+ reKey = re.compile('msgctxt|msgid')
+ reValue = re.compile('(?P<white>[ \t\r\n]*)(?P<cmd>msgstr)')
+ reComment = re.compile(r'(?:#.*?\n)+')
+ # string list item:
+ # leading whitespace
+ # `"`
+ # escaped quotes etc, not quote, newline, backslash
+ # `"`
+ reListItem = re.compile(r'[ \t\r\n]*"((?:\\[\\trn"]|[^"\n\\])*)"')
+
+ def __init__(self):
+ super().__init__()
+
+ def createEntity(self, ctx, m, current_comment, white_space):
+ start = cursor = m.start()
+ id_start = cursor
+ try:
+ msgctxt, cursor = self._parse_string_list(ctx, cursor, 'msgctxt')
+ m = self.reWhitespace.match(ctx.contents, cursor)
+ if m:
+ cursor = m.end()
+ except BadEntity:
+ # no msgctxt is OK
+ msgctxt = None
+ if id_start is None:
+ id_start = cursor
+ msgid, cursor = self._parse_string_list(ctx, cursor, 'msgid')
+ id_end = cursor
+ m = self.reWhitespace.match(ctx.contents, cursor)
+ if m:
+ cursor = m.end()
+ val_start = cursor
+ msgstr, cursor = self._parse_string_list(ctx, cursor, 'msgstr')
+ e = PoEntity(
+ ctx,
+ current_comment,
+ white_space,
+ (start, cursor),
+ (id_start, id_end),
+ (val_start, cursor)
+ )
+ e.stringlist_key = (msgid, msgctxt)
+ e.stringlist_val = msgstr
+ return e
+
+ def _parse_string_list(self, ctx, cursor, key):
+ if not ctx.contents.startswith(key, cursor):
+ raise BadEntity
+ cursor += len(key)
+ frags = []
+ while True:
+ m = self.reListItem.match(ctx.contents, cursor)
+ if not m:
+ break
+ frags.append(m.group(1))
+ cursor = m.end()
+ if not frags:
+ raise BadEntity
+ return eval_stringlist(frags), cursor
diff --git a/third_party/python/compare_locales/compare_locales/parser/properties.py b/third_party/python/compare_locales/compare_locales/parser/properties.py
new file mode 100644
index 0000000000..396800c99b
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/parser/properties.py
@@ -0,0 +1,113 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+
+from .base import (
+ Entity, OffsetComment, Whitespace,
+ Parser
+)
+
+
+class PropertiesEntityMixin:
+ escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|'
+ '(?P<nl>\n[ \t]*)|(?P<single>.))', re.M)
+ known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'}
+
+ @property
+ def val(self):
+ def unescape(m):
+ found = m.groupdict()
+ if found['uni']:
+ return chr(int(found['uni'][1:], 16))
+ if found['nl']:
+ return ''
+ return self.known_escapes.get(found['single'], found['single'])
+
+ return self.escape.sub(unescape, self.raw_val)
+
+
+class PropertiesEntity(PropertiesEntityMixin, Entity):
+ pass
+
+
+class PropertiesParser(Parser):
+
+ Comment = OffsetComment
+
+ def __init__(self):
+ self.reKey = re.compile(
+ '(?P<key>[^#! \t\r\n][^=:\n]*?)[ \t]*[:=][ \t]*', re.M)
+ self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M)
+ self._escapedEnd = re.compile(r'\\+$')
+ self._trailingWS = re.compile(r'[ \t\r\n]*(?:\n|\Z)', re.M)
+ Parser.__init__(self)
+
+ def getNext(self, ctx, offset):
+ junk_offset = offset
+ # overwritten to parse values line by line
+ contents = ctx.contents
+
+ m = self.reComment.match(contents, offset)
+ if m:
+ current_comment = self.Comment(ctx, m.span())
+ if offset == 0 and 'License' in current_comment.val:
+ # Heuristic. A early comment with "License" is probably
+ # a license header, and should be standalone.
+ return current_comment
+ offset = m.end()
+ else:
+ current_comment = None
+
+ m = self.reWhitespace.match(contents, offset)
+ if m:
+ white_space = Whitespace(ctx, m.span())
+ offset = m.end()
+ if (
+ current_comment is not None
+ and white_space.raw_val.count('\n') > 1
+ ):
+ # standalone comment
+ return current_comment
+ if current_comment is None:
+ return white_space
+ else:
+ white_space = None
+
+ m = self.reKey.match(contents, offset)
+ if m:
+ startline = offset = m.end()
+ while True:
+ endval = nextline = contents.find('\n', offset)
+ if nextline == -1:
+ endval = offset = len(contents)
+ break
+ # is newline escaped?
+ _e = self._escapedEnd.search(contents, offset, nextline)
+ offset = nextline + 1
+ if _e is None:
+ break
+ # backslashes at end of line, if 2*n, not escaped
+ if len(_e.group()) % 2 == 0:
+ break
+ startline = offset
+
+ # strip trailing whitespace
+ ws = self._trailingWS.search(contents, startline)
+ if ws:
+ endval = ws.start()
+
+ entity = PropertiesEntity(
+ ctx, current_comment, white_space,
+ (m.start(), endval), # full span
+ m.span('key'),
+ (m.end(), endval)) # value span
+ return entity
+
+ if current_comment is not None:
+ return current_comment
+ if white_space is not None:
+ return white_space
+
+ return self.getJunk(ctx, junk_offset, self.reKey, self.reComment)
diff --git a/third_party/python/compare_locales/compare_locales/paths/__init__.py b/third_party/python/compare_locales/compare_locales/paths/__init__.py
new file mode 100644
index 0000000000..f2d1c407c5
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/__init__.py
@@ -0,0 +1,53 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from compare_locales import mozpath
+from .files import ProjectFiles, REFERENCE_LOCALE
+from .ini import (
+ L10nConfigParser, SourceTreeConfigParser,
+ EnumerateApp, EnumerateSourceTreeApp,
+)
+from .matcher import Matcher
+from .project import ProjectConfig
+from .configparser import TOMLParser, ConfigNotFound
+
+
+__all__ = [
+ 'Matcher',
+ 'ProjectConfig',
+ 'L10nConfigParser', 'SourceTreeConfigParser',
+ 'EnumerateApp', 'EnumerateSourceTreeApp',
+ 'ProjectFiles', 'REFERENCE_LOCALE',
+ 'TOMLParser', 'ConfigNotFound',
+]
+
+
+class File:
+
+ def __init__(self, fullpath, file, module=None, locale=None):
+ self.fullpath = fullpath
+ self.file = file
+ self.module = module
+ self.locale = locale
+ pass
+
+ @property
+ def localpath(self):
+ if self.module:
+ return mozpath.join(self.locale, self.module, self.file)
+ return self.file
+
+ def __hash__(self):
+ return hash(self.localpath)
+
+ def __str__(self):
+ return self.fullpath
+
+ def __eq__(self, other):
+ if not isinstance(other, File):
+ return False
+ return vars(self) == vars(other)
+
+ def __ne__(self, other):
+ return not (self == other)
diff --git a/third_party/python/compare_locales/compare_locales/paths/configparser.py b/third_party/python/compare_locales/compare_locales/paths/configparser.py
new file mode 100644
index 0000000000..1c1dbfbff3
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/configparser.py
@@ -0,0 +1,138 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import errno
+import logging
+from compare_locales import mozpath
+from .project import ProjectConfig
+from .matcher import expand
+import toml
+
+
+class ConfigNotFound(EnvironmentError):
+ def __init__(self, path):
+ super().__init__(
+ errno.ENOENT,
+ 'Configuration file not found',
+ path)
+
+
+class ParseContext:
+ def __init__(self, path, env, ignore_missing_includes):
+ self.path = path
+ self.env = env
+ self.ignore_missing_includes = ignore_missing_includes
+ self.data = None
+ self.pc = ProjectConfig(path)
+
+
+class TOMLParser:
+ def parse(self, path, env=None, ignore_missing_includes=False):
+ ctx = self.context(
+ path, env=env, ignore_missing_includes=ignore_missing_includes
+ )
+ self.load(ctx)
+ self.processBasePath(ctx)
+ self.processEnv(ctx)
+ self.processPaths(ctx)
+ self.processFilters(ctx)
+ self.processIncludes(ctx)
+ self.processExcludes(ctx)
+ self.processLocales(ctx)
+ return self.asConfig(ctx)
+
+ def context(self, path, env=None, ignore_missing_includes=False):
+ return ParseContext(
+ path,
+ env if env is not None else {},
+ ignore_missing_includes,
+ )
+
+ def load(self, ctx):
+ try:
+ with open(ctx.path, 'rt') as fin:
+ ctx.data = toml.load(fin)
+ except (toml.TomlDecodeError, OSError):
+ raise ConfigNotFound(ctx.path)
+
+ def processBasePath(self, ctx):
+ assert ctx.data is not None
+ ctx.pc.set_root(ctx.data.get('basepath', '.'))
+
+ def processEnv(self, ctx):
+ assert ctx.data is not None
+ ctx.pc.add_environment(**ctx.data.get('env', {}))
+ # add parser environment, possibly overwriting file variables
+ ctx.pc.add_environment(**ctx.env)
+
+ def processLocales(self, ctx):
+ assert ctx.data is not None
+ if 'locales' in ctx.data:
+ ctx.pc.set_locales(ctx.data['locales'])
+
+ def processPaths(self, ctx):
+ assert ctx.data is not None
+ for data in ctx.data.get('paths', []):
+ paths = {
+ "l10n": data['l10n']
+ }
+ if 'locales' in data:
+ paths['locales'] = data['locales']
+ if 'reference' in data:
+ paths['reference'] = data['reference']
+ if 'test' in data:
+ paths['test'] = data['test']
+ ctx.pc.add_paths(paths)
+
+ def processFilters(self, ctx):
+ assert ctx.data is not None
+ for data in ctx.data.get('filters', []):
+ paths = data['path']
+ if isinstance(paths, str):
+ paths = [paths]
+ rule = {
+ "path": paths,
+ "action": data['action']
+ }
+ if 'key' in data:
+ rule['key'] = data['key']
+ ctx.pc.add_rules(rule)
+
+ def processIncludes(self, ctx):
+ for child in self._processChild(ctx, 'includes'):
+ ctx.pc.add_child(child)
+
+ def processExcludes(self, ctx):
+ for child in self._processChild(ctx, 'excludes'):
+ ctx.pc.exclude(child)
+
+ def _processChild(self, ctx, field):
+ assert ctx.data is not None
+ if field not in ctx.data:
+ return
+ for child_config in ctx.data[field]:
+ # resolve child_config['path'] against our root and env
+ p = mozpath.normpath(
+ expand(
+ ctx.pc.root,
+ child_config['path'],
+ ctx.pc.environ
+ )
+ )
+ try:
+ child = self.parse(
+ p, env=ctx.env,
+ ignore_missing_includes=ctx.ignore_missing_includes
+ )
+ except ConfigNotFound as e:
+ if not ctx.ignore_missing_includes:
+ raise
+ (logging
+ .getLogger('compare-locales.io')
+ .error('%s: %s', e.strerror, e.filename))
+ continue
+ yield child
+
+ def asConfig(self, ctx):
+ return ctx.pc
diff --git a/third_party/python/compare_locales/compare_locales/paths/files.py b/third_party/python/compare_locales/compare_locales/paths/files.py
new file mode 100644
index 0000000000..bfbe7ffbd1
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/files.py
@@ -0,0 +1,224 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+from compare_locales import mozpath
+
+
+REFERENCE_LOCALE = 'en-x-moz-reference'
+
+
+class ConfigList(list):
+ def maybe_extend(self, other):
+ '''Add configs from other list if this list doesn't have this path yet.
+ '''
+ for config in other:
+ if any(mine.path == config.path for mine in self):
+ continue
+ self.append(config)
+
+
+class ProjectFiles:
+ '''Iterable object to get all files and tests for a locale and a
+ list of ProjectConfigs.
+
+ If the given locale is None, iterate over reference files as
+ both reference and locale for a reference self-test.
+ '''
+ def __init__(self, locale, projects, mergebase=None):
+ self.locale = locale
+ self.matchers = []
+ self.exclude = None
+ self.mergebase = mergebase
+ configs = ConfigList()
+ excludes = ConfigList()
+ for project in projects:
+ # Only add this project if we're not in validation mode,
+ # and the given locale is enabled for the project.
+ if locale is not None and locale not in project.all_locales:
+ continue
+ configs.maybe_extend(project.configs)
+ excludes.maybe_extend(project.excludes)
+ # If an excluded config is explicitly included, drop if from the
+ # excludes.
+ excludes = [
+ exclude
+ for exclude in excludes
+ if not any(c.path == exclude.path for c in configs)
+ ]
+ if excludes:
+ self.exclude = ProjectFiles(locale, excludes)
+ for pc in configs:
+ if locale and pc.locales is not None and locale not in pc.locales:
+ continue
+ for paths in pc.paths:
+ if (
+ locale and
+ 'locales' in paths and
+ locale not in paths['locales']
+ ):
+ continue
+ m = {
+ 'l10n': paths['l10n'].with_env({
+ "locale": locale or REFERENCE_LOCALE
+ }),
+ 'module': paths.get('module'),
+ }
+ if 'reference' in paths:
+ m['reference'] = paths['reference']
+ if self.mergebase is not None:
+ m['merge'] = paths['l10n'].with_env({
+ "locale": locale,
+ "l10n_base": self.mergebase
+ })
+ m['test'] = set(paths.get('test', []))
+ if 'locales' in paths:
+ m['locales'] = paths['locales'][:]
+ self.matchers.append(m)
+ self.matchers.reverse() # we always iterate last first
+ # Remove duplicate patterns, comparing each matcher
+ # against all other matchers.
+ # Avoid n^2 comparisons by only scanning the upper triangle
+ # of a n x n matrix of all possible combinations.
+ # Using enumerate and keeping track of indexes, as we can't
+ # modify the list while iterating over it.
+ drops = set() # duplicate matchers to remove
+ for i, m in enumerate(self.matchers[:-1]):
+ if i in drops:
+ continue # we're dropping this anyway, don't search again
+ for i_, m_ in enumerate(self.matchers[(i+1):]):
+ if (mozpath.realpath(m['l10n'].prefix) !=
+ mozpath.realpath(m_['l10n'].prefix)):
+ # ok, not the same thing, continue
+ continue
+ if m['l10n'].pattern != m_['l10n'].pattern:
+ # We cannot guess whether same entry until the pattern is
+ # resolved, continue
+ continue
+ # check that we're comparing the same thing
+ if 'reference' in m:
+ if (mozpath.realpath(m['reference'].prefix) !=
+ mozpath.realpath(m_.get('reference').prefix)):
+ raise RuntimeError('Mismatch in reference for ' +
+ mozpath.realpath(m['l10n'].prefix))
+ drops.add(i_ + i + 1)
+ m['test'] |= m_['test']
+ drops = sorted(drops, reverse=True)
+ for i in drops:
+ del self.matchers[i]
+
+ def __iter__(self):
+ # The iteration is pretty different when we iterate over
+ # a localization vs over the reference. We do that latter
+ # when running in validation mode.
+ inner = self.iter_locale() if self.locale else self.iter_reference()
+ yield from inner
+
+ def iter_locale(self):
+ '''Iterate over locale files.'''
+ known = {}
+ for matchers in self.matchers:
+ matcher = matchers['l10n']
+ for path in self._files(matcher):
+ if path not in known:
+ known[path] = {'test': matchers.get('test')}
+ if 'reference' in matchers:
+ known[path]['reference'] = matcher.sub(
+ matchers['reference'], path)
+ if 'merge' in matchers:
+ known[path]['merge'] = matcher.sub(
+ matchers['merge'], path)
+ if 'reference' not in matchers:
+ continue
+ matcher = matchers['reference']
+ for path in self._files(matcher):
+ l10npath = matcher.sub(matchers['l10n'], path)
+ if l10npath not in known:
+ known[l10npath] = {
+ 'reference': path,
+ 'test': matchers.get('test')
+ }
+ if 'merge' in matchers:
+ known[l10npath]['merge'] = \
+ matcher.sub(matchers['merge'], path)
+ for path, d in sorted(known.items()):
+ yield (path, d.get('reference'), d.get('merge'), d['test'])
+
+ def iter_reference(self):
+ '''Iterate over reference files.'''
+ # unset self.exclude, as we don't want that for our reference files
+ exclude = self.exclude
+ self.exclude = None
+ known = {}
+ for matchers in self.matchers:
+ if 'reference' not in matchers:
+ continue
+ matcher = matchers['reference']
+ for path in self._files(matcher):
+ refpath = matcher.sub(matchers['reference'], path)
+ if refpath not in known:
+ known[refpath] = {
+ 'reference': path,
+ 'test': matchers.get('test')
+ }
+ for path, d in sorted(known.items()):
+ yield (path, d.get('reference'), None, d['test'])
+ self.exclude = exclude
+
+ def _files(self, matcher):
+ '''Base implementation of getting all files in a hierarchy
+ using the file system.
+ Subclasses might replace this method to support different IO
+ patterns.
+ '''
+ base = matcher.prefix
+ if self._isfile(base):
+ if self.exclude and self.exclude.match(base) is not None:
+ return
+ if matcher.match(base) is not None:
+ yield base
+ return
+ for d, dirs, files in self._walk(base):
+ for f in files:
+ p = mozpath.join(d, f)
+ if self.exclude and self.exclude.match(p) is not None:
+ continue
+ if matcher.match(p) is not None:
+ yield p
+
+ def _isfile(self, path):
+ return os.path.isfile(path)
+
+ def _walk(self, base):
+ yield from os.walk(base)
+
+ def match(self, path):
+ '''Return the tuple of l10n_path, reference, mergepath, tests
+ if the given path matches any config, otherwise None.
+
+ This routine doesn't check that the files actually exist.
+ '''
+ if (
+ self.locale is not None and
+ self.exclude and self.exclude.match(path) is not None
+ ):
+ return
+ for matchers in self.matchers:
+ matcher = matchers['l10n']
+ if self.locale is not None and matcher.match(path) is not None:
+ ref = merge = None
+ if 'reference' in matchers:
+ ref = matcher.sub(matchers['reference'], path)
+ if 'merge' in matchers:
+ merge = matcher.sub(matchers['merge'], path)
+ return path, ref, merge, matchers.get('test')
+ if 'reference' not in matchers:
+ continue
+ matcher = matchers['reference']
+ if matcher.match(path) is not None:
+ merge = None
+ l10n = matcher.sub(matchers['l10n'], path)
+ if 'merge' in matchers:
+ merge = matcher.sub(matchers['merge'], path)
+ return l10n, path, merge, matchers.get('test')
diff --git a/third_party/python/compare_locales/compare_locales/paths/ini.py b/third_party/python/compare_locales/compare_locales/paths/ini.py
new file mode 100644
index 0000000000..bde7def0ca
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/ini.py
@@ -0,0 +1,224 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+from configparser import ConfigParser, NoSectionError, NoOptionError
+from collections import defaultdict
+from compare_locales import util, mozpath
+from .project import ProjectConfig
+
+
+class L10nConfigParser:
+ '''Helper class to gather application information from ini files.
+
+ This class is working on synchronous open to read files or web data.
+ Subclass this and overwrite loadConfigs and addChild if you need async.
+ '''
+ def __init__(self, inipath, **kwargs):
+ """Constructor for L10nConfigParsers
+
+ inipath -- l10n.ini path
+ Optional keyword arguments are fowarded to the inner ConfigParser as
+ defaults.
+ """
+ self.inipath = mozpath.normpath(inipath)
+ # l10n.ini files can import other l10n.ini files, store the
+ # corresponding L10nConfigParsers
+ self.children = []
+ # we really only care about the l10n directories described in l10n.ini
+ self.dirs = []
+ # optional defaults to be passed to the inner ConfigParser (unused?)
+ self.defaults = kwargs
+
+ def getDepth(self, cp):
+ '''Get the depth for the comparison from the parsed l10n.ini.
+ '''
+ try:
+ depth = cp.get('general', 'depth')
+ except (NoSectionError, NoOptionError):
+ depth = '.'
+ return depth
+
+ def getFilters(self):
+ '''Get the test functions from this ConfigParser and all children.
+
+ Only works with synchronous loads, used by compare-locales, which
+ is local anyway.
+ '''
+ filter_path = mozpath.join(mozpath.dirname(self.inipath), 'filter.py')
+ try:
+ local = {}
+ with open(filter_path) as f:
+ exec(compile(f.read(), filter_path, 'exec'), {}, local)
+ if 'test' in local and callable(local['test']):
+ filters = [local['test']]
+ else:
+ filters = []
+ except BaseException: # we really want to handle EVERYTHING here
+ filters = []
+
+ for c in self.children:
+ filters += c.getFilters()
+
+ return filters
+
+ def loadConfigs(self):
+ """Entry point to load the l10n.ini file this Parser refers to.
+
+ This implementation uses synchronous loads, subclasses might overload
+ this behaviour. If you do, make sure to pass a file-like object
+ to onLoadConfig.
+ """
+ cp = ConfigParser(self.defaults)
+ cp.read(self.inipath)
+ depth = self.getDepth(cp)
+ self.base = mozpath.join(mozpath.dirname(self.inipath), depth)
+ # create child loaders for any other l10n.ini files to be included
+ try:
+ for title, path in cp.items('includes'):
+ # skip default items
+ if title in self.defaults:
+ continue
+ # add child config parser
+ self.addChild(title, path, cp)
+ except NoSectionError:
+ pass
+ # try to load the "dirs" defined in the "compare" section
+ try:
+ self.dirs.extend(cp.get('compare', 'dirs').split())
+ except (NoOptionError, NoSectionError):
+ pass
+ # try to set "all_path" and "all_url"
+ try:
+ self.all_path = mozpath.join(self.base, cp.get('general', 'all'))
+ except (NoOptionError, NoSectionError):
+ self.all_path = None
+ return cp
+
+ def addChild(self, title, path, orig_cp):
+ """Create a child L10nConfigParser and load it.
+
+ title -- indicates the module's name
+ path -- indicates the path to the module's l10n.ini file
+ orig_cp -- the configuration parser of this l10n.ini
+ """
+ cp = L10nConfigParser(mozpath.join(self.base, path), **self.defaults)
+ cp.loadConfigs()
+ self.children.append(cp)
+
+ def dirsIter(self):
+ """Iterate over all dirs and our base path for this l10n.ini"""
+ for dir in self.dirs:
+ yield dir, (self.base, dir)
+
+ def directories(self):
+ """Iterate over all dirs and base paths for this l10n.ini as well
+ as the included ones.
+ """
+ yield from self.dirsIter()
+ for child in self.children:
+ yield from child.directories()
+
+ def allLocales(self):
+ """Return a list of all the locales of this project"""
+ with open(self.all_path) as f:
+ return util.parseLocales(f.read())
+
+
+class SourceTreeConfigParser(L10nConfigParser):
+ '''Subclassing L10nConfigParser to work with just the repos
+ checked out next to each other instead of intermingled like
+ we do for real builds.
+ '''
+
+ def __init__(self, inipath, base, redirects):
+ '''Add additional arguments basepath.
+
+ basepath is used to resolve local paths via branchnames.
+ redirects is used in unified repository, mapping upstream
+ repos to local clones.
+ '''
+ L10nConfigParser.__init__(self, inipath)
+ self.base = base
+ self.redirects = redirects
+
+ def addChild(self, title, path, orig_cp):
+ # check if there's a section with details for this include
+ # we might have to check a different repo, or even VCS
+ # for example, projects like "mail" indicate in
+ # an "include_" section where to find the l10n.ini for "toolkit"
+ details = 'include_' + title
+ if orig_cp.has_section(details):
+ branch = orig_cp.get(details, 'mozilla')
+ branch = self.redirects.get(branch, branch)
+ inipath = orig_cp.get(details, 'l10n.ini')
+ path = mozpath.join(self.base, branch, inipath)
+ else:
+ path = mozpath.join(self.base, path)
+ cp = SourceTreeConfigParser(path, self.base, self.redirects,
+ **self.defaults)
+ cp.loadConfigs()
+ self.children.append(cp)
+
+
+class EnumerateApp:
+ reference = 'en-US'
+
+ def __init__(self, inipath, l10nbase):
+ self.setupConfigParser(inipath)
+ self.modules = defaultdict(dict)
+ self.l10nbase = mozpath.abspath(l10nbase)
+ self.filters = []
+ self.addFilters(*self.config.getFilters())
+
+ def setupConfigParser(self, inipath):
+ self.config = L10nConfigParser(inipath)
+ self.config.loadConfigs()
+
+ def addFilters(self, *args):
+ self.filters += args
+
+ def asConfig(self):
+ # We've already normalized paths in the ini parsing.
+ # Set the path and root to None to just keep our paths as is.
+ config = ProjectConfig(None)
+ config.set_root('.') # sets to None because path is None
+ config.add_environment(l10n_base=self.l10nbase)
+ self._config_for_ini(config, self.config)
+ filters = self.config.getFilters()
+ if filters:
+ config.set_filter_py(filters[0])
+ config.set_locales(self.config.allLocales(), deep=True)
+ return config
+
+ def _config_for_ini(self, projectconfig, aConfig):
+ for k, (basepath, module) in aConfig.dirsIter():
+ paths = {
+ 'module': module,
+ 'reference': mozpath.normpath('%s/%s/locales/en-US/**' %
+ (basepath, module)),
+ 'l10n': mozpath.normpath('{l10n_base}/{locale}/%s/**' %
+ module)
+ }
+ if module == 'mobile/android/base':
+ paths['test'] = ['android-dtd']
+ projectconfig.add_paths(paths)
+ for child in aConfig.children:
+ self._config_for_ini(projectconfig, child)
+
+
+class EnumerateSourceTreeApp(EnumerateApp):
+ '''Subclass EnumerateApp to work on side-by-side checked out
+ repos, and to no pay attention to how the source would actually
+ be checked out for building.
+ '''
+
+ def __init__(self, inipath, basepath, l10nbase, redirects):
+ self.basepath = basepath
+ self.redirects = redirects
+ EnumerateApp.__init__(self, inipath, l10nbase)
+
+ def setupConfigParser(self, inipath):
+ self.config = SourceTreeConfigParser(inipath, self.basepath,
+ self.redirects)
+ self.config.loadConfigs()
diff --git a/third_party/python/compare_locales/compare_locales/paths/matcher.py b/third_party/python/compare_locales/compare_locales/paths/matcher.py
new file mode 100644
index 0000000000..82de936107
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/matcher.py
@@ -0,0 +1,470 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import os
+import re
+import itertools
+from compare_locales import mozpath
+
+
+# Android uses non-standard locale codes, these are the mappings
+# back and forth
+ANDROID_LEGACY_MAP = {
+ 'he': 'iw',
+ 'id': 'in',
+ 'yi': 'ji'
+}
+ANDROID_STANDARD_MAP = {
+ legacy: standard
+ for standard, legacy in ANDROID_LEGACY_MAP.items()
+}
+
+
+class Matcher:
+ '''Path pattern matcher
+ Supports path matching similar to mozpath.match(), but does
+ not match trailing file paths without trailing wildcards.
+ Also gets a prefix, which is the path before the first wildcard,
+ which is good for filesystem iterations, and allows to replace
+ the own matches in a path on a different Matcher. compare-locales
+ uses that to transform l10n and en-US paths back and forth.
+ '''
+
+ def __init__(self, pattern_or_other, env={}, root=None, encoding=None):
+ '''Create regular expression similar to mozpath.match().
+ '''
+ parser = PatternParser()
+ real_env = {k: parser.parse(v) for k, v in env.items()}
+ self._cached_re = None
+ if root is not None:
+ # make sure that our root is fully expanded and ends with /
+ root = mozpath.abspath(root) + '/'
+ # allow constructing Matchers from Matchers
+ if isinstance(pattern_or_other, Matcher):
+ other = pattern_or_other
+ self.pattern = Pattern(other.pattern)
+ self.env = other.env.copy()
+ self.env.update(real_env)
+ if root is not None:
+ self.pattern.root = root
+ self.encoding = other.encoding
+ return
+ self.env = real_env
+ pattern = pattern_or_other
+ self.pattern = parser.parse(pattern)
+ if root is not None:
+ self.pattern.root = root
+ self.encoding = encoding
+
+ def with_env(self, environ):
+ return Matcher(self, environ)
+
+ @property
+ def prefix(self):
+ subpattern = Pattern(self.pattern[:self.pattern.prefix_length])
+ subpattern.root = self.pattern.root
+ prefix = subpattern.expand(self.env)
+ if self.encoding is not None:
+ prefix = prefix.encode(self.encoding)
+ return prefix
+
+ def match(self, path):
+ '''Test the given path against this matcher and its environment.
+
+ Return None if there's no match, and the dictionary of matched
+ variables in this matcher if there's a match.
+ '''
+ self._cache_regex()
+ m = self._cached_re.match(path)
+ if m is None:
+ return None
+ d = m.groupdict()
+ if self.encoding is not None:
+ d = {key: value.decode(self.encoding) for key, value in d.items()}
+ if 'android_locale' in d and 'locale' not in d:
+ # map android_locale to locale code
+ locale = d['android_locale']
+ # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji
+ locale = re.sub(
+ r'(iw|in|ji)(?=\Z|-)',
+ lambda legacy: ANDROID_STANDARD_MAP[legacy.group(1)],
+ locale
+ )
+ locale = re.sub(r'-r([A-Z]{2})', r'-\1', locale)
+ locale = locale.replace('b+', '').replace('+', '-')
+ d['locale'] = locale
+ return d
+
+ def _cache_regex(self):
+ if self._cached_re is not None:
+ return
+ pattern = self.pattern.regex_pattern(self.env) + '$'
+ if self.encoding is not None:
+ pattern = pattern.encode(self.encoding)
+ self._cached_re = re.compile(pattern)
+
+ def sub(self, other, path):
+ '''
+ Replace the wildcard matches in this pattern into the
+ pattern of the other Match object.
+ '''
+ m = self.match(path)
+ if m is None:
+ return None
+ env = {}
+ env.update(
+ (key, Literal(value if value is not None else ''))
+ for key, value in m.items()
+ )
+ env.update(other.env)
+ path = other.pattern.expand(env)
+ if self.encoding is not None:
+ path = path.encode(self.encoding)
+ return path
+
+ def concat(self, other):
+ '''Concat two Matcher objects.
+
+ The intent is to create one Matcher with variable substitutions that
+ behaves as if you joined the resulting paths.
+ This doesn't do path separator logic, though, and it won't resolve
+ parent directories.
+ '''
+ if not isinstance(other, Matcher):
+ other_matcher = Matcher(other)
+ else:
+ other_matcher = other
+ other_pattern = other_matcher.pattern
+ if other_pattern.root is not None:
+ raise ValueError('Other matcher must not be rooted')
+ result = Matcher(self)
+ result.pattern += other_pattern
+ if self.pattern.prefix_length == len(self.pattern):
+ result.pattern.prefix_length += other_pattern.prefix_length
+ result.env.update(other_matcher.env)
+ return result
+
+ def __str__(self):
+ return self.pattern.expand(self.env)
+
+ def __repr__(self):
+ return '{}({!r}, env={!r}, root={!r})'.format(
+ type(self).__name__, self.pattern, self.env, self.pattern.root
+ )
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __eq__(self, other):
+ '''Equality for Matcher.
+
+ The equality for Matchers is defined to have the same pattern,
+ and no conflicting environment. Additional environment settings
+ in self or other are OK.
+ '''
+ if other.__class__ is not self.__class__:
+ return NotImplemented
+ if self.pattern != other.pattern:
+ return False
+ if self.env and other.env:
+ for k in self.env:
+ if k not in other.env:
+ continue
+ if self.env[k] != other.env[k]:
+ return False
+ if self.encoding != other.encoding:
+ return False
+ return True
+
+
+def expand(root, path, env):
+ '''Expand a given path relative to the given root,
+ using the given env to resolve variables.
+
+ This will break if the path contains wildcards.
+ '''
+ matcher = Matcher(path, env=env, root=root)
+ return str(matcher)
+
+
+class MissingEnvironment(Exception):
+ pass
+
+
+class Node:
+ '''Abstract base class for all nodes in parsed patterns.'''
+ def regex_pattern(self, env):
+ '''Create a regular expression fragment for this Node.'''
+ raise NotImplementedError
+
+ def expand(self, env):
+ '''Convert this node to a string with the given environment.'''
+ raise NotImplementedError
+
+
+class Pattern(list, Node):
+ def __init__(self, iterable=[]):
+ list.__init__(self, iterable)
+ self.root = getattr(iterable, 'root', None)
+ self.prefix_length = getattr(iterable, 'prefix_length', None)
+
+ def regex_pattern(self, env):
+ root = ''
+ if self.root is not None:
+ # make sure we're not hiding a full path
+ first_seg = self[0].expand(env)
+ if not os.path.isabs(first_seg):
+ root = re.escape(self.root)
+ return root + ''.join(
+ child.regex_pattern(env) for child in self
+ )
+
+ def expand(self, env, raise_missing=False):
+ root = ''
+ if self.root is not None:
+ # make sure we're not hiding a full path
+ first_seg = self[0].expand(env)
+ if not os.path.isabs(first_seg):
+ root = self.root
+ return root + ''.join(self._expand_children(env, raise_missing))
+
+ def _expand_children(self, env, raise_missing):
+ # Helper iterator to convert Exception to a stopped iterator
+ for child in self:
+ try:
+ yield child.expand(env, raise_missing=True)
+ except MissingEnvironment:
+ if raise_missing:
+ raise
+ return
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __eq__(self, other):
+ if not super().__eq__(other):
+ return False
+ if other.__class__ == list:
+ # good for tests and debugging
+ return True
+ return (
+ self.root == other.root
+ and self.prefix_length == other.prefix_length
+ )
+
+
+class Literal(str, Node):
+ def regex_pattern(self, env):
+ return re.escape(self)
+
+ def expand(self, env, raise_missing=False):
+ return self
+
+
+class Variable(Node):
+ def __init__(self, name, repeat=False):
+ self.name = name
+ self.repeat = repeat
+
+ def regex_pattern(self, env):
+ if self.repeat:
+ return f'(?P={self.name})'
+ return f'(?P<{self.name}>{self._pattern_from_env(env)})'
+
+ def _pattern_from_env(self, env):
+ if self.name in env:
+ # make sure we match the value in the environment
+ return env[self.name].regex_pattern(self._no_cycle(env))
+ # match anything, including path segments
+ return '.+?'
+
+ def expand(self, env, raise_missing=False):
+ '''Create a string for this Variable.
+
+ This expansion happens recursively. We avoid recusion loops
+ by removing the current variable from the environment that's used
+ to expand child variable references.
+ '''
+ if self.name not in env:
+ raise MissingEnvironment
+ return env[self.name].expand(
+ self._no_cycle(env), raise_missing=raise_missing
+ )
+
+ def _no_cycle(self, env):
+ '''Remove our variable name from the environment.
+ That way, we can't create cyclic references.
+ '''
+ if self.name not in env:
+ return env
+ env = env.copy()
+ env.pop(self.name)
+ return env
+
+ def __repr__(self):
+ return f'Variable(name="{self.name}")'
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __eq__(self, other):
+ if other.__class__ is not self.__class__:
+ return False
+ return (
+ self.name == other.name
+ and self.repeat == other.repeat
+ )
+
+
+class AndroidLocale(Variable):
+ '''Subclass for Android locale code mangling.
+
+ Supports ab-rCD and b+ab+Scrip+DE.
+ Language and Language-Region tags get mapped to ab-rCD, more complex
+ Locale tags to b+.
+ '''
+ def __init__(self, repeat=False):
+ self.name = 'android_locale'
+ self.repeat = repeat
+
+ def _pattern_from_env(self, env):
+ android_locale = self._get_android_locale(env)
+ if android_locale is not None:
+ return re.escape(android_locale)
+ return '.+?'
+
+ def expand(self, env, raise_missing=False):
+ '''Create a string for this Variable.
+
+ This expansion happens recursively. We avoid recusion loops
+ by removing the current variable from the environment that's used
+ to expand child variable references.
+ '''
+ android_locale = self._get_android_locale(env)
+ if android_locale is None:
+ raise MissingEnvironment
+ return android_locale
+
+ def _get_android_locale(self, env):
+ if 'locale' not in env:
+ return None
+ android = bcp47 = env['locale'].expand(self._no_cycle(env))
+ # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji
+ android = bcp47 = re.sub(
+ r'(he|id|yi)(?=\Z|-)',
+ lambda standard: ANDROID_LEGACY_MAP[standard.group(1)],
+ bcp47
+ )
+ if re.match(r'[a-z]{2,3}-[A-Z]{2}', bcp47):
+ android = '{}-r{}'.format(*bcp47.split('-'))
+ elif '-' in bcp47:
+ android = 'b+' + bcp47.replace('-', '+')
+ return android
+
+
+class Star(Node):
+ def __init__(self, number):
+ self.number = number
+
+ def regex_pattern(self, env):
+ return f'(?P<s{self.number}>[^/]*)'
+
+ def expand(self, env, raise_missing=False):
+ return env['s%d' % self.number]
+
+ def __repr__(self):
+ return type(self).__name__
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __eq__(self, other):
+ if other.__class__ is not self.__class__:
+ return False
+ return self.number == other.number
+
+
+class Starstar(Star):
+ def __init__(self, number, suffix):
+ self.number = number
+ self.suffix = suffix
+
+ def regex_pattern(self, env):
+ return f'(?P<s{self.number}>.+{self.suffix})?'
+
+ def __ne__(self, other):
+ return not (self == other)
+
+ def __eq__(self, other):
+ if not super().__eq__(other):
+ return False
+ return self.suffix == other.suffix
+
+
+PATH_SPECIAL = re.compile(
+ r'(?P<starstar>(?<![^/}])\*\*(?P<suffix>/|$))'
+ r'|'
+ r'(?P<star>\*)'
+ r'|'
+ r'(?P<variable>{ *(?P<varname>[\w]+) *})'
+)
+
+
+class PatternParser:
+ def __init__(self):
+ # Not really initializing anything, just making room for our
+ # result and state members.
+ self.pattern = None
+ self._stargroup = self._cursor = None
+ self._known_vars = None
+
+ def parse(self, pattern):
+ if isinstance(pattern, Pattern):
+ return pattern
+ if isinstance(pattern, Matcher):
+ return pattern.pattern
+ # Initializing result and state
+ self.pattern = Pattern()
+ self._stargroup = itertools.count(1)
+ self._known_vars = set()
+ self._cursor = 0
+ for match in PATH_SPECIAL.finditer(pattern):
+ if match.start() > self._cursor:
+ self.pattern.append(
+ Literal(pattern[self._cursor:match.start()])
+ )
+ self.handle(match)
+ self.pattern.append(Literal(pattern[self._cursor:]))
+ if self.pattern.prefix_length is None:
+ self.pattern.prefix_length = len(self.pattern)
+ return self.pattern
+
+ def handle(self, match):
+ if match.group('variable'):
+ self.variable(match)
+ else:
+ self.wildcard(match)
+ self._cursor = match.end()
+
+ def variable(self, match):
+ varname = match.group('varname')
+ # Special case Android locale code matching.
+ # It's kinda sad, but true.
+ if varname == 'android_locale':
+ self.pattern.append(AndroidLocale(varname in self._known_vars))
+ else:
+ self.pattern.append(Variable(varname, varname in self._known_vars))
+ self._known_vars.add(varname)
+
+ def wildcard(self, match):
+ # wildcard found, stop prefix
+ if self.pattern.prefix_length is None:
+ self.pattern.prefix_length = len(self.pattern)
+ wildcard = next(self._stargroup)
+ if match.group('star'):
+ # *
+ self.pattern.append(Star(wildcard))
+ else:
+ # **
+ self.pattern.append(Starstar(wildcard, match.group('suffix')))
diff --git a/third_party/python/compare_locales/compare_locales/paths/project.py b/third_party/python/compare_locales/compare_locales/paths/project.py
new file mode 100644
index 0000000000..1f18a9d2d5
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/paths/project.py
@@ -0,0 +1,260 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+import re
+from compare_locales import mozpath
+from .matcher import Matcher
+
+
+class ExcludeError(ValueError):
+ pass
+
+
+class ProjectConfig:
+ '''Abstraction of l10n project configuration data.
+ '''
+
+ def __init__(self, path):
+ self.filter_py = None # legacy filter code
+ # {
+ # 'l10n': pattern,
+ # 'reference': pattern, # optional
+ # 'locales': [], # optional
+ # 'test': [], # optional
+ # }
+ self.path = path
+ self.root = None
+ self.paths = []
+ self.rules = []
+ self.locales = None
+ # cache for all_locales, as that's not in `filter`
+ self._all_locales = None
+ self.environ = {}
+ self.children = []
+ self.excludes = []
+ self._cache = None
+
+ def same(self, other):
+ '''Equality test, ignoring locales.
+ '''
+ if other.__class__ is not self.__class__:
+ return False
+ if len(self.children) != len(other.children):
+ return False
+ for prop in ('path', 'root', 'paths', 'rules', 'environ'):
+ if getattr(self, prop) != getattr(other, prop):
+ return False
+ for this_child, other_child in zip(self.children, other.children):
+ if not this_child.same(other_child):
+ return False
+ return True
+
+ def set_root(self, basepath):
+ if self.path is None:
+ self.root = None
+ return
+ self.root = mozpath.abspath(
+ mozpath.join(mozpath.dirname(self.path), basepath)
+ )
+
+ def add_environment(self, **kwargs):
+ self.environ.update(kwargs)
+
+ def add_paths(self, *paths):
+ '''Add path dictionaries to this config.
+ The dictionaries must have a `l10n` key. For monolingual files,
+ `reference` is also required.
+ An optional key `test` is allowed to enable additional tests for this
+ path pattern.
+ '''
+ self._all_locales = None # clear cache
+ for d in paths:
+ rv = {
+ 'l10n': Matcher(d['l10n'], env=self.environ, root=self.root),
+ 'module': d.get('module')
+ }
+ if 'reference' in d:
+ rv['reference'] = Matcher(
+ d['reference'], env=self.environ, root=self.root
+ )
+ if 'test' in d:
+ rv['test'] = d['test']
+ if 'locales' in d:
+ rv['locales'] = d['locales'][:]
+ self.paths.append(rv)
+
+ def set_filter_py(self, filter_function):
+ '''Set legacy filter.py code.
+ Assert that no rules are set.
+ Also, normalize output already here.
+ '''
+ assert not self.rules
+
+ def filter_(module, path, entity=None):
+ try:
+ rv = filter_function(module, path, entity=entity)
+ except BaseException: # we really want to handle EVERYTHING here
+ return 'error'
+ rv = {
+ True: 'error',
+ False: 'ignore',
+ 'report': 'warning'
+ }.get(rv, rv)
+ assert rv in ('error', 'ignore', 'warning', None)
+ return rv
+ self.filter_py = filter_
+
+ def add_rules(self, *rules):
+ '''Add rules to filter on.
+ Assert that there's no legacy filter.py code hooked up.
+ '''
+ assert self.filter_py is None
+ for rule in rules:
+ self.rules.extend(self._compile_rule(rule))
+
+ def add_child(self, child):
+ self._all_locales = None # clear cache
+ if child.excludes:
+ raise ExcludeError(
+ 'Included configs cannot declare their own excludes.'
+ )
+ self.children.append(child)
+
+ def exclude(self, child):
+ for config in child.configs:
+ if config.excludes:
+ raise ExcludeError(
+ 'Excluded configs cannot declare their own excludes.'
+ )
+ self.excludes.append(child)
+
+ def set_locales(self, locales, deep=False):
+ self._all_locales = None # clear cache
+ self.locales = locales
+ if not deep:
+ return
+ for child in self.children:
+ child.set_locales(locales, deep=deep)
+
+ @property
+ def configs(self):
+ 'Recursively get all configs in this project and its children'
+ yield self
+ for child in self.children:
+ yield from child.configs
+
+ @property
+ def all_locales(self):
+ 'Recursively get all locales in this project and its paths'
+ if self._all_locales is None:
+ all_locales = set()
+ for config in self.configs:
+ if config.locales is not None:
+ all_locales.update(config.locales)
+ for paths in config.paths:
+ if 'locales' in paths:
+ all_locales.update(paths['locales'])
+ self._all_locales = sorted(all_locales)
+ return self._all_locales
+
+ def filter(self, l10n_file, entity=None):
+ '''Filter a localization file or entities within, according to
+ this configuration file.'''
+ if l10n_file.locale not in self.all_locales:
+ return 'ignore'
+ if self.filter_py is not None:
+ return self.filter_py(l10n_file.module, l10n_file.file,
+ entity=entity)
+ rv = self._filter(l10n_file, entity=entity)
+ if rv is None:
+ return 'ignore'
+ return rv
+
+ class FilterCache:
+ def __init__(self, locale):
+ self.locale = locale
+ self.rules = []
+ self.l10n_paths = []
+
+ def cache(self, locale):
+ if self._cache and self._cache.locale == locale:
+ return self._cache
+ self._cache = self.FilterCache(locale)
+ for paths in self.paths:
+ if 'locales' in paths and locale not in paths['locales']:
+ continue
+ self._cache.l10n_paths.append(paths['l10n'].with_env({
+ "locale": locale
+ }))
+ for rule in self.rules:
+ cached_rule = rule.copy()
+ cached_rule['path'] = rule['path'].with_env({
+ "locale": locale
+ })
+ self._cache.rules.append(cached_rule)
+ return self._cache
+
+ def _filter(self, l10n_file, entity=None):
+ if any(
+ exclude.filter(l10n_file) == 'error'
+ for exclude in self.excludes
+ ):
+ return
+ actions = {
+ child._filter(l10n_file, entity=entity)
+ for child in self.children}
+ if 'error' in actions:
+ # return early if we know we'll error
+ return 'error'
+
+ cached = self.cache(l10n_file.locale)
+ if any(p.match(l10n_file.fullpath) for p in cached.l10n_paths):
+ action = 'error'
+ for rule in reversed(cached.rules):
+ if not rule['path'].match(l10n_file.fullpath):
+ continue
+ if ('key' in rule) ^ (entity is not None):
+ # key/file mismatch, not a matching rule
+ continue
+ if 'key' in rule and not rule['key'].match(entity):
+ continue
+ action = rule['action']
+ break
+ actions.add(action)
+ if 'error' in actions:
+ return 'error'
+ if 'warning' in actions:
+ return 'warning'
+ if 'ignore' in actions:
+ return 'ignore'
+
+ def _compile_rule(self, rule):
+ assert 'path' in rule
+ if isinstance(rule['path'], list):
+ for path in rule['path']:
+ _rule = rule.copy()
+ _rule['path'] = Matcher(path, env=self.environ, root=self.root)
+ yield from self._compile_rule(_rule)
+ return
+ if isinstance(rule['path'], str):
+ rule['path'] = Matcher(
+ rule['path'], env=self.environ, root=self.root
+ )
+ if 'key' not in rule:
+ yield rule
+ return
+ if not isinstance(rule['key'], str):
+ for key in rule['key']:
+ _rule = rule.copy()
+ _rule['key'] = key
+ yield from self._compile_rule(_rule)
+ return
+ rule = rule.copy()
+ key = rule['key']
+ if key.startswith('re:'):
+ key = key[3:]
+ else:
+ key = re.escape(key) + '$'
+ rule['key'] = re.compile(key)
+ yield rule
diff --git a/third_party/python/compare_locales/compare_locales/plurals.py b/third_party/python/compare_locales/compare_locales/plurals.py
new file mode 100644
index 0000000000..b04006b14f
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/plurals.py
@@ -0,0 +1,221 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'Mapping of locales to CLDR plural categories as implemented by PluralForm.jsm'
+
+CATEGORIES_BY_INDEX = (
+ # 0 (Chinese)
+ ('other',),
+ # 1 (English)
+ ('one', 'other'),
+ # 2 (French)
+ ('one', 'other'),
+ # 3 (Latvian)
+ ('zero', 'one', 'other'),
+ # 4 (Scottish Gaelic)
+ ('one', 'two', 'few', 'other'),
+ # 5 (Romanian)
+ ('one', 'few', 'other'),
+ # 6 (Lithuanian)
+ # CLDR: one, few, many (fractions), other
+ ('one', 'other', 'few'),
+ # 7 (Russian)
+ # CLDR: one, few, many, other (fractions)
+ ('one', 'few', 'many'),
+ # 8 (Slovak)
+ # CLDR: one, few, many (fractions), other
+ ('one', 'few', 'other'),
+ # 9 (Polish)
+ # CLDR: one, few, many, other (fractions)
+ ('one', 'few', 'many'),
+ # 10 (Slovenian)
+ ('one', 'two', 'few', 'other'),
+ # 11 (Irish Gaelic)
+ ('one', 'two', 'few', 'many', 'other'),
+ # 12 (Arabic)
+ # CLDR: zero, one, two, few, many, other
+ ('one', 'two', 'few', 'many', 'other', 'zero'),
+ # 13 (Maltese)
+ ('one', 'few', 'many', 'other'),
+ # 14 (Unused)
+ # CLDR: one, other
+ ('one', 'two', 'other'),
+ # 15 (Icelandic, Macedonian)
+ ('one', 'other'),
+ # 16 (Breton)
+ ('one', 'two', 'few', 'many', 'other'),
+ # 17 (Shuar)
+ # CLDR: (missing)
+ ('zero', 'other'),
+ # 18 (Welsh),
+ ('zero', 'one', 'two', 'few', 'many', 'other'),
+ # 19 (Bosnian, Croatian, Serbian)
+ ('one', 'few', 'other'),
+)
+
+CATEGORIES_EXCEPTIONS = {
+}
+
+CATEGORIES_BY_LOCALE = {
+ 'ace': 0,
+ 'ach': 1,
+ 'af': 1,
+ 'ak': 2,
+ 'an': 1,
+ 'ar': 12,
+ 'arn': 1,
+ 'as': 1,
+ 'ast': 1,
+ 'az': 1,
+ 'be': 7,
+ 'bg': 1,
+ 'bn': 2,
+ 'bo': 0,
+ 'br': 16,
+ 'brx': 1,
+ 'bs': 19,
+ 'ca': 1,
+ 'cak': 1,
+ 'ckb': 1,
+ 'crh': 1,
+ 'cs': 8,
+ 'csb': 9,
+ 'cv': 1,
+ 'cy': 18,
+ 'da': 1,
+ 'de': 1,
+ 'dsb': 10,
+ 'el': 1,
+ 'en': 1,
+ 'eo': 1,
+ 'es': 1,
+ 'et': 1,
+ 'eu': 1,
+ 'fa': 2,
+ 'ff': 1,
+ 'fi': 1,
+ 'fr': 2,
+ 'frp': 2,
+ 'fur': 1,
+ 'fy': 1,
+ 'ga': 11,
+ 'gd': 4,
+ 'gl': 1,
+ 'gn': 1,
+ 'gu': 2,
+ 'he': 1,
+ 'hi': 2,
+ 'hr': 19,
+ 'hsb': 10,
+ 'hto': 1,
+ 'hu': 1,
+ 'hy': 1,
+ 'hye': 1,
+ 'ia': 1,
+ 'id': 0,
+ 'ilo': 0,
+ 'is': 15,
+ 'it': 1,
+ 'ja': 0,
+ 'jiv': 17,
+ 'ka': 1,
+ 'kab': 1,
+ 'kk': 1,
+ 'km': 0,
+ 'kn': 1,
+ 'ko': 0,
+ 'ks': 1,
+ 'ku': 1,
+ 'lb': 1,
+ 'lg': 1,
+ 'lij': 1,
+ 'lo': 0,
+ 'lt': 6,
+ 'ltg': 3,
+ 'lv': 3,
+ 'lus': 0,
+ 'mai': 1,
+ 'meh': 0,
+ 'mix': 0,
+ 'mk': 15,
+ 'ml': 1,
+ 'mn': 1,
+ 'mr': 1,
+ 'ms': 0,
+ 'my': 0,
+ 'nb': 1,
+ 'ne': 1,
+ 'nl': 1,
+ 'nn': 1,
+ 'nr': 1,
+ 'nso': 2,
+ 'ny': 1,
+ 'oc': 2,
+ 'or': 1,
+ 'pa': 2,
+ 'pai': 0,
+ 'pl': 9,
+ 'pt': 1,
+ 'quy': 1,
+ 'qvi': 1,
+ 'rm': 1,
+ 'ro': 5,
+ 'ru': 7,
+ 'rw': 1,
+ 'sah': 0,
+ 'sat': 1,
+ 'sc': 1,
+ 'scn': 1,
+ 'sco': 1,
+ 'si': 1,
+ 'sk': 8,
+ 'skr': 1,
+ 'sl': 10,
+ 'son': 1,
+ 'sq': 1,
+ 'sr': 19,
+ 'ss': 1,
+ 'st': 1,
+ 'sv': 1,
+ 'sw': 1,
+ 'szl': 9,
+ 'ta': 1,
+ 'ta': 1,
+ 'te': 1,
+ 'tg': 1,
+ 'th': 0,
+ 'tl': 1,
+ 'tn': 1,
+ 'tr': 1,
+ 'trs': 1,
+ 'ts': 1,
+ 'tsz': 1,
+ 'uk': 7,
+ 'ur': 1,
+ 'uz': 1,
+ 've': 1,
+ 'vi': 0,
+ 'wo': 0,
+ 'xh': 1,
+ 'zam': 1,
+ 'zh-CN': 0,
+ 'zh-TW': 0,
+ 'zu': 2,
+}
+
+
+def get_plural(locale):
+ plural_form = get_plural_rule(locale)
+ if plural_form is None:
+ return None
+ return CATEGORIES_BY_INDEX[plural_form]
+
+
+def get_plural_rule(locale):
+ if locale is None:
+ return None
+ if locale in CATEGORIES_BY_LOCALE:
+ return CATEGORIES_BY_LOCALE[locale]
+ locale = locale.split('-', 1)[0]
+ return CATEGORIES_BY_LOCALE.get(locale)
diff --git a/third_party/python/compare_locales/compare_locales/serializer.py b/third_party/python/compare_locales/compare_locales/serializer.py
new file mode 100644
index 0000000000..826fb29693
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/serializer.py
@@ -0,0 +1,137 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+'''Serialize string changes.
+
+The serialization logic is based on the cross-channel merge algorithm.
+It's taking the file structure for the first file, and localizable entries
+from the last.
+Input data is the parsed reference as a list of parser.walk(),
+the existing localized file, also a list of parser.walk(), and a dictionary
+of newly added keys and raw values.
+To remove a string from a localization, pass `None` as value for a key.
+
+The marshalling between raw values and entities is done via Entity.unwrap
+and Entity.wrap.
+
+To avoid adding English reference strings into the generated file, the
+actual entities in the reference are replaced with Placeholders, which
+are removed in a final pass over the result of merge_resources. After that,
+we also prune whitespace once more.`
+'''
+
+from codecs import encode
+from functools import reduce
+
+from compare_locales.merge import merge_resources, serialize_legacy_resource
+from compare_locales.parser import getParser
+from compare_locales.parser.base import (
+ Entity,
+ PlaceholderEntity,
+ Junk,
+ Whitespace,
+)
+
+
+class SerializationNotSupportedError(ValueError):
+ pass
+
+
+def serialize(filename, reference, old_l10n, new_data):
+ '''Returns a byte string of the serialized content to use.
+
+ Input are a filename to create the right parser, a reference and
+ an existing localization, both as the result of parser.walk().
+ Finally, new_data is a dictionary of key to raw values to serialize.
+
+ Raises a SerializationNotSupportedError if we don't support the file
+ format.
+ '''
+ try:
+ parser = getParser(filename)
+ except UserWarning:
+ raise SerializationNotSupportedError(
+ f'Unsupported file format ({filename}).')
+ # create template, whitespace and all
+ placeholders = [
+ placeholder(entry)
+ for entry in reference
+ if not isinstance(entry, Junk)
+ ]
+ ref_mapping = {
+ entry.key: entry
+ for entry in reference
+ if isinstance(entry, Entity)
+ }
+ # strip obsolete strings
+ old_l10n = sanitize_old(ref_mapping.keys(), old_l10n, new_data)
+ # create new Entities
+ # .val can just be "", merge_channels doesn't need that
+ new_l10n = []
+ for key, new_raw_val in new_data.items():
+ if new_raw_val is None or key not in ref_mapping:
+ continue
+ ref_ent = ref_mapping[key]
+ new_l10n.append(ref_ent.wrap(new_raw_val))
+
+ merged = merge_resources(
+ parser,
+ [placeholders, old_l10n, new_l10n],
+ keep_newest=False
+ )
+ pruned = prune_placeholders(merged)
+ return encode(serialize_legacy_resource(pruned), parser.encoding)
+
+
+def sanitize_old(known_keys, old_l10n, new_data):
+ """Strip Junk and replace obsolete messages with placeholders.
+ If new_data has `None` as a value, strip the existing translation.
+ Use placeholders generously, so that we can rely on `prune_placeholders`
+ to find their associated comments and remove them, too.
+ """
+
+ def should_placeholder(entry):
+ # If entry is an Entity, check if it's obsolete
+ # or marked to be removed.
+ if not isinstance(entry, Entity):
+ return False
+ if entry.key not in known_keys:
+ return True
+ return entry.key in new_data and new_data[entry.key] is None
+
+ return [
+ placeholder(entry)
+ if should_placeholder(entry)
+ else entry
+ for entry in old_l10n
+ if not isinstance(entry, Junk)
+ ]
+
+
+def placeholder(entry):
+ if isinstance(entry, Entity):
+ return PlaceholderEntity(entry.key)
+ return entry
+
+
+def prune_placeholders(entries):
+ pruned = [
+ entry for entry in entries
+ if not isinstance(entry, PlaceholderEntity)
+ ]
+
+ def prune_whitespace(acc, entity):
+ if len(acc) and isinstance(entity, Whitespace):
+ prev_entity = acc[-1]
+
+ if isinstance(prev_entity, Whitespace):
+ # Prefer the longer whitespace.
+ if len(entity.all) > len(prev_entity.all):
+ acc[-1] = entity
+ return acc
+
+ acc.append(entity)
+ return acc
+
+ return reduce(prune_whitespace, pruned, [])
diff --git a/third_party/python/compare_locales/compare_locales/util.py b/third_party/python/compare_locales/compare_locales/util.py
new file mode 100644
index 0000000000..71eadd8749
--- /dev/null
+++ b/third_party/python/compare_locales/compare_locales/util.py
@@ -0,0 +1,11 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# This file is shared between compare-locales and locale-inspector
+# test_util is in compare-locales only, for the sake of easy
+# development.
+
+
+def parseLocales(content):
+ return sorted(l.split()[0] for l in content.splitlines() if l)