diff options
Diffstat (limited to 'third_party/python/compare_locales')
45 files changed, 6569 insertions, 0 deletions
diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md new file mode 100644 index 0000000000..a612ad9813 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/LICENSE.md @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA new file mode 100644 index 0000000000..65ff8760bf --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/METADATA @@ -0,0 +1,84 @@ +Metadata-Version: 2.1 +Name: compare-locales +Version: 9.0.1 +Summary: Lint Mozilla localizations +Home-page: https://github.com/mozilla/compare-locales +Author: Axel Hecht +Author-email: axel@mozilla.com +License: MPL 2.0 +Platform: any +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Localization +Classifier: Topic :: Software Development :: Testing +Requires-Python: >=3.7, <4 +Description-Content-Type: text/markdown +License-File: LICENSE.md +Requires-Dist: fluent.syntax (<0.20,>=0.18.0) +Requires-Dist: six +Requires-Dist: toml + +![Build tests](https://github.com/mozilla/compare-locales/workflows/test/badge.svg) +# compare-locales +Lint Mozilla localizations + +Finds +* missing strings +* obsolete strings +* errors on runtime errors without false positives +* warns on possible runtime errors + +It also includes `l10n-merge` functionality, which pads localizations with +missing English strings, and replaces entities with errors with English. + +If you want to check your original code for errors like duplicated messages, +use `moz-l10n-lint`, which is also part of this package. You can also use +this to check for conflicts between your strings and those already exposed +to l10n. + +# Configuration + +You configure `compare-locales` (and `moz-l10n-lint`) through a +[project configuration](https://moz-l10n-config.readthedocs.io/en/latest/fileformat.html) +file, `l10n.toml`. + +# Examples + +To check all locales in a project use + +```bash +compare-locales l10n.toml . +``` + +To check Firefox against a local check-out of l10n-central, use + +```bash +compare-locales browser/locales/l10n.toml ../l10n-central +``` + +If you just want to check particular locales, specify them as additional +commandline parameters. + +To lint your local work, use + +```bash +moz-l10n-lint l10n.toml +``` + +To check for conflicts against already existing strings: + +```bash +moz-l10n-lint --reference-project ../android-l10n/mozilla-mobile/fenix l10n.toml +moz-l10n-lint --l10n-reference ../gecko-strings browser/locales/l10n.toml +``` + +to check for a monolithic project like Fenix or a gecko project like Firefox, +resp. diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD new file mode 100644 index 0000000000..1d81d9fca6 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/RECORD @@ -0,0 +1,45 @@ +compare_locales/__init__.py,sha256=Lbi3Zk69IMtSQjV8b_gDCN24gZf_Vjd35WfEDZu9VNI,18 +compare_locales/commands.py,sha256=cAy0ZseVq2oAkXQyacn671PqfNx_zSraPgeSjAV7pWs,8428 +compare_locales/keyedtuple.py,sha256=WVOkwqS2y3-gH1GwU8oPhay5OeN1YsXTEPb1WacqiS4,1507 +compare_locales/merge.py,sha256=Cuaw783A0YaEpK_cV19iFNayg28l3VwsHLOvUX06y2w,4657 +compare_locales/mozpath.py,sha256=ZzBm7Y9LgO161UgqzHgniyIIXwAlTUDbF1Q2O9FxHL4,4232 +compare_locales/plurals.py,sha256=s5M29AZElgB4z9L24xtc3_W7lUK6UZr_j1APv89fx28,4015 +compare_locales/serializer.py,sha256=uJR-fL2h_X1j0lNnv3PwJ4RRV_x-5kc66KDJg863JvU,4408 +compare_locales/util.py,sha256=ttl1tcGveJpYqoHKVlIplhb0wSjAjAaTRQT0z6xoYrQ,439 +compare_locales/checks/__init__.py,sha256=7S1or4MzMxMA_MtRu-CB5eFyPDPnv1Zq6GGCToaztwo,969 +compare_locales/checks/android.py,sha256=L0z-DJatylz7NeQnAq0sA_fXHTXj0dfZ-nNS1DJPa-8,8318 +compare_locales/checks/base.py,sha256=ld5YSptqIU8xWWs9KKY-u9XP7oN8NrmvzqN605dwRPE,4165 +compare_locales/checks/dtd.py,sha256=OHG99oQI-tT9ZkSPCJR_T9idSSycI6mFSPrb6OJmdHw,9961 +compare_locales/checks/fluent.py,sha256=QP_709JGmEaqruYCyc17WuBcbet6MCa2jexuRHJaMQk,13019 +compare_locales/checks/properties.py,sha256=gtd-5fLWDdowN_KYgQ3dZLsElQHQ6NVvp4jx57GRPjA,6558 +compare_locales/compare/__init__.py,sha256=VMGx8O_MavjZGrcn_6DSfT-J75_ry8m2GxLgUcoUQjM,3293 +compare_locales/compare/content.py,sha256=qCOLcFCoWqktVS-FbsNeI0w1JPhi3t3gqz26Or592D8,10990 +compare_locales/compare/observer.py,sha256=RopVbCeq8nWilR7kfrAfBNfDkF2wHUv98Y8ki49TKMM,7357 +compare_locales/compare/utils.py,sha256=crRWvQYRoKwQbpu1z1IuLjWqOq-PMx23EHNIIAH3eDU,4197 +compare_locales/integration_tests/__init__.py,sha256=eOFgaCLveRf8s90SCQUeZRRxG5LAXwUSxQHxi4H4hvc,154 +compare_locales/integration_tests/test_plurals.py,sha256=Hs4pkXf-DJL7yxnsXo1lbz_1gBpL-1DKaeYy1cS4UY8,1643 +compare_locales/lint/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +compare_locales/lint/cli.py,sha256=dVf9TV5QgDy_5W1jpTIKzhZyvmRDZIZg1mZPBl9RbLE,2965 +compare_locales/lint/linter.py,sha256=cyS6SivquOgXUpQbjpFHs7GgdJbYgsW-5jT0F3RDGyQ,4211 +compare_locales/lint/util.py,sha256=hgHkSvNqWqEiFN38daujWXBUmlQAdy-XBRVGVY9RBfY,1290 +compare_locales/parser/__init__.py,sha256=BVL7HrZOmRo0tGDoROn1f2Ka93314LhrTGPU4Cx0pVU,2041 +compare_locales/parser/android.py,sha256=SvTeAInvGBlal8Ahpv9uA8SaHIZ1LOS0s9Kb-36DJQk,9212 +compare_locales/parser/base.py,sha256=1cDXMnkzM7Qt1KbwGlgKuNm17hPsoWgpdpJDC_9Icqg,12923 +compare_locales/parser/defines.py,sha256=LFseFNLFGb5bHNEmcYqeBymy7VzKIm7OPc6vSoQ298w,3549 +compare_locales/parser/dtd.py,sha256=Dmb8Rk-ptooLbHE9Le9lUUvdtWWFUtSBTlS8w2uWH94,4325 +compare_locales/parser/fluent.py,sha256=GHFCKuqaozGoN5C1c0PGBDhtQ994Swutw_aHXtu0WoM,7035 +compare_locales/parser/ini.py,sha256=I-t-hmGq6VH-sinAxjnIUwtPM2EE_AfMXlJ9G9hKnAs,1545 +compare_locales/parser/po.py,sha256=d9SYQ3WBTICGO_yFvz5SIHjM8mld7oYd-ZupXRN-qZ4,3220 +compare_locales/parser/properties.py,sha256=rnmomMr1-EDvjyC3R1lGl-nYkIZA1B9E2C-U-N_7YXY,3716 +compare_locales/paths/__init__.py,sha256=pQZ4FlsedUtR8dA-uqTqhiNC3rQvPZNzEoTRdJLbyts,1419 +compare_locales/paths/configparser.py,sha256=xIWYDgasIt_qXIcHvH6DMLtXiiF5zbu3Zi8bbrnArtY,4377 +compare_locales/paths/files.py,sha256=2uEhVEjpkGZBJNiF2jwiN5oyxhNouLCI7Hivw4SgkRE,9165 +compare_locales/paths/ini.py,sha256=5IPcgacKYCxKx3dEiNpi8MztYWWFQT6ATOgtpFaT54I,8411 +compare_locales/paths/matcher.py,sha256=4k0UZr1PvFAb29R_nATR5qdWP4ThJGy36yMf6Ipie58,15099 +compare_locales/paths/project.py,sha256=Tl6CfikkOKDi0E3BcxWS4Q3PSU-rjFKVdeNcENwQVN4,8784 +compare_locales-9.0.1.dist-info/LICENSE.md,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725 +compare_locales-9.0.1.dist-info/METADATA,sha256=j59rhNO4K7-WQKT_uxjCMBXlBcCyYuu3trfMS5Sskuw,2595 +compare_locales-9.0.1.dist-info/WHEEL,sha256=a-zpFRIJzOq5QfuhBzbhiA1eHTzNCJn8OdRvhdNX0Rk,110 +compare_locales-9.0.1.dist-info/entry_points.txt,sha256=EYuE78Z7UKpwisLmRuYHZdosK06cETbXNN4BZICR6xM,127 +compare_locales-9.0.1.dist-info/top_level.txt,sha256=eSEPLAFZcEPFC1j0N9GtVpMaKCFKw67ehDx9CMcoel0,16 +compare_locales-9.0.1.dist-info/RECORD,, diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL new file mode 100644 index 0000000000..f771c29b87 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: bdist_wheel (0.40.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt new file mode 100644 index 0000000000..03d6f06f40 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +compare-locales = compare_locales.commands:CompareLocales.call +moz-l10n-lint = compare_locales.lint.cli:main diff --git a/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt new file mode 100644 index 0000000000..d9c74fc101 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales-9.0.1.dist-info/top_level.txt @@ -0,0 +1 @@ +compare_locales diff --git a/third_party/python/compare_locales/compare_locales/__init__.py b/third_party/python/compare_locales/compare_locales/__init__.py new file mode 100644 index 0000000000..23b7f329ba --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/__init__.py @@ -0,0 +1 @@ +version = "9.0.1" diff --git a/third_party/python/compare_locales/compare_locales/checks/__init__.py b/third_party/python/compare_locales/compare_locales/checks/__init__.py new file mode 100644 index 0000000000..c15ede03f9 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/__init__.py @@ -0,0 +1,27 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from .base import Checker, EntityPos +from .android import AndroidChecker +from .dtd import DTDChecker +from .fluent import FluentChecker +from .properties import PropertiesChecker + + +__all__ = [ + 'Checker', 'EntityPos', + 'AndroidChecker', 'DTDChecker', 'FluentChecker', 'PropertiesChecker', +] + + +def getChecker(file, extra_tests=None): + if PropertiesChecker.use(file): + return PropertiesChecker(extra_tests, locale=file.locale) + if DTDChecker.use(file): + return DTDChecker(extra_tests, locale=file.locale) + if FluentChecker.use(file): + return FluentChecker(extra_tests, locale=file.locale) + if AndroidChecker.use(file): + return AndroidChecker(extra_tests, locale=file.locale) + return Checker(extra_tests, locale=file.locale) diff --git a/third_party/python/compare_locales/compare_locales/checks/android.py b/third_party/python/compare_locales/compare_locales/checks/android.py new file mode 100644 index 0000000000..d5a1f2f25f --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/android.py @@ -0,0 +1,256 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +from xml.dom import minidom + +from .base import Checker +from ..parser.android import textContent + + +class AndroidChecker(Checker): + pattern = re.compile('(.*)?strings.*\\.xml$') + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + ''' + yield from super().check(refEnt, l10nEnt) + refNode = refEnt.node + l10nNode = l10nEnt.node + # Apples and oranges, error out. + if refNode.nodeName != l10nNode.nodeName: + yield ("error", 0, "Incompatible resource types", "android") + return + # Once we start parsing more resource types, make sure to add checks + # for them. + if refNode.nodeName != "string": + yield ("warning", 0, "Unsupported resource type", "android") + return + yield from self.check_string([refNode], l10nEnt) + + def check_string(self, refs, l10nEnt): + '''Check a single string literal against a list of references. + + There should be multiple nodes given for <plurals> or <string-array>. + ''' + l10n = l10nEnt.node + if self.not_translatable(l10n, *refs): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(l10n): + yield ( + "error", + 0, + "strings must be translatable", + "android" + ) + return + if self.no_at_string(*refs): + yield ( + "warning", + 0, + "strings must be translatable", + "android" + ) + if self.non_simple_data(l10n): + yield ( + "error", + 0, + "Only plain text allowed, " + "or one CDATA surrounded by whitespace", + "android" + ) + return + yield from check_apostrophes(l10nEnt.val) + + params, errors = get_params(refs) + for error, pos in errors: + yield ( + "warning", + pos, + error, + "android" + ) + if params: + yield from check_params(params, l10nEnt.val) + + def not_translatable(self, *nodes): + return any( + node.hasAttribute("translatable") + and node.getAttribute("translatable") == "false" + for node in nodes + ) + + def no_at_string(self, *ref_nodes): + '''Android allows to reference other strings by using + @string/identifier + instead of the actual value. Those references don't belong into + a localizable file, warn on that. + ''' + return any( + textContent(node).startswith('@string/') + for node in ref_nodes + ) + + def non_simple_data(self, node): + '''Only allow single text nodes, or, a single CDATA node + surrounded by whitespace. + ''' + cdata = [ + child + for child in node.childNodes + if child.nodeType == minidom.Node.CDATA_SECTION_NODE + ] + if len(cdata) == 0: + if node.childNodes.length == 0: + # empty translation is OK + return False + if node.childNodes.length != 1: + return True + return node.childNodes[0].nodeType != minidom.Node.TEXT_NODE + if len(cdata) > 1: + return True + for child in node.childNodes: + if child == cdata[0]: + continue + if child.nodeType != minidom.Node.TEXT_NODE: + return True + if child.data.strip() != "": + return True + return False + + +silencer = re.compile(r'\\.|""') + + +def check_apostrophes(string): + '''Check Android logic for quotes and apostrophes. + + If you have an apostrophe (') in your string, you must either escape it + with a backslash (\') or enclose the string in double-quotes ("). + + Unescaped quotes are not visually shown on Android, but they're + also harmless, so we're not checking for quotes. We might do once we're + better at checking for inline XML, which is full of quotes. + Pairing quotes as in '""' is bad, though, so report errors for that. + Mostly, because it's hard to tell if a string is consider quoted or not + by Android in the end. + + https://developer.android.com/guide/topics/resources/string-resource#escaping_quotes + ''' + for m in re.finditer('""', string): + yield ( + "error", + m.start(), + "Double straight quotes not allowed", + "android" + ) + string = silencer.sub(" ", string) + + is_quoted = string.startswith('"') and string.endswith('"') + if not is_quoted: + # apostrophes need to be escaped + for m in re.finditer("'", string): + yield ( + "error", + m.start(), + "Apostrophe must be escaped", + "android" + ) + + +def get_params(refs): + '''Get printf parameters and internal errors. + + Returns a sparse map of positions to formatter, and a list + of errors. Errors covered so far are mismatching formatters. + ''' + params = {} + errors = [] + next_implicit = 1 + for ref in refs: + if isinstance(ref, minidom.Node): + ref = textContent(ref) + for m in re.finditer(r'%(?P<order>[1-9]\$)?(?P<format>[sSd])', ref): + order = m.group('order') + if order: + order = int(order[0]) + else: + order = next_implicit + next_implicit += 1 + fmt = m.group('format') + if order not in params: + params[order] = fmt + else: + # check for consistency errors + if params[order] == fmt: + continue + msg = "Conflicting formatting, %{order}${f1} vs %{order}${f2}" + errors.append(( + msg.format(order=order, f1=fmt, f2=params[order]), + m.start() + )) + return params, errors + + +def check_params(params, string): + '''Compare the printf parameters in the given string to the reference + parameters. + + Also yields errors that are internal to the parameters inside string, + as found by `get_params`. + ''' + lparams, errors = get_params([string]) + for error, pos in errors: + yield ( + "error", + pos, + error, + "android" + ) + # Compare reference for each localized parameter. + # If there's no reference found, error, as an out-of-bounds + # parameter crashes. + # This assumes that all parameters are actually used in the reference, + # which should be OK. + # If there's a mismatch in the formatter, error. + for order in sorted(lparams): + if order not in params: + yield ( + "error", + 0, + "Formatter %{}${} not found in reference".format( + order, lparams[order] + ), + "android" + ) + elif params[order] != lparams[order]: + yield ( + "error", + 0, + "Mismatching formatter", + "android" + ) + # All parameters used in the reference are expected to be included. + # Warn if this isn't the case. + for order in params: + if order not in sorted(lparams): + yield ( + "warning", + 0, + "Formatter %{}${} not found in translation".format( + order, params[order] + ), + "android", + ) diff --git a/third_party/python/compare_locales/compare_locales/checks/base.py b/third_party/python/compare_locales/compare_locales/checks/base.py new file mode 100644 index 0000000000..95f4bc7b59 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/base.py @@ -0,0 +1,122 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + + +class EntityPos(int): + pass + + +mochibake = re.compile('\ufffd') + + +class Checker: + '''Abstract class to implement checks per file type. + ''' + pattern = None + # if a check uses all reference entities, set this to True + needs_reference = False + + @classmethod + def use(cls, file): + return cls.pattern.match(file.file) + + def __init__(self, extra_tests, locale=None): + self.extra_tests = extra_tests + self.locale = locale + self.reference = None + + def check(self, refEnt, l10nEnt): + '''Given the reference and localized Entities, performs checks. + + This is a generator yielding tuples of + - "warning" or "error", depending on what should be reported, + - tuple of line, column info for the error within the string + - description string to be shown in the report + + By default, check for possible encoding errors. + ''' + for m in mochibake.finditer(l10nEnt.all): + yield ( + "warning", + EntityPos(m.start()), + f"\ufffd in: {l10nEnt.key}", + "encodings" + ) + + def set_reference(self, reference): + '''Set the reference entities. + Only do this if self.needs_reference is True. + ''' + self.reference = reference + + +class CSSCheckMixin: + def maybe_style(self, ref_value, l10n_value): + ref_map, _ = self.parse_css_spec(ref_value) + if not ref_map: + return + l10n_map, errors = self.parse_css_spec(l10n_value) + yield from self.check_style(ref_map, l10n_map, errors) + + def check_style(self, ref_map, l10n_map, errors): + if not l10n_map: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + if errors: + yield ('error', 0, 'reference is a CSS spec', 'css') + return + msgs = [] + for prop, unit in l10n_map.items(): + if prop not in ref_map: + msgs.insert(0, '%s only in l10n' % prop) + continue + else: + ref_unit = ref_map.pop(prop) + if unit != ref_unit: + msgs.append("units for %s don't match " + "(%s != %s)" % (prop, unit, ref_unit)) + for prop in ref_map.keys(): + msgs.insert(0, '%s only in reference' % prop) + if msgs: + yield ('warning', 0, ', '.join(msgs), 'css') + + def parse_css_spec(self, val): + if not hasattr(self, '_css_spec'): + self._css_spec = re.compile( + r'(?:' + r'(?P<prop>(?:min\-|max\-)?(?:width|height))' + r'[ \t\r\n]*:[ \t\r\n]*' + r'(?P<length>[0-9]+|[0-9]*\.[0-9]+)' + r'(?P<unit>ch|em|ex|rem|px|cm|mm|in|pc|pt)' + r')' + r'|\Z' + ) + self._css_sep = re.compile(r'[ \t\r\n]*(?P<semi>;)?[ \t\r\n]*$') + refMap = errors = None + end = 0 + for m in self._css_spec.finditer(val): + if end == 0 and m.start() == m.end(): + # no CSS spec found, just immediately end of string + return None, None + if m.start() > end: + split = self._css_sep.match(val, end, m.start()) + if split is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-bad-content', + }) + elif end > 0 and split.group('semi') is None: + errors = errors or [] + errors.append({ + 'pos': end, + 'code': 'css-missing-semicolon', + }) + if m.group('prop'): + refMap = refMap or {} + refMap[m.group('prop')] = m.group('unit') + end = m.end() + return refMap, errors diff --git a/third_party/python/compare_locales/compare_locales/checks/dtd.py b/third_party/python/compare_locales/compare_locales/checks/dtd.py new file mode 100644 index 0000000000..139624f98f --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/dtd.py @@ -0,0 +1,238 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from io import BytesIO +import re +from xml import sax + +from compare_locales.parser import DTDParser +from .base import Checker, CSSCheckMixin + + +class DTDChecker(Checker, CSSCheckMixin): + """Tests to run on DTD files. + + Uses xml.sax for the heavy lifting of xml parsing. + + The code tries to parse until it doesn't find any unresolved entities + anymore. If it finds one, it tries to grab the key, and adds an empty + <!ENTITY key ""> definition to the header. + + Also checks for some CSS and number heuristics in the values. + """ + pattern = re.compile(r'.*\.dtd$') + needs_reference = True # to cast a wider net for known entity references + + eref = re.compile('&(%s);' % DTDParser.Name) + tmpl = b'''<!DOCTYPE elem [%s]> +<elem>%s</elem> +''' + xmllist = {'amp', 'lt', 'gt', 'apos', 'quot'} + + def __init__(self, extra_tests, locale=None): + super().__init__(extra_tests, locale=locale) + self.processContent = False + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + self.processContent = True + self.__known_entities = None + + def known_entities(self, refValue): + if self.__known_entities is None and self.reference is not None: + self.__known_entities = set() + for ent in self.reference.values(): + self.__known_entities.update( + self.entities_for_value(ent.raw_val)) + return self.__known_entities if self.__known_entities is not None \ + else self.entities_for_value(refValue) + + def entities_for_value(self, value): + reflist = {m.group(1) for m in self.eref.finditer(value)} + reflist -= self.xmllist + return reflist + + # Setup for XML parser, with default and text-only content handler + class TextContent(sax.handler.ContentHandler): + textcontent = '' + + def characters(self, content): + self.textcontent += content + + defaulthandler = sax.handler.ContentHandler() + texthandler = TextContent() + + numPattern = r'([0-9]+|[0-9]*\.[0-9]+)' + num = re.compile('^%s$' % numPattern) + lengthPattern = '%s(em|px|ch|cm|in)' % numPattern + length = re.compile('^%s$' % lengthPattern) + + def check(self, refEnt, l10nEnt): + """Try to parse the refvalue inside a dummy element, and keep + track of entities that we need to define to make that work. + + Return a checker that offers just those entities. + """ + yield from super().check(refEnt, l10nEnt) + refValue, l10nValue = refEnt.raw_val, l10nEnt.raw_val + # find entities the refValue references, + # reusing markup from DTDParser. + reflist = self.known_entities(refValue) + inContext = self.entities_for_value(refValue) + entities = ''.join('<!ENTITY %s "">' % s for s in sorted(reflist)) + parser = sax.make_parser() + parser.setFeature(sax.handler.feature_external_ges, False) + + parser.setContentHandler(self.defaulthandler) + try: + parser.parse( + BytesIO(self.tmpl % + (entities.encode('utf-8'), + refValue.encode('utf-8')))) + # also catch stray % + parser.parse( + BytesIO(self.tmpl % + ((refEnt.all + entities).encode('utf-8'), + b'&%s;' % refEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + e # noqa + yield ('warning', + (0, 0), + "can't parse en-US value", 'xmlparse') + + # find entities the l10nValue references, + # reusing markup from DTDParser. + l10nlist = self.entities_for_value(l10nValue) + missing = sorted(l10nlist - reflist) + _entities = entities + ''.join('<!ENTITY %s "">' % s for s in missing) + if self.processContent: + self.texthandler.textcontent = '' + parser.setContentHandler(self.texthandler) + try: + parser.parse(BytesIO(self.tmpl % (_entities.encode('utf-8'), + l10nValue.encode('utf-8')))) + # also catch stray % + # if this fails, we need to substract the entity definition + parser.setContentHandler(self.defaulthandler) + parser.parse( + BytesIO(self.tmpl % + ((l10nEnt.all + _entities).encode('utf-8'), + b'&%s;' % l10nEnt.key.encode('utf-8')))) + except sax.SAXParseException as e: + # xml parse error, yield error + # sometimes, the error is reported on our fake closing + # element, make that the end of the last line + lnr = e.getLineNumber() - 1 + lines = l10nValue.splitlines() + if lnr > len(lines): + lnr = len(lines) + col = len(lines[lnr-1]) + else: + col = e.getColumnNumber() + if lnr == 1: + # first line starts with <elem>, substract + col -= len("<elem>") + elif lnr == 0: + col -= len("<!DOCTYPE elem [") # first line is DOCTYPE + yield ('error', (lnr, col), ' '.join(e.args), 'xmlparse') + + warntmpl = 'Referencing unknown entity `%s`' + if reflist: + if inContext: + elsewhere = reflist - inContext + warntmpl += ' (%s used in context' % \ + ', '.join(sorted(inContext)) + if elsewhere: + warntmpl += ', %s known)' % ', '.join(sorted(elsewhere)) + else: + warntmpl += ')' + else: + warntmpl += ' (%s known)' % ', '.join(sorted(reflist)) + for key in missing: + yield ('warning', (0, 0), warntmpl % key, + 'xmlparse') + if inContext and l10nlist and l10nlist - inContext - set(missing): + mismatch = sorted(l10nlist - inContext - set(missing)) + for key in mismatch: + yield ('warning', (0, 0), + 'Entity {} referenced, but {} used in context'.format( + key, + ', '.join(sorted(inContext)) + ), 'xmlparse') + + # Number check + if self.num.match(refValue) and not self.num.match(l10nValue): + yield ('warning', 0, 'reference is a number', 'number') + # CSS checks + # just a length, width="100em" + if self.length.match(refValue) and not self.length.match(l10nValue): + yield ('error', 0, 'reference is a CSS length', 'css') + # Check for actual CSS style attribute values + yield from self.maybe_style(refValue, l10nValue) + + if self.extra_tests is not None and 'android-dtd' in self.extra_tests: + yield from self.processAndroidContent(self.texthandler.textcontent) + + quoted = re.compile("(?P<q>[\"']).*(?P=q)$") + + def unicode_escape(self, str): + """Helper method to try to decode all unicode escapes in a string. + + This code uses the standard python decode for unicode-escape, but + that's somewhat tricky, as its input needs to be ascii. To get to + ascii, the unicode string gets converted to ascii with + backslashreplace, i.e., all non-ascii unicode chars get unicode + escaped. And then we try to roll all of that back. + Now, when that hits an error, that's from the original string, and we + need to search for the actual error position in the original string, + as the backslashreplace code changes string positions quite badly. + See also the last check in TestAndroid.test_android_dtd, with a + lengthy chinese string. + """ + val = str.encode('ascii', 'backslashreplace') + try: + val.decode('unicode-escape') + except UnicodeDecodeError as e: + args = list(e.args) + badstring = args[1][args[2]:args[3]] + i = len(args[1][:args[2]].decode('unicode-escape')) + args[2] = i + args[3] = i + len(badstring) + raise UnicodeDecodeError(*args) + + def processAndroidContent(self, val): + """Check for the string values that Android puts into an XML container. + + http://developer.android.com/guide/topics/resources/string-resource.html#FormattingAndStyling # noqa + + Check for unicode escapes and unescaped quotes and apostrophes, + if string's not quoted. + """ + # first, try to decode unicode escapes + try: + self.unicode_escape(val) + except UnicodeDecodeError as e: + yield ('error', e.args[2], e.args[4], 'android') + # check for unescaped single or double quotes. + # first, see if the complete string is single or double quoted, + # that changes the rules + m = self.quoted.match(val) + if m: + q = m.group('q') + offset = 0 + val = val[1:-1] # strip quotes + else: + q = "[\"']" + offset = -1 + stray_quot = re.compile(r"[\\\\]*(%s)" % q) + + for m in stray_quot.finditer(val): + if len(m.group(0)) % 2: + # found an unescaped single or double quote, which message? + if m.group(1) == '"': + msg = "Quotes in Android DTDs need escaping with \\\" "\ + "or \\u0022, or put string in apostrophes." + else: + msg = "Apostrophes in Android DTDs need escaping with "\ + "\\' or \\u0027, or use \u2019, or put string in "\ + "quotes." + yield ('error', m.end(0)+offset, msg, 'android') diff --git a/third_party/python/compare_locales/compare_locales/checks/fluent.py b/third_party/python/compare_locales/compare_locales/checks/fluent.py new file mode 100644 index 0000000000..f82ecbd54f --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/fluent.py @@ -0,0 +1,351 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +from collections import defaultdict + +from fluent.syntax import ast as ftl +from fluent.syntax.serializer import serialize_variant_key +from fluent.syntax.visitor import Visitor + +from .base import Checker, CSSCheckMixin +from compare_locales import plurals + + +MSGS = { + 'missing-msg-ref': 'Missing message reference: {ref}', + 'missing-term-ref': 'Missing term reference: {ref}', + 'obsolete-msg-ref': 'Obsolete message reference: {ref}', + 'obsolete-term-ref': 'Obsolete term reference: {ref}', + 'duplicate-attribute': 'Attribute "{name}" is duplicated', + 'missing-value': 'Missing value', + 'obsolete-value': 'Obsolete value', + 'missing-attribute': 'Missing attribute: {name}', + 'obsolete-attribute': 'Obsolete attribute: {name}', + 'duplicate-variant': 'Variant key "{name}" is duplicated', + 'missing-plural': 'Plural categories missing: {categories}', + 'plain-message': '{message}', +} + + +def pattern_variants(pattern): + """Get variants of plain text of a pattern. + + For now, just return simple text patterns. + This can be improved to allow for SelectExpressions + of simple text patterns, or even nested expressions, and Literals. + Variants with Variable-, Message-, or TermReferences should be ignored. + """ + elements = pattern.elements + if len(elements) == 1: + if isinstance(elements[0], ftl.TextElement): + return [elements[0].value] + return [] + + +class ReferenceMessageVisitor(Visitor, CSSCheckMixin): + def __init__(self): + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(dict) + # The currently active references + self.refs = {} + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + # If we're a messsage, store if there was a value + self.message_has_value = False + # Map attribute names to positions + self.attribute_positions = {} + # Map of CSS style attribute properties and units + self.css_styles = None + self.css_errors = None + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super().generic_visit(node) + + def visit_Message(self, node): + if node.value is not None: + self.message_has_value = True + super().generic_visit(node) + + def visit_Attribute(self, node): + self.attribute_positions[node.id.name] = node.span.start + old_refs = self.refs + self.refs = self.entry_refs[node.id.name] + super().generic_visit(node) + self.refs = old_refs + if node.id.name != 'style': + return + text_values = pattern_variants(node.value) + if not text_values: + self.css_styles = 'skip' + return + # right now, there's just one possible text value + self.css_styles, self.css_errors = self.parse_css_spec(text_values[0]) + + def visit_SelectExpression(self, node): + # optimize select expressions to only go through the variants + self.visit(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs[ref] = 'msg-ref' + + def visit_TermReference(self, node): + # only collect term references, but not attributes of terms + if node.attribute: + return + self.refs['-' + node.id.name] = 'term-ref' + + +class GenericL10nChecks: + '''Helper Mixin for checks shared between Terms and Messages.''' + def check_duplicate_attributes(self, node): + warned = set() + for left in range(len(node.attributes) - 1): + if left in warned: + continue + left_attr = node.attributes[left] + warned_left = False + for right in range(left+1, len(node.attributes)): + right_attr = node.attributes[right] + if left_attr.id.name == right_attr.id.name: + if not warned_left: + warned_left = True + self.messages.append( + ( + 'warning', left_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', right_attr.span.start, + MSGS['duplicate-attribute'].format( + name=left_attr.id.name + ) + ) + ) + + def check_variants(self, variants): + # Check for duplicate variants + warned = set() + for left in range(len(variants) - 1): + if left in warned: + continue + left_key = variants[left].key + key_string = None + for right in range(left+1, len(variants)): + if left_key.equals(variants[right].key): + if key_string is None: + key_string = serialize_variant_key(left_key) + self.messages.append( + ( + 'warning', left_key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + warned.add(right) + self.messages.append( + ( + 'warning', variants[right].key.span.start, + MSGS['duplicate-variant'].format( + name=key_string + ) + ) + ) + # Check for plural categories + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + known_plurals = set(known_plurals) + # Ask for known plurals, but check for plurals w/out `other`. + # `other` is used for all kinds of things. + check_plurals = known_plurals.copy() + check_plurals.discard('other') + given_plurals = {serialize_variant_key(v.key) for v in variants} + if given_plurals & check_plurals: + missing_plurals = sorted(known_plurals - given_plurals) + if missing_plurals: + self.messages.append( + ( + 'warning', variants[0].key.span.start, + MSGS['missing-plural'].format( + categories=', '.join(missing_plurals) + ) + ) + ) + + +class L10nMessageVisitor(GenericL10nChecks, ReferenceMessageVisitor): + def __init__(self, locale, reference): + super().__init__() + self.locale = locale + # Overload refs to map to sets, just store what we found + # References to Messages, their Attributes, and Terms + # Store reference name and type + self.entry_refs = defaultdict(set) + # The currently active references + self.refs = set() + # Start with the Entry value (associated with None) + self.entry_refs[None] = self.refs + self.reference = reference + self.reference_refs = reference.entry_refs[None] + self.messages = [] + + def visit_Message(self, node): + self.check_duplicate_attributes(node) + super().visit_Message(node) + if self.message_has_value and not self.reference.message_has_value: + self.messages.append( + ('error', node.value.span.start, MSGS['obsolete-value']) + ) + if not self.message_has_value and self.reference.message_has_value: + self.messages.append( + ('error', 0, MSGS['missing-value']) + ) + ref_attrs = set(self.reference.attribute_positions) + l10n_attrs = set(self.attribute_positions) + for missing_attr in ref_attrs - l10n_attrs: + self.messages.append( + ( + 'error', 0, + MSGS['missing-attribute'].format(name=missing_attr) + ) + ) + for obs_attr in l10n_attrs - ref_attrs: + self.messages.append( + ( + 'error', self.attribute_positions[obs_attr], + MSGS['obsolete-attribute'].format(name=obs_attr) + ) + ) + + def visit_Term(self, node): + raise RuntimeError("Should not use L10nMessageVisitor for Terms") + + def visit_Attribute(self, node): + old_reference_refs = self.reference_refs + self.reference_refs = self.reference.entry_refs[node.id.name] + super().visit_Attribute(node) + self.reference_refs = old_reference_refs + if node.id.name != 'style' or self.css_styles == 'skip': + return + ref_styles = self.reference.css_styles + if ref_styles in ('skip', None): + # Reference is complex, l10n isn't. + # Let's still validate the css spec. + ref_styles = {} + for cat, msg, pos, _ in self.check_style( + ref_styles, + self.css_styles, + self.css_errors + ): + self.messages.append((cat, msg, pos)) + + def visit_SelectExpression(self, node): + super().visit_SelectExpression(node) + self.check_variants(node.variants) + + def visit_MessageReference(self, node): + ref = node.id.name + if node.attribute: + ref += '.' + node.attribute.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'msg-ref') + + def visit_TermReference(self, node): + if node.attribute: + return + ref = '-' + node.id.name + self.refs.add(ref) + self.check_obsolete_ref(node, ref, 'term-ref') + + def check_obsolete_ref(self, node, ref, ref_type): + if ref not in self.reference_refs: + self.messages.append( + ( + 'warning', node.span.start, + MSGS['obsolete-' + ref_type].format(ref=ref), + ) + ) + + +class TermVisitor(GenericL10nChecks, Visitor): + def __init__(self, locale): + super().__init__() + self.locale = locale + self.messages = [] + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super().generic_visit(node) + + def visit_Message(self, node): + raise RuntimeError("Should not use TermVisitor for Messages") + + def visit_Term(self, node): + self.check_duplicate_attributes(node) + super().generic_visit(node) + + def visit_SelectExpression(self, node): + super().generic_visit(node) + self.check_variants(node.variants) + + +class FluentChecker(Checker): + '''Tests to run on Fluent (FTL) files. + ''' + pattern = re.compile(r'.*\.ftl') + + def check_message(self, ref_entry, l10n_entry): + '''Run checks on localized messages against reference message.''' + ref_data = ReferenceMessageVisitor() + ref_data.visit(ref_entry) + l10n_data = L10nMessageVisitor(self.locale, ref_data) + l10n_data.visit(l10n_entry) + + messages = l10n_data.messages + for attr_or_val, refs in ref_data.entry_refs.items(): + for ref, ref_type in refs.items(): + if ref not in l10n_data.entry_refs[attr_or_val]: + msg = MSGS['missing-' + ref_type].format(ref=ref) + messages.append(('warning', 0, msg)) + return messages + + def check_term(self, l10n_entry): + '''Check localized terms.''' + l10n_data = TermVisitor(self.locale) + l10n_data.visit(l10n_entry) + return l10n_data.messages + + def check(self, refEnt, l10nEnt): + yield from super().check(refEnt, l10nEnt) + l10n_entry = l10nEnt.entry + if isinstance(l10n_entry, ftl.Message): + ref_entry = refEnt.entry + messages = self.check_message(ref_entry, l10n_entry) + elif isinstance(l10n_entry, ftl.Term): + messages = self.check_term(l10n_entry) + + messages.sort(key=lambda t: t[1]) + for cat, pos, msg in messages: + if pos: + pos = pos - l10n_entry.span.start + yield (cat, pos, msg, 'fluent') diff --git a/third_party/python/compare_locales/compare_locales/checks/properties.py b/third_party/python/compare_locales/compare_locales/checks/properties.py new file mode 100644 index 0000000000..549e8533b6 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/checks/properties.py @@ -0,0 +1,162 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +from difflib import SequenceMatcher + +from compare_locales.parser import PropertiesEntity +from compare_locales import plurals +from .base import Checker + + +class PrintfException(Exception): + def __init__(self, msg, pos): + self.pos = pos + self.msg = msg + + +class PropertiesChecker(Checker): + '''Tests to run on .properties files. + ''' + pattern = re.compile(r'.*\.properties$') + printf = re.compile(r'%(?P<good>%|' + r'(?:(?P<number>[1-9][0-9]*)\$)?' + r'(?P<width>\*|[0-9]+)?' + r'(?P<prec>\.(?:\*|[0-9]+)?)?' + r'(?P<spec>[duxXosScpfg]))?') + + def check(self, refEnt, l10nEnt): + '''Test for the different variable formats. + ''' + yield from super().check(refEnt, l10nEnt) + refValue, l10nValue = refEnt.val, l10nEnt.val + refSpecs = None + # check for PluralForm.jsm stuff, should have the docs in the + # comment + # That also includes intl.properties' pluralRule, so exclude + # entities with that key and values with just numbers + if (refEnt.pre_comment + and 'Localization_and_Plurals' in refEnt.pre_comment.all + and refEnt.key != 'pluralRule' + and not re.match(r'\d+$', refValue)): + yield from self.check_plural(refValue, l10nValue) + return + # check for lost escapes + raw_val = l10nEnt.raw_val + for m in PropertiesEntity.escape.finditer(raw_val): + if m.group('single') and \ + m.group('single') not in PropertiesEntity.known_escapes: + yield ('warning', m.start(), + 'unknown escape sequence, \\' + m.group('single'), + 'escape') + try: + refSpecs = self.getPrintfSpecs(refValue) + except PrintfException: + refSpecs = [] + if refSpecs: + yield from self.checkPrintf(refSpecs, l10nValue) + return + + def check_plural(self, refValue, l10nValue): + '''Check for the stringbundle plurals logic. + The common variable pattern is #1. + ''' + known_plurals = plurals.get_plural(self.locale) + if known_plurals: + expected_forms = len(known_plurals) + found_forms = l10nValue.count(';') + 1 + msg = 'expecting {} plurals, found {}'.format( + expected_forms, + found_forms + ) + if expected_forms > found_forms: + yield ('warning', 0, msg, 'plural') + if expected_forms < found_forms: + yield ('warning', 0, msg, 'plural') + pats = {int(m.group(1)) for m in re.finditer('#([0-9]+)', refValue)} + if len(pats) == 0: + return + lpats = {int(m.group(1)) for m in re.finditer('#([0-9]+)', l10nValue)} + if pats - lpats: + yield ('warning', 0, 'not all variables used in l10n', + 'plural') + return + if lpats - pats: + yield ('error', 0, 'unreplaced variables in l10n', + 'plural') + + def checkPrintf(self, refSpecs, l10nValue): + try: + l10nSpecs = self.getPrintfSpecs(l10nValue) + except PrintfException as e: + yield ('error', e.pos, e.msg, 'printf') + return + if refSpecs != l10nSpecs: + sm = SequenceMatcher() + sm.set_seqs(refSpecs, l10nSpecs) + msgs = [] + warn = None + for action, i1, i2, j1, j2 in sm.get_opcodes(): + if action == 'equal': + continue + if action == 'delete': + # missing argument in l10n + if i2 == len(refSpecs): + # trailing specs missing, that's just a warning + warn = ', '.join('trailing argument %d `%s` missing' % + (i+1, refSpecs[i]) + for i in range(i1, i2)) + else: + for i in range(i1, i2): + msgs.append('argument %d `%s` missing' % + (i+1, refSpecs[i])) + continue + if action == 'insert': + # obsolete argument in l10n + for i in range(j1, j2): + msgs.append('argument %d `%s` obsolete' % + (i+1, l10nSpecs[i])) + continue + if action == 'replace': + for i, j in zip(range(i1, i2), range(j1, j2)): + msgs.append('argument %d `%s` should be `%s`' % + (j+1, l10nSpecs[j], refSpecs[i])) + if msgs: + yield ('error', 0, ', '.join(msgs), 'printf') + if warn is not None: + yield ('warning', 0, warn, 'printf') + + def getPrintfSpecs(self, val): + hasNumber = False + specs = [] + for m in self.printf.finditer(val): + if m.group("good") is None: + # found just a '%', signal an error + raise PrintfException('Found single %', m.start()) + if m.group("good") == '%': + # escaped % + continue + if ((hasNumber and m.group('number') is None) or + (not hasNumber and specs and + m.group('number') is not None)): + # mixed style, numbered and not + raise PrintfException('Mixed ordered and non-ordered args', + m.start()) + hasNumber = m.group('number') is not None + if hasNumber: + pos = int(m.group('number')) - 1 + ls = len(specs) + if pos >= ls: + # pad specs + nones = pos - ls + specs[ls:pos] = nones*[None] + specs.append(m.group('spec')) + else: + specs[pos] = m.group('spec') + else: + specs.append(m.group('spec')) + # check for missing args + if hasNumber and not all(specs): + raise PrintfException('Ordered argument missing', 0) + return specs diff --git a/third_party/python/compare_locales/compare_locales/commands.py b/third_party/python/compare_locales/compare_locales/commands.py new file mode 100644 index 0000000000..58266e308a --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/commands.py @@ -0,0 +1,203 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Commands exposed to commandlines' + +import logging +from argparse import ArgumentParser +from json import dump as json_dump +import os +import sys + +from compare_locales import mozpath +from compare_locales import version +from compare_locales.paths import EnumerateApp, TOMLParser, ConfigNotFound +from compare_locales.compare import compareProjects + + +class CompareLocales: + """Check the localization status of gecko applications. +The first arguments are paths to the l10n.toml or ini files for the +applications, followed by the base directory of the localization repositories. +Then you pass in the list of locale codes you want to compare. If there are +no locales given, the list of locales will be taken from the l10n.toml file +or the all-locales file referenced by the application\'s l10n.ini.""" + + def __init__(self): + self.parser = self.get_parser() + + def get_parser(self): + """Get an ArgumentParser, with class docstring as description. + """ + parser = ArgumentParser(description=self.__doc__) + parser.add_argument('--version', action='version', + version='%(prog)s ' + version) + parser.add_argument('-v', '--verbose', action='count', + default=0, help='Make more noise') + parser.add_argument('-q', '--quiet', action='count', + default=0, help='''Show less data. +Specified once, don't show obsolete entities. Specified twice, also hide +missing entities. Specify thrice to exclude warnings and four times to +just show stats''') + parser.add_argument('--validate', action='store_true', + help='Run compare-locales against reference') + parser.add_argument('-m', '--merge', + help='''Use this directory to stage merged files, +use {ab_CD} to specify a different directory for each locale''') + parser.add_argument('config_paths', metavar='l10n.toml', nargs='+', + help='TOML or INI file for the project') + parser.add_argument('l10n_base_dir', metavar='l10n-base-dir', + help='Parent directory of localizations') + parser.add_argument('locales', nargs='*', metavar='locale-code', + help='Locale code and top-level directory of ' + 'each localization') + parser.add_argument('--json', + help='''Serialize to JSON. Value is the name of +the output file, pass "-" to serialize to stdout and hide the default output. +''') + parser.add_argument('-D', action='append', metavar='var=value', + default=[], dest='defines', + help='Overwrite variables in TOML files') + parser.add_argument('--full', action="store_true", + help="Compare sub-projects that are disabled") + parser.add_argument('--return-zero', action="store_true", + help="Return 0 regardless of l10n status") + parser.add_argument('--clobber-merge', action="store_true", + default=False, dest='clobber', + help="""WARNING: DATALOSS. +Use this option with care. If specified, the merge directory will +be clobbered for each module. That means, the subdirectory will +be completely removed, any files that were there are lost. +Be careful to specify the right merge directory when using this option.""") + return parser + + @classmethod + def call(cls): + """Entry_point for setuptools. + The actual command handling is done in the handle() method of the + subclasses. + """ + cmd = cls() + args = cmd.parser.parse_args() + return cmd.handle(**vars(args)) + + def handle( + self, + quiet=0, verbose=0, + validate=False, + merge=None, + config_paths=[], l10n_base_dir=None, locales=[], + defines=[], + full=False, + return_zero=False, + clobber=False, + json=None, + ): + """The instance part of the classmethod call. + + Using keyword arguments as that is what we need for mach + commands in mozilla-central. + """ + # log as verbose or quiet as we want, warn by default + logging_level = logging.WARNING - (verbose - quiet) * 10 + logging.basicConfig() + logging.getLogger().setLevel(logging_level) + + config_paths, l10n_base_dir, locales = self.extract_positionals( + validate=validate, + config_paths=config_paths, + l10n_base_dir=l10n_base_dir, + locales=locales, + ) + + # when we compare disabled projects, we set our locales + # on all subconfigs, so deep is True. + locales_deep = full + configs = [] + config_env = { + 'l10n_base': l10n_base_dir + } + for define in defines: + var, _, value = define.partition('=') + config_env[var] = value + for config_path in config_paths: + if config_path.endswith('.toml'): + try: + config = TOMLParser().parse(config_path, env=config_env) + except ConfigNotFound as e: + self.parser.exit('config file %s not found' % e.filename) + if locales_deep: + if not locales: + # no explicit locales given, force all locales + config.set_locales(config.all_locales, deep=True) + else: + config.set_locales(locales, deep=True) + configs.append(config) + else: + app = EnumerateApp(config_path, l10n_base_dir) + configs.append(app.asConfig()) + try: + observers = compareProjects( + configs, + locales, + l10n_base_dir, + quiet=quiet, + merge_stage=merge, clobber_merge=clobber) + except OSError as exc: + print("FAIL: " + str(exc)) + self.parser.exit(2) + + if json is None or json != '-': + details = observers.serializeDetails() + if details: + print(details) + if len(configs) > 1: + if details: + print('') + print("Summaries for") + for config_path in config_paths: + print(" " + config_path) + print(" and the union of these, counting each string once") + print(observers.serializeSummaries()) + if json is not None: + data = [observer.toJSON() for observer in observers] + stdout = json == '-' + indent = 1 if stdout else None + fh = sys.stdout if stdout else open(json, 'w') + json_dump(data, fh, sort_keys=True, indent=indent) + if stdout: + fh.write('\n') + fh.close() + rv = 1 if not return_zero and observers.error else 0 + return rv + + def extract_positionals( + self, + validate=False, + config_paths=[], l10n_base_dir=None, locales=[], + ): + # using nargs multiple times in argparser totally screws things + # up, repair that. + # First files are configs, then the base dir, everything else is + # locales + all_args = config_paths + [l10n_base_dir] + locales + config_paths = [] + # The first directory is our l10n base, split there. + while all_args and not os.path.isdir(all_args[0]): + config_paths.append(all_args.pop(0)) + if not config_paths: + self.parser.error('no configuration file given') + for cf in config_paths: + if not os.path.isfile(cf): + self.parser.error('config file %s not found' % cf) + if not all_args: + self.parser.error('l10n-base-dir not found') + l10n_base_dir = mozpath.abspath(all_args.pop(0)) + if validate: + # signal validation mode by setting locale list to [None] + locales = [None] + else: + locales = all_args + + return config_paths, l10n_base_dir, locales diff --git a/third_party/python/compare_locales/compare_locales/compare/__init__.py b/third_party/python/compare_locales/compare_locales/compare/__init__.py new file mode 100644 index 0000000000..6d4f3735bf --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/compare/__init__.py @@ -0,0 +1,89 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +import os +import shutil + +from compare_locales import paths, mozpath + +from .content import ContentComparer +from .observer import Observer, ObserverList +from .utils import Tree, AddRemove + + +__all__ = [ + 'ContentComparer', + 'Observer', 'ObserverList', + 'AddRemove', 'Tree', + 'compareProjects', +] + + +def compareProjects( + project_configs, + locales, + l10n_base_dir, + stat_observer=None, + merge_stage=None, + clobber_merge=False, + quiet=0, + ): + all_locales = set(locales) + comparer = ContentComparer(quiet) + observers = comparer.observers + for project in project_configs: + # disable filter if we're in validation mode + if None in locales: + filter = None + else: + filter = project.filter + observers.append( + Observer( + quiet=quiet, + filter=filter, + )) + if not locales: + all_locales.update(project.all_locales) + for locale in sorted(all_locales): + files = paths.ProjectFiles(locale, project_configs, + mergebase=merge_stage) + if merge_stage is not None: + if clobber_merge: + mergematchers = {_m.get('merge') for _m in files.matchers} + mergematchers.discard(None) + for matcher in mergematchers: + clobberdir = matcher.prefix + if os.path.exists(clobberdir): + shutil.rmtree(clobberdir) + print("clobbered " + clobberdir) + for l10npath, refpath, mergepath, extra_tests in files: + # module and file path are needed for legacy filter.py support + module = None + fpath = mozpath.relpath(l10npath, l10n_base_dir) + for _m in files.matchers: + if _m['l10n'].match(l10npath): + if _m['module']: + # legacy ini support, set module, and resolve + # local path against the matcher prefix, + # which includes the module + module = _m['module'] + fpath = mozpath.relpath(l10npath, _m['l10n'].prefix) + break + reffile = paths.File(refpath, fpath or refpath, module=module) + if locale is None: + # When validating the reference files, set locale + # to a private subtag. This only shows in the output. + locale = paths.REFERENCE_LOCALE + l10n = paths.File(l10npath, fpath or l10npath, + module=module, locale=locale) + if not os.path.exists(l10npath): + comparer.add(reffile, l10n, mergepath) + continue + if not os.path.exists(refpath): + comparer.remove(reffile, l10n, mergepath) + continue + comparer.compare(reffile, l10n, mergepath, extra_tests) + return observers diff --git a/third_party/python/compare_locales/compare_locales/compare/content.py b/third_party/python/compare_locales/compare_locales/compare/content.py new file mode 100644 index 0000000000..1e879a643c --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/compare/content.py @@ -0,0 +1,304 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +import codecs +import os +import shutil +import re + +from compare_locales import parser +from compare_locales import mozpath +from compare_locales.checks import getChecker, EntityPos +from compare_locales.keyedtuple import KeyedTuple + +from .observer import ObserverList +from .utils import AddRemove + + +class ContentComparer: + keyRE = re.compile('[kK]ey') + nl = re.compile('\n', re.M) + + def __init__(self, quiet=0): + '''Create a ContentComparer. + observer is usually a instance of Observer. The return values + of the notify method are used to control the handling of missing + entities. + ''' + self.observers = ObserverList(quiet=quiet) + + def create_merge_dir(self, merge_file): + outdir = mozpath.dirname(merge_file) + os.makedirs(outdir, exist_ok=True) + + def merge(self, ref_entities, ref_file, l10n_file, merge_file, + missing, skips, ctx, capabilities, encoding): + '''Create localized file in merge dir + + `ref_entities` and `ref_map` are the parser result of the + reference file + `ref_file` and `l10n_file` are the File objects for the reference and + the l10n file, resp. + `merge_file` is the output path for the generated content. This is None + if we're just comparing or validating. + `missing` are the missing messages in l10n - potentially copied from + reference + `skips` are entries to be dropped from the localized file + `ctx` is the parsing context + `capabilities` are the capabilities for the merge algorithm + `encoding` is the encoding to be used when serializing, usually utf-8 + ''' + + if not merge_file: + return + + if capabilities == parser.CAN_NONE: + return + + self.create_merge_dir(merge_file) + + if capabilities & parser.CAN_COPY: + # copy the l10n file if it's good, or the reference file if not + if skips or missing: + src = ref_file.fullpath + else: + src = l10n_file.fullpath + shutil.copyfile(src, merge_file) + print("copied reference to " + merge_file) + return + + if not (capabilities & parser.CAN_SKIP): + return + + # Start with None in case the merge file doesn't need to be created. + f = None + + if skips: + # skips come in ordered by key name, we need them in file order + skips.sort(key=lambda s: s.span[0]) + + # we need to skip a few erroneous blocks in the input, copy by hand + f = codecs.open(merge_file, 'wb', encoding) + offset = 0 + for skip in skips: + chunk = skip.span + f.write(ctx.contents[offset:chunk[0]]) + offset = chunk[1] + f.write(ctx.contents[offset:]) + + if f is None: + # l10n file is a good starting point + shutil.copyfile(l10n_file.fullpath, merge_file) + + if not (capabilities & parser.CAN_MERGE): + if f: + f.close() + return + + if skips or missing: + if f is None: + f = codecs.open(merge_file, 'ab', encoding) + trailing = (['\n'] + + [ref_entities[key].all for key in missing] + + [ref_entities[skip.key].all for skip in skips + if not isinstance(skip, parser.Junk)]) + + def ensureNewline(s): + if not s.endswith('\n'): + return s + '\n' + return s + + print("adding to " + merge_file) + f.write(''.join(map(ensureNewline, trailing))) + + if f is not None: + f.close() + + def remove(self, ref_file, l10n, merge_file): + '''Obsolete l10n file. + + Copy to merge stage if we can. + ''' + self.observers.notify('obsoleteFile', l10n, None) + self.merge( + KeyedTuple([]), ref_file, l10n, merge_file, + [], [], None, parser.CAN_COPY, None + ) + + def compare(self, ref_file, l10n, merge_file, extra_tests=None): + try: + p = parser.getParser(ref_file.file) + except UserWarning: + # no comparison, XXX report? + # At least, merge + self.merge( + KeyedTuple([]), ref_file, l10n, merge_file, [], [], None, + parser.CAN_COPY, None) + return + try: + p.readFile(ref_file) + except Exception as e: + self.observers.notify('error', ref_file, str(e)) + return + ref_entities = p.parse() + try: + p.readFile(l10n) + l10n_entities = p.parse() + l10n_ctx = p.ctx + except Exception as e: + self.observers.notify('error', l10n, str(e)) + return + + ar = AddRemove() + ar.set_left(ref_entities.keys()) + ar.set_right(l10n_entities.keys()) + report = missing = obsolete = changed = unchanged = keys = 0 + missing_w = changed_w = unchanged_w = 0 # word stats + missings = [] + skips = [] + checker = getChecker(l10n, extra_tests=extra_tests) + if checker and checker.needs_reference: + checker.set_reference(ref_entities) + for msg in p.findDuplicates(ref_entities): + self.observers.notify('warning', l10n, msg) + for msg in p.findDuplicates(l10n_entities): + self.observers.notify('error', l10n, msg) + for action, entity_id in ar: + if action == 'delete': + # missing entity + if isinstance(ref_entities[entity_id], parser.Junk): + self.observers.notify( + 'warning', l10n, 'Parser error in en-US' + ) + continue + _rv = self.observers.notify('missingEntity', l10n, entity_id) + if _rv == "ignore": + continue + if _rv == "error": + # only add to missing entities for l10n-merge on error, + # not report + missings.append(entity_id) + missing += 1 + refent = ref_entities[entity_id] + missing_w += refent.count_words() + else: + # just report + report += 1 + elif action == 'add': + # obsolete entity or junk + if isinstance(l10n_entities[entity_id], + parser.Junk): + junk = l10n_entities[entity_id] + self.observers.notify( + 'error', l10n, + junk.error_message() + ) + if merge_file is not None: + skips.append(junk) + elif ( + self.observers.notify('obsoleteEntity', l10n, entity_id) + != 'ignore' + ): + obsolete += 1 + else: + # entity found in both ref and l10n, check for changed + refent = ref_entities[entity_id] + l10nent = l10n_entities[entity_id] + if self.keyRE.search(entity_id): + keys += 1 + else: + if refent.equals(l10nent): + self.doUnchanged(l10nent) + unchanged += 1 + unchanged_w += refent.count_words() + else: + self.doChanged(ref_file, refent, l10nent) + changed += 1 + changed_w += refent.count_words() + # run checks: + if checker: + for tp, pos, msg, cat in checker.check(refent, l10nent): + if isinstance(pos, EntityPos): + line, col = l10nent.position(pos) + else: + line, col = l10nent.value_position(pos) + # skip error entities when merging + if tp == 'error' and merge_file is not None: + skips.append(l10nent) + self.observers.notify( + tp, l10n, + "%s at line %d, column %d for %s" % + (msg, line, col, refent.key) + ) + pass + + if merge_file is not None: + self.merge( + ref_entities, ref_file, + l10n, merge_file, missings, skips, l10n_ctx, + p.capabilities, p.encoding) + + stats = { + 'missing': missing, + 'missing_w': missing_w, + 'report': report, + 'obsolete': obsolete, + 'changed': changed, + 'changed_w': changed_w, + 'unchanged': unchanged, + 'unchanged_w': unchanged_w, + 'keys': keys, + } + self.observers.updateStats(l10n, stats) + pass + + def add(self, orig, missing, merge_file): + ''' Add missing localized file.''' + f = orig + try: + p = parser.getParser(f.file) + except UserWarning: + p = None + + # if we don't support this file, assume CAN_COPY to mimick + # l10n dir as closely as possible + caps = p.capabilities if p else parser.CAN_COPY + if (caps & (parser.CAN_COPY | parser.CAN_MERGE)): + # even if we can merge, pretend we can only copy + self.merge( + KeyedTuple([]), orig, missing, merge_file, + ['trigger copy'], [], None, parser.CAN_COPY, None + ) + + if self.observers.notify('missingFile', missing, None) == "ignore": + # filter said that we don't need this file, don't count it + return + + if p is None: + # We don't have a parser, cannot count missing strings + return + + try: + p.readFile(f) + entities = p.parse() + except Exception as ex: + self.observers.notify('error', f, str(ex)) + return + # strip parse errors + entities = [e for e in entities if not isinstance(e, parser.Junk)] + self.observers.updateStats(missing, {'missing': len(entities)}) + missing_w = 0 + for e in entities: + missing_w += e.count_words() + self.observers.updateStats(missing, {'missing_w': missing_w}) + + def doUnchanged(self, entity): + # overload this if needed + pass + + def doChanged(self, file, ref_entity, l10n_entity): + # overload this if needed + pass diff --git a/third_party/python/compare_locales/compare_locales/compare/observer.py b/third_party/python/compare_locales/compare_locales/compare/observer.py new file mode 100644 index 0000000000..d336a004b3 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/compare/observer.py @@ -0,0 +1,215 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from collections import defaultdict + +from .utils import Tree + + +class Observer: + + def __init__(self, quiet=0, filter=None): + '''Create Observer + For quiet=1, skip per-entity missing and obsolete strings, + for quiet=2, skip missing and obsolete files. For quiet=3, + skip warnings and errors. + ''' + self.summary = defaultdict(lambda: { + "errors": 0, + "warnings": 0, + "missing": 0, + "missing_w": 0, + "report": 0, + "obsolete": 0, + "changed": 0, + "changed_w": 0, + "unchanged": 0, + "unchanged_w": 0, + "keys": 0, + }) + self.details = Tree(list) + self.quiet = quiet + self.filter = filter + self.error = False + + def _dictify(self, d): + plaindict = {} + for k, v in d.items(): + plaindict[k] = dict(v) + return plaindict + + def toJSON(self): + # Don't export file stats, even if we collected them. + # Those are not part of the data we use toJSON for. + return { + 'summary': self._dictify(self.summary), + 'details': self.details.toJSON() + } + + def updateStats(self, file, stats): + # in multi-project scenarios, this file might not be ours, + # check that. + # Pass in a dummy entity key '' to avoid getting in to + # generic file filters. If we have stats for those, + # we want to aggregate the counts + if (self.filter is not None and + self.filter(file, entity='') == 'ignore'): + return + for category, value in stats.items(): + if category == 'errors': + # updateStats isn't called with `errors`, but make sure + # we handle this if that changes + self.error = True + self.summary[file.locale][category] += value + + def notify(self, category, file, data): + rv = 'error' + if category in ['missingFile', 'obsoleteFile']: + if self.filter is not None: + rv = self.filter(file) + if rv == "ignore" or self.quiet >= 2: + return rv + if self.quiet == 0 or category == 'missingFile': + self.details[file].append({category: rv}) + return rv + if self.filter is not None: + rv = self.filter(file, data) + if rv == "ignore": + return rv + if category in ['missingEntity', 'obsoleteEntity']: + if ( + (category == 'missingEntity' and self.quiet < 2) + or (category == 'obsoleteEntity' and self.quiet < 1) + ): + self.details[file].append({category: data}) + return rv + if category == 'error': + # Set error independently of quiet + self.error = True + if category in ('error', 'warning'): + if ( + (category == 'error' and self.quiet < 4) + or (category == 'warning' and self.quiet < 3) + ): + self.details[file].append({category: data}) + self.summary[file.locale][category + 's'] += 1 + return rv + + +class ObserverList(Observer): + def __init__(self, quiet=0): + super().__init__(quiet=quiet) + self.observers = [] + + def __iter__(self): + return iter(self.observers) + + def append(self, observer): + self.observers.append(observer) + + def notify(self, category, file, data): + """Check observer for the found data, and if it's + not to ignore, notify stat_observers. + """ + rvs = { + observer.notify(category, file, data) + for observer in self.observers + } + if all(rv == 'ignore' for rv in rvs): + return 'ignore' + # our return value doesn't count + super().notify(category, file, data) + rvs.discard('ignore') + if 'error' in rvs: + return 'error' + assert len(rvs) == 1 + return rvs.pop() + + def updateStats(self, file, stats): + """Check observer for the found data, and if it's + not to ignore, notify stat_observers. + """ + for observer in self.observers: + observer.updateStats(file, stats) + super().updateStats(file, stats) + + def serializeDetails(self): + + def tostr(t): + if t[1] == 'key': + return ' ' * t[0] + '/'.join(t[2]) + o = [] + indent = ' ' * (t[0] + 1) + for item in t[2]: + if 'error' in item: + o += [indent + 'ERROR: ' + item['error']] + elif 'warning' in item: + o += [indent + 'WARNING: ' + item['warning']] + elif 'missingEntity' in item: + o += [indent + '+' + item['missingEntity']] + elif 'obsoleteEntity' in item: + o += [indent + '-' + item['obsoleteEntity']] + elif 'missingFile' in item: + o.append(indent + '// add and localize this file') + elif 'obsoleteFile' in item: + o.append(indent + '// remove this file') + return '\n'.join(o) + + return '\n'.join(tostr(c) for c in self.details.getContent()) + + def serializeSummaries(self): + summaries = { + loc: [] + for loc in self.summary.keys() + } + for observer in self.observers: + for loc, lst in summaries.items(): + # Not all locales are on all projects, + # default to empty summary + lst.append(observer.summary.get(loc, {})) + if len(self.observers) > 1: + # add ourselves if there's more than one project + for loc, lst in summaries.items(): + lst.append(self.summary[loc]) + keys = ( + 'errors', + 'warnings', + 'missing', 'missing_w', + 'obsolete', + 'changed', 'changed_w', + 'unchanged', 'unchanged_w', + 'keys', + ) + leads = [ + f'{k:12}' for k in keys + ] + out = [] + for locale, summaries in sorted(summaries.items()): + if locale: + out.append(locale + ':') + segment = [''] * len(keys) + for summary in summaries: + for row, key in enumerate(keys): + segment[row] += ' {:6}'.format(summary.get(key) or '') + + out += [ + lead + row + for lead, row in zip(leads, segment) + if row.strip() + ] + + total = sum(summaries[-1].get(k, 0) + for k in ['changed', 'unchanged', 'report', 'missing'] + ) + rate = 0 + if total: + rate = (('changed' in summary and summary['changed'] * 100) or + 0) / total + out.append('%d%% of entries changed' % rate) + return '\n'.join(out) + + def __str__(self): + return 'observer' diff --git a/third_party/python/compare_locales/compare_locales/compare/utils.py b/third_party/python/compare_locales/compare_locales/compare/utils.py new file mode 100644 index 0000000000..e298f80bc5 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/compare/utils.py @@ -0,0 +1,133 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mozilla l10n compare locales tool' + +from compare_locales import paths + + +class Tree: + def __init__(self, valuetype): + self.branches = dict() + self.valuetype = valuetype + self.value = None + + def __getitem__(self, leaf): + parts = [] + if isinstance(leaf, paths.File): + parts = [] + if leaf.module: + parts += [leaf.locale] + leaf.module.split('/') + parts += leaf.file.split('/') + else: + parts = leaf.split('/') + return self.__get(parts) + + def __get(self, parts): + common = None + old = None + new = tuple(parts) + t = self + for k, v in self.branches.items(): + for i, part in enumerate(zip(k, parts)): + if part[0] != part[1]: + i -= 1 + break + if i < 0: + continue + i += 1 + common = tuple(k[:i]) + old = tuple(k[i:]) + new = tuple(parts[i:]) + break + if old: + self.branches.pop(k) + t = Tree(self.valuetype) + t.branches[old] = v + self.branches[common] = t + elif common: + t = self.branches[common] + if new: + if common: + return t.__get(new) + t2 = t + t = Tree(self.valuetype) + t2.branches[new] = t + if t.value is None: + t.value = t.valuetype() + return t.value + + indent = ' ' + + def getContent(self, depth=0): + ''' + Returns iterator of (depth, flag, key_or_value) tuples. + If flag is 'value', key_or_value is a value object, otherwise + (flag is 'key') it's a key string. + ''' + keys = sorted(self.branches.keys()) + if self.value is not None: + yield (depth, 'value', self.value) + for key in keys: + yield (depth, 'key', key) + yield from self.branches[key].getContent(depth + 1) + + def toJSON(self): + ''' + Returns this Tree as a JSON-able tree of hashes. + Only the values need to take care that they're JSON-able. + ''' + if self.value is not None: + return self.value + return {'/'.join(key): self.branches[key].toJSON() + for key in self.branches.keys()} + + def getStrRows(self): + def tostr(t): + if t[1] == 'key': + return self.indent * t[0] + '/'.join(t[2]) + return self.indent * (t[0] + 1) + str(t[2]) + + return [tostr(c) for c in self.getContent()] + + def __str__(self): + return '\n'.join(self.getStrRows()) + + +class AddRemove: + def __init__(self): + self.left = self.right = None + + def set_left(self, left): + if not isinstance(left, list): + left = list(l for l in left) + self.left = left + + def set_right(self, right): + if not isinstance(right, list): + right = list(l for l in right) + self.right = right + + def __iter__(self): + # order_map stores index in left and then index in right + order_map = {item: (i, -1) for i, item in enumerate(self.left)} + left_items = set(order_map) + # as we go through the right side, keep track of which left + # item we had in right last, and for items not in left, + # set the sortmap to (left_offset, right_index) + left_offset = -1 + right_items = set() + for i, item in enumerate(self.right): + right_items.add(item) + if item in order_map: + left_offset = order_map[item][0] + else: + order_map[item] = (left_offset, i) + for item in sorted(order_map, key=lambda item: order_map[item]): + if item in left_items and item in right_items: + yield ('equal', item) + elif item in left_items: + yield ('delete', item) + else: + yield ('add', item) diff --git a/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py b/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py new file mode 100644 index 0000000000..ba9db8b8ec --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/integration_tests/__init__.py @@ -0,0 +1,5 @@ +'''Tests that are not run by default. + +They might just take long, or depend on external services, or both. +They might also fail for external changes. +''' diff --git a/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py b/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py new file mode 100644 index 0000000000..e63ff861f7 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/integration_tests/test_plurals.py @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import json +import unittest +from urllib.error import URLError +from urllib.request import urlopen + +from compare_locales import plurals + + +TRANSVISION_URL = ( + 'https://transvision.mozfr.org/' + 'api/v1/entity/gecko_strings/' + '?id=toolkit/chrome/global/intl.properties:pluralRule' +) + + +class TestPlural(unittest.TestCase): + '''Integration test for plural forms and l10n-central. + + Having more plural forms than in l10n-central is OK, missing or + mismatching ones isn't. + Depends on Transvision. + ''' + maxDiff = None + + def test_valid_forms(self): + reference_form_map = self._load_transvision() + # Strip matches from dicts, to make diff for test small + locales = list(reference_form_map) + cl_form_map = {} + for locale in locales: + cl_form = str(plurals.get_plural_rule(locale)) + if cl_form == reference_form_map[locale]: + reference_form_map.pop(locale) + else: + cl_form_map[locale] = cl_form + self.assertDictEqual(reference_form_map, cl_form_map) + + def _load_transvision(self): + '''Use the Transvision API to load all values of pluralRule + in intl.properties. + Skip test on load failure. + ''' + try: + data = urlopen(TRANSVISION_URL).read() + except URLError: + raise unittest.SkipTest("Couldn't load Transvision API.") + return json.loads(data) diff --git a/third_party/python/compare_locales/compare_locales/keyedtuple.py b/third_party/python/compare_locales/compare_locales/keyedtuple.py new file mode 100644 index 0000000000..af703e8fa2 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/keyedtuple.py @@ -0,0 +1,55 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''A tuple with keys. + +A Sequence type that allows to refer to its elements by key. +Making this immutable, 'cause keeping track of mutations is hard. + +compare-locales uses strings for Entity keys, and tuples in the +case of PO. Support both. + +In the interfaces that check for membership, dicts check keys and +sequences check values. Always try our dict cache `__map` first, +and fall back to the superclass implementation. +''' + + +class KeyedTuple(tuple): + + def __new__(cls, iterable): + return super().__new__(cls, iterable) + + def __init__(self, iterable): + self.__map = {} + if iterable: + for index, item in enumerate(self): + self.__map[item.key] = index + + def __contains__(self, key): + try: + contains = key in self.__map + if contains: + return True + except TypeError: + pass + return super().__contains__(key) + + def __getitem__(self, key): + try: + key = self.__map[key] + except (KeyError, TypeError): + pass + return super().__getitem__(key) + + def keys(self): + for value in self: + yield value.key + + def items(self): + for value in self: + yield value.key, value + + def values(self): + return self diff --git a/third_party/python/compare_locales/compare_locales/lint/__init__.py b/third_party/python/compare_locales/compare_locales/lint/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/lint/__init__.py diff --git a/third_party/python/compare_locales/compare_locales/lint/cli.py b/third_party/python/compare_locales/compare_locales/lint/cli.py new file mode 100644 index 0000000000..dc476e1b77 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/lint/cli.py @@ -0,0 +1,93 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import argparse +import os + +from compare_locales.lint.linter import L10nLinter +from compare_locales.lint.util import ( + default_reference_and_tests, + mirror_reference_and_tests, + l10n_base_reference_and_tests, +) +from compare_locales import mozpath +from compare_locales import paths +from compare_locales import parser +from compare_locales import version + + +epilog = '''\ +moz-l10n-lint checks for common mistakes in localizable files. It tests for +duplicate entries, parsing errors, and the like. Optionally, it can compare +the strings to an external reference with strings and warn if a string might +need to get a new ID. +''' + + +def main(): + p = argparse.ArgumentParser( + description='Validate localizable strings', + epilog=epilog, + ) + p.add_argument('l10n_toml') + p.add_argument( + '--version', action='version', version='%(prog)s ' + version + ) + p.add_argument('-W', action='store_true', help='error on warnings') + p.add_argument( + '--l10n-reference', + dest='l10n_reference', + metavar='PATH', + help='check for conflicts against an l10n-only reference repository ' + 'like gecko-strings', + ) + p.add_argument( + '--reference-project', + dest='ref_project', + metavar='PATH', + help='check for conflicts against a reference project like ' + 'android-l10n', + ) + args = p.parse_args() + if args.l10n_reference: + l10n_base, locale = \ + os.path.split(os.path.abspath(args.l10n_reference)) + if not locale or not os.path.isdir(args.l10n_reference): + p.error('Pass an existing l10n reference') + else: + l10n_base = '.' + locale = None + pc = paths.TOMLParser().parse(args.l10n_toml, env={'l10n_base': l10n_base}) + if locale: + pc.set_locales([locale], deep=True) + files = paths.ProjectFiles(locale, [pc]) + get_reference_and_tests = default_reference_and_tests + if args.l10n_reference: + get_reference_and_tests = l10n_base_reference_and_tests(files) + elif args.ref_project: + get_reference_and_tests = mirror_reference_and_tests( + files, args.ref_project + ) + linter = L10nLinter() + results = linter.lint( + (f for f, _, _, _ in files.iter_reference() if parser.hasParser(f)), + get_reference_and_tests + ) + rv = 0 + if results: + rv = 1 + if all(r['level'] == 'warning' for r in results) and not args.W: + rv = 0 + for result in results: + print('{} ({}:{}): {}'.format( + mozpath.relpath(result['path'], '.'), + result.get('lineno', 0), + result.get('column', 0), + result['message'] + )) + return rv + + +if __name__ == '__main__': + main() diff --git a/third_party/python/compare_locales/compare_locales/lint/linter.py b/third_party/python/compare_locales/compare_locales/lint/linter.py new file mode 100644 index 0000000000..a946608d97 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/lint/linter.py @@ -0,0 +1,121 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from collections import Counter +import os + +from compare_locales import parser, checks +from compare_locales.paths import File, REFERENCE_LOCALE + + +class L10nLinter: + + def lint(self, files, get_reference_and_tests): + results = [] + for path in files: + if not parser.hasParser(path): + continue + ref, extra_tests = get_reference_and_tests(path) + results.extend(self.lint_file(path, ref, extra_tests)) + return results + + def lint_file(self, path, ref, extra_tests): + file_parser = parser.getParser(path) + if ref is not None and os.path.isfile(ref): + file_parser.readFile(ref) + reference = file_parser.parse() + else: + reference = {} + file_parser.readFile(path) + current = file_parser.parse() + checker = checks.getChecker( + File(path, path, locale=REFERENCE_LOCALE), + extra_tests=extra_tests + ) + if checker and checker.needs_reference: + checker.set_reference(current) + linter = EntityLinter(current, checker, reference) + for current_entity in current: + for result in linter.lint_entity(current_entity): + result['path'] = path + yield result + + +class EntityLinter: + '''Factored out helper to run linters on a single entity.''' + def __init__(self, current, checker, reference): + self.key_count = Counter(entity.key for entity in current) + self.checker = checker + self.reference = reference + + def lint_entity(self, current_entity): + res = self.handle_junk(current_entity) + if res: + yield res + return + for res in self.lint_full_entity(current_entity): + yield res + for res in self.lint_value(current_entity): + yield res + + def lint_full_entity(self, current_entity): + '''Checks that go good or bad for a full entity, + without a particular spot inside the entity. + ''' + lineno = col = None + if self.key_count[current_entity.key] > 1: + lineno, col = current_entity.position() + yield { + 'lineno': lineno, + 'column': col, + 'level': 'error', + 'message': 'Duplicate string with ID: {}'.format( + current_entity.key + ) + } + + if current_entity.key in self.reference: + reference_entity = self.reference[current_entity.key] + if not current_entity.equals(reference_entity): + if lineno is None: + lineno, col = current_entity.position() + msg = 'Changes to string require a new ID: {}'.format( + current_entity.key + ) + yield { + 'lineno': lineno, + 'column': col, + 'level': 'warning', + 'message': msg, + } + + def lint_value(self, current_entity): + '''Checks that error on particular locations in the entity value. + ''' + if self.checker: + for tp, pos, msg, cat in self.checker.check( + current_entity, current_entity + ): + if isinstance(pos, checks.EntityPos): + lineno, col = current_entity.position(pos) + else: + lineno, col = current_entity.value_position(pos) + yield { + 'lineno': lineno, + 'column': col, + 'level': tp, + 'message': msg, + } + + def handle_junk(self, current_entity): + if not isinstance(current_entity, parser.Junk): + return None + + lineno, col = current_entity.position() + return { + 'lineno': lineno, + 'column': col, + 'level': 'error', + 'message': current_entity.error_message() + } diff --git a/third_party/python/compare_locales/compare_locales/lint/util.py b/third_party/python/compare_locales/compare_locales/lint/util.py new file mode 100644 index 0000000000..f5e1fb5e6e --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/lint/util.py @@ -0,0 +1,38 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from compare_locales import paths + + +def default_reference_and_tests(path): + return None, None + + +def mirror_reference_and_tests(files, basedir): + '''Get reference files to check for conflicts in android-l10n and friends. + ''' + def get_reference_and_tests(path): + for matchers in files.matchers: + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + if matcher.match(path) is None: + continue + ref_matcher = paths.Matcher(matcher, root=basedir) + ref_path = matcher.sub(ref_matcher, path) + return ref_path, matchers.get('test') + return None, None + return get_reference_and_tests + + +def l10n_base_reference_and_tests(files): + '''Get reference files to check for conflicts in gecko-strings and friends. + ''' + def get_reference_and_tests(path): + match = files.match(path) + if match is None: + return None, None + ref, _, _, extra_tests = match + return ref, extra_tests + return get_reference_and_tests diff --git a/third_party/python/compare_locales/compare_locales/merge.py b/third_party/python/compare_locales/compare_locales/merge.py new file mode 100644 index 0000000000..1d73560bb9 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/merge.py @@ -0,0 +1,143 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Merge resources across channels. + +Merging resources is done over a series of parsed resources, or source +strings. +The nomenclature is that the resources are ordered from newest to oldest. +The generated file structure is taken from the newest file, and then the +next-newest, etc. The values of the returned entities are taken from the +newest to the oldest resource, too. + +In merge_resources, there's an option to choose the values from oldest +to newest instead. +''' + +from collections import OrderedDict, defaultdict +from codecs import encode +from functools import reduce + + +from compare_locales import parser as cl +from compare_locales.parser.base import StickyEntry +from compare_locales.compare.utils import AddRemove + + +class MergeNotSupportedError(ValueError): + pass + + +def merge_channels(name, resources): + try: + parser = cl.getParser(name) + except UserWarning: + raise MergeNotSupportedError( + f'Unsupported file format ({name}).') + + entities = merge_resources(parser, resources) + return encode(serialize_legacy_resource(entities), parser.encoding) + + +def merge_resources(parser, resources, keep_newest=True): + '''Merge parsed or unparsed resources, returning a enumerable of Entities. + + Resources are ordered from newest to oldest in the input. The structure + of the generated content is taken from the newest resource first, and + then filled by the next etc. + Values are also taken from the newest, unless keep_newest is False, + then values are taken from the oldest first. + ''' + + def parse_resource(resource): + # The counter dict keeps track of number of identical comments. + counter = defaultdict(int) + if isinstance(resource, bytes): + parser.readContents(resource) + resource = parser.walk() + pairs = [get_key_value(entity, counter) for entity in resource] + return OrderedDict(pairs) + + def get_key_value(entity, counter): + if isinstance(entity, cl.Comment): + counter[entity.val] += 1 + # Use the (value, index) tuple as the key. AddRemove will + # de-deplicate identical comments at the same index. + return ((entity.val, counter[entity.val]), entity) + + if isinstance(entity, cl.Whitespace): + # Use the Whitespace instance as the key so that it's always + # unique. Adjecent whitespace will be folded into the longer one in + # prune. + return (entity, entity) + + return (entity.key, entity) + + entities = reduce( + lambda x, y: merge_two(x, y, keep_newer=keep_newest), + map(parse_resource, resources)) + return entities.values() + + +def merge_two(newer, older, keep_newer=True): + '''Merge two OrderedDicts. + + The order of the result dict is determined by `newer`. + The values in the dict are the newer ones by default, too. + If `keep_newer` is False, the values will be taken from the older + dict. + ''' + diff = AddRemove() + diff.set_left(newer.keys()) + diff.set_right(older.keys()) + + # Create a flat sequence of all entities in order reported by AddRemove. + get_entity = get_newer_entity if keep_newer else get_older_entity + contents = [(key, get_entity(newer, older, key)) for _, key in diff] + + def prune(acc, cur): + _, entity = cur + if entity is None: + # Prune Nones which stand for duplicated comments. + return acc + + if len(acc) and isinstance(entity, cl.Whitespace): + _, prev_entity = acc[-1] + + if isinstance(prev_entity, cl.Whitespace): + # Prefer the longer whitespace. + if len(entity.all) > len(prev_entity.all): + acc[-1] = (entity, entity) + return acc + + acc.append(cur) + return acc + + pruned = reduce(prune, contents, []) + return OrderedDict(pruned) + + +def get_newer_entity(newer, older, key): + entity = newer.get(key, None) + + # Always prefer the newer version. + if entity is not None: + return entity + + return older.get(key) + + +def get_older_entity(newer, older, key): + entity = older.get(key, None) + + # If we don't have an older version, or it's a StickyEntry, + # get a newer version + if entity is None or isinstance(entity, StickyEntry): + return newer.get(key) + + return entity + + +def serialize_legacy_resource(entities): + return "".join(entity.all for entity in entities) diff --git a/third_party/python/compare_locales/compare_locales/mozpath.py b/third_party/python/compare_locales/compare_locales/mozpath.py new file mode 100644 index 0000000000..d2b1575858 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/mozpath.py @@ -0,0 +1,154 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +''' +Like :py:mod:`os.path`, with a reduced set of functions, and with normalized +path separators (always use forward slashes). +Also contains a few additional utilities not found in :py:mod:`os.path`. +''' + + +import posixpath +import os +import re + + +def normsep(path): + ''' + Normalize path separators, by using forward slashes instead of whatever + :py:const:`os.sep` is. + ''' + if os.sep != '/': + path = path.replace(os.sep, '/') + if os.altsep and os.altsep != '/': + path = path.replace(os.altsep, '/') + return path + + +def relpath(path, start): + rel = normsep(os.path.relpath(path, start)) + return '' if rel == '.' else rel + + +def realpath(path): + return normsep(os.path.realpath(path)) + + +def abspath(path): + return normsep(os.path.abspath(path)) + + +def join(*paths): + return normsep(os.path.join(*paths)) + + +def normpath(path): + return posixpath.normpath(normsep(path)) + + +def dirname(path): + return posixpath.dirname(normsep(path)) + + +def commonprefix(paths): + return posixpath.commonprefix([normsep(path) for path in paths]) + + +def basename(path): + return os.path.basename(path) + + +def splitext(path): + return posixpath.splitext(normsep(path)) + + +def split(path): + ''' + Return the normalized path as a list of its components. + + ``split('foo/bar/baz')`` returns ``['foo', 'bar', 'baz']`` + ''' + return normsep(path).split('/') + + +def basedir(path, bases): + ''' + Given a list of directories (`bases`), return which one contains the given + path. If several matches are found, the deepest base directory is returned. + + ``basedir('foo/bar/baz', ['foo', 'baz', 'foo/bar'])`` returns ``'foo/bar'`` + (`'foo'` and `'foo/bar'` both match, but `'foo/bar'` is the deepest match) + ''' + path = normsep(path) + bases = [normsep(b) for b in bases] + if path in bases: + return path + for b in sorted(bases, reverse=True): + if b == '' or path.startswith(b + '/'): + return b + + +re_cache = {} + + +def match(path, pattern): + ''' + Return whether the given path matches the given pattern. + An asterisk can be used to match any string, including the null string, in + one part of the path: + + ``foo`` matches ``*``, ``f*`` or ``fo*o`` + + However, an asterisk matching a subdirectory may not match the null string: + + ``foo/bar`` does *not* match ``foo/*/bar`` + + If the pattern matches one of the ancestor directories of the path, the + patch is considered matching: + + ``foo/bar`` matches ``foo`` + + Two adjacent asterisks can be used to match files and zero or more + directories and subdirectories. + + ``foo/bar`` matches ``foo/**/bar``, or ``**/bar`` + ''' + if not pattern: + return True + if pattern not in re_cache: + last_end = 0 + p = '' + for m in re.finditer(r'(?:(^|/)\*\*(/|$))|(?P<star>\*)', pattern): + if m.start() > last_end: + p += re.escape(pattern[last_end:m.start()]) + if m.group('star'): + p += '[^/]*' + elif m.group(2): + p += re.escape(m.group(1)) + r'(?:.+%s)?' % m.group(2) + else: + p += r'(?:%s.+)?' % re.escape(m.group(1)) + last_end = m.end() + p += re.escape(pattern[last_end:]) + '(?:/.*)?$' + re_cache[pattern] = re.compile(p) + return re_cache[pattern].match(path) is not None + + +def rebase(oldbase, base, relativepath): + ''' + Return `relativepath` relative to `base` instead of `oldbase`. + ''' + if base == oldbase: + return relativepath + if len(base) < len(oldbase): + assert basedir(oldbase, [base]) == base + relbase = relpath(oldbase, base) + result = join(relbase, relativepath) + else: + assert basedir(base, [oldbase]) == oldbase + relbase = relpath(base, oldbase) + result = relpath(relativepath, relbase) + result = normpath(result) + if relativepath.endswith('/') and not result.endswith('/'): + result += '/' + return result diff --git a/third_party/python/compare_locales/compare_locales/parser/__init__.py b/third_party/python/compare_locales/compare_locales/parser/__init__.py new file mode 100644 index 0000000000..b537bb0686 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/__init__.py @@ -0,0 +1,81 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +from .base import ( + CAN_NONE, CAN_COPY, CAN_SKIP, CAN_MERGE, + Entry, Entity, Comment, OffsetComment, Junk, Whitespace, + BadEntity, Parser, +) +from .android import ( + AndroidParser +) +from .defines import ( + DefinesParser, DefinesInstruction +) +from .dtd import ( + DTDEntity, DTDParser +) +from .fluent import ( + FluentParser, FluentComment, FluentEntity, FluentMessage, FluentTerm, +) +from .ini import ( + IniParser, IniSection, +) +from .po import ( + PoParser +) +from .properties import ( + PropertiesParser, PropertiesEntity +) + +__all__ = [ + "CAN_NONE", "CAN_COPY", "CAN_SKIP", "CAN_MERGE", + "Junk", "Entry", "Entity", "Whitespace", "Comment", "OffsetComment", + "BadEntity", "Parser", + "AndroidParser", + "DefinesParser", "DefinesInstruction", + "DTDParser", "DTDEntity", + "FluentParser", "FluentComment", "FluentEntity", + "FluentMessage", "FluentTerm", + "IniParser", "IniSection", + "PoParser", + "PropertiesParser", "PropertiesEntity", +] + +__constructors = [] + + +def getParser(path): + for item in __constructors: + if re.search(item[0], path): + return item[1] + try: + from pkg_resources import iter_entry_points + for entry_point in iter_entry_points('compare_locales.parsers'): + p = entry_point.resolve()() + if p.use(path): + return p + except (ImportError, OSError): + pass + raise UserWarning("Cannot find Parser") + + +def hasParser(path): + try: + return bool(getParser(path)) + except UserWarning: + return False + + +__constructors = [ + ('strings.*\\.xml$', AndroidParser()), + ('\\.dtd$', DTDParser()), + ('\\.properties$', PropertiesParser()), + ('\\.ini$', IniParser()), + ('\\.inc$', DefinesParser()), + ('\\.ftl$', FluentParser()), + ('\\.pot?$', PoParser()), +] diff --git a/third_party/python/compare_locales/compare_locales/parser/android.py b/third_party/python/compare_locales/compare_locales/parser/android.py new file mode 100644 index 0000000000..ba4197da84 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/android.py @@ -0,0 +1,303 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Android strings.xml parser + +Parses strings.xml files per +https://developer.android.com/guide/topics/resources/localization. +As we're using a built-in XML parser underneath, errors on that level +break the full parsing, and result in a single Junk entry. +""" + + +import re +from xml.dom import minidom +from xml.dom.minidom import Node + +from .base import ( + CAN_SKIP, + Entity, Comment, Junk, Whitespace, + StickyEntry, LiteralEntity, + Parser +) + + +class AndroidEntity(Entity): + def __init__( + self, ctx, pre_comment, white_space, node, all, key, raw_val, val + ): + # fill out superclass as good as we can right now + # most span can get modified at endElement + super().__init__( + ctx, pre_comment, white_space, + (None, None), + (None, None), + (None, None) + ) + self.node = node + self._all_literal = all + self._key_literal = key + self._raw_val_literal = raw_val + self._val_literal = val + + @property + def all(self): + chunks = [] + if self.pre_comment is not None: + chunks.append(self.pre_comment.all) + if self.inner_white is not None: + chunks.append(self.inner_white.all) + chunks.append(self._all_literal) + return ''.join(chunks) + + @property + def key(self): + return self._key_literal + + @property + def raw_val(self): + return self._raw_val_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + def wrap(self, raw_val): + clone = self.node.cloneNode(True) + if clone.childNodes.length == 1: + child = clone.childNodes[0] + else: + for child in clone.childNodes: + if child.nodeType == Node.CDATA_SECTION_NODE: + break + child.data = raw_val + all = [] + if self.pre_comment is not None: + all.append(self.pre_comment.all) + if self.inner_white is not None: + all.append(self.inner_white.all) + all.append(clone.toxml()) + return LiteralEntity(self.key, raw_val, ''.join(all)) + + +class NodeMixin: + def __init__(self, all, value): + self._all_literal = all + self._val_literal = value + + @property + def all(self): + return self._all_literal + + @property + def key(self): + return self._all_literal + + @property + def raw_val(self): + return self._val_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + +class XMLWhitespace(NodeMixin, Whitespace): + pass + + +class XMLComment(NodeMixin, Comment): + @property + def val(self): + return self._val_literal + + @property + def key(self): + return None + + +# DocumentWrapper is sticky in serialization. +# Always keep the one from the reference document. +class DocumentWrapper(NodeMixin, StickyEntry): + def __init__(self, key, all): + self._all_literal = all + self._val_literal = all + self._key_literal = key + + @property + def key(self): + return self._key_literal + + +class XMLJunk(Junk): + def __init__(self, all): + super().__init__(None, (0, 0)) + self._all_literal = all + + @property + def all(self): + return self._all_literal + + def position(self, offset=0): + return (0, offset) + + def value_position(self, offset=0): + return (0, offset) + + +def textContent(node): + if node.childNodes.length == 0: + return '' + for child in node.childNodes: + if child.nodeType == minidom.Node.CDATA_SECTION_NODE: + return child.data + if ( + node.childNodes.length != 1 or + node.childNodes[0].nodeType != minidom.Node.TEXT_NODE + ): + # Return something, we'll fail in checks on this + return node.toxml() + return node.childNodes[0].data + + +NEWLINE = re.compile(r'[ \t]*\n[ \t]*') + + +def normalize(val): + return NEWLINE.sub('\n', val.strip(' \t')) + + +class AndroidParser(Parser): + # Android does l10n fallback at runtime, don't merge en-US strings + capabilities = CAN_SKIP + + def __init__(self): + super().__init__() + self.last_comment = None + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + ctx = self.ctx + contents = ctx.contents + try: + doc = minidom.parseString(contents.encode('utf-8')) + except Exception: + yield XMLJunk(contents) + return + docElement = doc.documentElement + if docElement.nodeName != 'resources': + yield XMLJunk(doc.toxml()) + return + root_children = docElement.childNodes + if not only_localizable: + yield DocumentWrapper( + '<?xml?><resources>', + '<?xml version="1.0" encoding="utf-8"?>\n<resources' + ) + for attr_name, attr_value in docElement.attributes.items(): + yield DocumentWrapper( + attr_name, + f' {attr_name}="{attr_value}"' + ) + yield DocumentWrapper('>', '>') + child_num = 0 + while child_num < len(root_children): + node = root_children[child_num] + if node.nodeType == Node.COMMENT_NODE: + current_comment, child_num = self.handleComment( + node, root_children, child_num + ) + if child_num < len(root_children): + node = root_children[child_num] + else: + if not only_localizable: + yield current_comment + break + else: + current_comment = None + if node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): + white_space = XMLWhitespace(node.toxml(), node.nodeValue) + child_num += 1 + if current_comment is None: + if not only_localizable: + yield white_space + continue + if node.nodeValue.count('\n') > 1: + if not only_localizable: + if current_comment is not None: + yield current_comment + yield white_space + continue + if child_num < len(root_children): + node = root_children[child_num] + else: + if not only_localizable: + if current_comment is not None: + yield current_comment + yield white_space + break + else: + white_space = None + if node.nodeType == Node.ELEMENT_NODE: + yield self.handleElement(node, current_comment, white_space) + else: + if not only_localizable: + if current_comment: + yield current_comment + if white_space: + yield white_space + child_num += 1 + if not only_localizable: + yield DocumentWrapper('</resources>', '</resources>\n') + + def handleElement(self, element, current_comment, white_space): + if element.nodeName == 'string' and element.hasAttribute('name'): + return AndroidEntity( + self.ctx, + current_comment, + white_space, + element, + element.toxml(), + element.getAttribute('name'), + textContent(element), + ''.join(c.toxml() for c in element.childNodes) + ) + else: + return XMLJunk(element.toxml()) + + def handleComment(self, node, root_children, child_num): + all = node.toxml() + val = normalize(node.nodeValue) + while True: + child_num += 1 + if child_num >= len(root_children): + break + node = root_children[child_num] + if node.nodeType == Node.TEXT_NODE: + if node.nodeValue.count('\n') > 1: + break + white = node + child_num += 1 + if child_num >= len(root_children): + break + node = root_children[child_num] + else: + white = None + if node.nodeType != Node.COMMENT_NODE: + if white is not None: + # do not consume this node + child_num -= 1 + break + if white: + all += white.toxml() + val += normalize(white.nodeValue) + all += node.toxml() + val += normalize(node.nodeValue) + return XMLComment(all, val), child_num diff --git a/third_party/python/compare_locales/compare_locales/parser/base.py b/third_party/python/compare_locales/compare_locales/parser/base.py new file mode 100644 index 0000000000..b8972beb33 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/base.py @@ -0,0 +1,443 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +import bisect +import codecs +from collections import Counter +from compare_locales.keyedtuple import KeyedTuple +from compare_locales.paths import File + +__constructors = [] + + +# The allowed capabilities for the Parsers. They define the exact strategy +# used by ContentComparer.merge. + +# Don't perform any merging +CAN_NONE = 0 +# Copy the entire reference file +CAN_COPY = 1 +# Remove broken entities from localization +# Without CAN_MERGE, en-US is not good to use for localization. +CAN_SKIP = 2 +# Add missing and broken entities from the reference to localization +# This effectively means that en-US is good to use for localized files. +CAN_MERGE = 4 + + +class Entry: + ''' + Abstraction layer for a localizable entity. + Currently supported are grammars of the form: + + 1: entity definition + 2: entity key (name) + 3: entity value + + <!ENTITY key "value"> + + <--- definition ----> + ''' + def __init__( + self, ctx, pre_comment, inner_white, span, key_span, val_span + ): + self.ctx = ctx + self.span = span + self.key_span = key_span + self.val_span = val_span + self.pre_comment = pre_comment + self.inner_white = inner_white + + def position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the Entity. + + If offset is negative, return the end of the Entity. + """ + if offset < 0: + pos = self.span[1] + else: + pos = self.span[0] + offset + return self.ctx.linecol(pos) + + def value_position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the value. + + If offset is negative, return the end of the value. + """ + assert self.val_span is not None + if offset < 0: + pos = self.val_span[1] + else: + pos = self.val_span[0] + offset + return self.ctx.linecol(pos) + + def _span_start(self): + start = self.span[0] + if hasattr(self, 'pre_comment') and self.pre_comment is not None: + start = self.pre_comment.span[0] + return start + + @property + def all(self): + start = self._span_start() + end = self.span[1] + return self.ctx.contents[start:end] + + @property + def key(self): + return self.ctx.contents[self.key_span[0]:self.key_span[1]] + + @property + def raw_val(self): + if self.val_span is None: + return None + return self.ctx.contents[self.val_span[0]:self.val_span[1]] + + @property + def val(self): + return self.raw_val + + def __repr__(self): + return self.key + + re_br = re.compile('<br[ \t\r\n]*/?>', re.U) + re_sgml = re.compile(r'</?\w+.*?>', re.U | re.M) + + def count_words(self): + """Count the words in an English string. + Replace a couple of xml markup to make that safer, too. + """ + value = self.re_br.sub('\n', self.val) + value = self.re_sgml.sub('', value) + return len(value.split()) + + def equals(self, other): + return self.key == other.key and self.val == other.val + + +class StickyEntry(Entry): + """Subclass of Entry to use in for syntax fragments + which should always be overwritten in the serializer. + """ + pass + + +class Entity(Entry): + @property + def localized(self): + '''Is this entity localized. + + Always true for monolingual files. + In bilingual files, this is a dynamic property. + ''' + return True + + def unwrap(self): + """Return the literal value to be used by tools. + """ + return self.raw_val + + def wrap(self, raw_val): + """Create literal entity based on reference and raw value. + + This is used by the serialization logic. + """ + start = self._span_start() + all = ( + self.ctx.contents[start:self.val_span[0]] + + raw_val + + self.ctx.contents[self.val_span[1]:self.span[1]] + ) + return LiteralEntity(self.key, raw_val, all) + + +class LiteralEntity(Entity): + """Subclass of Entity to represent entities without context slices. + + It's storing string literals for key, raw_val and all instead of spans. + """ + def __init__(self, key, val, all): + super().__init__(None, None, None, None, None, None) + self._key = key + self._raw_val = val + self._all = all + + @property + def key(self): + return self._key + + @property + def raw_val(self): + return self._raw_val + + @property + def all(self): + return self._all + + +class PlaceholderEntity(LiteralEntity): + """Subclass of Entity to be removed in merges. + """ + def __init__(self, key): + super().__init__(key, "", "\nplaceholder\n") + + +class Comment(Entry): + def __init__(self, ctx, span): + self.ctx = ctx + self.span = span + self.val_span = None + self._val_cache = None + + @property + def key(self): + return None + + @property + def val(self): + if self._val_cache is None: + self._val_cache = self.all + return self._val_cache + + def __repr__(self): + return self.all + + +class OffsetComment(Comment): + '''Helper for file formats that have a constant number of leading + chars to strip from comments. + Offset defaults to 1 + ''' + comment_offset = 1 + + @property + def val(self): + if self._val_cache is None: + self._val_cache = ''.join( + l[self.comment_offset:] for l in self.all.splitlines(True) + ) + return self._val_cache + + +class Junk: + ''' + An almost-Entity, representing junk data that we didn't parse. + This way, we can signal bad content as stuff we don't understand. + And the either fix that, or report real bugs in localizations. + ''' + junkid = 0 + + def __init__(self, ctx, span): + self.ctx = ctx + self.span = span + self.__class__.junkid += 1 + self.key = '_junk_%d_%d-%d' % (self.__class__.junkid, span[0], span[1]) + + def position(self, offset=0): + """Get the 1-based line and column of the character + with given offset into the Entity. + + If offset is negative, return the end of the Entity. + """ + if offset < 0: + pos = self.span[1] + else: + pos = self.span[0] + offset + return self.ctx.linecol(pos) + + @property + def all(self): + return self.ctx.contents[self.span[0]:self.span[1]] + + @property + def raw_val(self): + return self.all + + @property + def val(self): + return self.all + + def error_message(self): + params = (self.val,) + self.position() + self.position(-1) + return ( + 'Unparsed content "%s" from line %d column %d' + ' to line %d column %d' % params + ) + + def __repr__(self): + return self.key + + +class Whitespace(Entry): + '''Entity-like object representing an empty file with whitespace, + if allowed + ''' + def __init__(self, ctx, span): + self.ctx = ctx + self.span = self.key_span = self.val_span = span + + def __repr__(self): + return self.raw_val + + +class BadEntity(ValueError): + '''Raised when the parser can't create an Entity for a found match. + ''' + pass + + +class Parser: + capabilities = CAN_SKIP | CAN_MERGE + reWhitespace = re.compile('[ \t\r\n]+', re.M) + Comment = Comment + # NotImplementedError would be great, but also tedious + reKey = reComment = None + + class Context: + "Fixture for content and line numbers" + def __init__(self, contents): + self.contents = contents + # cache split lines + self._lines = None + + def linecol(self, position): + "Returns 1-based line and column numbers." + if self._lines is None: + nl = re.compile('\n', re.M) + self._lines = [m.end() + for m in nl.finditer(self.contents)] + + line_offset = bisect.bisect(self._lines, position) + line_start = self._lines[line_offset - 1] if line_offset else 0 + col_offset = position - line_start + + return line_offset + 1, col_offset + 1 + + def __init__(self): + if not hasattr(self, 'encoding'): + self.encoding = 'utf-8' + self.ctx = None + + def readFile(self, file): + '''Read contents from disk, with universal_newlines''' + if isinstance(file, File): + file = file.fullpath + # python 2 has binary input with universal newlines, + # python 3 doesn't. Let's split code paths + with open( + file, + encoding=self.encoding, errors='replace', + newline=None + ) as f: + self.readUnicode(f.read()) + + def readContents(self, contents): + '''Read contents and create parsing context. + + contents are in native encoding, but with normalized line endings. + ''' + (contents, _) = codecs.getdecoder(self.encoding)(contents, 'replace') + self.readUnicode(contents) + + def readUnicode(self, contents): + self.ctx = self.Context(contents) + + def parse(self): + return KeyedTuple(self) + + def __iter__(self): + return self.walk(only_localizable=True) + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + ctx = self.ctx + contents = ctx.contents + + next_offset = 0 + while next_offset < len(contents): + entity = self.getNext(ctx, next_offset) + + if isinstance(entity, (Entity, Junk)): + yield entity + elif not only_localizable: + yield entity + + next_offset = entity.span[1] + + def getNext(self, ctx, offset): + '''Parse the next fragment. + + Parse comments first, then white-space. + If an entity follows, create that entity with such pre_comment and + inner white-space. If not, emit comment or white-space as standlone. + It's OK that this might parse whitespace more than once. + Comments are associated with entities if they're not separated by + blank lines. Multiple consecutive comments are joined. + ''' + junk_offset = offset + m = self.reComment.match(ctx.contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + if offset < 2 and 'License' in current_comment.val: + # Heuristic. A early comment with "License" is probably + # a license header, and should be standalone. + # Not glueing ourselves to offset == 0 as we might have + # skipped a BOM. + return current_comment + offset = m.end() + else: + current_comment = None + m = self.reWhitespace.match(ctx.contents, offset) + if m: + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + # return the comment, and reparse the whitespace next time + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + m = self.reKey.match(ctx.contents, offset) + if m: + try: + return self.createEntity(ctx, m, current_comment, white_space) + except BadEntity: + # fall through to Junk, probably + pass + if current_comment is not None: + return current_comment + if white_space is not None: + return white_space + return self.getJunk(ctx, junk_offset, self.reKey, self.reComment) + + def getJunk(self, ctx, offset, *expressions): + junkend = None + for exp in expressions: + m = exp.search(ctx.contents, offset) + if m: + junkend = min(junkend, m.start()) if junkend else m.start() + return Junk(ctx, (offset, junkend or len(ctx.contents))) + + def createEntity(self, ctx, m, current_comment, white_space): + return Entity( + ctx, current_comment, white_space, + m.span(), m.span('key'), m.span('val') + ) + + @classmethod + def findDuplicates(cls, entities): + found = Counter(entity.key for entity in entities) + for entity_id, cnt in found.items(): + if cnt > 1: + yield f'{entity_id} occurs {cnt} times' diff --git a/third_party/python/compare_locales/compare_locales/parser/defines.py b/third_party/python/compare_locales/compare_locales/parser/defines.py new file mode 100644 index 0000000000..dd4511e4a8 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/defines.py @@ -0,0 +1,104 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +from .base import ( + CAN_COPY, + Entry, OffsetComment, Junk, Whitespace, + Parser +) + + +class DefinesInstruction(Entry): + '''Entity-like object representing processing instructions in inc files + ''' + def __init__(self, ctx, span, val_span): + self.ctx = ctx + self.span = span + self.key_span = self.val_span = val_span + + def __repr__(self): + return self.raw_val + + +class DefinesParser(Parser): + # can't merge, #unfilter needs to be the last item, which we don't support + capabilities = CAN_COPY + reWhitespace = re.compile('\n+', re.M) + + EMPTY_LINES = 1 << 0 + + class Comment(OffsetComment): + comment_offset = 2 + + class Context(Parser.Context): + def __init__(self, contents): + super(DefinesParser.Context, self).__init__(contents) + self.filter_empty_lines = False + + def __init__(self): + self.reComment = re.compile('(?:^# .*?\n)*(?:^# [^\n]*)', re.M) + # corresponds to + # https://hg.mozilla.org/mozilla-central/file/72ee4800d4156931c89b58bd807af4a3083702bb/python/mozbuild/mozbuild/preprocessor.py#l561 # noqa + self.reKey = re.compile( + r'#define[ \t]+(?P<key>\w+)(?:[ \t](?P<val>[^\n]*))?', re.M) + self.rePI = re.compile(r'#(?P<val>\w+[ \t]+[^\n]+)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + junk_offset = offset + contents = ctx.contents + + m = self.reComment.match(ctx.contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + offset = m.end() + else: + current_comment = None + + m = self.reWhitespace.match(contents, offset) + if m: + # blank lines outside of filter_empty_lines or + # leading whitespace are bad + if ( + offset == 0 or + not (len(m.group()) == 1 or ctx.filter_empty_lines) + ): + if current_comment: + return current_comment + return Junk(ctx, m.span()) + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + # return the comment, and reparse the whitespace next time + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + + m = self.reKey.match(contents, offset) + if m: + return self.createEntity(ctx, m, current_comment, white_space) + # defines instructions don't have comments + # Any pending commment is standalone + if current_comment: + return current_comment + if white_space: + return white_space + m = self.rePI.match(contents, offset) + if m: + instr = DefinesInstruction(ctx, m.span(), m.span('val')) + if instr.val == 'filter emptyLines': + ctx.filter_empty_lines = True + if instr.val == 'unfilter emptyLines': + ctx.filter_empty_lines = False + return instr + return self.getJunk( + ctx, junk_offset, self.reComment, self.reKey, self.rePI) diff --git a/third_party/python/compare_locales/compare_locales/parser/dtd.py b/third_party/python/compare_locales/compare_locales/parser/dtd.py new file mode 100644 index 0000000000..55086177a8 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/dtd.py @@ -0,0 +1,115 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +try: + from html import unescape as html_unescape +except ImportError: + from HTMLParser import HTMLParser + html_parser = HTMLParser() + html_unescape = html_parser.unescape + +from .base import ( + Entity, Comment, Junk, + Parser +) + + +class DTDEntityMixin: + @property + def val(self): + '''Unescape HTML entities into corresponding Unicode characters. + + Named (&), decimal (&), and hex (& and &) formats + are supported. Unknown entities are left intact. + + As of Python 3.7 the following 252 named entities are + recognized and unescaped: + + https://github.com/python/cpython/blob/3.7/Lib/html/entities.py + ''' + return html_unescape(self.raw_val) + + def value_position(self, offset=0): + # DTDChecker already returns tuples of (line, col) positions + if isinstance(offset, tuple): + line_pos, col_pos = offset + line, col = super().value_position() + if line_pos == 1: + col = col + col_pos + else: + col = col_pos + line += line_pos - 1 + return line, col + else: + return super().value_position(offset) + + +class DTDEntity(DTDEntityMixin, Entity): + pass + + +class DTDParser(Parser): + # http://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar + # ":" | [A-Z] | "_" | [a-z] | + # [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] + # | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | + # [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | + # [#x10000-#xEFFFF] + CharMinusDash = '\x09\x0A\x0D\u0020-\u002C\u002E-\uD7FF\uE000-\uFFFD' + XmlComment = '<!--(?:-?[%s])*?-->' % CharMinusDash + NameStartChar = ':A-Z_a-z\xC0-\xD6\xD8-\xF6\xF8-\u02FF' + \ + '\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F' + \ + '\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD' + # + \U00010000-\U000EFFFF seems to be unsupported in python + + # NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | + # [#x0300-#x036F] | [#x203F-#x2040] + NameChar = NameStartChar + r'\-\.0-9' + '\xB7\u0300-\u036F\u203F-\u2040' + Name = '[' + NameStartChar + '][' + NameChar + ']*' + reKey = re.compile('<!ENTITY[ \t\r\n]+(?P<key>' + Name + ')[ \t\r\n]+' + '(?P<val>\"[^\"]*\"|\'[^\']*\'?)[ \t\r\n]*>', + re.DOTALL | re.M) + # add BOM to DTDs, details in bug 435002 + reHeader = re.compile('^\ufeff') + reComment = re.compile('<!--(?P<val>-?[%s])*?-->' % CharMinusDash, + re.S) + rePE = re.compile('<!ENTITY[ \t\r\n]+%[ \t\r\n]+(?P<key>' + Name + ')' + '[ \t\r\n]+SYSTEM[ \t\r\n]+' + '(?P<val>\"[^\"]*\"|\'[^\']*\')[ \t\r\n]*>[ \t\r\n]*' + '%' + Name + ';' + '(?:[ \t]*(?:' + XmlComment + '[ \t\r\n]*)*\n?)?') + + class Comment(Comment): + @property + def val(self): + if self._val_cache is None: + # Strip "<!--" and "-->" to comment contents + self._val_cache = self.all[4:-3] + return self._val_cache + + def getNext(self, ctx, offset): + ''' + Overload Parser.getNext to special-case ParsedEntities. + Just check for a parsed entity if that method claims junk. + + <!ENTITY % foo SYSTEM "url"> + %foo; + ''' + if offset == 0 and self.reHeader.match(ctx.contents): + offset += 1 + entity = Parser.getNext(self, ctx, offset) + if (entity and isinstance(entity, Junk)) or entity is None: + m = self.rePE.match(ctx.contents, offset) + if m: + entity = DTDEntity( + ctx, None, None, m.span(), m.span('key'), m.span('val')) + return entity + + def createEntity(self, ctx, m, current_comment, white_space): + valspan = m.span('val') + valspan = (valspan[0]+1, valspan[1]-1) + return DTDEntity(ctx, current_comment, white_space, + m.span(), m.span('key'), valspan) diff --git a/third_party/python/compare_locales/compare_locales/parser/fluent.py b/third_party/python/compare_locales/compare_locales/parser/fluent.py new file mode 100644 index 0000000000..a74f8cb4f4 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/fluent.py @@ -0,0 +1,218 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +from fluent.syntax import FluentParser as FTLParser +from fluent.syntax import ast as ftl +from fluent.syntax.serializer import serialize_comment +from fluent.syntax.visitor import Visitor +from .base import ( + CAN_SKIP, + Entry, Entity, Comment, Junk, Whitespace, + LiteralEntity, + Parser +) + + +class WordCounter(Visitor): + def __init__(self): + self.word_count = 0 + + def generic_visit(self, node): + if isinstance( + node, + (ftl.Span, ftl.Annotation, ftl.BaseComment) + ): + return + super().generic_visit(node) + + def visit_SelectExpression(self, node): + # optimize select expressions to only go through the variants + self.visit(node.variants) + + def visit_TextElement(self, node): + self.word_count += len(node.value.split()) + + +class FluentAttribute(Entry): + ignored_fields = ['span'] + + def __init__(self, entity, attr_node): + self.ctx = entity.ctx + self.attr = attr_node + self.key_span = (attr_node.id.span.start, attr_node.id.span.end) + self.val_span = (attr_node.value.span.start, attr_node.value.span.end) + + def equals(self, other): + if not isinstance(other, FluentAttribute): + return False + return self.attr.equals( + other.attr, ignored_fields=self.ignored_fields) + + +class FluentEntity(Entity): + # Fields ignored when comparing two entities. + ignored_fields = ['comment', 'span'] + + def __init__(self, ctx, entry): + start = entry.span.start + end = entry.span.end + + self.ctx = ctx + self.span = (start, end) + + if isinstance(entry, ftl.Term): + # Terms don't have their '-' as part of the id, use the prior + # character + self.key_span = (entry.id.span.start - 1, entry.id.span.end) + else: + # Message + self.key_span = (entry.id.span.start, entry.id.span.end) + + if entry.value is not None: + self.val_span = (entry.value.span.start, entry.value.span.end) + else: + self.val_span = None + + self.entry = entry + + # Entry instances are expected to have pre_comment. It's used by + # other formats to associate a Comment with an Entity. FluentEntities + # don't need it because message comments are part of the entry AST and + # are not separate Comment instances. + self.pre_comment = None + + @property + def root_node(self): + '''AST node at which to start traversal for count_words. + + By default we count words in the value and in all attributes. + ''' + return self.entry + + _word_count = None + + def count_words(self): + if self._word_count is None: + counter = WordCounter() + counter.visit(self.root_node) + self._word_count = counter.word_count + + return self._word_count + + def equals(self, other): + return self.entry.equals( + other.entry, ignored_fields=self.ignored_fields) + + # In Fluent we treat entries as a whole. FluentChecker reports errors at + # offsets calculated from the beginning of the entry. + def value_position(self, offset=None): + if offset is None: + # no offset given, use our value start or id end + if self.val_span: + offset = self.val_span[0] - self.span[0] + else: + offset = self.key_span[1] - self.span[0] + return self.position(offset) + + @property + def attributes(self): + for attr_node in self.entry.attributes: + yield FluentAttribute(self, attr_node) + + def unwrap(self): + return self.all + + def wrap(self, raw_val): + """Create literal entity the given raw value. + + For Fluent, we're exposing the message source to tools like + Pontoon. + We also recreate the comment from this entity to the created entity. + """ + all = raw_val + if self.entry.comment is not None: + all = serialize_comment(self.entry.comment) + all + return LiteralEntity(self.key, raw_val, all) + + +class FluentMessage(FluentEntity): + pass + + +class FluentTerm(FluentEntity): + # Fields ignored when comparing two terms. + ignored_fields = ['attributes', 'comment', 'span'] + + @property + def root_node(self): + '''AST node at which to start traversal for count_words. + + In Fluent Terms we only count words in the value. Attributes are + private and do not count towards the word total. + ''' + return self.entry.value + + +class FluentComment(Comment): + def __init__(self, ctx, span, entry): + super().__init__(ctx, span) + self._val_cache = entry.content + + +class FluentParser(Parser): + capabilities = CAN_SKIP + + def __init__(self): + super().__init__() + self.ftl_parser = FTLParser() + + def walk(self, only_localizable=False): + if not self.ctx: + # loading file failed, or we just didn't load anything + return + + resource = self.ftl_parser.parse(self.ctx.contents) + + last_span_end = 0 + + for entry in resource.body: + if not only_localizable: + if entry.span.start > last_span_end: + yield Whitespace( + self.ctx, (last_span_end, entry.span.start)) + + if isinstance(entry, ftl.Message): + yield FluentMessage(self.ctx, entry) + elif isinstance(entry, ftl.Term): + yield FluentTerm(self.ctx, entry) + elif isinstance(entry, ftl.Junk): + start = entry.span.start + end = entry.span.end + # strip leading whitespace + start += re.match('[ \t\r\n]*', entry.content).end() + if not only_localizable and entry.span.start < start: + yield Whitespace( + self.ctx, (entry.span.start, start) + ) + # strip trailing whitespace + ws, we = re.search('[ \t\r\n]*$', entry.content).span() + end -= we - ws + yield Junk(self.ctx, (start, end)) + if not only_localizable and end < entry.span.end: + yield Whitespace( + self.ctx, (end, entry.span.end) + ) + elif isinstance(entry, ftl.BaseComment) and not only_localizable: + span = (entry.span.start, entry.span.end) + yield FluentComment(self.ctx, span, entry) + + last_span_end = entry.span.end + + # Yield Whitespace at the EOF. + if not only_localizable: + eof_offset = len(self.ctx.contents) + if eof_offset > last_span_end: + yield Whitespace(self.ctx, (last_span_end, eof_offset)) diff --git a/third_party/python/compare_locales/compare_locales/parser/ini.py b/third_party/python/compare_locales/compare_locales/parser/ini.py new file mode 100644 index 0000000000..623f7c15a4 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/ini.py @@ -0,0 +1,56 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +from .base import ( + Entry, OffsetComment, + Parser +) + + +class IniSection(Entry): + '''Entity-like object representing sections in ini files + ''' + def __init__(self, ctx, span, val_span): + self.ctx = ctx + self.span = span + self.key_span = self.val_span = val_span + + def __repr__(self): + return self.raw_val + + +class IniParser(Parser): + ''' + Parse files of the form: + # initial comment + [cat] + whitespace* + #comment + string=value + ... + ''' + + Comment = OffsetComment + + def __init__(self): + self.reComment = re.compile('(?:^[;#][^\n]*\n)*(?:^[;#][^\n]*)', re.M) + self.reSection = re.compile(r'\[(?P<val>.*?)\]', re.M) + self.reKey = re.compile('(?P<key>.+?)=(?P<val>.*)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + contents = ctx.contents + m = self.reSection.match(contents, offset) + if m: + return IniSection(ctx, m.span(), m.span('val')) + + return super().getNext(ctx, offset) + + def getJunk(self, ctx, offset, *expressions): + # base.Parser.getNext calls us with self.reKey, self.reComment. + # Add self.reSection to the end-of-junk expressions + expressions = expressions + (self.reSection,) + return super().getJunk(ctx, offset, *expressions) diff --git a/third_party/python/compare_locales/compare_locales/parser/po.py b/third_party/python/compare_locales/compare_locales/parser/po.py new file mode 100644 index 0000000000..48ea05ca2b --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/po.py @@ -0,0 +1,125 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +"""Gettext PO(T) parser + +Parses gettext po and pot files. +""" + + +import re + +from .base import ( + CAN_SKIP, + Entity, + BadEntity, + Parser +) + + +class PoEntityMixin: + + @property + def val(self): + return ( + self.stringlist_val + if self.stringlist_val + else self.stringlist_key[0] + ) + + @property + def key(self): + return self.stringlist_key + + @property + def localized(self): + # gettext denotes a non-localized string by an empty value + return bool(self.stringlist_val) + + def __repr__(self): + return self.key[0] + + +class PoEntity(PoEntityMixin, Entity): + pass + + +# Unescape and concat a string list +def eval_stringlist(lines): + return ''.join( + ( + l + .replace(r'\\', '\\') + .replace(r'\t', '\t') + .replace(r'\r', '\r') + .replace(r'\n', '\n') + .replace(r'\"', '"') + ) + for l in lines + ) + + +class PoParser(Parser): + # gettext l10n fallback at runtime, don't merge en-US strings + capabilities = CAN_SKIP + + reKey = re.compile('msgctxt|msgid') + reValue = re.compile('(?P<white>[ \t\r\n]*)(?P<cmd>msgstr)') + reComment = re.compile(r'(?:#.*?\n)+') + # string list item: + # leading whitespace + # `"` + # escaped quotes etc, not quote, newline, backslash + # `"` + reListItem = re.compile(r'[ \t\r\n]*"((?:\\[\\trn"]|[^"\n\\])*)"') + + def __init__(self): + super().__init__() + + def createEntity(self, ctx, m, current_comment, white_space): + start = cursor = m.start() + id_start = cursor + try: + msgctxt, cursor = self._parse_string_list(ctx, cursor, 'msgctxt') + m = self.reWhitespace.match(ctx.contents, cursor) + if m: + cursor = m.end() + except BadEntity: + # no msgctxt is OK + msgctxt = None + if id_start is None: + id_start = cursor + msgid, cursor = self._parse_string_list(ctx, cursor, 'msgid') + id_end = cursor + m = self.reWhitespace.match(ctx.contents, cursor) + if m: + cursor = m.end() + val_start = cursor + msgstr, cursor = self._parse_string_list(ctx, cursor, 'msgstr') + e = PoEntity( + ctx, + current_comment, + white_space, + (start, cursor), + (id_start, id_end), + (val_start, cursor) + ) + e.stringlist_key = (msgid, msgctxt) + e.stringlist_val = msgstr + return e + + def _parse_string_list(self, ctx, cursor, key): + if not ctx.contents.startswith(key, cursor): + raise BadEntity + cursor += len(key) + frags = [] + while True: + m = self.reListItem.match(ctx.contents, cursor) + if not m: + break + frags.append(m.group(1)) + cursor = m.end() + if not frags: + raise BadEntity + return eval_stringlist(frags), cursor diff --git a/third_party/python/compare_locales/compare_locales/parser/properties.py b/third_party/python/compare_locales/compare_locales/parser/properties.py new file mode 100644 index 0000000000..396800c99b --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/parser/properties.py @@ -0,0 +1,113 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re + +from .base import ( + Entity, OffsetComment, Whitespace, + Parser +) + + +class PropertiesEntityMixin: + escape = re.compile(r'\\((?P<uni>u[0-9a-fA-F]{1,4})|' + '(?P<nl>\n[ \t]*)|(?P<single>.))', re.M) + known_escapes = {'n': '\n', 'r': '\r', 't': '\t', '\\': '\\'} + + @property + def val(self): + def unescape(m): + found = m.groupdict() + if found['uni']: + return chr(int(found['uni'][1:], 16)) + if found['nl']: + return '' + return self.known_escapes.get(found['single'], found['single']) + + return self.escape.sub(unescape, self.raw_val) + + +class PropertiesEntity(PropertiesEntityMixin, Entity): + pass + + +class PropertiesParser(Parser): + + Comment = OffsetComment + + def __init__(self): + self.reKey = re.compile( + '(?P<key>[^#! \t\r\n][^=:\n]*?)[ \t]*[:=][ \t]*', re.M) + self.reComment = re.compile('(?:[#!][^\n]*\n)*(?:[#!][^\n]*)', re.M) + self._escapedEnd = re.compile(r'\\+$') + self._trailingWS = re.compile(r'[ \t\r\n]*(?:\n|\Z)', re.M) + Parser.__init__(self) + + def getNext(self, ctx, offset): + junk_offset = offset + # overwritten to parse values line by line + contents = ctx.contents + + m = self.reComment.match(contents, offset) + if m: + current_comment = self.Comment(ctx, m.span()) + if offset == 0 and 'License' in current_comment.val: + # Heuristic. A early comment with "License" is probably + # a license header, and should be standalone. + return current_comment + offset = m.end() + else: + current_comment = None + + m = self.reWhitespace.match(contents, offset) + if m: + white_space = Whitespace(ctx, m.span()) + offset = m.end() + if ( + current_comment is not None + and white_space.raw_val.count('\n') > 1 + ): + # standalone comment + return current_comment + if current_comment is None: + return white_space + else: + white_space = None + + m = self.reKey.match(contents, offset) + if m: + startline = offset = m.end() + while True: + endval = nextline = contents.find('\n', offset) + if nextline == -1: + endval = offset = len(contents) + break + # is newline escaped? + _e = self._escapedEnd.search(contents, offset, nextline) + offset = nextline + 1 + if _e is None: + break + # backslashes at end of line, if 2*n, not escaped + if len(_e.group()) % 2 == 0: + break + startline = offset + + # strip trailing whitespace + ws = self._trailingWS.search(contents, startline) + if ws: + endval = ws.start() + + entity = PropertiesEntity( + ctx, current_comment, white_space, + (m.start(), endval), # full span + m.span('key'), + (m.end(), endval)) # value span + return entity + + if current_comment is not None: + return current_comment + if white_space is not None: + return white_space + + return self.getJunk(ctx, junk_offset, self.reKey, self.reComment) diff --git a/third_party/python/compare_locales/compare_locales/paths/__init__.py b/third_party/python/compare_locales/compare_locales/paths/__init__.py new file mode 100644 index 0000000000..f2d1c407c5 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/__init__.py @@ -0,0 +1,53 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from compare_locales import mozpath +from .files import ProjectFiles, REFERENCE_LOCALE +from .ini import ( + L10nConfigParser, SourceTreeConfigParser, + EnumerateApp, EnumerateSourceTreeApp, +) +from .matcher import Matcher +from .project import ProjectConfig +from .configparser import TOMLParser, ConfigNotFound + + +__all__ = [ + 'Matcher', + 'ProjectConfig', + 'L10nConfigParser', 'SourceTreeConfigParser', + 'EnumerateApp', 'EnumerateSourceTreeApp', + 'ProjectFiles', 'REFERENCE_LOCALE', + 'TOMLParser', 'ConfigNotFound', +] + + +class File: + + def __init__(self, fullpath, file, module=None, locale=None): + self.fullpath = fullpath + self.file = file + self.module = module + self.locale = locale + pass + + @property + def localpath(self): + if self.module: + return mozpath.join(self.locale, self.module, self.file) + return self.file + + def __hash__(self): + return hash(self.localpath) + + def __str__(self): + return self.fullpath + + def __eq__(self, other): + if not isinstance(other, File): + return False + return vars(self) == vars(other) + + def __ne__(self, other): + return not (self == other) diff --git a/third_party/python/compare_locales/compare_locales/paths/configparser.py b/third_party/python/compare_locales/compare_locales/paths/configparser.py new file mode 100644 index 0000000000..1c1dbfbff3 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/configparser.py @@ -0,0 +1,138 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import errno +import logging +from compare_locales import mozpath +from .project import ProjectConfig +from .matcher import expand +import toml + + +class ConfigNotFound(EnvironmentError): + def __init__(self, path): + super().__init__( + errno.ENOENT, + 'Configuration file not found', + path) + + +class ParseContext: + def __init__(self, path, env, ignore_missing_includes): + self.path = path + self.env = env + self.ignore_missing_includes = ignore_missing_includes + self.data = None + self.pc = ProjectConfig(path) + + +class TOMLParser: + def parse(self, path, env=None, ignore_missing_includes=False): + ctx = self.context( + path, env=env, ignore_missing_includes=ignore_missing_includes + ) + self.load(ctx) + self.processBasePath(ctx) + self.processEnv(ctx) + self.processPaths(ctx) + self.processFilters(ctx) + self.processIncludes(ctx) + self.processExcludes(ctx) + self.processLocales(ctx) + return self.asConfig(ctx) + + def context(self, path, env=None, ignore_missing_includes=False): + return ParseContext( + path, + env if env is not None else {}, + ignore_missing_includes, + ) + + def load(self, ctx): + try: + with open(ctx.path, 'rt') as fin: + ctx.data = toml.load(fin) + except (toml.TomlDecodeError, OSError): + raise ConfigNotFound(ctx.path) + + def processBasePath(self, ctx): + assert ctx.data is not None + ctx.pc.set_root(ctx.data.get('basepath', '.')) + + def processEnv(self, ctx): + assert ctx.data is not None + ctx.pc.add_environment(**ctx.data.get('env', {})) + # add parser environment, possibly overwriting file variables + ctx.pc.add_environment(**ctx.env) + + def processLocales(self, ctx): + assert ctx.data is not None + if 'locales' in ctx.data: + ctx.pc.set_locales(ctx.data['locales']) + + def processPaths(self, ctx): + assert ctx.data is not None + for data in ctx.data.get('paths', []): + paths = { + "l10n": data['l10n'] + } + if 'locales' in data: + paths['locales'] = data['locales'] + if 'reference' in data: + paths['reference'] = data['reference'] + if 'test' in data: + paths['test'] = data['test'] + ctx.pc.add_paths(paths) + + def processFilters(self, ctx): + assert ctx.data is not None + for data in ctx.data.get('filters', []): + paths = data['path'] + if isinstance(paths, str): + paths = [paths] + rule = { + "path": paths, + "action": data['action'] + } + if 'key' in data: + rule['key'] = data['key'] + ctx.pc.add_rules(rule) + + def processIncludes(self, ctx): + for child in self._processChild(ctx, 'includes'): + ctx.pc.add_child(child) + + def processExcludes(self, ctx): + for child in self._processChild(ctx, 'excludes'): + ctx.pc.exclude(child) + + def _processChild(self, ctx, field): + assert ctx.data is not None + if field not in ctx.data: + return + for child_config in ctx.data[field]: + # resolve child_config['path'] against our root and env + p = mozpath.normpath( + expand( + ctx.pc.root, + child_config['path'], + ctx.pc.environ + ) + ) + try: + child = self.parse( + p, env=ctx.env, + ignore_missing_includes=ctx.ignore_missing_includes + ) + except ConfigNotFound as e: + if not ctx.ignore_missing_includes: + raise + (logging + .getLogger('compare-locales.io') + .error('%s: %s', e.strerror, e.filename)) + continue + yield child + + def asConfig(self, ctx): + return ctx.pc diff --git a/third_party/python/compare_locales/compare_locales/paths/files.py b/third_party/python/compare_locales/compare_locales/paths/files.py new file mode 100644 index 0000000000..bfbe7ffbd1 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/files.py @@ -0,0 +1,224 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +from compare_locales import mozpath + + +REFERENCE_LOCALE = 'en-x-moz-reference' + + +class ConfigList(list): + def maybe_extend(self, other): + '''Add configs from other list if this list doesn't have this path yet. + ''' + for config in other: + if any(mine.path == config.path for mine in self): + continue + self.append(config) + + +class ProjectFiles: + '''Iterable object to get all files and tests for a locale and a + list of ProjectConfigs. + + If the given locale is None, iterate over reference files as + both reference and locale for a reference self-test. + ''' + def __init__(self, locale, projects, mergebase=None): + self.locale = locale + self.matchers = [] + self.exclude = None + self.mergebase = mergebase + configs = ConfigList() + excludes = ConfigList() + for project in projects: + # Only add this project if we're not in validation mode, + # and the given locale is enabled for the project. + if locale is not None and locale not in project.all_locales: + continue + configs.maybe_extend(project.configs) + excludes.maybe_extend(project.excludes) + # If an excluded config is explicitly included, drop if from the + # excludes. + excludes = [ + exclude + for exclude in excludes + if not any(c.path == exclude.path for c in configs) + ] + if excludes: + self.exclude = ProjectFiles(locale, excludes) + for pc in configs: + if locale and pc.locales is not None and locale not in pc.locales: + continue + for paths in pc.paths: + if ( + locale and + 'locales' in paths and + locale not in paths['locales'] + ): + continue + m = { + 'l10n': paths['l10n'].with_env({ + "locale": locale or REFERENCE_LOCALE + }), + 'module': paths.get('module'), + } + if 'reference' in paths: + m['reference'] = paths['reference'] + if self.mergebase is not None: + m['merge'] = paths['l10n'].with_env({ + "locale": locale, + "l10n_base": self.mergebase + }) + m['test'] = set(paths.get('test', [])) + if 'locales' in paths: + m['locales'] = paths['locales'][:] + self.matchers.append(m) + self.matchers.reverse() # we always iterate last first + # Remove duplicate patterns, comparing each matcher + # against all other matchers. + # Avoid n^2 comparisons by only scanning the upper triangle + # of a n x n matrix of all possible combinations. + # Using enumerate and keeping track of indexes, as we can't + # modify the list while iterating over it. + drops = set() # duplicate matchers to remove + for i, m in enumerate(self.matchers[:-1]): + if i in drops: + continue # we're dropping this anyway, don't search again + for i_, m_ in enumerate(self.matchers[(i+1):]): + if (mozpath.realpath(m['l10n'].prefix) != + mozpath.realpath(m_['l10n'].prefix)): + # ok, not the same thing, continue + continue + if m['l10n'].pattern != m_['l10n'].pattern: + # We cannot guess whether same entry until the pattern is + # resolved, continue + continue + # check that we're comparing the same thing + if 'reference' in m: + if (mozpath.realpath(m['reference'].prefix) != + mozpath.realpath(m_.get('reference').prefix)): + raise RuntimeError('Mismatch in reference for ' + + mozpath.realpath(m['l10n'].prefix)) + drops.add(i_ + i + 1) + m['test'] |= m_['test'] + drops = sorted(drops, reverse=True) + for i in drops: + del self.matchers[i] + + def __iter__(self): + # The iteration is pretty different when we iterate over + # a localization vs over the reference. We do that latter + # when running in validation mode. + inner = self.iter_locale() if self.locale else self.iter_reference() + yield from inner + + def iter_locale(self): + '''Iterate over locale files.''' + known = {} + for matchers in self.matchers: + matcher = matchers['l10n'] + for path in self._files(matcher): + if path not in known: + known[path] = {'test': matchers.get('test')} + if 'reference' in matchers: + known[path]['reference'] = matcher.sub( + matchers['reference'], path) + if 'merge' in matchers: + known[path]['merge'] = matcher.sub( + matchers['merge'], path) + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + for path in self._files(matcher): + l10npath = matcher.sub(matchers['l10n'], path) + if l10npath not in known: + known[l10npath] = { + 'reference': path, + 'test': matchers.get('test') + } + if 'merge' in matchers: + known[l10npath]['merge'] = \ + matcher.sub(matchers['merge'], path) + for path, d in sorted(known.items()): + yield (path, d.get('reference'), d.get('merge'), d['test']) + + def iter_reference(self): + '''Iterate over reference files.''' + # unset self.exclude, as we don't want that for our reference files + exclude = self.exclude + self.exclude = None + known = {} + for matchers in self.matchers: + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + for path in self._files(matcher): + refpath = matcher.sub(matchers['reference'], path) + if refpath not in known: + known[refpath] = { + 'reference': path, + 'test': matchers.get('test') + } + for path, d in sorted(known.items()): + yield (path, d.get('reference'), None, d['test']) + self.exclude = exclude + + def _files(self, matcher): + '''Base implementation of getting all files in a hierarchy + using the file system. + Subclasses might replace this method to support different IO + patterns. + ''' + base = matcher.prefix + if self._isfile(base): + if self.exclude and self.exclude.match(base) is not None: + return + if matcher.match(base) is not None: + yield base + return + for d, dirs, files in self._walk(base): + for f in files: + p = mozpath.join(d, f) + if self.exclude and self.exclude.match(p) is not None: + continue + if matcher.match(p) is not None: + yield p + + def _isfile(self, path): + return os.path.isfile(path) + + def _walk(self, base): + yield from os.walk(base) + + def match(self, path): + '''Return the tuple of l10n_path, reference, mergepath, tests + if the given path matches any config, otherwise None. + + This routine doesn't check that the files actually exist. + ''' + if ( + self.locale is not None and + self.exclude and self.exclude.match(path) is not None + ): + return + for matchers in self.matchers: + matcher = matchers['l10n'] + if self.locale is not None and matcher.match(path) is not None: + ref = merge = None + if 'reference' in matchers: + ref = matcher.sub(matchers['reference'], path) + if 'merge' in matchers: + merge = matcher.sub(matchers['merge'], path) + return path, ref, merge, matchers.get('test') + if 'reference' not in matchers: + continue + matcher = matchers['reference'] + if matcher.match(path) is not None: + merge = None + l10n = matcher.sub(matchers['l10n'], path) + if 'merge' in matchers: + merge = matcher.sub(matchers['merge'], path) + return l10n, path, merge, matchers.get('test') diff --git a/third_party/python/compare_locales/compare_locales/paths/ini.py b/third_party/python/compare_locales/compare_locales/paths/ini.py new file mode 100644 index 0000000000..bde7def0ca --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/ini.py @@ -0,0 +1,224 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from configparser import ConfigParser, NoSectionError, NoOptionError +from collections import defaultdict +from compare_locales import util, mozpath +from .project import ProjectConfig + + +class L10nConfigParser: + '''Helper class to gather application information from ini files. + + This class is working on synchronous open to read files or web data. + Subclass this and overwrite loadConfigs and addChild if you need async. + ''' + def __init__(self, inipath, **kwargs): + """Constructor for L10nConfigParsers + + inipath -- l10n.ini path + Optional keyword arguments are fowarded to the inner ConfigParser as + defaults. + """ + self.inipath = mozpath.normpath(inipath) + # l10n.ini files can import other l10n.ini files, store the + # corresponding L10nConfigParsers + self.children = [] + # we really only care about the l10n directories described in l10n.ini + self.dirs = [] + # optional defaults to be passed to the inner ConfigParser (unused?) + self.defaults = kwargs + + def getDepth(self, cp): + '''Get the depth for the comparison from the parsed l10n.ini. + ''' + try: + depth = cp.get('general', 'depth') + except (NoSectionError, NoOptionError): + depth = '.' + return depth + + def getFilters(self): + '''Get the test functions from this ConfigParser and all children. + + Only works with synchronous loads, used by compare-locales, which + is local anyway. + ''' + filter_path = mozpath.join(mozpath.dirname(self.inipath), 'filter.py') + try: + local = {} + with open(filter_path) as f: + exec(compile(f.read(), filter_path, 'exec'), {}, local) + if 'test' in local and callable(local['test']): + filters = [local['test']] + else: + filters = [] + except BaseException: # we really want to handle EVERYTHING here + filters = [] + + for c in self.children: + filters += c.getFilters() + + return filters + + def loadConfigs(self): + """Entry point to load the l10n.ini file this Parser refers to. + + This implementation uses synchronous loads, subclasses might overload + this behaviour. If you do, make sure to pass a file-like object + to onLoadConfig. + """ + cp = ConfigParser(self.defaults) + cp.read(self.inipath) + depth = self.getDepth(cp) + self.base = mozpath.join(mozpath.dirname(self.inipath), depth) + # create child loaders for any other l10n.ini files to be included + try: + for title, path in cp.items('includes'): + # skip default items + if title in self.defaults: + continue + # add child config parser + self.addChild(title, path, cp) + except NoSectionError: + pass + # try to load the "dirs" defined in the "compare" section + try: + self.dirs.extend(cp.get('compare', 'dirs').split()) + except (NoOptionError, NoSectionError): + pass + # try to set "all_path" and "all_url" + try: + self.all_path = mozpath.join(self.base, cp.get('general', 'all')) + except (NoOptionError, NoSectionError): + self.all_path = None + return cp + + def addChild(self, title, path, orig_cp): + """Create a child L10nConfigParser and load it. + + title -- indicates the module's name + path -- indicates the path to the module's l10n.ini file + orig_cp -- the configuration parser of this l10n.ini + """ + cp = L10nConfigParser(mozpath.join(self.base, path), **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + def dirsIter(self): + """Iterate over all dirs and our base path for this l10n.ini""" + for dir in self.dirs: + yield dir, (self.base, dir) + + def directories(self): + """Iterate over all dirs and base paths for this l10n.ini as well + as the included ones. + """ + yield from self.dirsIter() + for child in self.children: + yield from child.directories() + + def allLocales(self): + """Return a list of all the locales of this project""" + with open(self.all_path) as f: + return util.parseLocales(f.read()) + + +class SourceTreeConfigParser(L10nConfigParser): + '''Subclassing L10nConfigParser to work with just the repos + checked out next to each other instead of intermingled like + we do for real builds. + ''' + + def __init__(self, inipath, base, redirects): + '''Add additional arguments basepath. + + basepath is used to resolve local paths via branchnames. + redirects is used in unified repository, mapping upstream + repos to local clones. + ''' + L10nConfigParser.__init__(self, inipath) + self.base = base + self.redirects = redirects + + def addChild(self, title, path, orig_cp): + # check if there's a section with details for this include + # we might have to check a different repo, or even VCS + # for example, projects like "mail" indicate in + # an "include_" section where to find the l10n.ini for "toolkit" + details = 'include_' + title + if orig_cp.has_section(details): + branch = orig_cp.get(details, 'mozilla') + branch = self.redirects.get(branch, branch) + inipath = orig_cp.get(details, 'l10n.ini') + path = mozpath.join(self.base, branch, inipath) + else: + path = mozpath.join(self.base, path) + cp = SourceTreeConfigParser(path, self.base, self.redirects, + **self.defaults) + cp.loadConfigs() + self.children.append(cp) + + +class EnumerateApp: + reference = 'en-US' + + def __init__(self, inipath, l10nbase): + self.setupConfigParser(inipath) + self.modules = defaultdict(dict) + self.l10nbase = mozpath.abspath(l10nbase) + self.filters = [] + self.addFilters(*self.config.getFilters()) + + def setupConfigParser(self, inipath): + self.config = L10nConfigParser(inipath) + self.config.loadConfigs() + + def addFilters(self, *args): + self.filters += args + + def asConfig(self): + # We've already normalized paths in the ini parsing. + # Set the path and root to None to just keep our paths as is. + config = ProjectConfig(None) + config.set_root('.') # sets to None because path is None + config.add_environment(l10n_base=self.l10nbase) + self._config_for_ini(config, self.config) + filters = self.config.getFilters() + if filters: + config.set_filter_py(filters[0]) + config.set_locales(self.config.allLocales(), deep=True) + return config + + def _config_for_ini(self, projectconfig, aConfig): + for k, (basepath, module) in aConfig.dirsIter(): + paths = { + 'module': module, + 'reference': mozpath.normpath('%s/%s/locales/en-US/**' % + (basepath, module)), + 'l10n': mozpath.normpath('{l10n_base}/{locale}/%s/**' % + module) + } + if module == 'mobile/android/base': + paths['test'] = ['android-dtd'] + projectconfig.add_paths(paths) + for child in aConfig.children: + self._config_for_ini(projectconfig, child) + + +class EnumerateSourceTreeApp(EnumerateApp): + '''Subclass EnumerateApp to work on side-by-side checked out + repos, and to no pay attention to how the source would actually + be checked out for building. + ''' + + def __init__(self, inipath, basepath, l10nbase, redirects): + self.basepath = basepath + self.redirects = redirects + EnumerateApp.__init__(self, inipath, l10nbase) + + def setupConfigParser(self, inipath): + self.config = SourceTreeConfigParser(inipath, self.basepath, + self.redirects) + self.config.loadConfigs() diff --git a/third_party/python/compare_locales/compare_locales/paths/matcher.py b/third_party/python/compare_locales/compare_locales/paths/matcher.py new file mode 100644 index 0000000000..82de936107 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/matcher.py @@ -0,0 +1,470 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import os +import re +import itertools +from compare_locales import mozpath + + +# Android uses non-standard locale codes, these are the mappings +# back and forth +ANDROID_LEGACY_MAP = { + 'he': 'iw', + 'id': 'in', + 'yi': 'ji' +} +ANDROID_STANDARD_MAP = { + legacy: standard + for standard, legacy in ANDROID_LEGACY_MAP.items() +} + + +class Matcher: + '''Path pattern matcher + Supports path matching similar to mozpath.match(), but does + not match trailing file paths without trailing wildcards. + Also gets a prefix, which is the path before the first wildcard, + which is good for filesystem iterations, and allows to replace + the own matches in a path on a different Matcher. compare-locales + uses that to transform l10n and en-US paths back and forth. + ''' + + def __init__(self, pattern_or_other, env={}, root=None, encoding=None): + '''Create regular expression similar to mozpath.match(). + ''' + parser = PatternParser() + real_env = {k: parser.parse(v) for k, v in env.items()} + self._cached_re = None + if root is not None: + # make sure that our root is fully expanded and ends with / + root = mozpath.abspath(root) + '/' + # allow constructing Matchers from Matchers + if isinstance(pattern_or_other, Matcher): + other = pattern_or_other + self.pattern = Pattern(other.pattern) + self.env = other.env.copy() + self.env.update(real_env) + if root is not None: + self.pattern.root = root + self.encoding = other.encoding + return + self.env = real_env + pattern = pattern_or_other + self.pattern = parser.parse(pattern) + if root is not None: + self.pattern.root = root + self.encoding = encoding + + def with_env(self, environ): + return Matcher(self, environ) + + @property + def prefix(self): + subpattern = Pattern(self.pattern[:self.pattern.prefix_length]) + subpattern.root = self.pattern.root + prefix = subpattern.expand(self.env) + if self.encoding is not None: + prefix = prefix.encode(self.encoding) + return prefix + + def match(self, path): + '''Test the given path against this matcher and its environment. + + Return None if there's no match, and the dictionary of matched + variables in this matcher if there's a match. + ''' + self._cache_regex() + m = self._cached_re.match(path) + if m is None: + return None + d = m.groupdict() + if self.encoding is not None: + d = {key: value.decode(self.encoding) for key, value in d.items()} + if 'android_locale' in d and 'locale' not in d: + # map android_locale to locale code + locale = d['android_locale'] + # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji + locale = re.sub( + r'(iw|in|ji)(?=\Z|-)', + lambda legacy: ANDROID_STANDARD_MAP[legacy.group(1)], + locale + ) + locale = re.sub(r'-r([A-Z]{2})', r'-\1', locale) + locale = locale.replace('b+', '').replace('+', '-') + d['locale'] = locale + return d + + def _cache_regex(self): + if self._cached_re is not None: + return + pattern = self.pattern.regex_pattern(self.env) + '$' + if self.encoding is not None: + pattern = pattern.encode(self.encoding) + self._cached_re = re.compile(pattern) + + def sub(self, other, path): + ''' + Replace the wildcard matches in this pattern into the + pattern of the other Match object. + ''' + m = self.match(path) + if m is None: + return None + env = {} + env.update( + (key, Literal(value if value is not None else '')) + for key, value in m.items() + ) + env.update(other.env) + path = other.pattern.expand(env) + if self.encoding is not None: + path = path.encode(self.encoding) + return path + + def concat(self, other): + '''Concat two Matcher objects. + + The intent is to create one Matcher with variable substitutions that + behaves as if you joined the resulting paths. + This doesn't do path separator logic, though, and it won't resolve + parent directories. + ''' + if not isinstance(other, Matcher): + other_matcher = Matcher(other) + else: + other_matcher = other + other_pattern = other_matcher.pattern + if other_pattern.root is not None: + raise ValueError('Other matcher must not be rooted') + result = Matcher(self) + result.pattern += other_pattern + if self.pattern.prefix_length == len(self.pattern): + result.pattern.prefix_length += other_pattern.prefix_length + result.env.update(other_matcher.env) + return result + + def __str__(self): + return self.pattern.expand(self.env) + + def __repr__(self): + return '{}({!r}, env={!r}, root={!r})'.format( + type(self).__name__, self.pattern, self.env, self.pattern.root + ) + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + '''Equality for Matcher. + + The equality for Matchers is defined to have the same pattern, + and no conflicting environment. Additional environment settings + in self or other are OK. + ''' + if other.__class__ is not self.__class__: + return NotImplemented + if self.pattern != other.pattern: + return False + if self.env and other.env: + for k in self.env: + if k not in other.env: + continue + if self.env[k] != other.env[k]: + return False + if self.encoding != other.encoding: + return False + return True + + +def expand(root, path, env): + '''Expand a given path relative to the given root, + using the given env to resolve variables. + + This will break if the path contains wildcards. + ''' + matcher = Matcher(path, env=env, root=root) + return str(matcher) + + +class MissingEnvironment(Exception): + pass + + +class Node: + '''Abstract base class for all nodes in parsed patterns.''' + def regex_pattern(self, env): + '''Create a regular expression fragment for this Node.''' + raise NotImplementedError + + def expand(self, env): + '''Convert this node to a string with the given environment.''' + raise NotImplementedError + + +class Pattern(list, Node): + def __init__(self, iterable=[]): + list.__init__(self, iterable) + self.root = getattr(iterable, 'root', None) + self.prefix_length = getattr(iterable, 'prefix_length', None) + + def regex_pattern(self, env): + root = '' + if self.root is not None: + # make sure we're not hiding a full path + first_seg = self[0].expand(env) + if not os.path.isabs(first_seg): + root = re.escape(self.root) + return root + ''.join( + child.regex_pattern(env) for child in self + ) + + def expand(self, env, raise_missing=False): + root = '' + if self.root is not None: + # make sure we're not hiding a full path + first_seg = self[0].expand(env) + if not os.path.isabs(first_seg): + root = self.root + return root + ''.join(self._expand_children(env, raise_missing)) + + def _expand_children(self, env, raise_missing): + # Helper iterator to convert Exception to a stopped iterator + for child in self: + try: + yield child.expand(env, raise_missing=True) + except MissingEnvironment: + if raise_missing: + raise + return + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if not super().__eq__(other): + return False + if other.__class__ == list: + # good for tests and debugging + return True + return ( + self.root == other.root + and self.prefix_length == other.prefix_length + ) + + +class Literal(str, Node): + def regex_pattern(self, env): + return re.escape(self) + + def expand(self, env, raise_missing=False): + return self + + +class Variable(Node): + def __init__(self, name, repeat=False): + self.name = name + self.repeat = repeat + + def regex_pattern(self, env): + if self.repeat: + return f'(?P={self.name})' + return f'(?P<{self.name}>{self._pattern_from_env(env)})' + + def _pattern_from_env(self, env): + if self.name in env: + # make sure we match the value in the environment + return env[self.name].regex_pattern(self._no_cycle(env)) + # match anything, including path segments + return '.+?' + + def expand(self, env, raise_missing=False): + '''Create a string for this Variable. + + This expansion happens recursively. We avoid recusion loops + by removing the current variable from the environment that's used + to expand child variable references. + ''' + if self.name not in env: + raise MissingEnvironment + return env[self.name].expand( + self._no_cycle(env), raise_missing=raise_missing + ) + + def _no_cycle(self, env): + '''Remove our variable name from the environment. + That way, we can't create cyclic references. + ''' + if self.name not in env: + return env + env = env.copy() + env.pop(self.name) + return env + + def __repr__(self): + return f'Variable(name="{self.name}")' + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if other.__class__ is not self.__class__: + return False + return ( + self.name == other.name + and self.repeat == other.repeat + ) + + +class AndroidLocale(Variable): + '''Subclass for Android locale code mangling. + + Supports ab-rCD and b+ab+Scrip+DE. + Language and Language-Region tags get mapped to ab-rCD, more complex + Locale tags to b+. + ''' + def __init__(self, repeat=False): + self.name = 'android_locale' + self.repeat = repeat + + def _pattern_from_env(self, env): + android_locale = self._get_android_locale(env) + if android_locale is not None: + return re.escape(android_locale) + return '.+?' + + def expand(self, env, raise_missing=False): + '''Create a string for this Variable. + + This expansion happens recursively. We avoid recusion loops + by removing the current variable from the environment that's used + to expand child variable references. + ''' + android_locale = self._get_android_locale(env) + if android_locale is None: + raise MissingEnvironment + return android_locale + + def _get_android_locale(self, env): + if 'locale' not in env: + return None + android = bcp47 = env['locale'].expand(self._no_cycle(env)) + # map legacy locale codes, he <-> iw, id <-> in, yi <-> ji + android = bcp47 = re.sub( + r'(he|id|yi)(?=\Z|-)', + lambda standard: ANDROID_LEGACY_MAP[standard.group(1)], + bcp47 + ) + if re.match(r'[a-z]{2,3}-[A-Z]{2}', bcp47): + android = '{}-r{}'.format(*bcp47.split('-')) + elif '-' in bcp47: + android = 'b+' + bcp47.replace('-', '+') + return android + + +class Star(Node): + def __init__(self, number): + self.number = number + + def regex_pattern(self, env): + return f'(?P<s{self.number}>[^/]*)' + + def expand(self, env, raise_missing=False): + return env['s%d' % self.number] + + def __repr__(self): + return type(self).__name__ + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if other.__class__ is not self.__class__: + return False + return self.number == other.number + + +class Starstar(Star): + def __init__(self, number, suffix): + self.number = number + self.suffix = suffix + + def regex_pattern(self, env): + return f'(?P<s{self.number}>.+{self.suffix})?' + + def __ne__(self, other): + return not (self == other) + + def __eq__(self, other): + if not super().__eq__(other): + return False + return self.suffix == other.suffix + + +PATH_SPECIAL = re.compile( + r'(?P<starstar>(?<![^/}])\*\*(?P<suffix>/|$))' + r'|' + r'(?P<star>\*)' + r'|' + r'(?P<variable>{ *(?P<varname>[\w]+) *})' +) + + +class PatternParser: + def __init__(self): + # Not really initializing anything, just making room for our + # result and state members. + self.pattern = None + self._stargroup = self._cursor = None + self._known_vars = None + + def parse(self, pattern): + if isinstance(pattern, Pattern): + return pattern + if isinstance(pattern, Matcher): + return pattern.pattern + # Initializing result and state + self.pattern = Pattern() + self._stargroup = itertools.count(1) + self._known_vars = set() + self._cursor = 0 + for match in PATH_SPECIAL.finditer(pattern): + if match.start() > self._cursor: + self.pattern.append( + Literal(pattern[self._cursor:match.start()]) + ) + self.handle(match) + self.pattern.append(Literal(pattern[self._cursor:])) + if self.pattern.prefix_length is None: + self.pattern.prefix_length = len(self.pattern) + return self.pattern + + def handle(self, match): + if match.group('variable'): + self.variable(match) + else: + self.wildcard(match) + self._cursor = match.end() + + def variable(self, match): + varname = match.group('varname') + # Special case Android locale code matching. + # It's kinda sad, but true. + if varname == 'android_locale': + self.pattern.append(AndroidLocale(varname in self._known_vars)) + else: + self.pattern.append(Variable(varname, varname in self._known_vars)) + self._known_vars.add(varname) + + def wildcard(self, match): + # wildcard found, stop prefix + if self.pattern.prefix_length is None: + self.pattern.prefix_length = len(self.pattern) + wildcard = next(self._stargroup) + if match.group('star'): + # * + self.pattern.append(Star(wildcard)) + else: + # ** + self.pattern.append(Starstar(wildcard, match.group('suffix'))) diff --git a/third_party/python/compare_locales/compare_locales/paths/project.py b/third_party/python/compare_locales/compare_locales/paths/project.py new file mode 100644 index 0000000000..1f18a9d2d5 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/paths/project.py @@ -0,0 +1,260 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +import re +from compare_locales import mozpath +from .matcher import Matcher + + +class ExcludeError(ValueError): + pass + + +class ProjectConfig: + '''Abstraction of l10n project configuration data. + ''' + + def __init__(self, path): + self.filter_py = None # legacy filter code + # { + # 'l10n': pattern, + # 'reference': pattern, # optional + # 'locales': [], # optional + # 'test': [], # optional + # } + self.path = path + self.root = None + self.paths = [] + self.rules = [] + self.locales = None + # cache for all_locales, as that's not in `filter` + self._all_locales = None + self.environ = {} + self.children = [] + self.excludes = [] + self._cache = None + + def same(self, other): + '''Equality test, ignoring locales. + ''' + if other.__class__ is not self.__class__: + return False + if len(self.children) != len(other.children): + return False + for prop in ('path', 'root', 'paths', 'rules', 'environ'): + if getattr(self, prop) != getattr(other, prop): + return False + for this_child, other_child in zip(self.children, other.children): + if not this_child.same(other_child): + return False + return True + + def set_root(self, basepath): + if self.path is None: + self.root = None + return + self.root = mozpath.abspath( + mozpath.join(mozpath.dirname(self.path), basepath) + ) + + def add_environment(self, **kwargs): + self.environ.update(kwargs) + + def add_paths(self, *paths): + '''Add path dictionaries to this config. + The dictionaries must have a `l10n` key. For monolingual files, + `reference` is also required. + An optional key `test` is allowed to enable additional tests for this + path pattern. + ''' + self._all_locales = None # clear cache + for d in paths: + rv = { + 'l10n': Matcher(d['l10n'], env=self.environ, root=self.root), + 'module': d.get('module') + } + if 'reference' in d: + rv['reference'] = Matcher( + d['reference'], env=self.environ, root=self.root + ) + if 'test' in d: + rv['test'] = d['test'] + if 'locales' in d: + rv['locales'] = d['locales'][:] + self.paths.append(rv) + + def set_filter_py(self, filter_function): + '''Set legacy filter.py code. + Assert that no rules are set. + Also, normalize output already here. + ''' + assert not self.rules + + def filter_(module, path, entity=None): + try: + rv = filter_function(module, path, entity=entity) + except BaseException: # we really want to handle EVERYTHING here + return 'error' + rv = { + True: 'error', + False: 'ignore', + 'report': 'warning' + }.get(rv, rv) + assert rv in ('error', 'ignore', 'warning', None) + return rv + self.filter_py = filter_ + + def add_rules(self, *rules): + '''Add rules to filter on. + Assert that there's no legacy filter.py code hooked up. + ''' + assert self.filter_py is None + for rule in rules: + self.rules.extend(self._compile_rule(rule)) + + def add_child(self, child): + self._all_locales = None # clear cache + if child.excludes: + raise ExcludeError( + 'Included configs cannot declare their own excludes.' + ) + self.children.append(child) + + def exclude(self, child): + for config in child.configs: + if config.excludes: + raise ExcludeError( + 'Excluded configs cannot declare their own excludes.' + ) + self.excludes.append(child) + + def set_locales(self, locales, deep=False): + self._all_locales = None # clear cache + self.locales = locales + if not deep: + return + for child in self.children: + child.set_locales(locales, deep=deep) + + @property + def configs(self): + 'Recursively get all configs in this project and its children' + yield self + for child in self.children: + yield from child.configs + + @property + def all_locales(self): + 'Recursively get all locales in this project and its paths' + if self._all_locales is None: + all_locales = set() + for config in self.configs: + if config.locales is not None: + all_locales.update(config.locales) + for paths in config.paths: + if 'locales' in paths: + all_locales.update(paths['locales']) + self._all_locales = sorted(all_locales) + return self._all_locales + + def filter(self, l10n_file, entity=None): + '''Filter a localization file or entities within, according to + this configuration file.''' + if l10n_file.locale not in self.all_locales: + return 'ignore' + if self.filter_py is not None: + return self.filter_py(l10n_file.module, l10n_file.file, + entity=entity) + rv = self._filter(l10n_file, entity=entity) + if rv is None: + return 'ignore' + return rv + + class FilterCache: + def __init__(self, locale): + self.locale = locale + self.rules = [] + self.l10n_paths = [] + + def cache(self, locale): + if self._cache and self._cache.locale == locale: + return self._cache + self._cache = self.FilterCache(locale) + for paths in self.paths: + if 'locales' in paths and locale not in paths['locales']: + continue + self._cache.l10n_paths.append(paths['l10n'].with_env({ + "locale": locale + })) + for rule in self.rules: + cached_rule = rule.copy() + cached_rule['path'] = rule['path'].with_env({ + "locale": locale + }) + self._cache.rules.append(cached_rule) + return self._cache + + def _filter(self, l10n_file, entity=None): + if any( + exclude.filter(l10n_file) == 'error' + for exclude in self.excludes + ): + return + actions = { + child._filter(l10n_file, entity=entity) + for child in self.children} + if 'error' in actions: + # return early if we know we'll error + return 'error' + + cached = self.cache(l10n_file.locale) + if any(p.match(l10n_file.fullpath) for p in cached.l10n_paths): + action = 'error' + for rule in reversed(cached.rules): + if not rule['path'].match(l10n_file.fullpath): + continue + if ('key' in rule) ^ (entity is not None): + # key/file mismatch, not a matching rule + continue + if 'key' in rule and not rule['key'].match(entity): + continue + action = rule['action'] + break + actions.add(action) + if 'error' in actions: + return 'error' + if 'warning' in actions: + return 'warning' + if 'ignore' in actions: + return 'ignore' + + def _compile_rule(self, rule): + assert 'path' in rule + if isinstance(rule['path'], list): + for path in rule['path']: + _rule = rule.copy() + _rule['path'] = Matcher(path, env=self.environ, root=self.root) + yield from self._compile_rule(_rule) + return + if isinstance(rule['path'], str): + rule['path'] = Matcher( + rule['path'], env=self.environ, root=self.root + ) + if 'key' not in rule: + yield rule + return + if not isinstance(rule['key'], str): + for key in rule['key']: + _rule = rule.copy() + _rule['key'] = key + yield from self._compile_rule(_rule) + return + rule = rule.copy() + key = rule['key'] + if key.startswith('re:'): + key = key[3:] + else: + key = re.escape(key) + '$' + rule['key'] = re.compile(key) + yield rule diff --git a/third_party/python/compare_locales/compare_locales/plurals.py b/third_party/python/compare_locales/compare_locales/plurals.py new file mode 100644 index 0000000000..b04006b14f --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/plurals.py @@ -0,0 +1,221 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'Mapping of locales to CLDR plural categories as implemented by PluralForm.jsm' + +CATEGORIES_BY_INDEX = ( + # 0 (Chinese) + ('other',), + # 1 (English) + ('one', 'other'), + # 2 (French) + ('one', 'other'), + # 3 (Latvian) + ('zero', 'one', 'other'), + # 4 (Scottish Gaelic) + ('one', 'two', 'few', 'other'), + # 5 (Romanian) + ('one', 'few', 'other'), + # 6 (Lithuanian) + # CLDR: one, few, many (fractions), other + ('one', 'other', 'few'), + # 7 (Russian) + # CLDR: one, few, many, other (fractions) + ('one', 'few', 'many'), + # 8 (Slovak) + # CLDR: one, few, many (fractions), other + ('one', 'few', 'other'), + # 9 (Polish) + # CLDR: one, few, many, other (fractions) + ('one', 'few', 'many'), + # 10 (Slovenian) + ('one', 'two', 'few', 'other'), + # 11 (Irish Gaelic) + ('one', 'two', 'few', 'many', 'other'), + # 12 (Arabic) + # CLDR: zero, one, two, few, many, other + ('one', 'two', 'few', 'many', 'other', 'zero'), + # 13 (Maltese) + ('one', 'few', 'many', 'other'), + # 14 (Unused) + # CLDR: one, other + ('one', 'two', 'other'), + # 15 (Icelandic, Macedonian) + ('one', 'other'), + # 16 (Breton) + ('one', 'two', 'few', 'many', 'other'), + # 17 (Shuar) + # CLDR: (missing) + ('zero', 'other'), + # 18 (Welsh), + ('zero', 'one', 'two', 'few', 'many', 'other'), + # 19 (Bosnian, Croatian, Serbian) + ('one', 'few', 'other'), +) + +CATEGORIES_EXCEPTIONS = { +} + +CATEGORIES_BY_LOCALE = { + 'ace': 0, + 'ach': 1, + 'af': 1, + 'ak': 2, + 'an': 1, + 'ar': 12, + 'arn': 1, + 'as': 1, + 'ast': 1, + 'az': 1, + 'be': 7, + 'bg': 1, + 'bn': 2, + 'bo': 0, + 'br': 16, + 'brx': 1, + 'bs': 19, + 'ca': 1, + 'cak': 1, + 'ckb': 1, + 'crh': 1, + 'cs': 8, + 'csb': 9, + 'cv': 1, + 'cy': 18, + 'da': 1, + 'de': 1, + 'dsb': 10, + 'el': 1, + 'en': 1, + 'eo': 1, + 'es': 1, + 'et': 1, + 'eu': 1, + 'fa': 2, + 'ff': 1, + 'fi': 1, + 'fr': 2, + 'frp': 2, + 'fur': 1, + 'fy': 1, + 'ga': 11, + 'gd': 4, + 'gl': 1, + 'gn': 1, + 'gu': 2, + 'he': 1, + 'hi': 2, + 'hr': 19, + 'hsb': 10, + 'hto': 1, + 'hu': 1, + 'hy': 1, + 'hye': 1, + 'ia': 1, + 'id': 0, + 'ilo': 0, + 'is': 15, + 'it': 1, + 'ja': 0, + 'jiv': 17, + 'ka': 1, + 'kab': 1, + 'kk': 1, + 'km': 0, + 'kn': 1, + 'ko': 0, + 'ks': 1, + 'ku': 1, + 'lb': 1, + 'lg': 1, + 'lij': 1, + 'lo': 0, + 'lt': 6, + 'ltg': 3, + 'lv': 3, + 'lus': 0, + 'mai': 1, + 'meh': 0, + 'mix': 0, + 'mk': 15, + 'ml': 1, + 'mn': 1, + 'mr': 1, + 'ms': 0, + 'my': 0, + 'nb': 1, + 'ne': 1, + 'nl': 1, + 'nn': 1, + 'nr': 1, + 'nso': 2, + 'ny': 1, + 'oc': 2, + 'or': 1, + 'pa': 2, + 'pai': 0, + 'pl': 9, + 'pt': 1, + 'quy': 1, + 'qvi': 1, + 'rm': 1, + 'ro': 5, + 'ru': 7, + 'rw': 1, + 'sah': 0, + 'sat': 1, + 'sc': 1, + 'scn': 1, + 'sco': 1, + 'si': 1, + 'sk': 8, + 'skr': 1, + 'sl': 10, + 'son': 1, + 'sq': 1, + 'sr': 19, + 'ss': 1, + 'st': 1, + 'sv': 1, + 'sw': 1, + 'szl': 9, + 'ta': 1, + 'ta': 1, + 'te': 1, + 'tg': 1, + 'th': 0, + 'tl': 1, + 'tn': 1, + 'tr': 1, + 'trs': 1, + 'ts': 1, + 'tsz': 1, + 'uk': 7, + 'ur': 1, + 'uz': 1, + 've': 1, + 'vi': 0, + 'wo': 0, + 'xh': 1, + 'zam': 1, + 'zh-CN': 0, + 'zh-TW': 0, + 'zu': 2, +} + + +def get_plural(locale): + plural_form = get_plural_rule(locale) + if plural_form is None: + return None + return CATEGORIES_BY_INDEX[plural_form] + + +def get_plural_rule(locale): + if locale is None: + return None + if locale in CATEGORIES_BY_LOCALE: + return CATEGORIES_BY_LOCALE[locale] + locale = locale.split('-', 1)[0] + return CATEGORIES_BY_LOCALE.get(locale) diff --git a/third_party/python/compare_locales/compare_locales/serializer.py b/third_party/python/compare_locales/compare_locales/serializer.py new file mode 100644 index 0000000000..826fb29693 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/serializer.py @@ -0,0 +1,137 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +'''Serialize string changes. + +The serialization logic is based on the cross-channel merge algorithm. +It's taking the file structure for the first file, and localizable entries +from the last. +Input data is the parsed reference as a list of parser.walk(), +the existing localized file, also a list of parser.walk(), and a dictionary +of newly added keys and raw values. +To remove a string from a localization, pass `None` as value for a key. + +The marshalling between raw values and entities is done via Entity.unwrap +and Entity.wrap. + +To avoid adding English reference strings into the generated file, the +actual entities in the reference are replaced with Placeholders, which +are removed in a final pass over the result of merge_resources. After that, +we also prune whitespace once more.` +''' + +from codecs import encode +from functools import reduce + +from compare_locales.merge import merge_resources, serialize_legacy_resource +from compare_locales.parser import getParser +from compare_locales.parser.base import ( + Entity, + PlaceholderEntity, + Junk, + Whitespace, +) + + +class SerializationNotSupportedError(ValueError): + pass + + +def serialize(filename, reference, old_l10n, new_data): + '''Returns a byte string of the serialized content to use. + + Input are a filename to create the right parser, a reference and + an existing localization, both as the result of parser.walk(). + Finally, new_data is a dictionary of key to raw values to serialize. + + Raises a SerializationNotSupportedError if we don't support the file + format. + ''' + try: + parser = getParser(filename) + except UserWarning: + raise SerializationNotSupportedError( + f'Unsupported file format ({filename}).') + # create template, whitespace and all + placeholders = [ + placeholder(entry) + for entry in reference + if not isinstance(entry, Junk) + ] + ref_mapping = { + entry.key: entry + for entry in reference + if isinstance(entry, Entity) + } + # strip obsolete strings + old_l10n = sanitize_old(ref_mapping.keys(), old_l10n, new_data) + # create new Entities + # .val can just be "", merge_channels doesn't need that + new_l10n = [] + for key, new_raw_val in new_data.items(): + if new_raw_val is None or key not in ref_mapping: + continue + ref_ent = ref_mapping[key] + new_l10n.append(ref_ent.wrap(new_raw_val)) + + merged = merge_resources( + parser, + [placeholders, old_l10n, new_l10n], + keep_newest=False + ) + pruned = prune_placeholders(merged) + return encode(serialize_legacy_resource(pruned), parser.encoding) + + +def sanitize_old(known_keys, old_l10n, new_data): + """Strip Junk and replace obsolete messages with placeholders. + If new_data has `None` as a value, strip the existing translation. + Use placeholders generously, so that we can rely on `prune_placeholders` + to find their associated comments and remove them, too. + """ + + def should_placeholder(entry): + # If entry is an Entity, check if it's obsolete + # or marked to be removed. + if not isinstance(entry, Entity): + return False + if entry.key not in known_keys: + return True + return entry.key in new_data and new_data[entry.key] is None + + return [ + placeholder(entry) + if should_placeholder(entry) + else entry + for entry in old_l10n + if not isinstance(entry, Junk) + ] + + +def placeholder(entry): + if isinstance(entry, Entity): + return PlaceholderEntity(entry.key) + return entry + + +def prune_placeholders(entries): + pruned = [ + entry for entry in entries + if not isinstance(entry, PlaceholderEntity) + ] + + def prune_whitespace(acc, entity): + if len(acc) and isinstance(entity, Whitespace): + prev_entity = acc[-1] + + if isinstance(prev_entity, Whitespace): + # Prefer the longer whitespace. + if len(entity.all) > len(prev_entity.all): + acc[-1] = entity + return acc + + acc.append(entity) + return acc + + return reduce(prune_whitespace, pruned, []) diff --git a/third_party/python/compare_locales/compare_locales/util.py b/third_party/python/compare_locales/compare_locales/util.py new file mode 100644 index 0000000000..71eadd8749 --- /dev/null +++ b/third_party/python/compare_locales/compare_locales/util.py @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# This file is shared between compare-locales and locale-inspector +# test_util is in compare-locales only, for the sake of easy +# development. + + +def parseLocales(content): + return sorted(l.split()[0] for l in content.splitlines() if l) |