From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/rust/jsparagus/.cargo-checksum.json | 1 + third_party/rust/jsparagus/.flake8 | 15 + third_party/rust/jsparagus/.githooks/pre-commit | 14 + .../rust/jsparagus/.github/workflows/ci-daily.yml | 46 + .../jsparagus/.github/workflows/ci-generated.yml | 54 + .../rust/jsparagus/.github/workflows/ci-issues.yml | 44 + .../rust/jsparagus/.github/workflows/ci-push.yml | 57 + .../.github/workflows/real-js-benchmark.yml | 135 ++ .../rust/jsparagus/.github/workflows/rust.yml | 24 + .../jsparagus/.github/workflows/smoosh-status.yml | 87 + third_party/rust/jsparagus/.metrics/README.md | 79 + .../rust/jsparagus/.metrics/create-ci-branch.sh | 38 + .../rust/jsparagus/.metrics/fuzzbug_count_badge.py | 22 + .../rust/jsparagus/.metrics/fuzzbug_date_badge.py | 40 + .../rust/jsparagus/.metrics/generated_README.md | 48 + .../jsparagus/.metrics/not_implemented_badge.py | 14 + .../jsparagus/.metrics/not_implemented_count.py | 19 + .../rust/jsparagus/.metrics/populate_fuzzbug.sh | 19 + .../jsparagus/.metrics/populate_not_implemented.sh | 41 + third_party/rust/jsparagus/CODE_OF_CONDUCT.md | 8 + third_party/rust/jsparagus/Cargo.lock | 904 ++++++++++ third_party/rust/jsparagus/Cargo.toml | 69 + third_party/rust/jsparagus/LICENSE | 15 + third_party/rust/jsparagus/LICENSE-APACHE-2.0 | 218 +++ third_party/rust/jsparagus/LICENSE-MIT | 7 + third_party/rust/jsparagus/Makefile | 111 ++ third_party/rust/jsparagus/README.md | 159 ++ .../benchmarks/compare-spidermonkey-parsers.js | 315 ++++ third_party/rust/jsparagus/gecko-patches.txt | 1 + third_party/rust/jsparagus/journal.md | 272 +++ third_party/rust/jsparagus/js-quirks.md | 1036 +++++++++++ third_party/rust/jsparagus/js_parser/README.md | 67 + third_party/rust/jsparagus/js_parser/__init__.py | 0 .../js_parser/es-lexical-simplified.esgrammar | 485 ++++++ .../jsparagus/js_parser/es-simplified.esgrammar | 1453 ++++++++++++++++ third_party/rust/jsparagus/js_parser/es.esgrammar | 1668 ++++++++++++++++++ .../rust/jsparagus/js_parser/esgrammar.pgen | 290 ++++ .../rust/jsparagus/js_parser/extract_es_grammar.py | 567 ++++++ .../js_parser/generate_js_parser_tables.py | 140 ++ third_party/rust/jsparagus/js_parser/lexer.py | 315 ++++ .../rust/jsparagus/js_parser/load_es_grammar.py | 129 ++ .../rust/jsparagus/js_parser/parse_esgrammar.py | 545 ++++++ third_party/rust/jsparagus/js_parser/parser.py | 42 + .../rust/jsparagus/js_parser/slash.esgrammar | 1683 ++++++++++++++++++ third_party/rust/jsparagus/js_parser/try_it.py | 59 + third_party/rust/jsparagus/jsparagus/README.md | 65 + third_party/rust/jsparagus/jsparagus/__init__.py | 1 + third_party/rust/jsparagus/jsparagus/actions.py | 651 +++++++ third_party/rust/jsparagus/jsparagus/aps.py | 422 +++++ .../rust/jsparagus/jsparagus/emit/__init__.py | 6 + .../rust/jsparagus/jsparagus/emit/python.py | 228 +++ third_party/rust/jsparagus/jsparagus/emit/rust.py | 903 ++++++++++ third_party/rust/jsparagus/jsparagus/extension.py | 108 ++ third_party/rust/jsparagus/jsparagus/gen.py | 137 ++ third_party/rust/jsparagus/jsparagus/grammar.py | 1248 ++++++++++++++ third_party/rust/jsparagus/jsparagus/lexer.py | 219 +++ third_party/rust/jsparagus/jsparagus/lr0.py | 385 +++++ third_party/rust/jsparagus/jsparagus/main.py | 28 + third_party/rust/jsparagus/jsparagus/ordered.py | 157 ++ third_party/rust/jsparagus/jsparagus/parse_pgen.py | 280 +++ .../jsparagus/jsparagus/parse_pgen_generated.py | 1429 +++++++++++++++ .../rust/jsparagus/jsparagus/parse_table.py | 1820 ++++++++++++++++++++ third_party/rust/jsparagus/jsparagus/rewrites.py | 735 ++++++++ third_party/rust/jsparagus/jsparagus/runtime.py | 317 ++++ third_party/rust/jsparagus/jsparagus/types.py | 326 ++++ third_party/rust/jsparagus/jsparagus/utils.py | 74 + third_party/rust/jsparagus/mozconfigs/smoosh-debug | 6 + third_party/rust/jsparagus/mozconfigs/smoosh-opt | 7 + third_party/rust/jsparagus/pgen.pgen | 89 + third_party/rust/jsparagus/requirements.txt | 13 + third_party/rust/jsparagus/smoosh_status.py | 193 +++ third_party/rust/jsparagus/src/bin/smoosh_tools.rs | 964 +++++++++++ third_party/rust/jsparagus/src/lib.rs | 24 + third_party/rust/jsparagus/test.sh | 31 + third_party/rust/jsparagus/tests/__init__.py | 0 third_party/rust/jsparagus/tests/test.py | 1204 +++++++++++++ third_party/rust/jsparagus/tests/test_js.py | 207 +++ .../rust/jsparagus/tests/test_parse_pgen.py | 33 + third_party/rust/jsparagus/update.sh | 26 + third_party/rust/jsparagus/update_stencil.py | 722 ++++++++ third_party/rust/jsparagus/update_unicode.py | 354 ++++ 81 files changed, 24767 insertions(+) create mode 100644 third_party/rust/jsparagus/.cargo-checksum.json create mode 100644 third_party/rust/jsparagus/.flake8 create mode 100755 third_party/rust/jsparagus/.githooks/pre-commit create mode 100644 third_party/rust/jsparagus/.github/workflows/ci-daily.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/ci-generated.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/ci-issues.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/ci-push.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/rust.yml create mode 100644 third_party/rust/jsparagus/.github/workflows/smoosh-status.yml create mode 100644 third_party/rust/jsparagus/.metrics/README.md create mode 100755 third_party/rust/jsparagus/.metrics/create-ci-branch.sh create mode 100644 third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py create mode 100644 third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py create mode 100644 third_party/rust/jsparagus/.metrics/generated_README.md create mode 100644 third_party/rust/jsparagus/.metrics/not_implemented_badge.py create mode 100644 third_party/rust/jsparagus/.metrics/not_implemented_count.py create mode 100755 third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh create mode 100755 third_party/rust/jsparagus/.metrics/populate_not_implemented.sh create mode 100644 third_party/rust/jsparagus/CODE_OF_CONDUCT.md create mode 100644 third_party/rust/jsparagus/Cargo.lock create mode 100644 third_party/rust/jsparagus/Cargo.toml create mode 100644 third_party/rust/jsparagus/LICENSE create mode 100644 third_party/rust/jsparagus/LICENSE-APACHE-2.0 create mode 100644 third_party/rust/jsparagus/LICENSE-MIT create mode 100644 third_party/rust/jsparagus/Makefile create mode 100644 third_party/rust/jsparagus/README.md create mode 100644 third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js create mode 100644 third_party/rust/jsparagus/gecko-patches.txt create mode 100644 third_party/rust/jsparagus/journal.md create mode 100644 third_party/rust/jsparagus/js-quirks.md create mode 100644 third_party/rust/jsparagus/js_parser/README.md create mode 100644 third_party/rust/jsparagus/js_parser/__init__.py create mode 100644 third_party/rust/jsparagus/js_parser/es-lexical-simplified.esgrammar create mode 100644 third_party/rust/jsparagus/js_parser/es-simplified.esgrammar create mode 100644 third_party/rust/jsparagus/js_parser/es.esgrammar create mode 100644 third_party/rust/jsparagus/js_parser/esgrammar.pgen create mode 100755 third_party/rust/jsparagus/js_parser/extract_es_grammar.py create mode 100755 third_party/rust/jsparagus/js_parser/generate_js_parser_tables.py create mode 100644 third_party/rust/jsparagus/js_parser/lexer.py create mode 100644 third_party/rust/jsparagus/js_parser/load_es_grammar.py create mode 100644 third_party/rust/jsparagus/js_parser/parse_esgrammar.py create mode 100644 third_party/rust/jsparagus/js_parser/parser.py create mode 100644 third_party/rust/jsparagus/js_parser/slash.esgrammar create mode 100755 third_party/rust/jsparagus/js_parser/try_it.py create mode 100644 third_party/rust/jsparagus/jsparagus/README.md create mode 100644 third_party/rust/jsparagus/jsparagus/__init__.py create mode 100644 third_party/rust/jsparagus/jsparagus/actions.py create mode 100644 third_party/rust/jsparagus/jsparagus/aps.py create mode 100644 third_party/rust/jsparagus/jsparagus/emit/__init__.py create mode 100644 third_party/rust/jsparagus/jsparagus/emit/python.py create mode 100644 third_party/rust/jsparagus/jsparagus/emit/rust.py create mode 100644 third_party/rust/jsparagus/jsparagus/extension.py create mode 100755 third_party/rust/jsparagus/jsparagus/gen.py create mode 100644 third_party/rust/jsparagus/jsparagus/grammar.py create mode 100644 third_party/rust/jsparagus/jsparagus/lexer.py create mode 100644 third_party/rust/jsparagus/jsparagus/lr0.py create mode 100755 third_party/rust/jsparagus/jsparagus/main.py create mode 100644 third_party/rust/jsparagus/jsparagus/ordered.py create mode 100755 third_party/rust/jsparagus/jsparagus/parse_pgen.py create mode 100644 third_party/rust/jsparagus/jsparagus/parse_pgen_generated.py create mode 100644 third_party/rust/jsparagus/jsparagus/parse_table.py create mode 100644 third_party/rust/jsparagus/jsparagus/rewrites.py create mode 100644 third_party/rust/jsparagus/jsparagus/runtime.py create mode 100644 third_party/rust/jsparagus/jsparagus/types.py create mode 100644 third_party/rust/jsparagus/jsparagus/utils.py create mode 100644 third_party/rust/jsparagus/mozconfigs/smoosh-debug create mode 100644 third_party/rust/jsparagus/mozconfigs/smoosh-opt create mode 100644 third_party/rust/jsparagus/pgen.pgen create mode 100644 third_party/rust/jsparagus/requirements.txt create mode 100644 third_party/rust/jsparagus/smoosh_status.py create mode 100644 third_party/rust/jsparagus/src/bin/smoosh_tools.rs create mode 100644 third_party/rust/jsparagus/src/lib.rs create mode 100755 third_party/rust/jsparagus/test.sh create mode 100644 third_party/rust/jsparagus/tests/__init__.py create mode 100755 third_party/rust/jsparagus/tests/test.py create mode 100644 third_party/rust/jsparagus/tests/test_js.py create mode 100644 third_party/rust/jsparagus/tests/test_parse_pgen.py create mode 100755 third_party/rust/jsparagus/update.sh create mode 100755 third_party/rust/jsparagus/update_stencil.py create mode 100644 third_party/rust/jsparagus/update_unicode.py (limited to 'third_party/rust/jsparagus') diff --git a/third_party/rust/jsparagus/.cargo-checksum.json b/third_party/rust/jsparagus/.cargo-checksum.json new file mode 100644 index 0000000000..d353031fbf --- /dev/null +++ b/third_party/rust/jsparagus/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{".flake8":"d0b5a0ca5e524819918726fbc8e8e7e41b4cca3cd06099fa5ed4bf96b0997c93",".githooks/pre-commit":"f37701f35731e8dec0dc0579669069cd720ba2d33dce24fee57735ee614ba654",".github/workflows/ci-daily.yml":"2bc9aa85b1f88ca0474b6fddc62f7182f5ea9e8257b77d60196b1ab5699ad4f8",".github/workflows/ci-generated.yml":"394a805aad7bd4ac66e2ddab7158c9e59183a026cb43d8821c55013e8dcb1e61",".github/workflows/ci-issues.yml":"ab3fa56ceaa65b1afb1a76285598a99befdd8131f68cb4bab0c7502dff9ac03f",".github/workflows/ci-push.yml":"d8133372446aae1437c1f9be88995b2be422b87aace5fce25b3d494656abdced",".github/workflows/real-js-benchmark.yml":"014bfb992808d4cc2158f5b3f47e20c99a7ecea40470595e4a22c0c070c4538f",".github/workflows/rust.yml":"5db3658068f4bef356a24e2a21cc3f7c34b4f19405e24884f1763749e82c5dff",".github/workflows/smoosh-status.yml":"7e6eb19a9fb5c18c5bdaefd477af5d94a374ed0a95f0826e92c9f0c0d15a5b48",".metrics/README.md":"8c963dc571c77f90d0ba1a67e48a32cc8c10166971b9fe8f2926ff00986262c4",".metrics/create-ci-branch.sh":"2dc3130e2eccb474edfdeb9ee1f43140f6f0a2489f013d153c3b3497e37d20c7",".metrics/fuzzbug_count_badge.py":"ad0b0dff8345e64eba17b14d583675df0b9aec4f9ca845166763384e1f1a2c29",".metrics/fuzzbug_date_badge.py":"e938af4faa21cebb9141227c3e3dcd57da3e98e0298d7bc2f9f257346156ad0d",".metrics/generated_README.md":"9be5ea93b90622b6e57969a90073957de4a00f9a05fb074e8146df130072ebb1",".metrics/not_implemented_badge.py":"a550a2e4b1cc151b80b2d6dcfbd8ccfaa3728bc7d759da2bf6eca5981de9e336",".metrics/not_implemented_count.py":"fb2741497b81668511afb761b941609fdc1eb343a3b81a4383561ca394838e26",".metrics/populate_fuzzbug.sh":"97d79de3075113846ff451db87769147427ab3581bc5629d53c7b2fca8dc86cf",".metrics/populate_not_implemented.sh":"75ea57b552dec3c0cd794be2c971a2c085bb99c5526176be860a2fb8af771021","CODE_OF_CONDUCT.md":"baa6d197a7e955ebe93c3b78e2d89d6f6f8d76fdc6c6ffb47ec937034ac2330e","Cargo.lock":"42b56c3499ce495710cffdb69db94b29667ef5f6d6b6849cfee7b113db192cff","Cargo.toml":"354843246df7c3671dd004cb5577a69fcd48a72ca20786713545f03bcdb2b80b","LICENSE":"83cced0d7ea4adca70302518dc44375445900ae8ed1c3d0a311e76474443d978","LICENSE-APACHE-2.0":"c6ac25baa937b3543482a2595950d337eccd6d620848455fd63d1a89c2009330","LICENSE-MIT":"20ad71f83cbf8fec779108327990518af3a87855d086bee40dc138656b94bd61","Makefile":"5bc156d54f4001cfc18484a963faf8d96430f73dbfff5b138ad2ae824d0b1bb4","README.md":"35fa02ac2528c0793d87f9f8dfd0caa683231ccf8c6a754a6de22456efa935fd","benchmarks/compare-spidermonkey-parsers.js":"58859b90cec170ab5103437194f1a751c83ad312b5e32dc78842b0c2720e1f02","gecko-patches.txt":"4c5532351f41e7a2e5af543686f1373f51e74c5908fbd80f2f337caa1bfe2099","journal.md":"e2af8d3ea87eac2afd106f943c13d0a0e5b03d09fb8ebec09ea4aa7d06490851","js-quirks.md":"8f5f0c6bd8cb9477b575716ac67b6a110865b4df60b7fecdcf2dbb606b8cf094","js_parser/README.md":"49370046241a091313cbe29d9171f47248c2fe742c8dfbdd4f7b4860ca961ffa","js_parser/__init__.py":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","js_parser/es-lexical-simplified.esgrammar":"cc5e0f7bd270e35ff04cad1464317cef0377df13e5fcf145f12783faccc90eff","js_parser/es-simplified.esgrammar":"fc2e5617351f964de6ebadfbda50653bb0e3528a67df4ab364a0125b4326ae83","js_parser/es.esgrammar":"14558b604fe62b7421551d1e694b0f4feb84d8ed114589f75885b217e14cfb05","js_parser/esgrammar.pgen":"e0affd8bb7843aece6d628561ce3057079e879eb11260cbd01b5426c9bce6f29","js_parser/extract_es_grammar.py":"04838d2a0555345699f30fb014f806d4b2e15aa36ed9ec772f514fb4ad858570","js_parser/generate_js_parser_tables.py":"2a33156b3d370e10c8f4eaeb3a00e1322fe71707d67f2f96f85f1a069a084b93","js_parser/lexer.py":"94252a6687fff473269b9eda5ee964d748c480c9a5d938569ac77ab9287cff80","js_parser/load_es_grammar.py":"d711fcc302b786551a83c3d5b7630de594732aa2f8c65e05105b355cd1139480","js_parser/parse_esgrammar.py":"3bc67c3aaf3fcaede4f89a4ad14103fe9e548ac035d1547f0cd799d83785d2b6","js_parser/parser.py":"0f2a9476463457aab7df1269373acec7e08a392209226b94a031267e055eb37a","js_parser/slash.esgrammar":"1fb1591a9773621c30fdac04f539452fb13990daece9ec939040fbb03445f434","js_parser/try_it.py":"c31fbdb7ad9164d16d173f23a6ae5e40da8d9c912f66d7751a53e9cecbbdafa9","jsparagus/README.md":"7f26517592e6d9b291a9162300b3157374422c712fd9b0042390ce55b3b3d715","jsparagus/__init__.py":"c277ec16d8ed12646b0d62e91249498fe7a207b5824e2d6e93d3f77e65828244","jsparagus/actions.py":"02f600ca9189d901779deeaeb3acccb9dfb72ab3842dfabdeafe17e6bade110f","jsparagus/aps.py":"9d14d7109c382af5bdf3bde574226afca65dc2caa0b7524f32f85de056730cfe","jsparagus/emit/__init__.py":"dcf1a8b26f7403871907f646c1ba3ef7dc1a593889a8f8d40490a0db791b0aff","jsparagus/emit/python.py":"fc8ad300727e735dab2222319039f2be9f792ebfc4a17f5f9ff03e58ad5a68e1","jsparagus/emit/rust.py":"6ecd3c76a6d9a37cf3ee9c8c440ba5538850a4bfcabe0a2ce662307b8a33f1ee","jsparagus/extension.py":"803c6db89e6d9e2480da4962c7db58b459dc3bd5594fc97fd89f1b43edf90081","jsparagus/gen.py":"1eabba9ce872ad130d878fa852e81efa6688b2f24c2bf9e4cc830a8afa58bd99","jsparagus/grammar.py":"23078e473dc3fc7ae9a85ce82dd928478d72ef8dd189adbcfd49de28f0b88efc","jsparagus/lexer.py":"8ed7b67dda1626ce98884e754c23eedeb1ce118ddd759b1571c131e5cb51ffda","jsparagus/lr0.py":"0bd25a501ca89b2dfdcbc90f9a0f8209e9cbfcaead099426ababdef6979c7ec9","jsparagus/main.py":"bae2377d6e840db55db6abbeffa58777020053d629de2b1bc8068aaf6f077dee","jsparagus/ordered.py":"15ebf9136ba760ee3e38611c76b55c6335002c6f5f98b43e62ed4c38fa0ef5e1","jsparagus/parse_pgen.py":"b68857e1de6fb41bece972d31384201b7e1feffadb07a3229a5d47c069d48160","jsparagus/parse_pgen_generated.py":"e794a794e95435d90654884ecce9ab68f763d13cd575f07228eaf1ebd27b9c18","jsparagus/parse_table.py":"7ce8388a468607a0bb20db0fb8769027af8927fe6e203f7c281ffc0221a6974b","jsparagus/rewrites.py":"3e5f82352237143d0fd2163459aa370e9b1661811b6eb5c1b9d79e3dd01c7f53","jsparagus/runtime.py":"f4f8f318e730cb7107490710868b9021bdbcf8e5e153ed3b858c7338b9b5d919","jsparagus/types.py":"b55d0eb466ffeff0441874b81c2dfeeaace7fa19eadc1d277d753803946e311f","jsparagus/utils.py":"cc26da2f258f0565062c77c61328210e2f8afb5b8866c153d2d1c159966a3913","mozconfigs/smoosh-debug":"422d2911e5f6acf99fd47435ec9cd0d9f43a680521de51d04aded8bed1136318","mozconfigs/smoosh-opt":"e9eab2cb659b5e7c1e88fc731d6c110157816b5a76e840e0bf51f167566e9b18","pgen.pgen":"60f457614f90a9bf022922dad563262f64e627d9aab934722246c20daa50b5de","requirements.txt":"3a392cc4f4db58be78817dc74a88178e6b4afe1e444c0068cb13e49502d7995a","smoosh_status.py":"a3824b4b20fde8fcf643e28de7d1a9a208352c778d1f9dc7d15f506258dbb36a","src/bin/smoosh_tools.rs":"989f3991bc5886664363b839ecae09d0b95c0e1844b5c2cbfc750fc3bcf52b37","src/lib.rs":"93b32cb970f69fa33e11d41db1696bd578095e07db44ed348ed5e21a8d13d21a","test.sh":"31676e86c2e7e6f6f69050766e237d0eee7da3598b11f95a7335292af2802d11","tests/__init__.py":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","tests/test.py":"1ab6b2d002419eac0edc460a5f31b24f0b3ad7c52b79e83f4fd08bded67c6eec","tests/test_js.py":"5f4474eef53d7286d5683c0970a9ba69248a7c843c2c0d9d4111bc432f2f6dbb","tests/test_parse_pgen.py":"6b99e38b2045bae4b0c1b99fe23e1a47ea886b9ce4e902990cc366b8ca9d758e","update.sh":"39986fc0dfe2dd2d2dd2d408cb25577f8ad736b657430233e5a9e214684ce6f1","update_stencil.py":"51a7e79935e664614441491605c8aa6f9cd9fe731faeba6b9c6cd5f23fc6c1ee","update_unicode.py":"18136102a3f38f87a4d6176e07a933711afb42796118293462765a6b271a240e"},"package":null} \ No newline at end of file diff --git a/third_party/rust/jsparagus/.flake8 b/third_party/rust/jsparagus/.flake8 new file mode 100644 index 0000000000..ce4ecbfbe5 --- /dev/null +++ b/third_party/rust/jsparagus/.flake8 @@ -0,0 +1,15 @@ +[flake8] + +# E721 do not compare types, use 'isinstance()' +# We run afoul of the heuristics that detect this on lines of code +# like `if method.return_type == types.UnitType:`. We are not comparing +# Python types. The warning is spurious for us. +# W503 line break before binary operator +# Amazingly, flake8 by default warns about binary operators no matter +# what you do. We choose to break before operators. +ignore = E721,W503 + +exclude = jsparagus_build_venv,crates/target,jsparagus/parse_pgen_generated.py,js_parser/parser_tables.py + +# todo: get this down to 99 +max_line_length=109 diff --git a/third_party/rust/jsparagus/.githooks/pre-commit b/third_party/rust/jsparagus/.githooks/pre-commit new file mode 100755 index 0000000000..5ca77432b2 --- /dev/null +++ b/third_party/rust/jsparagus/.githooks/pre-commit @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# +# Simplified commit hook to format the files which were changed in the current commit +# + +printf "[pre-commit] rustfmt" + +for file in $(git diff --name-only --cached); do + if [ ${file: -3} == ".rs" ]; then + rustfmt $file + fi +done + +exit 0 diff --git a/third_party/rust/jsparagus/.github/workflows/ci-daily.yml b/third_party/rust/jsparagus/.github/workflows/ci-daily.yml new file mode 100644 index 0000000000..37a258b28d --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-daily.yml @@ -0,0 +1,46 @@ +name: CI Daily + +on: + schedule: + # Run daily at 00:00 + - cron: '0 0 * * 0-6' + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Get Fuzzbugs + run: | + cd .metrics + # Only update this if it doesn't already exist. + # This action is only used to calculate the days since the last fuzzbug. + if [ ! -f count/fuzzbug.json ]; then + curl "https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" > count/fuzzbug.json + fi + python fuzzbug_date_badge.py + git add badges/since-last-fuzzbug.json + git add count/fuzzbug.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + # fails if no files to commit + continue-on-error: true + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/ci-generated.yml b/third_party/rust/jsparagus/.github/workflows/ci-generated.yml new file mode 100644 index 0000000000..4027c6c9c6 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-generated.yml @@ -0,0 +1,54 @@ +name: Generate Files + +on: + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise, you will failed to push refs to dest repo + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Setup Git Profile + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + - name: Revert Previous Change and Merge Master + run: | + if git ls-remote origin | grep refs/heads/ci_generated; then + # If the remote branch exists. + git fetch origin + + # Merge master, discarding changes in origin/ci_generated + MASTER_REV=$(git log -1 master --pretty=%H) + git checkout -b ci_generated-master origin/master + git merge origin/ci_generated -m "Merge master ${MASTER_REV}" -s ours --allow-unrelated-histories + else + # Otherwise, just start from master branch. + git checkout -b ci_generated-master + fi + - name: Generate Files + run: | + make init-venv && make all + # OS independant replace + sed -i.bak '/*_generated.rs/d' .gitignore && rm .gitignore.bak + - name: Commit files + run: | + git add . + MASTER_REV=$(git log -1 master --pretty=%H) + git commit -m "Add Generated Files for ${MASTER_REV}" -a + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_generated + diff --git a/third_party/rust/jsparagus/.github/workflows/ci-issues.yml b/third_party/rust/jsparagus/.github/workflows/ci-issues.yml new file mode 100644 index 0000000000..bb2265ecb3 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-issues.yml @@ -0,0 +1,44 @@ +name: CI Issues + +on: + issues: + types: [opened, closed, reopened] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Count Fuzzbugs + run: | + cd .metrics + # Get the new list + curl "https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" > count/fuzzbug.json + python fuzzbug_count_badge.py + python fuzzbug_date_badge.py + git add badges/since-last-fuzzbug.json + git add badges/open-fuzzbug.json + git add count/fuzzbug.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + # fails if no files to commit + continue-on-error: true + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/ci-push.yml b/third_party/rust/jsparagus/.github/workflows/ci-push.yml new file mode 100644 index 0000000000..0ca6f3a954 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-push.yml @@ -0,0 +1,57 @@ +name: NotImplemented + +on: + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + # Check out master, so that we can count. + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Get Count + # Record the count in a tmp file so it survives changing branches + run: | + find crates -iname '*.rs' -type f -exec cat {} + | grep -c -E "(Emit|Parse)Error::NotImplemented" > /tmp/count + git rev-parse HEAD > /tmp/commit + cp .metrics/not_implemented_badge.py /tmp + cp .metrics/not_implemented_count.py /tmp + # Checkout the results branch + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Add NotImplemented count + run: | + export total_count=$(cat /tmp/count) + export current_commit=$(cat /tmp/commit) + # Make sure the generating files are up to date + cp -f /tmp/not_implemented_badge.py .metrics/not_implemented_badge.py + cp -f /tmp/not_implemented_count.py .metrics/not_implemented_count.py + # Run the files + cd .metrics + python not_implemented_badge.py + python not_implemented_count.py + git add badges/not-implemented.json + git add count/not-implemented.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + uses: ad-m/github-push-action@master + continue-on-error: true + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml b/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml new file mode 100644 index 0000000000..eec8047762 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml @@ -0,0 +1,135 @@ +name: Real JS Samples Benchmark +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - master + +jobs: + benchmark: + # This workflow relies on: + # - A specific hardware (benchmark-pool-1) in order to have a consistent + # and comparable results against multiple builds. + # + # - Some persistent data to reduce the time needed to checkout + # mozilla-central. + # + # To setup such host multiple things should be considered. + # + # In terms of security, the code which is executed on this hardware should + # not be trusted. As such, the Github Action jobs should run on a dedicated + # computer which is either isolated or containerized. Do not run this setup + # on a non-dedicated computer! + # + # It is best to create a dedicated user. + # $ mkdir /var/github-action-runner + # $ useradd -d /var/github-action-runner github-action-user + # + # Make sure this newly added user has no sudo capabilities. + # + # A checkout of Gecko should be present under /var/github-action-runner. The + # dependencies for building Gecko should as well be installed with `mach + # bootstrap`, which can be done using another user with sudo capabilities, + # and changing the HOME environment variable to match the github-action-user + # home. + # + # The file /var/github-action-runner/.profile contains: + # + # export PATH="$HOME/.cargo/bin:$PATH" + # export PATH="/var/github-action-runner/.mozbuild/git-cinnabar:$PATH" + # + # Which is used to add cargo in the path, as well as git-cinnabar, to keep + # the gecko clone up to date. + # + # To add this computer to the benchmark pool, follow the instruction + # provided by github, after clicking "Add runner" on this page: + # https://github.com/mozilla-spidermonkey/jsparagus/settings/actions + # + # "benchmark-pool-1" specification: + # /proc/cpuinfo: + # Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz + # dmidecode --type 17: + # 2x Hynix/Hyundai HMT41GU6MFR8C-PB (DDR3, 8GB, 1600 MT/s) + # + runs-on: [self-hosted, benchmark-pool-1] + steps: + - name: Clean Work Directory + run: | + rm -rf * + - name: Checkout jsparagus + uses: actions/checkout@v2 + with: + fetch-depth: 0 + path: 'jsparagus' + - name: Checkout real-js-samples + uses: actions/checkout@v2 + with: + repository: 'Yoric/real-js-samples' + path: 'real-js-samples' + fetch-depth: 0 + - name: Checkout mozilla-central + run: | + # Pull mozilla-central changes + source /var/github-action-runner/.profile + git -C /var/github-action-runner/gecko pull --all + # Create a local clone of mozilla-central + git clone -l /var/github-action-runner/gecko mozilla-central + - name: Status of Checkouts + run: | + echo "mozilla-central: $(git -C mozilla-central show --oneline -s)" + echo "jsparagus: $(git -C jsparagus show --oneline -s)" + echo "real-js-samples: $(git -C real-js-samples show --oneline -s)" + - name: Setup venv + run: | + source /var/github-action-runner/.profile + cd jsparagus + make init + - name: Generate Files + run: | + source /var/github-action-runner/.profile + cd jsparagus + make all + # OS independant replace + sed -i.bak '/*_generated.rs/d' .gitignore && rm .gitignore.bak + - name: Apply gecko patches + run: | + source /var/github-action-runner/.profile + cd mozilla-central + cat ../jsparagus/gecko-patches.txt | while read PATCH_AND_BUG; do + PATCH=$(echo $PATCH_AND_BUG | cut -d : -f 1) + BUG=$(echo $PATCH_AND_BUG | cut -d : -f 2) + + # Check bug status and skip if it's already landed. + STATUS=$(curl https://bugzilla.mozilla.org/rest/bug/$BUG | python3 -c 'import sys, json; print(json.load(sys.stdin)["bugs"][0]["status"])') + echo "Bug $BUG $STATUS" + if [ "x$STATUS" = "xRESOLVED" ]; then + continue + fi + + # Apply the latest patch from phabricator. + PATCH_URL=https://phabricator.services.mozilla.com/${PATCH}?download=true + curl --location "$PATCH_URL" | git apply --index || git reset --hard + git status + git commit --allow-empty -m "Bug $BUG" + done + - name: Build Gecko + run: | + # Disable Opcodes.h check, as we only focus on parsing speed. + export JS_SMOOSH_DISABLE_OPCODE_CHECK=1 + # Apply Bug 1640982 fix. + export CARGO_PROFILE_RELEASE_LTO=true + source /var/github-action-runner/.profile + cd jsparagus + cargo run --bin smoosh_tools build --opt + - name: Benchmark Real JS Samples + run: | + source /var/github-action-runner/.profile + cd jsparagus + cargo run --bin smoosh_tools bench --opt + - name: Post Checkout mozilla-central + if: ${{ always() }} + run: | + # Remove checked out repository. + rm -rf mozilla-central + diff --git a/third_party/rust/jsparagus/.github/workflows/rust.yml b/third_party/rust/jsparagus/.github/workflows/rust.yml new file mode 100644 index 0000000000..3ecc7db649 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/rust.yml @@ -0,0 +1,24 @@ +name: Rust + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Initialize repository + run: make init + - name: Build & Test + run: RUSTFLAGS="-D warnings" make check diff --git a/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml b/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml new file mode 100644 index 0000000000..b68925dd7f --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml @@ -0,0 +1,87 @@ +name: SmooshMonkey status + +on: + schedule: + # Every hour + - cron: '0 * * * *' + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Initialize venv + run: make init-venv + - name: Setup Git Profile + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + - name: Check SmooshMonkey status + run: make smoosh-status-ci + id: status + - name: Checkout ci_smoosh_status + run: | + if git ls-remote origin | grep refs/heads/ci_smoosh_status; then + # If the remote branch exists. + git fetch origin ci_smoosh_status + git checkout -b ci_smoosh_status origin/ci_smoosh_status + else + # Otherwise, create a branch. + git checkout -b ci_smoosh_status-master + # And reset all history + git reset --hard deb48a2460abf091705d9972318bbb6e7349de9c + # And also remove remaining files + rm README.md gen.py + echo jsparagus_build_venv > .gitignore + fi + - name: Update files + run: | + echo ${{steps.status.outputs.mc}} > latest_mc + echo ${{steps.status.outputs.jsparagus}} > latest_jsparagus + echo ${{steps.status.outputs.build}} > latest_build + echo ${{steps.status.outputs.test}} > latest_test + + if [ ${{steps.status.outputs.build}} == "OK" ]; then + BUILD_COLOR="green" + elif [ ${{steps.status.outputs.build}} == "NG" ]; then + BUILD_COLOR="red" + else + BUILD_COLOR="yellow" + fi + + if [ ${{steps.status.outputs.test}} == "OK" ]; then + echo ${{steps.status.outputs.mc}} > known_good_mc + echo ${{steps.status.outputs.jsparagus}} > known_good_jsparagus + TEST_COLOR="green" + elif [ ${{steps.status.outputs.test}} == "NG" ]; then + TEST_COLOR="red" + else + TEST_COLOR="yellow" + fi + + echo "{ \"schemaVersion\": 1, \"label\": \"SmooshMonkey Build\", \"message\": \"${{steps.status.outputs.build}}\", \"color\": \"$BUILD_COLOR\" }" > smoosh_build.json + echo "{ \"schemaVersion\": 1, \"label\": \"SmooshMonkey Test\", \"message\": \"${{steps.status.outputs.test}}\", \"color\": \"$TEST_COLOR\" }" > smoosh_test.json + - name: Add files + run: | + git add . + set +e + git diff --cached --quiet + echo "##[set-output name=modified;]$?" + set -e + id: status_add + - name: Commit files + if: steps.status_add.outputs.modified == '1' + run: | + git commit -m "Update Smoosh Status" -a + - name: Push changes + if: steps.status_add.outputs.modified == '1' + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_smoosh_status diff --git a/third_party/rust/jsparagus/.metrics/README.md b/third_party/rust/jsparagus/.metrics/README.md new file mode 100644 index 0000000000..f665dbd9c3 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/README.md @@ -0,0 +1,79 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] + +# Metrics + +This is the metrics directory. It follows the evolution of the repository separately from the +repostory. You can find the actual metrics in the +[`ci-results`](https://github.com/mozilla-spidermonkey/jsparagus/tree/ci-results) branch of the jsparagus project. This branch is automatically generated using the `create-ci-branch.sh` script found in this directory. If there are issues with your fork, you can remove the `ci-results` branch, and the ci will automatically rerun the `create-ci-branch` script to reset it. Do not push manula data to this repository, it will be lost. + +If you find that the `ci-results` branch has disappeared or been corrupted somehow, you can reset it by deleting it and recreating it. + +``` +git branch -D ci-results +cd .metrics +./create-ci-branch.sh +``` + +The `create-ci-branch.sh` file creates the branch, prepares it, and populates it with data from the past. + +## Making your own metrics +Make sure you do not use data that can not be automatically recovered. We cannot rely on the `ci-results` branch always being present, therefore anything that you write must be recoverable on its own, either by relying on external APIs or through some other mechanism. + +Please update this README if you make any changes. + +## Types of CI Actions +These actions are all found in the `.github/workflows` directory + +1) `Rust.yml` - Run on Pull Request +* runs every time there is a push to master, use for any metrics that are development related. Examples include linting, testing, etc. +2) `ci-push.yml` - Run on Push to `master` +* runs on self contained metrics. An example is the number of `NotImplemented` errors in the codebase. This does not depend on anything external +3) `ci-daily.yml` - Run Daily +* a cron task that runs daily. Useful for metrics that need daily updates +4) `ci-issue.yml` - Run on issue open +* runs each time an issue is opened. Good for tracking types of issues. + + +## Types of data + +These are the types of data that this metrics folder tracks. + +1) Rust Passing + * Ensures our internal tests are passing + * Updates on every pull request to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/rust.yml) + +2) NotImplemented Count + * counts number of NotImplemented errors in the codebase. This should slowly rundown to zero + * Updates on every push to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-push.yml) + +3) Days Since last Fuzzbug + * tracks the last fuzzbug we saw, if it does not exist, return ∞, otherwise return the last date regardless of state. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +4) Fuzzbug open count + * tracks the number of open fuzzbugs + * Updates on issue open. See [this action](https://github.com/mozilla-spidermonkey/jsparagus/.github/workflows/ci-issues.yml) + +5) Percentage of tests passing with SmooshMonkey + * TODO: tracks the number of tests passing without fallback. We should use the try api for this. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +6) Percentage of JS compilable with SmooshMonkey + * TODO: see comment about writing bytes to a file in [this repo](https://github.com/nbp/seqrec) + * implementation is dependant on how we get the data. We need a robust solution for importing this data. + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer diff --git a/third_party/rust/jsparagus/.metrics/create-ci-branch.sh b/third_party/rust/jsparagus/.metrics/create-ci-branch.sh new file mode 100755 index 0000000000..2415ed7ce9 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/create-ci-branch.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. +shopt -s extglob + +# export the ci_branch we will be using in all shell scripts +export ci_branch=ci_results + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir + +if [ `git branch --list $ci_branch` ] +then + echo "Branch exists" #We don't need to do anything +else + git checkout -b $ci_branch + + # clear out the repostory + git rm -r !(.metrics|.git|tmp) + git rm -r .github + + cp .metrics/generated_README.md README.md + mkdir .metrics/badges + mkdir .metrics/count + + git add . + git commit -m"Initial commit for results branch" + + # scripts needed to populated. Should be self contained with cleanup of extra files + cd .metrics && ./populate_not_implemented.sh + cd $topdir + cd .metrics && ./populate_fuzzbug.sh + + cd $topdir + git add . + git commit -m"Inital run of Populate scripts" +fi diff --git a/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py b/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py new file mode 100644 index 0000000000..654908ff1f --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +import json + +read_filename = 'count/fuzzbug.json' +write_count = 'badges/open-fuzzbug.json' + +open_fuzzbugs = 0 +with open(read_filename, 'r') as f: + filedata = json.load(f) + # the open fuzzbug count. Can be deleted + open_fuzzbugs = len([x for x in filedata if x['closed_at'] is None]) + +# Write fuzzbug count +data = { + "schemaVersion": 1, + "label": "Open FuzzBugs", + "message": str(open_fuzzbugs) if open_fuzzbugs > 0 else "None", + "color": "green" if open_fuzzbugs == 0 else "yellow", +} + +with open(write_count, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py b/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py new file mode 100644 index 0000000000..a2d2b88c09 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +import json +from datetime import datetime + +read_filename = 'count/fuzzbug.json' +write_since = 'badges/since-last-fuzzbug.json' + +days_since = None +with open(read_filename, 'r') as f: + filedata = json.load(f) + count = len(filedata) + # the last time we saw a fuzzbug regardless of status + if count > 0: + dt_format = "%Y-%m-%dT%H:%M:%SZ" + fuzzbug_opened = filedata[0]["created_at"] + fuzzbug_date = datetime.strptime(fuzzbug_opened, dt_format) + today = datetime.today() + days_since = (today - fuzzbug_date).days + + +# Write days since last fuzzbug + +def get_color(days): + if days_since is None or days_since > 100: + return "green" + elif days_since > 10: + return "yellow" + else: + return "red" + + +data = { + "schemaVersion": 1, + "label": "Days since last FuzzBug", + "message": str(days_since) if days_since is not None else "Forever", + "color": get_color(days_since), +} + +with open(write_since, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/generated_README.md b/third_party/rust/jsparagus/.metrics/generated_README.md new file mode 100644 index 0000000000..af4208a90f --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/generated_README.md @@ -0,0 +1,48 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] + +# Metrics + +Unlike other branches in this project, this branch is for collecting metrics from the CI. you will +find these files in the `.results` folder. If this branch gets deleted, don't worry. This branch can be auto-generated from the `.metrics` +folder in the main repository. + +## Types of data + +These are the types of data that this metrics folder tracks. + +1) NotImplemented Count + * counts number of NotImplemented errors in the codebase. This should slowly rundown to zero + * Updates on every push to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-push.yml) + +2) Days Since last Fuzzbug + * tracks the last fuzzbug we saw, if it does not exist, return ∞, otherwise return the last date regardless of state. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +3) Fuzzbug open count + * tracks the number of open fuzzbugs + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-issues.yml) + +4) Percentage of tests passing with SmooshMonkey + * TODO: tracks the number of tests passing without fallback. We should use the try api for this. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + + +5) Percentage of JS compilable with SmooshMonkey + * TODO: see comment about writing bytes to a file in [this repo](https://github.com/nbp/seqrec) + * implementation is dependant on how we get the data. We need a robust solution for importing this data. + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer diff --git a/third_party/rust/jsparagus/.metrics/not_implemented_badge.py b/third_party/rust/jsparagus/.metrics/not_implemented_badge.py new file mode 100644 index 0000000000..bc522f1fe3 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/not_implemented_badge.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +import json +import os.path + +filename = 'badges/not-implemented.json' +total_count = os.environ['total_count'] +data = { + "schemaVersion": 1, + "label": "NotImplemented", + "message": total_count, + "color": "green" if total_count == "0" else "yellow", +} +with open(filename, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/not_implemented_count.py b/third_party/rust/jsparagus/.metrics/not_implemented_count.py new file mode 100644 index 0000000000..947044eca9 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/not_implemented_count.py @@ -0,0 +1,19 @@ +#!/usr/bin/python +import json +import os.path + +filename = 'count/not-implemented.json' +if not os.path.isfile(filename): + with open(filename, 'w') as f: + json.dump([], f, indent=4) # initialize with an empty list + +with open(filename, 'r+') as f: + data = json.load(f) + if len(data) == 0 or data[-1]["commit"] != os.environ['current_commit']: + data.append({ + "commit": os.environ['current_commit'], + "total_count": os.environ['total_count'] + }) + f.seek(0) + json.dump(data, f, indent=4) + f.truncate() diff --git a/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh b/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh new file mode 100755 index 0000000000..af48d566b1 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir/.metrics + +url="https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" + +curl $url > count/fuzzbug.json +python fuzzbug_count_badge.py +git add . +git commit -m"Add Fuzzbug date" +python fuzzbug_date_badge.py + +git add . + +git commit -m"Add Fuzzbug count" diff --git a/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh b/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh new file mode 100755 index 0000000000..3a6200133c --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir +# setup: persist the scripts between commits +mkdir -p tmp +cp -r .metrics tmp/ +git checkout master +git pull origin master + +# create the log of commits +git log --format=oneline --since=2020-01-01 | tac | awk '{print $1}' > tmp/commit-list +cd tmp/.metrics + +# do stuff with the commits +for commit in $(cat $topdir/tmp/commit-list) +do + git checkout $commit + # python script pulls from env variables, export those + export total_count=$(find $topdir/rust -iname '*.rs' -type f -exec cat {} + | grep -c -E "(Emit|Parse|ScopeBuild)Error::NotImplemented") + export current_commit=$commit + python not_implemented_count.py + python not_implemented_badge.py +done + +cd $topdir +git checkout $ci_branch + +# replace this file stuff with whatever it is you want to do to get it to the right place in the +# repo +mv -f tmp/.metrics/count/not-implemented.json .metrics/count/not-implemented.json +mv -f tmp/.metrics/badges/not-implemented.json .metrics/badges/not-implemented.json + +# Cleanup: Kill the tmp dir +rm -r tmp + +git add . +git commit -m"Add NotImplemented" diff --git a/third_party/rust/jsparagus/CODE_OF_CONDUCT.md b/third_party/rust/jsparagus/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..2c75c30d59 --- /dev/null +++ b/third_party/rust/jsparagus/CODE_OF_CONDUCT.md @@ -0,0 +1,8 @@ +# Community Participation Guidelines + +This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). + +## How to Report + +For more information on how to report violations of the Community Participation Guidelines, please read our "[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)" page. + diff --git a/third_party/rust/jsparagus/Cargo.lock b/third_party/rust/jsparagus/Cargo.lock new file mode 100644 index 0000000000..fb3f65a447 --- /dev/null +++ b/third_party/rust/jsparagus/Cargo.lock @@ -0,0 +1,904 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "anyhow" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + +[[package]] +name = "autocfg" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "bstr" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "502ae1441a0a5adb8fbd38a5955a6416b9493e92b465de5e4a9bde6a539c2c48" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "clap" +version = "2.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "criterion" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc755679c12bda8e5523a71e4d654b6bf2e14bd838dfc48cde6559a05caf7d1" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01e15e0ea58e8234f96146b1f91fa9d0e4dd7a38da93ff7a75d42c0b9d3a545" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20ff29ded3204c5106278a81a38f4b482636ed4fa1e6cfbeef193291beb29ed" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "maybe-uninit", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +dependencies = [ + "autocfg 1.0.0", + "cfg-if", + "crossbeam-utils", + "lazy_static", + "maybe-uninit", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" +dependencies = [ + "autocfg 0.1.7", + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" + +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "log", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" + +[[package]] +name = "heck" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" + +[[package]] +name = "js-sys" +version = "0.3.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7889c7c36282151f6bf465be4700359318aef36baa951462382eae49e9577cf9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "jsparagus" +version = "0.1.0" +dependencies = [ + "jsparagus-ast", + "jsparagus-driver", + "jsparagus-emitter", + "jsparagus-generated-parser", + "jsparagus-interpreter", + "jsparagus-json-log", + "jsparagus-parser", + "jsparagus-scope", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-ast" +version = "0.1.0" +dependencies = [ + "bumpalo", + "indexmap", +] + +[[package]] +name = "jsparagus-driver" +version = "0.1.0" +dependencies = [ + "bumpalo", + "env_logger", + "jsparagus-ast", + "jsparagus-emitter", + "jsparagus-interpreter", + "jsparagus-parser", + "jsparagus-stencil", + "log", + "structopt", +] + +[[package]] +name = "jsparagus-emitter" +version = "0.1.0" +dependencies = [ + "bumpalo", + "byteorder", + "indexmap", + "jsparagus-ast", + "jsparagus-parser", + "jsparagus-scope", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-generated-parser" +version = "0.1.0" +dependencies = [ + "bumpalo", + "jsparagus-ast", + "static_assertions", +] + +[[package]] +name = "jsparagus-interpreter" +version = "0.1.0" +dependencies = [ + "bumpalo", + "jsparagus-ast", + "jsparagus-emitter", + "jsparagus-parser", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-json-log" +version = "0.1.0" +dependencies = [ + "log", + "serde_json", +] + +[[package]] +name = "jsparagus-parser" +version = "0.1.0" +dependencies = [ + "arrayvec", + "bumpalo", + "criterion", + "jsparagus-ast", + "jsparagus-generated-parser", + "jsparagus-json-log", +] + +[[package]] +name = "jsparagus-scope" +version = "0.1.0" +dependencies = [ + "indexmap", + "jsparagus-ast", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-stencil" +version = "0.1.0" +dependencies = [ + "jsparagus-ast", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" + +[[package]] +name = "log" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" + +[[package]] +name = "memchr" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53445de381a1f436797497c61d851644d0e8e88e6140f22872ad33a704933978" + +[[package]] +name = "memoffset" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check 0.1.5", +] + +[[package]] +name = "num-traits" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" +dependencies = [ + "autocfg 1.0.0", +] + +[[package]] +name = "num_cpus" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "oorandom" +version = "11.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebcec7c9c2a95cacc7cd0ecb89d8a8454eca13906f6deb55258ffff0adeb9405" + +[[package]] +name = "plotters" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e3bb8da247d27ae212529352020f3e5ee16e83c0c258061d27b08ab92675eeb" +dependencies = [ + "js-sys", + "num-traits", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "proc-macro-error" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7959c6467d962050d639361f7703b2051c43036d03493c36f01d440fdd3138a" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check 0.9.1", +] + +[[package]] +name = "proc-macro-error-attr" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4002d9f55991d5e019fb940a90e1a95eb80c24e77cb2462dd4dc869604d543a" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "syn-mid", + "version_check 0.9.1", +] + +[[package]] +name = "proc-macro2" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acb317c6ff86a4e579dfa00fc5e6cca91ecbb4e7eb2df0468805b674eb88548" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" +dependencies = [ + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" +dependencies = [ + "crossbeam-deque", + "crossbeam-queue", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9" +dependencies = [ + "byteorder", +] + +[[package]] +name = "regex-syntax" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "serde" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" + +[[package]] +name = "serde_derive" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sourcefile" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8faa2719539bbe9d77869bfb15d4ee769f99525e707931452c97b693b3f159d" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f88b8e18c69496aad6f9ddf4630dd7d585bcaf765786cb415b9aec2fe5a0430" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "syn-mid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "tinytemplate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a3c6667d3e65eb1bc3aed6fd14011c6cbc3a0665218ab7f5daf040b9ec371a" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-segmentation" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" + +[[package]] +name = "unicode-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" + +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" + +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + +[[package]] +name = "version_check" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" + +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5205e9afdf42282b192e2310a5b463a6d1c1d774e30dc3c791ac37ab42d2616c" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11cdb95816290b525b32587d76419facd99662a07e59d3cdb560488a819d9a45" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "574094772ce6921576fb6f2e3f7497b8a76273b6db092be18fc48a082de09dc3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e85031354f25eaebe78bb7db1c3d86140312a911a106b2e29f9cc440ce3e7668" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5e7e61fc929f4c0dddb748b102ebf9f632e2b8d739f2016542b4de2965a9601" + +[[package]] +name = "wasm-bindgen-webidl" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef012a0d93fc0432df126a8eaf547b2dce25a8ce9212e1d3cbeef5c11157975d" +dependencies = [ + "anyhow", + "heck", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "weedle", +] + +[[package]] +name = "web-sys" +version = "0.3.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf97caf6aa8c2b1dac90faf0db529d9d63c93846cca4911856f78a83cebf53b" +dependencies = [ + "anyhow", + "js-sys", + "sourcefile", + "wasm-bindgen", + "wasm-bindgen-webidl", +] + +[[package]] +name = "weedle" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bb43f70885151e629e2a19ce9e50bd730fd436cfd4b666894c9ce4de9141164" +dependencies = [ + "nom", +] + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/jsparagus/Cargo.toml b/third_party/rust/jsparagus/Cargo.toml new file mode 100644 index 0000000000..7b5653dacb --- /dev/null +++ b/third_party/rust/jsparagus/Cargo.toml @@ -0,0 +1,69 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +name = "jsparagus" +version = "0.1.0" +authors = ["The jsparagus Project Developers"] +publish = false +description = "A JavaScript parser" +readme = "README.md" +license = "MIT/Apache-2.0" +repository = "https://github.com/mozilla-spidermonkey/jsparagus" + +[profile.release] +debug = 2 + +[lib] + +[[bin]] +name = "smoosh_tools" +path = "src/bin/smoosh_tools.rs" + +[dependencies.jsparagus-ast] +path = "crates/ast" + +[dependencies.jsparagus-driver] +path = "crates/driver" +optional = true + +[dependencies.jsparagus-emitter] +path = "crates/emitter" + +[dependencies.jsparagus-generated-parser] +path = "crates/generated_parser" + +[dependencies.jsparagus-interpreter] +path = "crates/interpreter" +optional = true + +[dependencies.jsparagus-json-log] +path = "crates/json-log" + +[dependencies.jsparagus-parser] +path = "crates/parser" + +[dependencies.jsparagus-scope] +path = "crates/scope" + +[dependencies.jsparagus-stencil] +path = "crates/stencil" + +[features] +default = [] +full = [ + "jsparagus-driver", + "jsparagus-interpreter", +] +logging = ["jsparagus-parser/logging"] + +[workspace] diff --git a/third_party/rust/jsparagus/LICENSE b/third_party/rust/jsparagus/LICENSE new file mode 100644 index 0000000000..638926c173 --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE @@ -0,0 +1,15 @@ +Copyright Mozilla Foundation + +Licensed under the Apache License (Version 2.0), or the MIT license, +(the "Licenses") at your option. You may not use this file except in +compliance with one of the Licenses. You may obtain copies of the +Licenses at: + +http://www.apache.org/licenses/LICENSE-2.0 +http://opensource.org/licenses/MIT + +Unless required by applicable law or agreed to in writing, software +distributed under the Licenses is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licenses for the specific language governing permissions and +limitations under the Licenses. diff --git a/third_party/rust/jsparagus/LICENSE-APACHE-2.0 b/third_party/rust/jsparagus/LICENSE-APACHE-2.0 new file mode 100644 index 0000000000..80b2915dd3 --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE-APACHE-2.0 @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Mozilla Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/third_party/rust/jsparagus/LICENSE-MIT b/third_party/rust/jsparagus/LICENSE-MIT new file mode 100644 index 0000000000..ee2a08f98f --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE-MIT @@ -0,0 +1,7 @@ +Copyright (c) 2020 Mozilla Foundation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/jsparagus/Makefile b/third_party/rust/jsparagus/Makefile new file mode 100644 index 0000000000..827e232c7f --- /dev/null +++ b/third_party/rust/jsparagus/Makefile @@ -0,0 +1,111 @@ +PY_OUT = js_parser/parser_tables.py +HANDLER_FILE = crates/generated_parser/src/ast_builder.rs +HANDLER_INFO_OUT = jsparagus/emit/collect_handler_info/info.json +RS_TABLES_OUT = crates/generated_parser/src/parser_tables_generated.rs +RS_AST_OUT = crates/ast/src/types_generated.rs \ + crates/ast/src/type_id_generated.rs \ + crates/ast/src/dump_generated.rs \ + crates/ast/src/visit_generated.rs \ + crates/ast/src/source_location_accessor_generated.rs \ + crates/generated_parser/src/stack_value_generated.rs + +JSPARAGUS_DIR := $(dir $(firstword $(MAKEFILE_LIST))) +VENV_BIN_DIR := $(JSPARAGUS_DIR)jsparagus_build_venv/bin +PYTHON := $(VENV_BIN_DIR)/python +PIP := $(VENV_BIN_DIR)/pip + +all: $(PY_OUT) rust + +init-venv: + python3 -m venv jsparagus_build_venv &&\ + $(PIP) install --upgrade pip &&\ + $(PIP) install -r requirements.txt + +init: init-venv + git config core.hooksPath .githooks + +ECMA262_SPEC_HTML = ../tc39/ecma262/spec.html +STANDARD_ES_GRAMMAR_OUT = js_parser/es.esgrammar + +# List of files which have a grammar_extension! Rust macro. The macro content is +# scrapped to patch the extracted grammar. +EXTENSION_FILES = \ + +# Incomplete list of files that contribute to the dump file. +SOURCE_FILES = $(EXTENSION_FILES) \ +jsparagus/gen.py \ +jsparagus/grammar.py \ +jsparagus/rewrites.py \ +jsparagus/lr0.py \ +jsparagus/parse_table.py \ +jsparagus/extension.py \ +jsparagus/utils.py \ +jsparagus/actions.py \ +jsparagus/aps.py \ +jsparagus/types.py \ +js_parser/esgrammar.pgen \ +js_parser/generate_js_parser_tables.py \ +js_parser/parse_esgrammar.py \ +js_parser/load_es_grammar.py \ +js_parser/es-simplified.esgrammar + +EMIT_FILES = $(SOURCE_FILES) \ +jsparagus/emit/__init__.py \ +jsparagus/emit/python.py \ +jsparagus/emit/rust.py + +DUMP_FILE = js_parser/parser_generated.jsparagus_dump + +$(DUMP_FILE): $(SOURCE_FILES) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(EXTENSION_FILES:%=--extend %) + +$(PY_OUT): $(EMIT_FILES) $(DUMP_FILE) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(DUMP_FILE) + +$(HANDLER_INFO_OUT): jsparagus/emit/collect_handler_info/src/main.rs $(HANDLER_FILE) + (cd jsparagus/emit/collect_handler_info/; cargo run --bin collect_handler_info ../../../$(HANDLER_FILE) $(subst jsparagus/emit/collect_handler_info/,,$(HANDLER_INFO_OUT))) + +$(RS_AST_OUT): crates/ast/ast.json crates/ast/generate_ast.py + (cd crates/ast && $(abspath $(PYTHON)) generate_ast.py) + +$(RS_TABLES_OUT): $(EMIT_FILES) $(DUMP_FILE) $(HANDLER_INFO_OUT) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(DUMP_FILE) $(HANDLER_INFO_OUT) + +# This isn't part of the `all` target because it relies on a file that might +# not be there -- it lives in a different git respository. +$(STANDARD_ES_GRAMMAR_OUT): $(ECMA262_SPEC_HTML) + $(PYTHON) -m js_parser.extract_es_grammar $(ECMA262_SPEC_HTML) > $@ || rm $@ + +rust: $(RS_AST_OUT) $(RS_TABLES_OUT) + cargo build --all + +jsparagus/parse_pgen_generated.py: + $(PYTHON) -m jsparagus.parse_pgen --regenerate > $@ + +check: all static-check dyn-check + +dyn-check: + ./test.sh + cargo fmt + cargo test --all + +static-check: + $(VENV_BIN_DIR)/mypy -p jsparagus -p tests -p js_parser + +jsdemo: $(PY_OUT) + $(PYTHON) -m js_parser.try_it + +update-stencil: + $(PYTHON) update_stencil.py \ + ../mozilla-unified ./ + +update-unicode: + $(PYTHON) update_unicode.py UNIDATA ./ + +smoosh-status: + $(PYTHON) smoosh_status.py + +smoosh-status-ci: + $(PYTHON) smoosh_status.py ci + +.PHONY: all check static-check dyn-check jsdemo rust update-opcodes-m-u smoosh-status smoosh-status-ci diff --git a/third_party/rust/jsparagus/README.md b/third_party/rust/jsparagus/README.md new file mode 100644 index 0000000000..a380accae5 --- /dev/null +++ b/third_party/rust/jsparagus/README.md @@ -0,0 +1,159 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] +[![SmooshMonkey Build Result][SmooshMonkey Build Badge]][SmooshMonkey Build TreeHerder] +[![SmooshMonkey Test Result][SmooshMonkey Test Badge]][SmooshMonkey Test TreeHerder] + +# jsparagus - A JavaScript parser written in Rust + +jsparagus is intended to replace the JavaScript parser in Firefox. + +Current status: + +* jsparagus is not on crates.io yet. The AST design is not stable + enough. We do have a build of the JS shell that includes jsparagus + as an option (falling back on C++ for features jsparagus doesn't + support). See + [mozilla-spidermonkey/rust-frontend](https://github.com/mozilla-spidermonkey/rust-frontend). + +* It can parse a lot of JS scripts, and will eventually be able to parse everything. + See the current limitations below, or our GitHub issues. + +* Our immediate goal is to [support parsing everything in Mozilla's JS + test suite and the features in test262 that Firefox already + supports](https://github.com/mozilla-spidermonkey/jsparagus/milestone/1). + +Join us on Discord: https://discord.gg/tUFFk9Y + + +## Building jsparagus + +To build the parser by itself: + +```sh +make init +make all +``` + +The build takes about 3 minutes to run on my laptop. + +When it's done, you can: + +* Run `make check` to make sure things are working. + +* `cd crates/driver && cargo run -- -D` to try out the JS parser and bytecode emitter. + + +## Building and running SpiderMonkey with jsparagus + +* To build SpiderMonkey with jsparagus, `configure` with `--enable-smoosh`. + + This builds with a specific known-good revision of jsparagus. + +* Building SpiderMonkey with your own local jsparagus repo, for + development, takes more work; see [the jsparagus + SpiderMonkey wiki + page](https://github.com/mozilla-spidermonkey/jsparagus/wiki/SpiderMonkey) + for details. + +**NOTE: Even after building with jsparagus, you must run the shell with +`--smoosh`** to enable jsparagus at run time. + + + +## Benchmarking + +### Fine-grain Benchmarks + +Fine-grain benchmarks are used to detect regression by focusing on each part of +the parser at one time, exercising only this one part. The benchmarks are not +meant to represent any real code sample, but to focus on executing specific +functions of the parser. + +To run this parser, you should execute the following command at the root of the +repository: + +```sh +cd crates/parser +cargo bench +``` + +### Real-world JavaScript + +Real world benchmarks are used to track the overall evolution of performance over +time. The benchmarks are meant to represent realistic production use cases. + +To benchmark the AST generation, we use SpiderMonkey integration to execute the +parser and compare it against SpiderMonkey's default parser. Therefore, to run +this benchmark, we have to first compile SpiderMonkey, then execute SpiderMonkey +shell on the benchmark. (The following instructions assume that `~` is the +directory where all projects are checked out) + +* Generate Parse Tables: + + ```sh + cd ~/jsparagus/ + make init + make all + ``` + +* Compile an optimized version of [SpiderMonkey's JavaScript shell](https://github.com/mozilla/gecko-dev): + + ```sh + cd ~/mozilla/js/src/ + # set the jsparagus' path to the abosulte path to ~/jsparagus. + $EDITOR frontend/smoosh/Cargo.toml + ../../mach vendor rust + # Create a build directory + mkdir obj.opt + cd obj.opt + # Build SpiderMonkey + ../configure --enable-nspr-build --enable-smoosh --enable-debug-symbols=-ggdb3 --disable-debug --enable-optimize --enable-release --disable-tests + make + ``` + +* Execute the [real-js-samples](https://github.com/nbp/real-js-samples/) benchmark: + + ```sh + cd ~/real-js-samples/ + ~/mozilla/js/src/obj.opt/dist/bin/js ./20190416.js + ``` + +This should return the overall time taken to parse all the Script once, in the +cases where there is no error. The goal is to minimize the number of +nano-seconds per bytes. + + +## Limitations + +It's *all* limitations, but I'll try to list the ones that are relevant +to parsing JS. + +* Features that are not implemented in the parser yet include `let`, + `import` and `export`, `async` functions, `yield` expressions, the + use of `await` and `yield` as identifiers, template strings, + `BigInt`, Unicode escape sequences that evaluate to surrogate code + points, legacy octal integer literals, legacy octal escape + sequences, some RegExp flags, strict mode code, `__proto__` in + object literals, some features of destructuring assignment. + + Many more features are not yet supported in the bytecode emitter. + +* Error messages are poor. + +We're currently working on parser performance and completeness, as well +as the bytecode emitter and further integration with SpiderMonkey. + + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer +[SmooshMonkey Build Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_smoosh_status%2Fsmoosh_build.json +[SmooshMonkey Build TreeHerder]: https://treeherder.mozilla.org/#/jobs?repo=mozilla-central&tier=1%2C2%2C3&searchStr=sm-nonunified +[SmooshMonkey Test Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_smoosh_status%2Fsmoosh_test.json +[SmooshMonkey Test TreeHerder]: https://treeherder.mozilla.org/#/jobs?repo=mozilla-central&tier=1%2C2%2C3&searchStr=sm-smoosh diff --git a/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js b/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js new file mode 100644 index 0000000000..d449bffda7 --- /dev/null +++ b/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js @@ -0,0 +1,315 @@ +// This script runs multipe parsers from a single engine. +"use strict"; + +// Directory where to find the list of JavaScript sources to be used for +// benchmarking. +var dir = "."; + +// Skip list cache to be used to be able to compare profiles. Without a skip +// list which ensure that only runnable test cases are used, the profile would +// not represent the actual values reported by this script. +var skipList = [], skipFile = "", skipLen = 0; + +// Handle command line arguments. +for (var i = 0; i < scriptArgs.length; i++) { + switch (scriptArgs[i]) { + case "--dir": + if (++i >= scriptArgs.length) { + throw Error("--dir expects a path."); + } + dir = scriptArgs[i]; + break; + case "--skip-file": + if (++i >= scriptArgs.length) { + throw Error("--skip-file expects a path."); + } + skipFile = scriptArgs[i]; + try { + skipList = eval(os.file.readFile(skipFile)); + } catch (e) { + // ignore errors + } + skipLen = skipList.length; + break; + } +} + +// Execution mode of the parser, either "script" or "module". +var mode = "script"; + +// Number of times each JavaScript source is used for benchmarking. +var runs_per_script = 10; + +// First parser +var name_1 = "SpiderMonkey parser"; +function parse_1(path) { + var start = performance.now(); + parse(path, { module: mode == "module", smoosh: false }); + return performance.now() - start; +} + +// Second parser +var name_2 = "SmooshMonkey parser"; +function parse_2(path) { + var start = performance.now(); + parse(path, { module: mode == "module", smoosh: true }); + return performance.now() - start; +} + +// For a given `parse` function, execute it with the content of each file in +// `dir`. This process is repeated `N` times and the results are added to the +// `result` argument using the `prefix` key for the filenames. +function for_all_files(parse, N = 1, prefix = "", result = {}) { + var path = "", content = ""; + var t = 0; + var list = os.file.listDir(dir); + for (var file of list) { + try { + path = os.path.join(dir, file); + content = os.file.readRelativeToScript(path); + try { + t = 0; + for (var n = 0; n < N; n++) + t += parse(content); + result[prefix + path] = { time: t / N, bytes: content.length }; + } catch (e) { + // ignore all errors for now. + result[prefix + path] = { time: null, bytes: content.length }; + } + } catch (e) { + // ignore all read errors. + } + } + return result; +} + +// Compare the results of 2 parser runs and compute the speed ratio between the +// 2 parsers. Results from both parsers are assuming to be comparing the same +// things if they have the same property name. +// +// The aggregated results is returned as an object, which reports the total time +// for each parser, the quantity of bytes parsed and skipped and an array of +// speed ratios for each file tested. +function compare(name1, res1, name2, res2) { + var result = { + name1: name1, + name2: name2, + time1: 0, + time2: 0, + parsed_files: 0, + parsed_bytes: 0, + skipped_files: 0, + skipped_bytes: 0, + ratios_2over1: [], + }; + for (var path of Object.keys(res1)) { + if (!(path in res1 && path in res2)) { + continue; + } + var p1 = res1[path]; + var p2 = res2[path]; + if (p1.time !== null && p2.time !== null) { + result.time1 += p1.time; + result.time2 += p2.time; + result.parsed_files += 1; + result.parsed_bytes += p1.bytes; + result.ratios_2over1.push(p2.time / p1.time); + } else { + result.skipped_files += 1; + result.skipped_bytes += p1.bytes; + } + } + return result; +} + +function print_result(result) { + print(result.name1, "\t", result.time1, "ms\t", 1e6 * result.time1 / result.parsed_bytes, 'ns/byte\t', result.parsed_bytes / (1e6 * result.time1), 'bytes/ns\t'); + print(result.name2, "\t", result.time2, "ms\t", 1e6 * result.time2 / result.parsed_bytes, 'ns/byte\t', result.parsed_bytes / (1e6 * result.time2), 'bytes/ns\t'); + print("Total parsed (scripts:", result.parsed_files, ", bytes:", result.parsed_bytes, ")"); + print("Total skipped (scripts:", result.skipped_files, ", bytes:", result.skipped_bytes, ")"); + print(result.name2, "/", result.name1, ":", result.time2 / result.time1); + print(result.name2, "/", result.name1, ":", spread(result.ratios_2over1, 0, 5, 0.05)); +} + +// Given a `table` of speed ratios, display a distribution chart of speed +// ratios. This is useful to check if the data is noisy, bimodal, and to easily +// eye-ball characteristics of the distribution. +function spread(table, min, max, step) { + // var chars = ["\xa0", "\u2591", "\u2592", "\u2593", "\u2588"]; + var chars = ["\xa0", "\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"]; + var s = ["\xa0", "\xa0", "" + min, "\xa0", "\xa0"]; + var ending = ["\xa0", "\xa0", "" + max, "\xa0", "\xa0"]; + var scale = "\xa0\xa0"; + var scale_values = ["⁰", "¹", "²", "³", "⁴", "⁵", "⁶", "⁷", "⁸", "⁹"]; + var ranges = []; + var vmax = table.length / 10; + for (var i = min; i < max; i += step) { + ranges.push(0); + var decimal = i - Math.trunc(i); + var error = Math.abs(decimal - Math.round(10 * decimal) / 10); + decimal = Math.round(decimal * 10) % 10; + if (error < step / 2) + scale += scale_values[decimal]; + else + scale += "\xa0"; + } + for (var x of table) { + if (x < min || max < x) continue; + var idx = ((x - min) / step)|0; + ranges[idx] += 1; + } + var max_index = chars.length * s.length; + var ratio = max_index / vmax; + for (i = 0; i < s.length; i++) + s[i] += "\xa0\u2595"; + for (var v of ranges) { + var d = Math.min((v * ratio)|0, max_index - 1); + var offset = max_index; + for (i = 0; i < s.length; i++) { + offset -= chars.length; + var c = Math.max(0, Math.min(d - offset, chars.length - 1)); + s[i] += chars[c]; + } + } + for (i = 0; i < s.length; i++) + s[i] += "\u258f\xa0" + ending[i]; + var res = ""; + for (i = 0; i < s.length; i++) + res += "\n" + s[i]; + res += "\n" + scale; + return res; +} + +// NOTE: We have multiple strategies depending whether we want to check the +// throughput of the parser assuming the parser is cold/hot in memory, the data is +// cold/hot in the cache, and the adaptive CPU throttle is low/high. +// +// Ideally we should be comparing comparable things, but due to the adaptive +// behavior of CPU and Disk, we can only approximate it while keeping results +// comparable to what users might see. + +// Compare Hot-parsers on cold data. +function strategy_1() { + var res1 = for_all_files(parse_1, runs_per_script); + var res2 = for_all_files(parse_2, runs_per_script); + return compare(name_1, res1, name_2, res2); +} + +// Compare Hot-parsers on cold data, and swap parse order. +function strategy_2() { + var res2 = for_all_files(parse_2, runs_per_script); + var res1 = for_all_files(parse_1, runs_per_script); + return compare(name_1, res1, name_2, res2); +} + +// Interleaves N hot-parser results. (if N=1, then strategy_3 is identical to strategy_1) +// +// At the moment, this is assumed to be the best approach which might mimic how +// a helper-thread would behave if it was saturated with content to be parsed. +function strategy_3() { + var res1 = {}; + var res2 = {}; + var N = runs_per_script; + for (var n = 0; n < N; n++) { + for_all_files(parse_1, 1, "" + n, res1); + for_all_files(parse_2, 1, "" + n, res2); + } + return compare(name_1, res1, name_2, res2); +} + +// Compare cold parsers, with alternatetively cold/hot data. +// +// By swapping parser order of execution after each file, we expect that the +// previous parser execution would be enough to evict the other from the L2 +// cache, and as such cause the other parser to hit cold instruction cache where +// the instruction have to be reloaded. +// +// At the moment, this is assumed to be the best approach which might mimic how +// parsers are effectively used on the main thread. +function strategy_0() { + var path = "", content = ""; + var t_1= 0, t_2 = 0, time_1 = 0, time_2 = 0; + var count = 0, count_bytes = 0, skipped = 0, skipped_bytes = 0; + var parse1_first = false; + var list = os.file.listDir(dir); + var ratios_2over1 = []; + var parse1_first = true; + for (var file of list) { + path = os.path.join(dir, file); + if (skipList.includes(path)) { + continue; + } + content = ""; + try { + // print(Math.round(100 * f / list.length), file); + content = os.file.readRelativeToScript(path); + parse1_first = !parse1_first; // Math.random() > 0.5; + for (var i = 0; i < runs_per_script; i++) { + // Randomize the order in which parsers are executed as they are + // executed in the same process and the parsed content might be + // faster to load for the second parser as it is already in memory. + if (parse1_first) { + t_1 = parse_1(content); + t_2 = parse_2(content); + } else { + t_2 = parse_2(content); + t_1 = parse_1(content); + } + time_1 += t_1; + time_2 += t_2; + ratios_2over1.push(t_2 / t_1); + } + count++; + count_bytes += content.length; + } catch (e) { + // ignore all errors for now. + skipped++; + skipped_bytes += content.length; + skipList.push(path); + } + } + + return { + name1: name_1, + name2: name_2, + time1: time_1, + time2: time_2, + parsed_files: count * runs_per_script, + parsed_bytes: count_bytes * runs_per_script, + skipped_files: skipped * runs_per_script, + skipped_bytes: skipped_bytes * runs_per_script, + ratios_2over1: ratios_2over1, + }; +} + +var outputJSON = os.getenv("SMOOSH_BENCH_AS_JSON") !== undefined; +if (!outputJSON) { + print("Main thread comparison:"); +} +var main_thread_result = strategy_0(); +if (!outputJSON) { + print_result(main_thread_result); + print(""); + print("Off-thread comparison:"); +} +var off_thread_result = strategy_3(); +if (!outputJSON) { + print_result(off_thread_result); +} + +if (outputJSON) { + print(JSON.stringify({ + main_thread: main_thread_result, + off_thread: main_thread_result + })); +} + +if (skipFile && skipList.length > skipLen) { + var content = `[${skipList.map(s => `"${s}"`).join(",")}]`; + var data = new ArrayBuffer(content.length); + var view = new Uint8Array(data); + for (var i = 0; i < content.length; i++) { + view[i] = content.charCodeAt(i); + } + os.file.writeTypedArrayToFile(skipFile, view); +} diff --git a/third_party/rust/jsparagus/gecko-patches.txt b/third_party/rust/jsparagus/gecko-patches.txt new file mode 100644 index 0000000000..30c51f01a2 --- /dev/null +++ b/third_party/rust/jsparagus/gecko-patches.txt @@ -0,0 +1 @@ +D88970:1662383 diff --git a/third_party/rust/jsparagus/journal.md b/third_party/rust/jsparagus/journal.md new file mode 100644 index 0000000000..3536812ef2 --- /dev/null +++ b/third_party/rust/jsparagus/journal.md @@ -0,0 +1,272 @@ +## What I learned, what I wonder + + +### Stab 5 (simple LR, LR(1), then LALR(1)) + +Well. I learned enough to implement this, although there is still much I +don't understand. + +I learned a bit about what kind of phenomenon can render a grammar +outside XLL(1) (that is, LL(1) as extended by automated left-factoring +and left-recursion elimination); see `testFirstFirstConflict` in +`test.py` for a contrived example, and `testLeftHandSideExpression` for +a realistic one. + +I learned that the shift-reduce operator precedence parser I wrote for +SpiderMonkey is even less like a typical LR parser than I imagined. + +I was stunned to find that the SLR parser I wrote first, including the +table generator, was *less* code than the predictive LL parser of stab +4. However, full LR(1) took rather a lot of code. + +I learned that I will apparently hand-code the computation of transitive +closures of sets under relations ten times before even considering +writing a general algorithm. The patterns I have written over and over +are: 1. `while not done:` visit every element already in the set, +iterating to a fixed point, which is this ludicrous O(*n*2) +in the number of pairs in the relation; 2. depth-first graph walking +with cycle detection, which can overflow the stack. + +I learned three ways to hack features into an LR parser generator (cf. how +easy it is to hack stuff into a recursive descent parser). The tricks I +know are: + +1. Add custom items. To add lookahead assertions, I just added a + lookahead element to the LRItem tuple. The trick then is to make + sure you are normalizing states that are actually identical, to + avoid combinatorial explosion—and eventually, I expect, table + compression. + +2. Add custom actions. I think I can support automatic semicolon + insertion by replacing the usual error action of some states with a + special ASI actions. + +3. Desugaring. The + [ECMAScript standard](https://tc39.es/ecma262/#sec-grammar-notation) + describes optional elements and parameterized nonterminals this way, + and for now at least, that's how we actually implement them. + +There's a lot still to learn here. + +* OMG, what does it all mean? I'm getting more comfortable with the + control flow ("calls" and "returns") of this system, but I wouldn't + say I understand it! + +* Why is lookahead, past the end of the current half-parsed + production, part of an LR item? What other kinds of item + embellishment could be done instead? + +* In what sense is an LR parser a DFA? I implemented it, but there's + more to it that I haven't grokked yet. + +* Is there just one DFA or many? What exactly is the "derived" grammar + that the DFA parses? How on earth does it magically turn out to be + regular? (This depends on it not extending past the first handle, + but I still don't quite see.) + +* If I faithfully implement the algorithms in the book, will it be + less of a dumpster fire? Smaller, more factored? + +* How can I tell if a transformation on grammars preserves the + property of being LR(k)? Factoring out a nonterminal, for example, + may not preserve LR(k)ness. Inlining probably always does. + +* Is there some variant of this that treats nonterminals more like + terminals? It's easy to imagine computing start sets and follow sets + that contain both kinds of symbols. Does that buy us anything? + + +Things I noticed: + +* I think Yacc allows bits of code in the middle of productions: + + nt1: T1 T2 nt2 { code1(); } T3 nt3 T4 { code2(); } + + That could be implemented by introducing a synthetic production + that contains everything up to the first code block: + + nt1_aux: T1 T2 nt2 { code1(); } + nt1: nt1_aux T3 nt3 T4 { code2(); } + + There is a principle that says code should happen only at the end of + a production: because LR states are superpositions of items. We + don't know which production we are really parsing until we reduce, + so we don't know which code to execute. + +* Each state is reachable from an initial state by a finite sequence + of "pushes", each of which pushes either a terminal (a shift action) + or a nonterminal (a summary of a bunch of parsing actions, ending + with a reduce). + + States can sometimes be reached multiple ways (it's a state + transition graph). But regardless of which path you take, the symbols + pushed by the last few steps always match the symbols appearing to + the left of point in each of the state's LR items. (This implies + that those items have to agree on what has happened. Might make a + nice assertion.) + + + +### Stab 4 (nonrecursive table-driven predictive LL parser) + +I learned that testing that a Python program can do something deeply +recursive is kind of nontrivial. :-\ + +I learned that the predictive parser still takes two stacks (one +representing the future and one representing the past). It's not magic! +This makes me want to hop back to stab 3, optimize away the operand +stack, and see what kind of code I can get. + +It seems like recursive descent would be faster, but the table-driven +parser could be made to support incremental parsing (the state of the +algorithm is "just data", a pair of stacks, neither of which is the +parser program's native call stack). + + +### Stab 3 (recursive descent with principled left-recursion-elimination and left-factoring) + +I learned how to eliminate left recursion in a grammar (Algorithm 4.1 +from the book). I learned how to check that a grammar is LL(1) using +the start and follow sets, although I didn't really learn what LL(1) +means in any depth. (I'm just using it as a means to prove that the +grammar is unambiguous.) + +I learned from the book how to do a table-driven "nonrecursive +predictive parser". Something to try later. + +I came up with the "reduction symbol" thing. It seems to work as +expected! This allows me to transform the grammar, but still generate +parse trees reflecting the source grammar. However, the resulting code +is inefficient. Further optimization would improve it, but the +predictive parser will fare better even without optimization. + +I wonder what differences there are between LL(1) and LR(1) grammars. +(The book repeatedly says they are different, but the distinctions it +draws suggest differences like: left-recursive grammars can be LR but +never LL. That particular difference doesn't matter much to me, because +there's an algorithm for eliminating left recursion.) + + +### Stab 2 (recursive descent with ad hoc immediate-left-recursion-elimination) + +I learned it's easy for code to race ahead of understanding. +I learned that a little feature can mean a lot of complexity. + +I learned that it's probably hard to support indirect left-recursion using this approach. +We're able to twist left-recursion into a `while` loop because what we're doing is local to a single nonterminal's productions, +and they're all parsed by a single function. +Making this work across function boundaries would be annoying, +even ignoring the possibility that a nonterminal can be involved in multiple left-call cycles. + +I wonder if the JS spec uses any indirect left-recursion. + +I wonder if there's a nice formalization of a "grammar with actions" that abstracts away "implementation details", +so that we could prove two grammars equivalent, +not just in that they describe the same language, +but equivalent in output. +This could help me explore "grammar rewrites", +which could lead to usable optimizations. + +I noticed that the ES spec contains this: + +> ### 13.6 The if Statement +> #### Syntax +> ``` +> IfStatement[Yield, Await, Return]: +> if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else Statement[?Yield, ?Await, ?Return] +> if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] +> ``` +> +> Each `else` for which the choice of associated `if` is ambiguous shall +> be associated with the nearest possible `if` that would otherwise have +> no corresponding `else`. + +I wonder if this prose is effectively the same as adding a negative lookahead assertion +"[lookahead ≠ `else`]" at the end of the shorter production. + +(I asked bterlson and he thinks so.) + +I wonder if follow sets can be usefully considered as context-dependent. +What do I mean by this? +For example, `function` is certainly in the follow set of *Statement* in JS, +but there are plenty of contexts, like the rule `do Statement while ( Expression ) ;`, +where the nested *Statement* is never followed by `function`. +But does it matter? +I think it only matters if you're interested in better error messages. +Follow sets only matter to detect ambiguity in a grammar, +and *Statement* is ambiguous if it's ambiguous in *any* context. + + +### Stab 1 (very naive recursive descent) + +I learned that if you simply define a grammar as a set of rules, +there are all sorts of anomalies that can come up: + +* Vacant nonterminals (that do not match any input strings); + +* Nonterminals that match only infinite strings, like `a ::= X a`. + +* Cycles ("busy loops"), like `a ::= a`. + These always introduce ambiguity. + (You can also have cycles through multiple nonterminals: + `a ::= b; b ::= a`.) + +These in particular are easy to test for, with no false positives. +I wonder if there are other anomalies, +and if the "easiness" generalizes to all of them, and why. + +I know what it means for a grammar to be *ambiguous*: +it means there's at least one input with multiple valid parses. +I understand that parser generators can check for ambiguity. +But it's easiest to do so by imposing draconian restrictions. +I learned the "dangling `else` problem" is an ambiguity in exactly this sense. +I wonder if there's a principled way to deal with it. + +I know that a parse is a constructive proof that a string matches a grammar. + +I learned that start sets are important even in minimal parser generators. +This is interesting because they'll be a bit more interesting to compute +once we start considering empty productions. +I wonder if it turns out to still be pretty easy. +Does the start set of a possibly-empty production include its follow set? +(According to the dragon book, you add epsilon to the start set in this case.) + + +### Nice grammars + +I learned that the definition of a "grammar" +as a formal description of a language (= a set of strings) +is incomplete. + +Consider the Lisp syntax we're using: + +``` +sexpr ::= SYMBOL +sexpr ::= "(" tail + +tail ::= ")" +tail ::= sexpr tail +``` + +Nobody wants to parse Lisp like that. +There are two problems. + +One is expressive. +The `"("` and `")"` tokens should appear in the same production. +That way, the grammar says declaratively: these marks always appear in properly nesting pairs. + +``` +sexpr ::= SYMBOL +sexpr ::= "(" list ")" + +list ::= [empty] +list ::= sexpr list +``` + +The other problem has to do with *what you've got* when you get an automatically generated parse. +A grammar is more than just a description of a language, +to the extent we care about the form of the parse trees we get out of the parser. + +A grammar is a particular way of writing a parser, +and since we care about the parser's output, +we care about details of the grammar that would be mere "implementation details" otherwise. diff --git a/third_party/rust/jsparagus/js-quirks.md b/third_party/rust/jsparagus/js-quirks.md new file mode 100644 index 0000000000..20c621c92a --- /dev/null +++ b/third_party/rust/jsparagus/js-quirks.md @@ -0,0 +1,1036 @@ +## JS syntactic quirks + +> *To make a labyrinth, it takes* +> *Some good intentions, some mistakes.* +> —A. E. Stallings, “Daedal” + +JavaScript is rather hard to parse. Here is an in-depth accounting of +its syntactic quirks, with an eye toward actually implementing a parser +from scratch. + +With apologies to the generous people who work on the standard. Thanks +for doing that—better you than me. + +Thanks to [@bakkot](https://github.com/bakkot) and +[@mathiasbynens](https://github.com/mathiasbynens) for pointing out +several additional quirks. + +Problems are rated in terms of difficulty, from `(*)` = easy to `(***)` += hard. We’ll start with the easiest problems. + + +### Dangling else (*) + +If you know what this is, you may be excused. + +Statements like `if (EXPR) STMT if (EXPR) STMT else STMT` +are straight-up ambiguous in the JS formal grammar. +The ambiguity is resolved with +[a line of specification text](https://tc39.es/ecma262/#sec-if-statement): + +> Each `else` for which the choice of associated `if` is ambiguous shall +> be associated with the nearest possible `if` that would otherwise have +> no corresponding `else`. + +I love this sentence. Something about it cracks me up, I dunno... + +In a recursive descent parser, just doing the dumbest possible thing +correctly implements this rule. + +A parser generator has to decide what to do. In Yacc, you can use +operator precedence for this. + +Yacc aside: This should seem a little outrageous at first, as `else` is +hardly an operator. It helps if you understand what Yacc is doing. In LR +parsers, this kind of ambiguity in the grammar manifests as a +shift-reduce conflict. In this case, when we’ve already parsed `if ( +Expression ) Statement if ( Expression ) Statement` +and are looking at `else`, it’s unclear to Yacc +whether to reduce the if-statement or shift `else`. Yacc does not offer +a feature that lets us just say "always shift `else` here"; but there +*is* a Yacc feature that lets us resolve shift-reduce conflicts in a +rather odd, indirect way: operator precedence. We can resolve this +conflict by making `else` higher-precedence than the preceding symbol +`)`. + +Alternatively, I believe it’s equivalent to add "[lookahead ≠ `else`]" +at the end of the IfStatement production that doesn’t have an `else`. + + +### Other ambiguities and informal parts of the spec (*) + +Not all of the spec is as formal as it seems at first. Most of the stuff +in this section is easy to deal with, but #4 is special. + +1. The lexical grammar is ambiguous: when looking at the characters `<<=`, + there is the question of whether to parse that as one token `<<=`, two + tokens (`< <=` or `<< =`), or three (`< < =`). + + Of course every programming language has this, and the fix is one + sentence of prose in the spec: + + > The source text is scanned from left to right, repeatedly taking the + > longest possible sequence of code points as the next input element. + + This is easy enough for hand-coded lexers, and for systems that are + designed to use separate lexical and syntactic grammars. (Other + parser generators may need help to avoid parsing `functionf(){}` as + a function.) + +2. The above line of prose does not apply *within* input elements, in + components of the lexical grammar. In those cases, the same basic + idea ("maximum munch") is specified using lookahead restrictions at + the end of productions: + + > *LineTerminatorSequence* :: + >     <LF> + >     <CR>[lookahead ≠ <LF>] + >     <LS> + >     <PS> + >     <CR><LF> + + The lookahead restriction prevents a CR LF sequence from being + parsed as two adjacent *LineTerminatorSequence*s. + + This technique is used in several places, particularly in + [*NotEscapeSequences*](https://tc39.es/ecma262/#prod-NotEscapeSequence). + +3. Annex B.1.4 extends the syntax for regular expressions, making the + grammar ambiguous. Again, a line of prose explains how to cope: + + > These changes introduce ambiguities that are broken by the + > ordering of grammar productions and by contextual + > information. When parsing using the following grammar, each + > alternative is considered only if previous production alternatives + > do not match. + +4. Annex B.1.2 extends the syntax of string literals to allow legacy + octal escape sequences, like `\033`. It says: + + > The syntax and semantics of 11.8.4 is extended as follows except + > that this extension is not allowed for strict mode code: + + ...followed by a new definition of *EscapeSequence*. + + So there are two sets of productions for *EscapeSequence*, and an + implementation is required to implement both and dynamically switch + between them. + + This means that `function f() { "\033"; "use strict"; }` is a + SyntaxError, even though the octal escape is scanned before we know + we're in strict mode. + +For another ambiguity, see "Slashes" below. + + +### Unicode quirks + +JavaScript source is Unicode and usually follows Unicode rules for thing +like identifiers and whitespace, but it has a few special cases: `$`, +`_`, `U+200C ZERO WIDTH NON-JOINER`, and `U+200D ZERO WIDTH JOINER` are +legal in identifiers (the latter two only after the first character), and +`U+FEFF ZERO WIDTH NO-BREAK SPACE` (also known as the byte-order mark) is +treated as whitespace. + +It also allows any code point, including surrogate halves, even though the +Unicode standard says that unpaired surrogate halves should be treated as +encoding errors. + + +### Legacy octal literals and escape sequences (*) + +This is more funny than difficult. + +In a browser, in non-strict code, every sequence of decimal digits (not +followed by an identifier character) is a *NumericLiteral* token. + +If it starts with `0`, with more digits after, then it's a legacy Annex +B.1.1 literal. If the token contains an `8` or a `9`, it's a decimal +number. Otherwise, hilariously, it's octal. + +``` +js> [067, 068, 069, 070] +[55, 68, 69, 56] +``` + +There are also legacy octal escape sequences in strings, and these have +their own quirks. `'\07' === '\u{7}'`, but `'\08' !== '\u{8}'` since 8 +is not an octal digit. Instead `'\08' === '\0' + '8'`, because `\0` +followed by `8` or `9` is a legacy octal escape sequence representing +the null character. (Not to be confused with `\0` in strict code, not +followed by a digit, which still represents the null character, but +doesn't count as octal.) + +None of this is hard to implement, but figuring out what the spec says +is hard. + + +### Strict mode (*) + +*(entangled with: lazy compilation)* + +A script or function can start with this: + +```js +"use strict"; +``` + +This enables ["strict mode"](https://tc39.es/ecma262/#sec-strict-mode-of-ecmascript). +Additionally, all classes and modules are strict mode code. + +Strict mode has both parse-time and run-time effects. Parse-time effects +include: + +* Strict mode affects the lexical grammar: octal integer literals are + SyntaxErrors, octal character escapes are SyntaxErrors, and a + handful of words like `private` and `interface` are reserved (and + thus usually SyntaxErrors) in strict mode. + + Like the situation with slashes, this means it is not possible to + implement a complete lexer for JS without also parsing—at least + enough to detect class boundaries, "use strict" directives in + functions, and function boundaries. + +* It’s a SyntaxError to have bindings named `eval` or `arguments` in + strict mode code, or to assign to `eval` or `arguments`. + +* It’s a SyntaxError to have two argument bindings with the same name + in a strict function. + + Interestingly, you don’t always know if you’re in strict mode or not + when parsing arguments. + + ```js + function foo(a, a) { + "use strict"; + } + ``` + + When the implementation reaches the Use Strict Directive, it must + either know that `foo` has two arguments both named `a`, or switch + to strict mode, go back, and reparse the function from the + beginning. + + Fortunately an Early Error rule prohibits mixing `"use strict"` with + more complex parameter lists, like `function foo(x = eval('')) {`. + +* The expression syntax “`delete` *Identifier*” and the abominable + *WithStatement* are banned in strict mode. + + +### Conditional keywords (**) + +In some programming languages, you could write a lexer that has rules +like + +* When you see `if`, return `Token::If`. + +* When you see something like `apple` or `arrow` or `target`, + return `Token::Identifier`. + +Not so in JavaScript. The input `if` matches both the terminal `if` and +the nonterminal *IdentifierName*, both of which appear in the high-level +grammar. The same goes for `target`. + +This poses a deceptively difficult problem for table-driven parsers. +Such parsers run on a stream of token-ids, but the question of which +token-id to use for a word like `if` or `target` is ambiguous. The +current parser state can't fully resolve the ambiguity: there are cases +like `class C { get` where the token `get` might match either as a +keyword (the start of a getter) or as an *IdentifierName* (a method or +property named `get`) in different grammatical productions. + +All keywords are conditional, but some are more conditional than others. +The rules are inconsistent to a tragicomic extent. Keywords like `if` +that date back to JavaScript 1.0 are always keywords except when used as +property names or method names. They can't be variable names. Two +conditional keywords (`await` and `yield`) are in the *Keyword* list; +the rest are not. New syntax that happened to be introduced around the +same time as strict mode was awarded keyword status in strict mode. The +rules are scattered through the spec. All this interacts with `\u0065` +Unicode escape sequences somehow. It’s just unbelievably confusing. + +(After writing this section, I +[proposed revisions to the specification](https://github.com/tc39/ecma262/pull/1694) +to make it a little less confusing.) + +* Thirty-six words are always reserved: + + > `break` `case` `catch` `class` `const` `continue` `debugger` + > `default` `delete` `do` `else` `enum` `export` `extends` `false` + > `finally` `for` `function` `if` `import` `in` `instanceof` `new` + > `null` `return` `super` `switch` `this` `throw` `true` `try` + > `typeof` `var` `void` `while` `with` + + These tokens can't be used as names of variables or arguments. + They're always considered special *except* when used as property + names, method names, or import/export names in modules. + + ```js + // property names + let obj = {if: 3, function: 4}; + assert(obj.if == 3); + + // method names + class C { + if() {} + function() {} + } + + // imports and exports + import {if as my_if} from "modulename"; + export {my_if as if}; + ``` + +* Two more words, `yield` and `await`, are in the *Keyword* list but + do not always act like keywords in practice. + + * `yield` is a *Keyword*; but it can be used as an identifier, + except in generators and strict mode code. + + This means that `yield - 1` is valid both inside and outside + generators, with different meanings. Outside a generator, it’s + subtraction. Inside, it yields the value **-1**. + + That reminds me of the Groucho Marx line: Outside of a dog, a + book is a man’s best friend. Inside of a dog it’s too dark to + read. + + * `await` is like that, but in async functions. Also it’s not a + valid identifier in modules. + + Conditional keywords are entangled with slashes: `yield /a/g` is two + tokens in a generator but five tokens elsewhere. + +* In strict mode code, `implements`, `interface`, `package`, + `private`, `protected`, and `public` are reserved (via Early Errors + rules). + + This is reflected in the message and location information for + certain syntax errors: + + ``` + SyntaxError: implements is a reserved identifier: + class implements {} + ......^ + + SyntaxError: implements is a reserved identifier: + function implements() { "use strict"; } + ....................................^ + ``` + +* `let` is not a *Keyword* or *ReservedWord*. Usually it can be an + identifier. It is special at the beginning of a statement or after + `for (` or `for await (`. + + ```js + var let = [new Date]; // ok: let as identifier + let v = let; // ok: let as keyword, then identifier + let let; // SyntaxError: banned by special early error rule + let.length; // ok: `let .` -> ExpressionStatement + let[0].getYear(); // SyntaxError: `let [` -> LexicalDeclaration + ``` + + In strict mode code, `let` is reserved. + +* `static` is similar. It’s a valid identifier, except in strict + mode. It’s only special at the beginning of a *ClassElement*. + + In strict mode code, `static` is reserved. + +* `async` is similar, but trickier. It’s an identifier. It is special + only if it’s marking an `async` function, method, or arrow function + (the tough case, since you won’t know it’s an arrow function until + you see the `=>`, possibly much later). + + ```js + function async() {} // normal function named "async" + + async(); // ok, `async` is an Identifier; function call + async() => {}; // ok, `async` is not an Identifier; async arrow function + ``` + +* `of` is special only in one specific place in `for-of` loop syntax. + + ```js + var of = [1, 2, 3]; + for (of of of) console.log(of); // logs 1, 2, 3 + ``` + + Amazingly, both of the following are valid JS code: + + ```js + for (async of => {};;) {} + for (async of []) {} + ``` + + In the first line, `async` is a keyword and `of` is an identifier; + in the second line it's the other way round. + + Even a simplified JS grammar can't be LR(1) as long as it includes + the features used here! + +* `get` and `set` are special only in a class or an object literal, + and then only if followed by a PropertyName: + + ```js + var obj1 = {get: f}; // `get` is an identifier + var obj2 = {get x() {}}; // `get` means getter + + class C1 { get = 3; } // `get` is an identifier + class C2 { get x() {} } // `get` means getter + ``` + +* `target` is special only in `new.target`. + +* `arguments` and `eval` can't be binding names, and can't be assigned + to, in strict mode code. + +To complicate matters, there are a few grammatical contexts where both +*IdentifierName* and *Identifier* match. For example, after `var {` +there are two possibilities: + +```js +// Longhand properties: BindingProperty -> PropertyName -> IdentifierName +var { xy: v } = obj; // ok +var { if: v } = obj; // ok, `if` is an IdentifierName + +// Shorthand properties: BindingProperty -> SingleNameBinding -> BindingIdentifier -> Identifier +var { xy } = obj; // ok +var { if } = obj; // SyntaxError: `if` is not an Identifier +``` + + +### Escape sequences in keywords + +*(entangled with: conditional keywords, ASI)* + +You can use escape sequences to write variable and property names, but +not keywords (including contextual keywords in contexts where they act +as keywords). + +So `if (foo) {}` and `{ i\u0066: 0 }` are legal but `i\u0066 (foo)` is not. + +And you don't necessarily know if you're lexing a contextual keyword +until the next token: `({ g\u0065t: 0 })` is legal, but +`({ g\u0065t x(){} })` is not. + +And for `let` it's even worse: `l\u0065t` by itself is a legal way to +reference a variable named `let`, which means that + +```js +let +x +``` +declares a variable named `x`, while, thanks to ASI, + +```js +l\u0065t +x +``` +is a reference to a variable named `let` followed by a reference to a +variable named `x`. + + +### Early errors (**) + +*(entangled with: lazy parsing, conditional keywords, ASI)* + +Some early errors are basically syntactic. Others are not. + +This is entangled with lazy compilation: "early errors" often involve a +retrospective look at an arbitrarily large glob of code we just parsed, +but in Beast Mode we’re not building an AST. In fact we would like to be +doing as little bookkeeping as possible. + +Even setting that aside, every early error is a special case, and it’s +just a ton of rules that all have to be implemented by hand. + +Here are some examples of Early Error rules—setting aside restrictions +that are covered adequately elsewhere: + +* Rules about names: + + * Rules that affect the set of keywords (character sequences that + match *IdentifierName* but are not allowed as binding names) based + on whether or not we’re in strict mode code, or in a + *Module*. Affected identifiers include `arguments`, `eval`, `yield`, + `await`, `let`, `implements`, `interface`, `package`, `private`, + `protected`, `public`, `static`. + + * One of these is a strangely worded rule which prohibits using + `yield` as a *BindingIdentifier*. At first blush, this seems + like it could be enforced in the grammar, but that approach + would make this a valid program, due to ASI: + + ```js + let + yield 0; + ``` + + Enforcing the same rule using an Early Error prohibits ASI here. + It works by exploiting the detailed inner workings of ASI case + 1, and arranging for `0` to be "the offending token" rather than + `yield`. + + * Lexical variable names have to be unique within a scope: + + * Lexical variables (`let` and `const`) can’t be declared more + than once in a block, or both lexically declared and + declared with `var`. + + * Lexically declared variables in a function body can’t have the same + name as argument bindings. + + * A lexical variable can’t be named `let`. + + * Common-sense rules dealing with unicode escape sequences in + identifiers. + +* Common-sense rules about regular expression literals. (They have to + actually be valid regular expressions, and unrecognized flags are + errors.) + +* The number of string parts that a template string can have is + limited to 232 − 1. + +* Invalid Unicode escape sequences, like `\7` or `\09` or `\u{3bjq`, are + banned in non-tagged templates (in tagged templates, they are allowed). + +* The *SuperCall* syntax is allowed only in derived class + constructors. + +* `const x;` without an initializer is a Syntax Error. + +* A direct substatement of an `if` statement, loop statement, or + `with` statement can’t be a labelled `function`. + +* Early errors are used to hook up cover grammars. + + * Early errors are also used in one case to avoid having to + specify a very large refinement grammar when *ObjectLiteral* + almost covers *ObjectAssignmentPattern*: + [sorry, too complicated to explain](https://tc39.es/ecma262/#sec-object-initializer-static-semantics-early-errors). + +* Early errors are sometimes used to prevent parsers from needing to + backtrack too much. + + * When parsing `async ( x = await/a/g )`, you don't know until the + next token if this is an async arrow or a call to a function named + `async`. This means you can't even tokenize properly, because in + the former case the thing following `x =` is two divisions and in + the latter case it's an *AwaitExpression* of a regular expression. + So an Early Error forbids having `await` in parameters at all, + allowing parsers to immediately throw an error if they find + themselves in this case. + +Many strict mode rules are enforced using Early Errors, but others +affect runtime semantics. + + + + +### Boolean parameters (**) + +Some nonterminals are parameterized. (Search for “parameterized +production” in [this spec +section](https://tc39.es/ecma262/#sec-grammar-notation).) + +Implemented naively (e.g. by macro expansion) in a parser generator, +each parameter could nearly double the size of the parser. Instead, the +parameters must be tracked at run time somehow. + + +### Lookahead restrictions (**) + +*(entangled with: restricted productions)* + +TODO (I implemented this by hacking the entire LR algorithm. Most every +part of it is touched, although in ways that seem almost obvious once +you understand LR inside and out.) + +(Note: It may seem like all of the lookahead restrictions in the spec +are really just a way of saying “this production takes precedence over +that one”—for example, that the lookahead restriction on +*ExpressionStatement* just means that other productions for statements +and declarations take precedence over it. But that isn't accurate; you +can't have an *ExpressionStatement* that starts with `{`, even if it +doesn't parse as a *Block* or any other kind of statement.) + + +### Automatic Semicolon Insertion (**) + +*(entangled with: restricted productions, slashes)* + +Most semicolons at the end of JS statements and declarations “may be +omitted from the source text in certain situations”. This is called +[Automatic Semicolon +Insertion](https://tc39.es/ecma262/#sec-automatic-semicolon-insertion), +or ASI for short. + +The specification for this feature is both very-high-level and weirdly +procedural (“When, as the source text is parsed from left to right, a +token is encountered...”, as if the specification is telling a story +about a browser. As far as I know, this is the only place in the spec +where anything is assumed or implied about the internal implementation +details of parsing.) But it would be hard to specify ASI any other way. + +Wrinkles: + +1. Whitespace is significant (including whitespace inside comments). + Most semicolons in the grammar are optional only at the end of a + line (or before `}`, or at the end of the program). + +2. The ending semicolon of a `do`-`while` statement is extra optional. + You can always omit it. + +3. A few semicolons are never optional, like the semicolons in `for (;;)`. + + This means there’s a semicolon in the grammar that is optionally + optional! This one: + + > *LexicalDeclaration* : *LetOrConst* *BindingList* `;` + + It’s usually optional, but not if this is the *LexicalDeclaration* + in `for (let i = 0; i < 9; i++)`! + +4. Semicolons are not inserted only as a last resort to avoid + SyntaxErrors. That turned out to be too error-prone, so there are + also *restricted productions* (see below), where semicolons are more + aggressively inferred. + +5. In implementations, ASI interacts with the ambiguity of *slashes* + (see below). + +A recursive descent parser implements ASI by calling a special method +every time it needs to parse a semicolon that might be optional. The +special method has to peek at the next token and consume it only if it’s +a semicolon. This would not be so bad if it weren’t for slashes. + +In a parser generator, ASI can be implemented using an error recovery +mechanism. + +I think the [error recovery mechanism in +yacc/Bison](https://www.gnu.org/software/bison/manual/bison.html#Error-Recovery) +is too imprecise—when an error happens, it discards states from the +stack searching for a matching error-handling rule. The manual says +“Error recovery strategies are necessarily guesses.” + +But here’s a slightly more careful error recovery mechanism that could +do the job: + +1. For each production in the ES spec grammar where ASI could happen, e.g. + + ``` + ImportDeclaration ::= `import` ModuleSpecifier `;` + { import_declaration($2); } + ``` + + add an ASI production, like this: + + ``` + ImportDeclaration ::= `import` ModuleSpecifier [ERROR] + { check_asi(); import_declaration($2); } + ``` + + What does this mean? This production can be matched, like any other + production, but it's a fallback. All other productions take + precedence. + +2. While generating the parser, treat `[ERROR]` as a terminal + symbol. It can be included in start sets and follow sets, lookahead, + and so forth. + +3. At run time, when an error happens, synthesize an `[ERROR]` token. + Let that bounce through the state machine. It will cause zero or + more reductions. Then, it might actually match a production that + contains `[ERROR]`, like the ASI production above. + + Otherwise, we’ll get another error—the entry in the parser table for + an `[ERROR]` token at this state will be an `error` entry. Then we + really have a syntax error. + +This solves most of the ASI issues: + +* [x] Whitespace sensitivity: That's what `check_asi()` is for. It + should signal an error if we're not at the end of a line. + +* [x] Special treatment of `do`-`while` loops: Make an error production, + but don't `check_asi()`. + +* [x] Rule banning ASI in *EmptyStatement* or `for(;;)`: + Easy, don't create error productions for those. + + * [x] Banning ASI in `for (let x=1 \n x<9; x++)`: Manually adjust + the grammar, copying *LexicalDeclaration* so that there's a + *LexicalDeclarationNoASI* production used only by `for` + statements. Not a big deal, as it turns out. + +* [x] Slashes: Perhaps have `check_asi` reset the lexer to rescan the + next token, if it starts with `/`. + +* [ ] Restricted productions: Not solved. Read on. + + +### Restricted productions (**) + +*(entangled with: ASI, slashes)* + +Line breaks aren’t allowed in certain places. For example, the following +is not a valid program: + + throw // SyntaxError + new Error(); + +For another example, this function contains two statements, not one: + + function f(g) { + return // ASI + g(); + } + +The indentation is misleading; actually ASI inserts a semicolon at the +end of the first line: `return; g();`. (This function always returns +undefined. The second statement is never reached.) + +These restrictions apply even to multiline comments, so the function + +```js +function f(g) { + return /* + */ g(); +} +``` +contains two statements, just as the previous example did. + +I’m not sure why these rules exist, but it’s probably because (back in +the Netscape days) users complained about the bizarre behavior of +automatic semicolon insertion, and so some special do-what-I-mean hacks +were put in. + +This is specified with a weird special thing in the grammar: + +> *ReturnStatement* : `return` [no *LineTerminator* here] *Expression* `;` + +This is called a *restricted production*, and it’s unfortunately +necessary to go through them one by one, because there are several +kinds. Note that the particular hack required to parse them in a +recursive descent parser is a little bit different each time. + +* After `continue`, `break`, or `return`, a line break triggers ASI. + + The relevant productions are all statements, and in each case + there’s an alternative production that ends immediately with a + semicolon: `continue ;` `break ;` and `return ;`. + + Note that the alternative production is *not* restricted: e.g. a + *LineTerminator* can appear between `return` and `;`: + + ```js + if (x) + return // ok + ; + else + f(); + ``` + +* After `throw`, a line break is a SyntaxError. + +* After `yield`, a line break terminates the *YieldExpression*. + + Here the alternative production is simply `yield`, not `yield ;`. + +* In a post-increment or post-decrement expression, there can’t be a + line break before `++` or `--`. + + The purpose of this rule is subtle. It triggers ASI and thus prevents + syntax errors: + + ```js + var x = y // ok: semicolon inserted here + ++z; + ``` + + Without the restricted production, `var x = y ++` would parse + successfully, and the “offending token” would be `z`. It would be + too late for ASI. + + However, the restriction can of course also *cause* a SyntaxError: + + ```js + var x = (y + ++); // SyntaxError + ``` + +As we said, recursive descent parsers can implement these rules with hax. + +In a generated parser, there are a few possible ways to implement +them. Here are three. If you are not interested in ridiculous +approaches, you can skip the first two. + +* Treat every token as actually a different token when it appears + after a line break: `TokenType::LeftParen` and + `TokenType::LeftParenAfterLineBreak`. Of course the parser + generator can treat these exactly the same in normal cases, and + automatically generate identical table entries (or whatever) except + in states where there’s a relevant restricted production. + +* Add a special LineTerminator token. Normally, the lexer skips + newlines and never emits this token. However, if the current state + has a relevant restricted production, the lexer knows this and emits + a LineTerminator for the first line break it sees; and the parser + uses that token to trigger an error or transition to another state, + as appropriate. + +* When in a state that has a relevant restricted production, change + states if there’s a line break before the next token. That is, + split each such state into two: the one we stay in when there’s not + a line break, and the one we jump to if there is a line break. + +In all cases it’ll be hard to have confidence that the resulting parser +generator is really sound. (That is, it might not properly reject all +ambiguous grammars.) I don’t know exactly what property of the few +special uses in the ES grammar makes them seem benign. + + +### Slashes (**) + +*(entangled with: ASI, restricted productions)* + +When you see `/` in a JS program, you don’t know if that’s a +division operator or the start of a regular expression unless you’ve +been paying attention up to that point. + +[The spec:](https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar) + +> There are several situations where the identification of lexical input +> elements is sensitive to the syntactic grammar context that is +> consuming the input elements. This requires multiple goal symbols for +> the lexical grammar. + +You might think the lexer could treat `/` as an operator only if the +previous token is one that can be the last token of an expression (a set +that includes literals, identifiers, `this`, `)`, `]`, and `}`). To see +that this does not work, consider: + +```js +{} /x/ // `/` after `}` is regexp +({} / 2) // `/` after `}` is division + +for (g of /(a)(b)/) {} // `/` after `of` is regexp +var of = 6; of / 2 // `/` after `of` is division + +throw /x/; // `/` after `throw` is regexp +Math.throw / 2; // `/` after `throw` is division + +++/x/.lastIndex; // `/` after `++` is regexp +n++ / 2; // `/` after `++` is division +``` + +So how can the spec be implemented? + +In a recursive descent parser, you have to tell the lexer which goal +symbol to use every time you ask for a token. And you have to make sure, +if you look ahead at a token, but *don’t* consume it, and fall back on +another path that can accept a *RegularExpressionLiteral* or +*DivPunctuator*, that you did not initially lex it incorrectly. We have +assertions for this and it is a bit of a nightmare when we have to touch +it (which is thankfully rare). Part of the problem is that the token +you’re peeking ahead at might not be part of the same production at all. +Thanks to ASI, it might be the start of the next statement, which will +be parsed in a faraway part of the Parser. + +A table-driven parser has it easy here! The lexer can consult the state +table and see which kind of token can be accepted in the current +state. This is closer to what the spec actually says. + +Two minor things to watch out for: + +* The nonterminal *ClassTail* is used both at the end of + *ClassExpression*, which may be followed by `/`; and at the end of + *ClassDeclaration*, which may be followed by a + *RegularExpressionLiteral* at the start of the next + statement. Canonical LR creates separate states for these two uses + of *ClassTail*, but the LALR algorithm will unify them, creating + some states that have both `/` and *RegularExpressionLiteral* in the + follow set. In these states, determining which terminal is actually + allowed requires looking not only at the current state, but at the + current stack of states (to see one level of grammatical context). + +* Since this decision depends on the parser state, and automatic + semicolon insertion adjusts the parser state, a parser may need to + re-scan a token after ASI. + +In other kinds of generated parsers, at least the lexical goal symbol +can be determined automatically. + + +### Lazy compilation and scoping (**) + +*(entangled with: arrow functions)* + +JS engines *lazily compile* function bodies. During parsing, when the +engine sees a `function`, it switches to a high-speed parsing mode +(which I will call “Beast Mode”) that just skims the function and checks +for syntax errors. Beast Mode does not compile the code. Beast Mode +doesn’t even create AST nodes. All that will be done later, on demand, +the first time the function is called. + +The point is to get through parsing *fast*, so that the script can start +running. In browsers, `