diff options
Diffstat (limited to 'third_party/rust/jsparagus')
81 files changed, 24721 insertions, 0 deletions
diff --git a/third_party/rust/jsparagus/.cargo-checksum.json b/third_party/rust/jsparagus/.cargo-checksum.json new file mode 100644 index 0000000000..ca5dc2e479 --- /dev/null +++ b/third_party/rust/jsparagus/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{".flake8":"d0b5a0ca5e524819918726fbc8e8e7e41b4cca3cd06099fa5ed4bf96b0997c93",".githooks/pre-commit":"f37701f35731e8dec0dc0579669069cd720ba2d33dce24fee57735ee614ba654",".github/workflows/ci-daily.yml":"2bc9aa85b1f88ca0474b6fddc62f7182f5ea9e8257b77d60196b1ab5699ad4f8",".github/workflows/ci-generated.yml":"394a805aad7bd4ac66e2ddab7158c9e59183a026cb43d8821c55013e8dcb1e61",".github/workflows/ci-issues.yml":"ab3fa56ceaa65b1afb1a76285598a99befdd8131f68cb4bab0c7502dff9ac03f",".github/workflows/ci-push.yml":"d8133372446aae1437c1f9be88995b2be422b87aace5fce25b3d494656abdced",".github/workflows/real-js-benchmark.yml":"014bfb992808d4cc2158f5b3f47e20c99a7ecea40470595e4a22c0c070c4538f",".github/workflows/rust.yml":"5db3658068f4bef356a24e2a21cc3f7c34b4f19405e24884f1763749e82c5dff",".github/workflows/smoosh-status.yml":"7e6eb19a9fb5c18c5bdaefd477af5d94a374ed0a95f0826e92c9f0c0d15a5b48",".metrics/README.md":"8c963dc571c77f90d0ba1a67e48a32cc8c10166971b9fe8f2926ff00986262c4",".metrics/create-ci-branch.sh":"2dc3130e2eccb474edfdeb9ee1f43140f6f0a2489f013d153c3b3497e37d20c7",".metrics/fuzzbug_count_badge.py":"ad0b0dff8345e64eba17b14d583675df0b9aec4f9ca845166763384e1f1a2c29",".metrics/fuzzbug_date_badge.py":"e938af4faa21cebb9141227c3e3dcd57da3e98e0298d7bc2f9f257346156ad0d",".metrics/generated_README.md":"9be5ea93b90622b6e57969a90073957de4a00f9a05fb074e8146df130072ebb1",".metrics/not_implemented_badge.py":"a550a2e4b1cc151b80b2d6dcfbd8ccfaa3728bc7d759da2bf6eca5981de9e336",".metrics/not_implemented_count.py":"fb2741497b81668511afb761b941609fdc1eb343a3b81a4383561ca394838e26",".metrics/populate_fuzzbug.sh":"97d79de3075113846ff451db87769147427ab3581bc5629d53c7b2fca8dc86cf",".metrics/populate_not_implemented.sh":"75ea57b552dec3c0cd794be2c971a2c085bb99c5526176be860a2fb8af771021","CODE_OF_CONDUCT.md":"baa6d197a7e955ebe93c3b78e2d89d6f6f8d76fdc6c6ffb47ec937034ac2330e","Cargo.lock":"3af550a3d3fecbd2b571d411040169f57221a047279986f768ceb7e5211d34ed","Cargo.toml":"ed206d6f994cacbe37de89986b84f2fb6dc71876bc83612a065a2357476b0894","LICENSE":"83cced0d7ea4adca70302518dc44375445900ae8ed1c3d0a311e76474443d978","LICENSE-APACHE-2.0":"c6ac25baa937b3543482a2595950d337eccd6d620848455fd63d1a89c2009330","LICENSE-MIT":"20ad71f83cbf8fec779108327990518af3a87855d086bee40dc138656b94bd61","Makefile":"5bc156d54f4001cfc18484a963faf8d96430f73dbfff5b138ad2ae824d0b1bb4","README.md":"35fa02ac2528c0793d87f9f8dfd0caa683231ccf8c6a754a6de22456efa935fd","benchmarks/compare-spidermonkey-parsers.js":"58859b90cec170ab5103437194f1a751c83ad312b5e32dc78842b0c2720e1f02","gecko-patches.txt":"4c5532351f41e7a2e5af543686f1373f51e74c5908fbd80f2f337caa1bfe2099","journal.md":"e2af8d3ea87eac2afd106f943c13d0a0e5b03d09fb8ebec09ea4aa7d06490851","js-quirks.md":"8f5f0c6bd8cb9477b575716ac67b6a110865b4df60b7fecdcf2dbb606b8cf094","js_parser/README.md":"49370046241a091313cbe29d9171f47248c2fe742c8dfbdd4f7b4860ca961ffa","js_parser/__init__.py":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","js_parser/es-lexical-simplified.esgrammar":"cc5e0f7bd270e35ff04cad1464317cef0377df13e5fcf145f12783faccc90eff","js_parser/es-simplified.esgrammar":"fc2e5617351f964de6ebadfbda50653bb0e3528a67df4ab364a0125b4326ae83","js_parser/es.esgrammar":"14558b604fe62b7421551d1e694b0f4feb84d8ed114589f75885b217e14cfb05","js_parser/esgrammar.pgen":"e0affd8bb7843aece6d628561ce3057079e879eb11260cbd01b5426c9bce6f29","js_parser/extract_es_grammar.py":"04838d2a0555345699f30fb014f806d4b2e15aa36ed9ec772f514fb4ad858570","js_parser/generate_js_parser_tables.py":"2a33156b3d370e10c8f4eaeb3a00e1322fe71707d67f2f96f85f1a069a084b93","js_parser/lexer.py":"94252a6687fff473269b9eda5ee964d748c480c9a5d938569ac77ab9287cff80","js_parser/load_es_grammar.py":"d711fcc302b786551a83c3d5b7630de594732aa2f8c65e05105b355cd1139480","js_parser/parse_esgrammar.py":"3bc67c3aaf3fcaede4f89a4ad14103fe9e548ac035d1547f0cd799d83785d2b6","js_parser/parser.py":"0f2a9476463457aab7df1269373acec7e08a392209226b94a031267e055eb37a","js_parser/slash.esgrammar":"1fb1591a9773621c30fdac04f539452fb13990daece9ec939040fbb03445f434","js_parser/try_it.py":"c31fbdb7ad9164d16d173f23a6ae5e40da8d9c912f66d7751a53e9cecbbdafa9","jsparagus/README.md":"7f26517592e6d9b291a9162300b3157374422c712fd9b0042390ce55b3b3d715","jsparagus/__init__.py":"c277ec16d8ed12646b0d62e91249498fe7a207b5824e2d6e93d3f77e65828244","jsparagus/actions.py":"02f600ca9189d901779deeaeb3acccb9dfb72ab3842dfabdeafe17e6bade110f","jsparagus/aps.py":"9d14d7109c382af5bdf3bde574226afca65dc2caa0b7524f32f85de056730cfe","jsparagus/emit/__init__.py":"dcf1a8b26f7403871907f646c1ba3ef7dc1a593889a8f8d40490a0db791b0aff","jsparagus/emit/python.py":"fc8ad300727e735dab2222319039f2be9f792ebfc4a17f5f9ff03e58ad5a68e1","jsparagus/emit/rust.py":"6ecd3c76a6d9a37cf3ee9c8c440ba5538850a4bfcabe0a2ce662307b8a33f1ee","jsparagus/extension.py":"803c6db89e6d9e2480da4962c7db58b459dc3bd5594fc97fd89f1b43edf90081","jsparagus/gen.py":"1eabba9ce872ad130d878fa852e81efa6688b2f24c2bf9e4cc830a8afa58bd99","jsparagus/grammar.py":"23078e473dc3fc7ae9a85ce82dd928478d72ef8dd189adbcfd49de28f0b88efc","jsparagus/lexer.py":"8ed7b67dda1626ce98884e754c23eedeb1ce118ddd759b1571c131e5cb51ffda","jsparagus/lr0.py":"0bd25a501ca89b2dfdcbc90f9a0f8209e9cbfcaead099426ababdef6979c7ec9","jsparagus/main.py":"bae2377d6e840db55db6abbeffa58777020053d629de2b1bc8068aaf6f077dee","jsparagus/ordered.py":"15ebf9136ba760ee3e38611c76b55c6335002c6f5f98b43e62ed4c38fa0ef5e1","jsparagus/parse_pgen.py":"b68857e1de6fb41bece972d31384201b7e1feffadb07a3229a5d47c069d48160","jsparagus/parse_pgen_generated.py":"e794a794e95435d90654884ecce9ab68f763d13cd575f07228eaf1ebd27b9c18","jsparagus/parse_table.py":"7ce8388a468607a0bb20db0fb8769027af8927fe6e203f7c281ffc0221a6974b","jsparagus/rewrites.py":"3e5f82352237143d0fd2163459aa370e9b1661811b6eb5c1b9d79e3dd01c7f53","jsparagus/runtime.py":"f4f8f318e730cb7107490710868b9021bdbcf8e5e153ed3b858c7338b9b5d919","jsparagus/types.py":"b55d0eb466ffeff0441874b81c2dfeeaace7fa19eadc1d277d753803946e311f","jsparagus/utils.py":"cc26da2f258f0565062c77c61328210e2f8afb5b8866c153d2d1c159966a3913","mozconfigs/smoosh-debug":"422d2911e5f6acf99fd47435ec9cd0d9f43a680521de51d04aded8bed1136318","mozconfigs/smoosh-opt":"e9eab2cb659b5e7c1e88fc731d6c110157816b5a76e840e0bf51f167566e9b18","pgen.pgen":"60f457614f90a9bf022922dad563262f64e627d9aab934722246c20daa50b5de","requirements.txt":"3a392cc4f4db58be78817dc74a88178e6b4afe1e444c0068cb13e49502d7995a","smoosh_status.py":"a3824b4b20fde8fcf643e28de7d1a9a208352c778d1f9dc7d15f506258dbb36a","src/bin/smoosh_tools.rs":"989f3991bc5886664363b839ecae09d0b95c0e1844b5c2cbfc750fc3bcf52b37","src/lib.rs":"93b32cb970f69fa33e11d41db1696bd578095e07db44ed348ed5e21a8d13d21a","test.sh":"31676e86c2e7e6f6f69050766e237d0eee7da3598b11f95a7335292af2802d11","tests/__init__.py":"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855","tests/test.py":"1ab6b2d002419eac0edc460a5f31b24f0b3ad7c52b79e83f4fd08bded67c6eec","tests/test_js.py":"5f4474eef53d7286d5683c0970a9ba69248a7c843c2c0d9d4111bc432f2f6dbb","tests/test_parse_pgen.py":"6b99e38b2045bae4b0c1b99fe23e1a47ea886b9ce4e902990cc366b8ca9d758e","update.sh":"39986fc0dfe2dd2d2dd2d408cb25577f8ad736b657430233e5a9e214684ce6f1","update_stencil.py":"51a7e79935e664614441491605c8aa6f9cd9fe731faeba6b9c6cd5f23fc6c1ee","update_unicode.py":"18136102a3f38f87a4d6176e07a933711afb42796118293462765a6b271a240e"},"package":null}
\ No newline at end of file diff --git a/third_party/rust/jsparagus/.flake8 b/third_party/rust/jsparagus/.flake8 new file mode 100644 index 0000000000..ce4ecbfbe5 --- /dev/null +++ b/third_party/rust/jsparagus/.flake8 @@ -0,0 +1,15 @@ +[flake8] + +# E721 do not compare types, use 'isinstance()' +# We run afoul of the heuristics that detect this on lines of code +# like `if method.return_type == types.UnitType:`. We are not comparing +# Python types. The warning is spurious for us. +# W503 line break before binary operator +# Amazingly, flake8 by default warns about binary operators no matter +# what you do. We choose to break before operators. +ignore = E721,W503 + +exclude = jsparagus_build_venv,crates/target,jsparagus/parse_pgen_generated.py,js_parser/parser_tables.py + +# todo: get this down to 99 +max_line_length=109 diff --git a/third_party/rust/jsparagus/.githooks/pre-commit b/third_party/rust/jsparagus/.githooks/pre-commit new file mode 100755 index 0000000000..5ca77432b2 --- /dev/null +++ b/third_party/rust/jsparagus/.githooks/pre-commit @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +# +# Simplified commit hook to format the files which were changed in the current commit +# + +printf "[pre-commit] rustfmt" + +for file in $(git diff --name-only --cached); do + if [ ${file: -3} == ".rs" ]; then + rustfmt $file + fi +done + +exit 0 diff --git a/third_party/rust/jsparagus/.github/workflows/ci-daily.yml b/third_party/rust/jsparagus/.github/workflows/ci-daily.yml new file mode 100644 index 0000000000..37a258b28d --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-daily.yml @@ -0,0 +1,46 @@ +name: CI Daily + +on: + schedule: + # Run daily at 00:00 + - cron: '0 0 * * 0-6' + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Get Fuzzbugs + run: | + cd .metrics + # Only update this if it doesn't already exist. + # This action is only used to calculate the days since the last fuzzbug. + if [ ! -f count/fuzzbug.json ]; then + curl "https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" > count/fuzzbug.json + fi + python fuzzbug_date_badge.py + git add badges/since-last-fuzzbug.json + git add count/fuzzbug.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + # fails if no files to commit + continue-on-error: true + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/ci-generated.yml b/third_party/rust/jsparagus/.github/workflows/ci-generated.yml new file mode 100644 index 0000000000..4027c6c9c6 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-generated.yml @@ -0,0 +1,54 @@ +name: Generate Files + +on: + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 # otherwise, you will failed to push refs to dest repo + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Setup Git Profile + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + - name: Revert Previous Change and Merge Master + run: | + if git ls-remote origin | grep refs/heads/ci_generated; then + # If the remote branch exists. + git fetch origin + + # Merge master, discarding changes in origin/ci_generated + MASTER_REV=$(git log -1 master --pretty=%H) + git checkout -b ci_generated-master origin/master + git merge origin/ci_generated -m "Merge master ${MASTER_REV}" -s ours --allow-unrelated-histories + else + # Otherwise, just start from master branch. + git checkout -b ci_generated-master + fi + - name: Generate Files + run: | + make init-venv && make all + # OS independant replace + sed -i.bak '/*_generated.rs/d' .gitignore && rm .gitignore.bak + - name: Commit files + run: | + git add . + MASTER_REV=$(git log -1 master --pretty=%H) + git commit -m "Add Generated Files for ${MASTER_REV}" -a + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_generated + diff --git a/third_party/rust/jsparagus/.github/workflows/ci-issues.yml b/third_party/rust/jsparagus/.github/workflows/ci-issues.yml new file mode 100644 index 0000000000..bb2265ecb3 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-issues.yml @@ -0,0 +1,44 @@ +name: CI Issues + +on: + issues: + types: [opened, closed, reopened] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Count Fuzzbugs + run: | + cd .metrics + # Get the new list + curl "https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" > count/fuzzbug.json + python fuzzbug_count_badge.py + python fuzzbug_date_badge.py + git add badges/since-last-fuzzbug.json + git add badges/open-fuzzbug.json + git add count/fuzzbug.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + # fails if no files to commit + continue-on-error: true + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/ci-push.yml b/third_party/rust/jsparagus/.github/workflows/ci-push.yml new file mode 100644 index 0000000000..0ca6f3a954 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/ci-push.yml @@ -0,0 +1,57 @@ +name: NotImplemented + +on: + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + # Check out master, so that we can count. + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Get Count + # Record the count in a tmp file so it survives changing branches + run: | + find crates -iname '*.rs' -type f -exec cat {} + | grep -c -E "(Emit|Parse)Error::NotImplemented" > /tmp/count + git rev-parse HEAD > /tmp/commit + cp .metrics/not_implemented_badge.py /tmp + cp .metrics/not_implemented_count.py /tmp + # Checkout the results branch + - uses: actions/checkout@v2 + with: + ref: ci_results + - name: Add NotImplemented count + run: | + export total_count=$(cat /tmp/count) + export current_commit=$(cat /tmp/commit) + # Make sure the generating files are up to date + cp -f /tmp/not_implemented_badge.py .metrics/not_implemented_badge.py + cp -f /tmp/not_implemented_count.py .metrics/not_implemented_count.py + # Run the files + cd .metrics + python not_implemented_badge.py + python not_implemented_count.py + git add badges/not-implemented.json + git add count/not-implemented.json + - name: Commit files + # fails if no files to commit + continue-on-error: true + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + git commit -m "update NotImplemented Count" -a + - name: Push changes + uses: ad-m/github-push-action@master + continue-on-error: true + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_results + force: true diff --git a/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml b/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml new file mode 100644 index 0000000000..eec8047762 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/real-js-benchmark.yml @@ -0,0 +1,135 @@ +name: Real JS Samples Benchmark +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - master + +jobs: + benchmark: + # This workflow relies on: + # - A specific hardware (benchmark-pool-1) in order to have a consistent + # and comparable results against multiple builds. + # + # - Some persistent data to reduce the time needed to checkout + # mozilla-central. + # + # To setup such host multiple things should be considered. + # + # In terms of security, the code which is executed on this hardware should + # not be trusted. As such, the Github Action jobs should run on a dedicated + # computer which is either isolated or containerized. Do not run this setup + # on a non-dedicated computer! + # + # It is best to create a dedicated user. + # $ mkdir /var/github-action-runner + # $ useradd -d /var/github-action-runner github-action-user + # + # Make sure this newly added user has no sudo capabilities. + # + # A checkout of Gecko should be present under /var/github-action-runner. The + # dependencies for building Gecko should as well be installed with `mach + # bootstrap`, which can be done using another user with sudo capabilities, + # and changing the HOME environment variable to match the github-action-user + # home. + # + # The file /var/github-action-runner/.profile contains: + # + # export PATH="$HOME/.cargo/bin:$PATH" + # export PATH="/var/github-action-runner/.mozbuild/git-cinnabar:$PATH" + # + # Which is used to add cargo in the path, as well as git-cinnabar, to keep + # the gecko clone up to date. + # + # To add this computer to the benchmark pool, follow the instruction + # provided by github, after clicking "Add runner" on this page: + # https://github.com/mozilla-spidermonkey/jsparagus/settings/actions + # + # "benchmark-pool-1" specification: + # /proc/cpuinfo: + # Intel(R) Core(TM) i7-4770 CPU @ 3.40GHz + # dmidecode --type 17: + # 2x Hynix/Hyundai HMT41GU6MFR8C-PB (DDR3, 8GB, 1600 MT/s) + # + runs-on: [self-hosted, benchmark-pool-1] + steps: + - name: Clean Work Directory + run: | + rm -rf * + - name: Checkout jsparagus + uses: actions/checkout@v2 + with: + fetch-depth: 0 + path: 'jsparagus' + - name: Checkout real-js-samples + uses: actions/checkout@v2 + with: + repository: 'Yoric/real-js-samples' + path: 'real-js-samples' + fetch-depth: 0 + - name: Checkout mozilla-central + run: | + # Pull mozilla-central changes + source /var/github-action-runner/.profile + git -C /var/github-action-runner/gecko pull --all + # Create a local clone of mozilla-central + git clone -l /var/github-action-runner/gecko mozilla-central + - name: Status of Checkouts + run: | + echo "mozilla-central: $(git -C mozilla-central show --oneline -s)" + echo "jsparagus: $(git -C jsparagus show --oneline -s)" + echo "real-js-samples: $(git -C real-js-samples show --oneline -s)" + - name: Setup venv + run: | + source /var/github-action-runner/.profile + cd jsparagus + make init + - name: Generate Files + run: | + source /var/github-action-runner/.profile + cd jsparagus + make all + # OS independant replace + sed -i.bak '/*_generated.rs/d' .gitignore && rm .gitignore.bak + - name: Apply gecko patches + run: | + source /var/github-action-runner/.profile + cd mozilla-central + cat ../jsparagus/gecko-patches.txt | while read PATCH_AND_BUG; do + PATCH=$(echo $PATCH_AND_BUG | cut -d : -f 1) + BUG=$(echo $PATCH_AND_BUG | cut -d : -f 2) + + # Check bug status and skip if it's already landed. + STATUS=$(curl https://bugzilla.mozilla.org/rest/bug/$BUG | python3 -c 'import sys, json; print(json.load(sys.stdin)["bugs"][0]["status"])') + echo "Bug $BUG $STATUS" + if [ "x$STATUS" = "xRESOLVED" ]; then + continue + fi + + # Apply the latest patch from phabricator. + PATCH_URL=https://phabricator.services.mozilla.com/${PATCH}?download=true + curl --location "$PATCH_URL" | git apply --index || git reset --hard + git status + git commit --allow-empty -m "Bug $BUG" + done + - name: Build Gecko + run: | + # Disable Opcodes.h check, as we only focus on parsing speed. + export JS_SMOOSH_DISABLE_OPCODE_CHECK=1 + # Apply Bug 1640982 fix. + export CARGO_PROFILE_RELEASE_LTO=true + source /var/github-action-runner/.profile + cd jsparagus + cargo run --bin smoosh_tools build --opt + - name: Benchmark Real JS Samples + run: | + source /var/github-action-runner/.profile + cd jsparagus + cargo run --bin smoosh_tools bench --opt + - name: Post Checkout mozilla-central + if: ${{ always() }} + run: | + # Remove checked out repository. + rm -rf mozilla-central + diff --git a/third_party/rust/jsparagus/.github/workflows/rust.yml b/third_party/rust/jsparagus/.github/workflows/rust.yml new file mode 100644 index 0000000000..3ecc7db649 --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/rust.yml @@ -0,0 +1,24 @@ +name: Rust + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: + - master + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Initialize repository + run: make init + - name: Build & Test + run: RUSTFLAGS="-D warnings" make check diff --git a/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml b/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml new file mode 100644 index 0000000000..b68925dd7f --- /dev/null +++ b/third_party/rust/jsparagus/.github/workflows/smoosh-status.yml @@ -0,0 +1,87 @@ +name: SmooshMonkey status + +on: + schedule: + # Every hour + - cron: '0 * * * *' + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: "3.7" + - name: Initialize venv + run: make init-venv + - name: Setup Git Profile + run: | + git config --local user.email "action@github.com" + git config --local user.name "GitHub Action" + - name: Check SmooshMonkey status + run: make smoosh-status-ci + id: status + - name: Checkout ci_smoosh_status + run: | + if git ls-remote origin | grep refs/heads/ci_smoosh_status; then + # If the remote branch exists. + git fetch origin ci_smoosh_status + git checkout -b ci_smoosh_status origin/ci_smoosh_status + else + # Otherwise, create a branch. + git checkout -b ci_smoosh_status-master + # And reset all history + git reset --hard deb48a2460abf091705d9972318bbb6e7349de9c + # And also remove remaining files + rm README.md gen.py + echo jsparagus_build_venv > .gitignore + fi + - name: Update files + run: | + echo ${{steps.status.outputs.mc}} > latest_mc + echo ${{steps.status.outputs.jsparagus}} > latest_jsparagus + echo ${{steps.status.outputs.build}} > latest_build + echo ${{steps.status.outputs.test}} > latest_test + + if [ ${{steps.status.outputs.build}} == "OK" ]; then + BUILD_COLOR="green" + elif [ ${{steps.status.outputs.build}} == "NG" ]; then + BUILD_COLOR="red" + else + BUILD_COLOR="yellow" + fi + + if [ ${{steps.status.outputs.test}} == "OK" ]; then + echo ${{steps.status.outputs.mc}} > known_good_mc + echo ${{steps.status.outputs.jsparagus}} > known_good_jsparagus + TEST_COLOR="green" + elif [ ${{steps.status.outputs.test}} == "NG" ]; then + TEST_COLOR="red" + else + TEST_COLOR="yellow" + fi + + echo "{ \"schemaVersion\": 1, \"label\": \"SmooshMonkey Build\", \"message\": \"${{steps.status.outputs.build}}\", \"color\": \"$BUILD_COLOR\" }" > smoosh_build.json + echo "{ \"schemaVersion\": 1, \"label\": \"SmooshMonkey Test\", \"message\": \"${{steps.status.outputs.test}}\", \"color\": \"$TEST_COLOR\" }" > smoosh_test.json + - name: Add files + run: | + git add . + set +e + git diff --cached --quiet + echo "##[set-output name=modified;]$?" + set -e + id: status_add + - name: Commit files + if: steps.status_add.outputs.modified == '1' + run: | + git commit -m "Update Smoosh Status" -a + - name: Push changes + if: steps.status_add.outputs.modified == '1' + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + branch: ci_smoosh_status diff --git a/third_party/rust/jsparagus/.metrics/README.md b/third_party/rust/jsparagus/.metrics/README.md new file mode 100644 index 0000000000..f665dbd9c3 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/README.md @@ -0,0 +1,79 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] + +# Metrics + +This is the metrics directory. It follows the evolution of the repository separately from the +repostory. You can find the actual metrics in the +[`ci-results`](https://github.com/mozilla-spidermonkey/jsparagus/tree/ci-results) branch of the jsparagus project. This branch is automatically generated using the `create-ci-branch.sh` script found in this directory. If there are issues with your fork, you can remove the `ci-results` branch, and the ci will automatically rerun the `create-ci-branch` script to reset it. Do not push manula data to this repository, it will be lost. + +If you find that the `ci-results` branch has disappeared or been corrupted somehow, you can reset it by deleting it and recreating it. + +``` +git branch -D ci-results +cd .metrics +./create-ci-branch.sh +``` + +The `create-ci-branch.sh` file creates the branch, prepares it, and populates it with data from the past. + +## Making your own metrics +Make sure you do not use data that can not be automatically recovered. We cannot rely on the `ci-results` branch always being present, therefore anything that you write must be recoverable on its own, either by relying on external APIs or through some other mechanism. + +Please update this README if you make any changes. + +## Types of CI Actions +These actions are all found in the `.github/workflows` directory + +1) `Rust.yml` - Run on Pull Request +* runs every time there is a push to master, use for any metrics that are development related. Examples include linting, testing, etc. +2) `ci-push.yml` - Run on Push to `master` +* runs on self contained metrics. An example is the number of `NotImplemented` errors in the codebase. This does not depend on anything external +3) `ci-daily.yml` - Run Daily +* a cron task that runs daily. Useful for metrics that need daily updates +4) `ci-issue.yml` - Run on issue open +* runs each time an issue is opened. Good for tracking types of issues. + + +## Types of data + +These are the types of data that this metrics folder tracks. + +1) Rust Passing + * Ensures our internal tests are passing + * Updates on every pull request to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/rust.yml) + +2) NotImplemented Count + * counts number of NotImplemented errors in the codebase. This should slowly rundown to zero + * Updates on every push to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-push.yml) + +3) Days Since last Fuzzbug + * tracks the last fuzzbug we saw, if it does not exist, return โ, otherwise return the last date regardless of state. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +4) Fuzzbug open count + * tracks the number of open fuzzbugs + * Updates on issue open. See [this action](https://github.com/mozilla-spidermonkey/jsparagus/.github/workflows/ci-issues.yml) + +5) Percentage of tests passing with SmooshMonkey + * TODO: tracks the number of tests passing without fallback. We should use the try api for this. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +6) Percentage of JS compilable with SmooshMonkey + * TODO: see comment about writing bytes to a file in [this repo](https://github.com/nbp/seqrec) + * implementation is dependant on how we get the data. We need a robust solution for importing this data. + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer diff --git a/third_party/rust/jsparagus/.metrics/create-ci-branch.sh b/third_party/rust/jsparagus/.metrics/create-ci-branch.sh new file mode 100755 index 0000000000..2415ed7ce9 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/create-ci-branch.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. +shopt -s extglob + +# export the ci_branch we will be using in all shell scripts +export ci_branch=ci_results + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir + +if [ `git branch --list $ci_branch` ] +then + echo "Branch exists" #We don't need to do anything +else + git checkout -b $ci_branch + + # clear out the repostory + git rm -r !(.metrics|.git|tmp) + git rm -r .github + + cp .metrics/generated_README.md README.md + mkdir .metrics/badges + mkdir .metrics/count + + git add . + git commit -m"Initial commit for results branch" + + # scripts needed to populated. Should be self contained with cleanup of extra files + cd .metrics && ./populate_not_implemented.sh + cd $topdir + cd .metrics && ./populate_fuzzbug.sh + + cd $topdir + git add . + git commit -m"Inital run of Populate scripts" +fi diff --git a/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py b/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py new file mode 100644 index 0000000000..654908ff1f --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/fuzzbug_count_badge.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +import json + +read_filename = 'count/fuzzbug.json' +write_count = 'badges/open-fuzzbug.json' + +open_fuzzbugs = 0 +with open(read_filename, 'r') as f: + filedata = json.load(f) + # the open fuzzbug count. Can be deleted + open_fuzzbugs = len([x for x in filedata if x['closed_at'] is None]) + +# Write fuzzbug count +data = { + "schemaVersion": 1, + "label": "Open FuzzBugs", + "message": str(open_fuzzbugs) if open_fuzzbugs > 0 else "None", + "color": "green" if open_fuzzbugs == 0 else "yellow", +} + +with open(write_count, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py b/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py new file mode 100644 index 0000000000..a2d2b88c09 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/fuzzbug_date_badge.py @@ -0,0 +1,40 @@ +#!/usr/bin/python +import json +from datetime import datetime + +read_filename = 'count/fuzzbug.json' +write_since = 'badges/since-last-fuzzbug.json' + +days_since = None +with open(read_filename, 'r') as f: + filedata = json.load(f) + count = len(filedata) + # the last time we saw a fuzzbug regardless of status + if count > 0: + dt_format = "%Y-%m-%dT%H:%M:%SZ" + fuzzbug_opened = filedata[0]["created_at"] + fuzzbug_date = datetime.strptime(fuzzbug_opened, dt_format) + today = datetime.today() + days_since = (today - fuzzbug_date).days + + +# Write days since last fuzzbug + +def get_color(days): + if days_since is None or days_since > 100: + return "green" + elif days_since > 10: + return "yellow" + else: + return "red" + + +data = { + "schemaVersion": 1, + "label": "Days since last FuzzBug", + "message": str(days_since) if days_since is not None else "Forever", + "color": get_color(days_since), +} + +with open(write_since, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/generated_README.md b/third_party/rust/jsparagus/.metrics/generated_README.md new file mode 100644 index 0000000000..af4208a90f --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/generated_README.md @@ -0,0 +1,48 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] + +# Metrics + +Unlike other branches in this project, this branch is for collecting metrics from the CI. you will +find these files in the `.results` folder. If this branch gets deleted, don't worry. This branch can be auto-generated from the `.metrics` +folder in the main repository. + +## Types of data + +These are the types of data that this metrics folder tracks. + +1) NotImplemented Count + * counts number of NotImplemented errors in the codebase. This should slowly rundown to zero + * Updates on every push to master. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-push.yml) + +2) Days Since last Fuzzbug + * tracks the last fuzzbug we saw, if it does not exist, return โ, otherwise return the last date regardless of state. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + +3) Fuzzbug open count + * tracks the number of open fuzzbugs + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-issues.yml) + +4) Percentage of tests passing with SmooshMonkey + * TODO: tracks the number of tests passing without fallback. We should use the try api for this. + * Updates daily, regardless of push. See [this + action](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/.github/workflows/ci-daily.yml) + + +5) Percentage of JS compilable with SmooshMonkey + * TODO: see comment about writing bytes to a file in [this repo](https://github.com/nbp/seqrec) + * implementation is dependant on how we get the data. We need a robust solution for importing this data. + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer diff --git a/third_party/rust/jsparagus/.metrics/not_implemented_badge.py b/third_party/rust/jsparagus/.metrics/not_implemented_badge.py new file mode 100644 index 0000000000..bc522f1fe3 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/not_implemented_badge.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +import json +import os.path + +filename = 'badges/not-implemented.json' +total_count = os.environ['total_count'] +data = { + "schemaVersion": 1, + "label": "NotImplemented", + "message": total_count, + "color": "green" if total_count == "0" else "yellow", +} +with open(filename, 'w') as f: + json.dump(data, f, indent=4) diff --git a/third_party/rust/jsparagus/.metrics/not_implemented_count.py b/third_party/rust/jsparagus/.metrics/not_implemented_count.py new file mode 100644 index 0000000000..947044eca9 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/not_implemented_count.py @@ -0,0 +1,19 @@ +#!/usr/bin/python +import json +import os.path + +filename = 'count/not-implemented.json' +if not os.path.isfile(filename): + with open(filename, 'w') as f: + json.dump([], f, indent=4) # initialize with an empty list + +with open(filename, 'r+') as f: + data = json.load(f) + if len(data) == 0 or data[-1]["commit"] != os.environ['current_commit']: + data.append({ + "commit": os.environ['current_commit'], + "total_count": os.environ['total_count'] + }) + f.seek(0) + json.dump(data, f, indent=4) + f.truncate() diff --git a/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh b/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh new file mode 100755 index 0000000000..af48d566b1 --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/populate_fuzzbug.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir/.metrics + +url="https://api.github.com/repos/mozilla-spidermonkey/jsparagus/issues?labels=libFuzzer&state=all" + +curl $url > count/fuzzbug.json +python fuzzbug_count_badge.py +git add . +git commit -m"Add Fuzzbug date" +python fuzzbug_date_badge.py + +git add . + +git commit -m"Add Fuzzbug count" diff --git a/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh b/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh new file mode 100755 index 0000000000..3a6200133c --- /dev/null +++ b/third_party/rust/jsparagus/.metrics/populate_not_implemented.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash + +set -ue # its like javascript, everything is allowed unless you prevent it. + +topdir=$(git rev-parse --show-toplevel) + +cd $topdir +# setup: persist the scripts between commits +mkdir -p tmp +cp -r .metrics tmp/ +git checkout master +git pull origin master + +# create the log of commits +git log --format=oneline --since=2020-01-01 | tac | awk '{print $1}' > tmp/commit-list +cd tmp/.metrics + +# do stuff with the commits +for commit in $(cat $topdir/tmp/commit-list) +do + git checkout $commit + # python script pulls from env variables, export those + export total_count=$(find $topdir/rust -iname '*.rs' -type f -exec cat {} + | grep -c -E "(Emit|Parse|ScopeBuild)Error::NotImplemented") + export current_commit=$commit + python not_implemented_count.py + python not_implemented_badge.py +done + +cd $topdir +git checkout $ci_branch + +# replace this file stuff with whatever it is you want to do to get it to the right place in the +# repo +mv -f tmp/.metrics/count/not-implemented.json .metrics/count/not-implemented.json +mv -f tmp/.metrics/badges/not-implemented.json .metrics/badges/not-implemented.json + +# Cleanup: Kill the tmp dir +rm -r tmp + +git add . +git commit -m"Add NotImplemented" diff --git a/third_party/rust/jsparagus/CODE_OF_CONDUCT.md b/third_party/rust/jsparagus/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000..2c75c30d59 --- /dev/null +++ b/third_party/rust/jsparagus/CODE_OF_CONDUCT.md @@ -0,0 +1,8 @@ +# Community Participation Guidelines + +This repository is governed by Mozilla's code of conduct and etiquette guidelines. For more details, please read the [Mozilla Community Participation Guidelines](https://www.mozilla.org/about/governance/policies/participation/). + +## How to Report + +For more information on how to report violations of the Community Participation Guidelines, please read our "[How to Report](https://www.mozilla.org/about/governance/policies/participation/reporting/)" page. + diff --git a/third_party/rust/jsparagus/Cargo.lock b/third_party/rust/jsparagus/Cargo.lock new file mode 100644 index 0000000000..c7c4127467 --- /dev/null +++ b/third_party/rust/jsparagus/Cargo.lock @@ -0,0 +1,891 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi", +] + +[[package]] +name = "anyhow" +version = "1.0.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c" + +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2" + +[[package]] +name = "autocfg" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "bstr" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "502ae1441a0a5adb8fbd38a5955a6416b9493e92b465de5e4a9bde6a539c2c48" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820" + +[[package]] +name = "byteorder" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" + +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "clap" +version = "2.33.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +dependencies = [ + "ansi_term", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + +[[package]] +name = "criterion" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc755679c12bda8e5523a71e4d654b6bf2e14bd838dfc48cde6559a05caf7d1" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01e15e0ea58e8234f96146b1f91fa9d0e4dd7a38da93ff7a75d42c0b9d3a545" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20ff29ded3204c5106278a81a38f4b482636ed4fa1e6cfbeef193291beb29ed" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", + "maybe-uninit", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "058ed274caafc1f60c4997b5fc07bf7dc7cca454af7c6e81edffe5f33f70dace" +dependencies = [ + "autocfg 1.0.0", + "cfg-if", + "crossbeam-utils", + "lazy_static", + "maybe-uninit", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-queue" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c695eeca1e7173472a32221542ae469b3e9aac3a4fc81f7696bcad82029493db" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce446db02cdc3165b94ae73111e570793400d0794e46125cc4056c81cbb039f4" +dependencies = [ + "autocfg 0.1.7", + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00affe7f6ab566df61b4be3ce8cf16bc2576bca0963ceb0955e45d514bf9a279" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" + +[[package]] +name = "env_logger" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36" +dependencies = [ + "log", +] + +[[package]] +name = "heck" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1010591b26bbfe835e9faeabeb11866061cc7dcebffd56ad7d0942d0e61aefd8" +dependencies = [ + "libc", +] + +[[package]] +name = "indexmap" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076f042c5b7b98f31d205f1249267e12a6518c1481e9dae9764af19b707d2292" +dependencies = [ + "autocfg 1.0.0", +] + +[[package]] +name = "itertools" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f56a2d0bc861f9165be4eb3442afd3c236d8a98afd426f65d92324ae1091a484" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8b7a7c0c47db5545ed3fef7468ee7bb5b74691498139e4b3f6a20685dc6dd8e" + +[[package]] +name = "js-sys" +version = "0.3.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7889c7c36282151f6bf465be4700359318aef36baa951462382eae49e9577cf9" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "jsparagus" +version = "0.1.0" +dependencies = [ + "jsparagus-ast", + "jsparagus-driver", + "jsparagus-emitter", + "jsparagus-generated-parser", + "jsparagus-interpreter", + "jsparagus-json-log", + "jsparagus-parser", + "jsparagus-scope", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-ast" +version = "0.1.0" +dependencies = [ + "bumpalo", + "indexmap", +] + +[[package]] +name = "jsparagus-driver" +version = "0.1.0" +dependencies = [ + "bumpalo", + "env_logger", + "jsparagus-ast", + "jsparagus-emitter", + "jsparagus-interpreter", + "jsparagus-parser", + "jsparagus-stencil", + "log", + "structopt", +] + +[[package]] +name = "jsparagus-emitter" +version = "0.1.0" +dependencies = [ + "bumpalo", + "byteorder", + "indexmap", + "jsparagus-ast", + "jsparagus-parser", + "jsparagus-scope", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-generated-parser" +version = "0.1.0" +dependencies = [ + "bumpalo", + "jsparagus-ast", + "static_assertions", +] + +[[package]] +name = "jsparagus-interpreter" +version = "0.1.0" +dependencies = [ + "bumpalo", + "jsparagus-ast", + "jsparagus-emitter", + "jsparagus-parser", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-json-log" +version = "0.1.0" +dependencies = [ + "log", + "serde_json", +] + +[[package]] +name = "jsparagus-parser" +version = "0.1.0" +dependencies = [ + "arrayvec", + "bumpalo", + "criterion", + "jsparagus-ast", + "jsparagus-generated-parser", + "jsparagus-json-log", +] + +[[package]] +name = "jsparagus-scope" +version = "0.1.0" +dependencies = [ + "indexmap", + "jsparagus-ast", + "jsparagus-stencil", +] + +[[package]] +name = "jsparagus-stencil" +version = "0.1.0" +dependencies = [ + "jsparagus-ast", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.67" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb147597cdf94ed43ab7a9038716637d2d1bf2bc571da995d0028dec06bd3018" + +[[package]] +name = "log" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "maybe-uninit" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" + +[[package]] +name = "memchr" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53445de381a1f436797497c61d851644d0e8e88e6140f22872ad33a704933978" + +[[package]] +name = "memoffset" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "nom" +version = "4.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6" +dependencies = [ + "memchr", + "version_check 0.1.5", +] + +[[package]] +name = "num-traits" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62be47e61d1842b9170f0fdeec8eba98e60e90e5446449a0545e5152acd7096" +dependencies = [ + "autocfg 1.0.0", +] + +[[package]] +name = "num_cpus" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46203554f085ff89c235cd12f7075f3233af9b11ed7c9e16dfe2560d03313ce6" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "oorandom" +version = "11.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebcec7c9c2a95cacc7cd0ecb89d8a8454eca13906f6deb55258ffff0adeb9405" + +[[package]] +name = "plotters" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e3bb8da247d27ae212529352020f3e5ee16e83c0c258061d27b08ab92675eeb" +dependencies = [ + "js-sys", + "num-traits", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "proc-macro-error" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7959c6467d962050d639361f7703b2051c43036d03493c36f01d440fdd3138a" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check 0.9.1", +] + +[[package]] +name = "proc-macro-error-attr" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4002d9f55991d5e019fb940a90e1a95eb80c24e77cb2462dd4dc869604d543a" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "syn-mid", + "version_check 0.9.1", +] + +[[package]] +name = "proc-macro2" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acb317c6ff86a4e579dfa00fc5e6cca91ecbb4e7eb2df0468805b674eb88548" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db6ce3297f9c85e16621bb8cca38a06779ffc31bb8184e1be4bed2be4678a098" +dependencies = [ + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08a89b46efaf957e52b18062fb2f4660f8b8a4dde1807ca002690868ef2c85a9" +dependencies = [ + "crossbeam-deque", + "crossbeam-queue", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322cf97724bea3ee221b78fe25ac9c46114ebb51747ad5babd51a2fc6a8235a8" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9" +dependencies = [ + "byteorder", +] + +[[package]] +name = "regex-syntax" +version = "0.6.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b28dfe3fe9badec5dbf0a79a9cccad2cfc2ab5484bdb3e44cbd1ae8b3ba2be06" + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "serde" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "414115f25f818d7dfccec8ee535d76949ae78584fc4f79a6f45a904bf8ab4449" + +[[package]] +name = "serde_derive" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128f9e303a5a29922045a830221b8f78ec74a5f544944f3d5984f8ec3895ef64" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9371ade75d4c2d6cb154141b9752cf3781ec9c05e0e5cf35060e1e70ee7b9c25" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sourcefile" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8faa2719539bbe9d77869bfb15d4ee769f99525e707931452c97b693b3f159d" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f88b8e18c69496aad6f9ddf4630dd7d585bcaf765786cb415b9aec2fe5a0430" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af6f3550d8dff9ef7dc34d384ac6f107e5d31c8f57d9f28e0081503f547ac8f5" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "syn-mid" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7be3539f6c128a931cf19dcee741c1af532c7fd387baa739c03dd2e96479338a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "tinytemplate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57a3c6667d3e65eb1bc3aed6fd14011c6cbc3a0665218ab7f5daf040b9ec371a" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-segmentation" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0" + +[[package]] +name = "unicode-width" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479" + +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" + +[[package]] +name = "vec_map" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" + +[[package]] +name = "version_check" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" + +[[package]] +name = "version_check" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" + +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5205e9afdf42282b192e2310a5b463a6d1c1d774e30dc3c791ac37ab42d2616c" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11cdb95816290b525b32587d76419facd99662a07e59d3cdb560488a819d9a45" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "574094772ce6921576fb6f2e3f7497b8a76273b6db092be18fc48a082de09dc3" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e85031354f25eaebe78bb7db1c3d86140312a911a106b2e29f9cc440ce3e7668" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f5e7e61fc929f4c0dddb748b102ebf9f632e2b8d739f2016542b4de2965a9601" + +[[package]] +name = "wasm-bindgen-webidl" +version = "0.2.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef012a0d93fc0432df126a8eaf547b2dce25a8ce9212e1d3cbeef5c11157975d" +dependencies = [ + "anyhow", + "heck", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "weedle", +] + +[[package]] +name = "web-sys" +version = "0.3.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf97caf6aa8c2b1dac90faf0db529d9d63c93846cca4911856f78a83cebf53b" +dependencies = [ + "anyhow", + "js-sys", + "sourcefile", + "wasm-bindgen", + "wasm-bindgen-webidl", +] + +[[package]] +name = "weedle" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bb43f70885151e629e2a19ce9e50bd730fd436cfd4b666894c9ce4de9141164" +dependencies = [ + "nom", +] + +[[package]] +name = "winapi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ccfbf554c6ad11084fb7517daca16cfdcaccbdadba4fc336f032a8b12c2ad80" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/jsparagus/Cargo.toml b/third_party/rust/jsparagus/Cargo.toml new file mode 100644 index 0000000000..289f04a5bc --- /dev/null +++ b/third_party/rust/jsparagus/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "jsparagus" +authors = ["The jsparagus Project Developers"] +version = "0.1.0" +description = "A JavaScript parser" +license = "MIT/Apache-2.0" +repository = "https://github.com/mozilla-spidermonkey/jsparagus" +publish = false +edition = "2018" + +[workspace] + +[lib] + +[[bin]] +name = "smoosh_tools" +path = "src/bin/smoosh_tools.rs" + +[dependencies] +jsparagus-ast = { path = "crates/ast" } +jsparagus-driver = { path = "crates/driver", optional = true } +jsparagus-emitter = { path = "crates/emitter" } +jsparagus-generated-parser = { path = "crates/generated_parser" } +jsparagus-interpreter = { path = "crates/interpreter", optional = true } +jsparagus-json-log = { path = "crates/json-log" } +jsparagus-parser = { path = "crates/parser" } +jsparagus-scope = { path = "crates/scope" } +jsparagus-stencil = { path = "crates/stencil" } + +[features] +default = [] +logging = ["jsparagus-parser/logging"] +full = ["jsparagus-driver", "jsparagus-interpreter"] + +[profile.release] +debug = true # enable debug symbols in release builds diff --git a/third_party/rust/jsparagus/LICENSE b/third_party/rust/jsparagus/LICENSE new file mode 100644 index 0000000000..638926c173 --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE @@ -0,0 +1,15 @@ +Copyright Mozilla Foundation + +Licensed under the Apache License (Version 2.0), or the MIT license, +(the "Licenses") at your option. You may not use this file except in +compliance with one of the Licenses. You may obtain copies of the +Licenses at: + +http://www.apache.org/licenses/LICENSE-2.0 +http://opensource.org/licenses/MIT + +Unless required by applicable law or agreed to in writing, software +distributed under the Licenses is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the Licenses for the specific language governing permissions and +limitations under the Licenses. diff --git a/third_party/rust/jsparagus/LICENSE-APACHE-2.0 b/third_party/rust/jsparagus/LICENSE-APACHE-2.0 new file mode 100644 index 0000000000..80b2915dd3 --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE-APACHE-2.0 @@ -0,0 +1,218 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2020 Mozilla Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +--- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. diff --git a/third_party/rust/jsparagus/LICENSE-MIT b/third_party/rust/jsparagus/LICENSE-MIT new file mode 100644 index 0000000000..ee2a08f98f --- /dev/null +++ b/third_party/rust/jsparagus/LICENSE-MIT @@ -0,0 +1,7 @@ +Copyright (c) 2020 Mozilla Foundation + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/jsparagus/Makefile b/third_party/rust/jsparagus/Makefile new file mode 100644 index 0000000000..827e232c7f --- /dev/null +++ b/third_party/rust/jsparagus/Makefile @@ -0,0 +1,111 @@ +PY_OUT = js_parser/parser_tables.py +HANDLER_FILE = crates/generated_parser/src/ast_builder.rs +HANDLER_INFO_OUT = jsparagus/emit/collect_handler_info/info.json +RS_TABLES_OUT = crates/generated_parser/src/parser_tables_generated.rs +RS_AST_OUT = crates/ast/src/types_generated.rs \ + crates/ast/src/type_id_generated.rs \ + crates/ast/src/dump_generated.rs \ + crates/ast/src/visit_generated.rs \ + crates/ast/src/source_location_accessor_generated.rs \ + crates/generated_parser/src/stack_value_generated.rs + +JSPARAGUS_DIR := $(dir $(firstword $(MAKEFILE_LIST))) +VENV_BIN_DIR := $(JSPARAGUS_DIR)jsparagus_build_venv/bin +PYTHON := $(VENV_BIN_DIR)/python +PIP := $(VENV_BIN_DIR)/pip + +all: $(PY_OUT) rust + +init-venv: + python3 -m venv jsparagus_build_venv &&\ + $(PIP) install --upgrade pip &&\ + $(PIP) install -r requirements.txt + +init: init-venv + git config core.hooksPath .githooks + +ECMA262_SPEC_HTML = ../tc39/ecma262/spec.html +STANDARD_ES_GRAMMAR_OUT = js_parser/es.esgrammar + +# List of files which have a grammar_extension! Rust macro. The macro content is +# scrapped to patch the extracted grammar. +EXTENSION_FILES = \ + +# Incomplete list of files that contribute to the dump file. +SOURCE_FILES = $(EXTENSION_FILES) \ +jsparagus/gen.py \ +jsparagus/grammar.py \ +jsparagus/rewrites.py \ +jsparagus/lr0.py \ +jsparagus/parse_table.py \ +jsparagus/extension.py \ +jsparagus/utils.py \ +jsparagus/actions.py \ +jsparagus/aps.py \ +jsparagus/types.py \ +js_parser/esgrammar.pgen \ +js_parser/generate_js_parser_tables.py \ +js_parser/parse_esgrammar.py \ +js_parser/load_es_grammar.py \ +js_parser/es-simplified.esgrammar + +EMIT_FILES = $(SOURCE_FILES) \ +jsparagus/emit/__init__.py \ +jsparagus/emit/python.py \ +jsparagus/emit/rust.py + +DUMP_FILE = js_parser/parser_generated.jsparagus_dump + +$(DUMP_FILE): $(SOURCE_FILES) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(EXTENSION_FILES:%=--extend %) + +$(PY_OUT): $(EMIT_FILES) $(DUMP_FILE) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(DUMP_FILE) + +$(HANDLER_INFO_OUT): jsparagus/emit/collect_handler_info/src/main.rs $(HANDLER_FILE) + (cd jsparagus/emit/collect_handler_info/; cargo run --bin collect_handler_info ../../../$(HANDLER_FILE) $(subst jsparagus/emit/collect_handler_info/,,$(HANDLER_INFO_OUT))) + +$(RS_AST_OUT): crates/ast/ast.json crates/ast/generate_ast.py + (cd crates/ast && $(abspath $(PYTHON)) generate_ast.py) + +$(RS_TABLES_OUT): $(EMIT_FILES) $(DUMP_FILE) $(HANDLER_INFO_OUT) + $(PYTHON) -m js_parser.generate_js_parser_tables --progress -o $@ $(DUMP_FILE) $(HANDLER_INFO_OUT) + +# This isn't part of the `all` target because it relies on a file that might +# not be there -- it lives in a different git respository. +$(STANDARD_ES_GRAMMAR_OUT): $(ECMA262_SPEC_HTML) + $(PYTHON) -m js_parser.extract_es_grammar $(ECMA262_SPEC_HTML) > $@ || rm $@ + +rust: $(RS_AST_OUT) $(RS_TABLES_OUT) + cargo build --all + +jsparagus/parse_pgen_generated.py: + $(PYTHON) -m jsparagus.parse_pgen --regenerate > $@ + +check: all static-check dyn-check + +dyn-check: + ./test.sh + cargo fmt + cargo test --all + +static-check: + $(VENV_BIN_DIR)/mypy -p jsparagus -p tests -p js_parser + +jsdemo: $(PY_OUT) + $(PYTHON) -m js_parser.try_it + +update-stencil: + $(PYTHON) update_stencil.py \ + ../mozilla-unified ./ + +update-unicode: + $(PYTHON) update_unicode.py UNIDATA ./ + +smoosh-status: + $(PYTHON) smoosh_status.py + +smoosh-status-ci: + $(PYTHON) smoosh_status.py ci + +.PHONY: all check static-check dyn-check jsdemo rust update-opcodes-m-u smoosh-status smoosh-status-ci diff --git a/third_party/rust/jsparagus/README.md b/third_party/rust/jsparagus/README.md new file mode 100644 index 0000000000..a380accae5 --- /dev/null +++ b/third_party/rust/jsparagus/README.md @@ -0,0 +1,159 @@ +[![Rust][Rust Badge]][Rust CI Link] +[![NotImplemented Counter][NotImplemented Badge]][NotImplemented Search] +[![Fuzzbug days since][Fuzzbug Days Badge]][Fuzzbugs] +[![Fuzzbug open][Fuzzbug Open Badge]][Open Fuzzbugs] +[![SmooshMonkey Build Result][SmooshMonkey Build Badge]][SmooshMonkey Build TreeHerder] +[![SmooshMonkey Test Result][SmooshMonkey Test Badge]][SmooshMonkey Test TreeHerder] + +# jsparagus - A JavaScript parser written in Rust + +jsparagus is intended to replace the JavaScript parser in Firefox. + +Current status: + +* jsparagus is not on crates.io yet. The AST design is not stable + enough. We do have a build of the JS shell that includes jsparagus + as an option (falling back on C++ for features jsparagus doesn't + support). See + [mozilla-spidermonkey/rust-frontend](https://github.com/mozilla-spidermonkey/rust-frontend). + +* It can parse a lot of JS scripts, and will eventually be able to parse everything. + See the current limitations below, or our GitHub issues. + +* Our immediate goal is to [support parsing everything in Mozilla's JS + test suite and the features in test262 that Firefox already + supports](https://github.com/mozilla-spidermonkey/jsparagus/milestone/1). + +Join us on Discord: https://discord.gg/tUFFk9Y + + +## Building jsparagus + +To build the parser by itself: + +```sh +make init +make all +``` + +The build takes about 3 minutes to run on my laptop. + +When it's done, you can: + +* Run `make check` to make sure things are working. + +* `cd crates/driver && cargo run -- -D` to try out the JS parser and bytecode emitter. + + +## Building and running SpiderMonkey with jsparagus + +* To build SpiderMonkey with jsparagus, `configure` with `--enable-smoosh`. + + This builds with a specific known-good revision of jsparagus. + +* Building SpiderMonkey with your own local jsparagus repo, for + development, takes more work; see [the jsparagus + SpiderMonkey wiki + page](https://github.com/mozilla-spidermonkey/jsparagus/wiki/SpiderMonkey) + for details. + +**NOTE: Even after building with jsparagus, you must run the shell with +`--smoosh`** to enable jsparagus at run time. + + + +## Benchmarking + +### Fine-grain Benchmarks + +Fine-grain benchmarks are used to detect regression by focusing on each part of +the parser at one time, exercising only this one part. The benchmarks are not +meant to represent any real code sample, but to focus on executing specific +functions of the parser. + +To run this parser, you should execute the following command at the root of the +repository: + +```sh +cd crates/parser +cargo bench +``` + +### Real-world JavaScript + +Real world benchmarks are used to track the overall evolution of performance over +time. The benchmarks are meant to represent realistic production use cases. + +To benchmark the AST generation, we use SpiderMonkey integration to execute the +parser and compare it against SpiderMonkey's default parser. Therefore, to run +this benchmark, we have to first compile SpiderMonkey, then execute SpiderMonkey +shell on the benchmark. (The following instructions assume that `~` is the +directory where all projects are checked out) + +* Generate Parse Tables: + + ```sh + cd ~/jsparagus/ + make init + make all + ``` + +* Compile an optimized version of [SpiderMonkey's JavaScript shell](https://github.com/mozilla/gecko-dev): + + ```sh + cd ~/mozilla/js/src/ + # set the jsparagus' path to the abosulte path to ~/jsparagus. + $EDITOR frontend/smoosh/Cargo.toml + ../../mach vendor rust + # Create a build directory + mkdir obj.opt + cd obj.opt + # Build SpiderMonkey + ../configure --enable-nspr-build --enable-smoosh --enable-debug-symbols=-ggdb3 --disable-debug --enable-optimize --enable-release --disable-tests + make + ``` + +* Execute the [real-js-samples](https://github.com/nbp/real-js-samples/) benchmark: + + ```sh + cd ~/real-js-samples/ + ~/mozilla/js/src/obj.opt/dist/bin/js ./20190416.js + ``` + +This should return the overall time taken to parse all the Script once, in the +cases where there is no error. The goal is to minimize the number of +nano-seconds per bytes. + + +## Limitations + +It's *all* limitations, but I'll try to list the ones that are relevant +to parsing JS. + +* Features that are not implemented in the parser yet include `let`, + `import` and `export`, `async` functions, `yield` expressions, the + use of `await` and `yield` as identifiers, template strings, + `BigInt`, Unicode escape sequences that evaluate to surrogate code + points, legacy octal integer literals, legacy octal escape + sequences, some RegExp flags, strict mode code, `__proto__` in + object literals, some features of destructuring assignment. + + Many more features are not yet supported in the bytecode emitter. + +* Error messages are poor. + +We're currently working on parser performance and completeness, as well +as the bytecode emitter and further integration with SpiderMonkey. + + +[Rust Badge]: https://github.com/mozilla-spidermonkey/jsparagus/workflows/Rust/badge.svg +[Rust CI Link]: https://github.com/mozilla-spidermonkey/jsparagus/actions?query=branch%3Amaster +[NotImplemented Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fnot-implemented.json +[NotImplemented Search]: https://github.com/mozilla-spidermonkey/jsparagus/search?q=notimplemented&unscoped_q=notimplemented +[Fuzzbug days Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fsince-last-fuzzbug.json +[Fuzzbug Open Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_results%2F.metrics%2Fbadges%2Fopen-fuzzbug.json +[Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/issues?utf8=%E2%9C%93&q=label%3AlibFuzzer+ +[Open Fuzzbugs]: https://github.com/mozilla-spidermonkey/jsparagus/labels/libFuzzer +[SmooshMonkey Build Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_smoosh_status%2Fsmoosh_build.json +[SmooshMonkey Build TreeHerder]: https://treeherder.mozilla.org/#/jobs?repo=mozilla-central&tier=1%2C2%2C3&searchStr=sm-nonunified +[SmooshMonkey Test Badge]: https://img.shields.io/endpoint?url=https%3A%2F%2Fraw.githubusercontent.com%2Fmozilla-spidermonkey%2Fjsparagus%2Fci_smoosh_status%2Fsmoosh_test.json +[SmooshMonkey Test TreeHerder]: https://treeherder.mozilla.org/#/jobs?repo=mozilla-central&tier=1%2C2%2C3&searchStr=sm-smoosh diff --git a/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js b/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js new file mode 100644 index 0000000000..d449bffda7 --- /dev/null +++ b/third_party/rust/jsparagus/benchmarks/compare-spidermonkey-parsers.js @@ -0,0 +1,315 @@ +// This script runs multipe parsers from a single engine. +"use strict"; + +// Directory where to find the list of JavaScript sources to be used for +// benchmarking. +var dir = "."; + +// Skip list cache to be used to be able to compare profiles. Without a skip +// list which ensure that only runnable test cases are used, the profile would +// not represent the actual values reported by this script. +var skipList = [], skipFile = "", skipLen = 0; + +// Handle command line arguments. +for (var i = 0; i < scriptArgs.length; i++) { + switch (scriptArgs[i]) { + case "--dir": + if (++i >= scriptArgs.length) { + throw Error("--dir expects a path."); + } + dir = scriptArgs[i]; + break; + case "--skip-file": + if (++i >= scriptArgs.length) { + throw Error("--skip-file expects a path."); + } + skipFile = scriptArgs[i]; + try { + skipList = eval(os.file.readFile(skipFile)); + } catch (e) { + // ignore errors + } + skipLen = skipList.length; + break; + } +} + +// Execution mode of the parser, either "script" or "module". +var mode = "script"; + +// Number of times each JavaScript source is used for benchmarking. +var runs_per_script = 10; + +// First parser +var name_1 = "SpiderMonkey parser"; +function parse_1(path) { + var start = performance.now(); + parse(path, { module: mode == "module", smoosh: false }); + return performance.now() - start; +} + +// Second parser +var name_2 = "SmooshMonkey parser"; +function parse_2(path) { + var start = performance.now(); + parse(path, { module: mode == "module", smoosh: true }); + return performance.now() - start; +} + +// For a given `parse` function, execute it with the content of each file in +// `dir`. This process is repeated `N` times and the results are added to the +// `result` argument using the `prefix` key for the filenames. +function for_all_files(parse, N = 1, prefix = "", result = {}) { + var path = "", content = ""; + var t = 0; + var list = os.file.listDir(dir); + for (var file of list) { + try { + path = os.path.join(dir, file); + content = os.file.readRelativeToScript(path); + try { + t = 0; + for (var n = 0; n < N; n++) + t += parse(content); + result[prefix + path] = { time: t / N, bytes: content.length }; + } catch (e) { + // ignore all errors for now. + result[prefix + path] = { time: null, bytes: content.length }; + } + } catch (e) { + // ignore all read errors. + } + } + return result; +} + +// Compare the results of 2 parser runs and compute the speed ratio between the +// 2 parsers. Results from both parsers are assuming to be comparing the same +// things if they have the same property name. +// +// The aggregated results is returned as an object, which reports the total time +// for each parser, the quantity of bytes parsed and skipped and an array of +// speed ratios for each file tested. +function compare(name1, res1, name2, res2) { + var result = { + name1: name1, + name2: name2, + time1: 0, + time2: 0, + parsed_files: 0, + parsed_bytes: 0, + skipped_files: 0, + skipped_bytes: 0, + ratios_2over1: [], + }; + for (var path of Object.keys(res1)) { + if (!(path in res1 && path in res2)) { + continue; + } + var p1 = res1[path]; + var p2 = res2[path]; + if (p1.time !== null && p2.time !== null) { + result.time1 += p1.time; + result.time2 += p2.time; + result.parsed_files += 1; + result.parsed_bytes += p1.bytes; + result.ratios_2over1.push(p2.time / p1.time); + } else { + result.skipped_files += 1; + result.skipped_bytes += p1.bytes; + } + } + return result; +} + +function print_result(result) { + print(result.name1, "\t", result.time1, "ms\t", 1e6 * result.time1 / result.parsed_bytes, 'ns/byte\t', result.parsed_bytes / (1e6 * result.time1), 'bytes/ns\t'); + print(result.name2, "\t", result.time2, "ms\t", 1e6 * result.time2 / result.parsed_bytes, 'ns/byte\t', result.parsed_bytes / (1e6 * result.time2), 'bytes/ns\t'); + print("Total parsed (scripts:", result.parsed_files, ", bytes:", result.parsed_bytes, ")"); + print("Total skipped (scripts:", result.skipped_files, ", bytes:", result.skipped_bytes, ")"); + print(result.name2, "/", result.name1, ":", result.time2 / result.time1); + print(result.name2, "/", result.name1, ":", spread(result.ratios_2over1, 0, 5, 0.05)); +} + +// Given a `table` of speed ratios, display a distribution chart of speed +// ratios. This is useful to check if the data is noisy, bimodal, and to easily +// eye-ball characteristics of the distribution. +function spread(table, min, max, step) { + // var chars = ["\xa0", "\u2591", "\u2592", "\u2593", "\u2588"]; + var chars = ["\xa0", "\u2581", "\u2582", "\u2583", "\u2584", "\u2585", "\u2586", "\u2587", "\u2588"]; + var s = ["\xa0", "\xa0", "" + min, "\xa0", "\xa0"]; + var ending = ["\xa0", "\xa0", "" + max, "\xa0", "\xa0"]; + var scale = "\xa0\xa0"; + var scale_values = ["โฐ", "ยน", "ยฒ", "ยณ", "โด", "โต", "โถ", "โท", "โธ", "โน"]; + var ranges = []; + var vmax = table.length / 10; + for (var i = min; i < max; i += step) { + ranges.push(0); + var decimal = i - Math.trunc(i); + var error = Math.abs(decimal - Math.round(10 * decimal) / 10); + decimal = Math.round(decimal * 10) % 10; + if (error < step / 2) + scale += scale_values[decimal]; + else + scale += "\xa0"; + } + for (var x of table) { + if (x < min || max < x) continue; + var idx = ((x - min) / step)|0; + ranges[idx] += 1; + } + var max_index = chars.length * s.length; + var ratio = max_index / vmax; + for (i = 0; i < s.length; i++) + s[i] += "\xa0\u2595"; + for (var v of ranges) { + var d = Math.min((v * ratio)|0, max_index - 1); + var offset = max_index; + for (i = 0; i < s.length; i++) { + offset -= chars.length; + var c = Math.max(0, Math.min(d - offset, chars.length - 1)); + s[i] += chars[c]; + } + } + for (i = 0; i < s.length; i++) + s[i] += "\u258f\xa0" + ending[i]; + var res = ""; + for (i = 0; i < s.length; i++) + res += "\n" + s[i]; + res += "\n" + scale; + return res; +} + +// NOTE: We have multiple strategies depending whether we want to check the +// throughput of the parser assuming the parser is cold/hot in memory, the data is +// cold/hot in the cache, and the adaptive CPU throttle is low/high. +// +// Ideally we should be comparing comparable things, but due to the adaptive +// behavior of CPU and Disk, we can only approximate it while keeping results +// comparable to what users might see. + +// Compare Hot-parsers on cold data. +function strategy_1() { + var res1 = for_all_files(parse_1, runs_per_script); + var res2 = for_all_files(parse_2, runs_per_script); + return compare(name_1, res1, name_2, res2); +} + +// Compare Hot-parsers on cold data, and swap parse order. +function strategy_2() { + var res2 = for_all_files(parse_2, runs_per_script); + var res1 = for_all_files(parse_1, runs_per_script); + return compare(name_1, res1, name_2, res2); +} + +// Interleaves N hot-parser results. (if N=1, then strategy_3 is identical to strategy_1) +// +// At the moment, this is assumed to be the best approach which might mimic how +// a helper-thread would behave if it was saturated with content to be parsed. +function strategy_3() { + var res1 = {}; + var res2 = {}; + var N = runs_per_script; + for (var n = 0; n < N; n++) { + for_all_files(parse_1, 1, "" + n, res1); + for_all_files(parse_2, 1, "" + n, res2); + } + return compare(name_1, res1, name_2, res2); +} + +// Compare cold parsers, with alternatetively cold/hot data. +// +// By swapping parser order of execution after each file, we expect that the +// previous parser execution would be enough to evict the other from the L2 +// cache, and as such cause the other parser to hit cold instruction cache where +// the instruction have to be reloaded. +// +// At the moment, this is assumed to be the best approach which might mimic how +// parsers are effectively used on the main thread. +function strategy_0() { + var path = "", content = ""; + var t_1= 0, t_2 = 0, time_1 = 0, time_2 = 0; + var count = 0, count_bytes = 0, skipped = 0, skipped_bytes = 0; + var parse1_first = false; + var list = os.file.listDir(dir); + var ratios_2over1 = []; + var parse1_first = true; + for (var file of list) { + path = os.path.join(dir, file); + if (skipList.includes(path)) { + continue; + } + content = ""; + try { + // print(Math.round(100 * f / list.length), file); + content = os.file.readRelativeToScript(path); + parse1_first = !parse1_first; // Math.random() > 0.5; + for (var i = 0; i < runs_per_script; i++) { + // Randomize the order in which parsers are executed as they are + // executed in the same process and the parsed content might be + // faster to load for the second parser as it is already in memory. + if (parse1_first) { + t_1 = parse_1(content); + t_2 = parse_2(content); + } else { + t_2 = parse_2(content); + t_1 = parse_1(content); + } + time_1 += t_1; + time_2 += t_2; + ratios_2over1.push(t_2 / t_1); + } + count++; + count_bytes += content.length; + } catch (e) { + // ignore all errors for now. + skipped++; + skipped_bytes += content.length; + skipList.push(path); + } + } + + return { + name1: name_1, + name2: name_2, + time1: time_1, + time2: time_2, + parsed_files: count * runs_per_script, + parsed_bytes: count_bytes * runs_per_script, + skipped_files: skipped * runs_per_script, + skipped_bytes: skipped_bytes * runs_per_script, + ratios_2over1: ratios_2over1, + }; +} + +var outputJSON = os.getenv("SMOOSH_BENCH_AS_JSON") !== undefined; +if (!outputJSON) { + print("Main thread comparison:"); +} +var main_thread_result = strategy_0(); +if (!outputJSON) { + print_result(main_thread_result); + print(""); + print("Off-thread comparison:"); +} +var off_thread_result = strategy_3(); +if (!outputJSON) { + print_result(off_thread_result); +} + +if (outputJSON) { + print(JSON.stringify({ + main_thread: main_thread_result, + off_thread: main_thread_result + })); +} + +if (skipFile && skipList.length > skipLen) { + var content = `[${skipList.map(s => `"${s}"`).join(",")}]`; + var data = new ArrayBuffer(content.length); + var view = new Uint8Array(data); + for (var i = 0; i < content.length; i++) { + view[i] = content.charCodeAt(i); + } + os.file.writeTypedArrayToFile(skipFile, view); +} diff --git a/third_party/rust/jsparagus/gecko-patches.txt b/third_party/rust/jsparagus/gecko-patches.txt new file mode 100644 index 0000000000..30c51f01a2 --- /dev/null +++ b/third_party/rust/jsparagus/gecko-patches.txt @@ -0,0 +1 @@ +D88970:1662383 diff --git a/third_party/rust/jsparagus/journal.md b/third_party/rust/jsparagus/journal.md new file mode 100644 index 0000000000..3536812ef2 --- /dev/null +++ b/third_party/rust/jsparagus/journal.md @@ -0,0 +1,272 @@ +## What I learned, what I wonder + + +### Stab 5 (simple LR, LR(1), then LALR(1)) + +Well. I learned enough to implement this, although there is still much I +don't understand. + +I learned a bit about what kind of phenomenon can render a grammar +outside XLL(1) (that is, LL(1) as extended by automated left-factoring +and left-recursion elimination); see `testFirstFirstConflict` in +`test.py` for a contrived example, and `testLeftHandSideExpression` for +a realistic one. + +I learned that the shift-reduce operator precedence parser I wrote for +SpiderMonkey is even less like a typical LR parser than I imagined. + +I was stunned to find that the SLR parser I wrote first, including the +table generator, was *less* code than the predictive LL parser of stab +4. However, full LR(1) took rather a lot of code. + +I learned that I will apparently hand-code the computation of transitive +closures of sets under relations ten times before even considering +writing a general algorithm. The patterns I have written over and over +are: 1. `while not done:` visit every element already in the set, +iterating to a fixed point, which is this ludicrous O(*n*<sup>2</sup>) +in the number of pairs in the relation; 2. depth-first graph walking +with cycle detection, which can overflow the stack. + +I learned three ways to hack features into an LR parser generator (cf. how +easy it is to hack stuff into a recursive descent parser). The tricks I +know are: + +1. Add custom items. To add lookahead assertions, I just added a + lookahead element to the LRItem tuple. The trick then is to make + sure you are normalizing states that are actually identical, to + avoid combinatorial explosionโand eventually, I expect, table + compression. + +2. Add custom actions. I think I can support automatic semicolon + insertion by replacing the usual error action of some states with a + special ASI actions. + +3. Desugaring. The + [ECMAScript standard](https://tc39.es/ecma262/#sec-grammar-notation) + describes optional elements and parameterized nonterminals this way, + and for now at least, that's how we actually implement them. + +There's a lot still to learn here. + +* OMG, what does it all mean? I'm getting more comfortable with the + control flow ("calls" and "returns") of this system, but I wouldn't + say I understand it! + +* Why is lookahead, past the end of the current half-parsed + production, part of an LR item? What other kinds of item + embellishment could be done instead? + +* In what sense is an LR parser a DFA? I implemented it, but there's + more to it that I haven't grokked yet. + +* Is there just one DFA or many? What exactly is the "derived" grammar + that the DFA parses? How on earth does it magically turn out to be + regular? (This depends on it not extending past the first handle, + but I still don't quite see.) + +* If I faithfully implement the algorithms in the book, will it be + less of a dumpster fire? Smaller, more factored? + +* How can I tell if a transformation on grammars preserves the + property of being LR(k)? Factoring out a nonterminal, for example, + may not preserve LR(k)ness. Inlining probably always does. + +* Is there some variant of this that treats nonterminals more like + terminals? It's easy to imagine computing start sets and follow sets + that contain both kinds of symbols. Does that buy us anything? + + +Things I noticed: + +* I think Yacc allows bits of code in the middle of productions: + + nt1: T1 T2 nt2 { code1(); } T3 nt3 T4 { code2(); } + + That could be implemented by introducing a synthetic production + that contains everything up to the first code block: + + nt1_aux: T1 T2 nt2 { code1(); } + nt1: nt1_aux T3 nt3 T4 { code2(); } + + There is a principle that says code should happen only at the end of + a production: because LR states are superpositions of items. We + don't know which production we are really parsing until we reduce, + so we don't know which code to execute. + +* Each state is reachable from an initial state by a finite sequence + of "pushes", each of which pushes either a terminal (a shift action) + or a nonterminal (a summary of a bunch of parsing actions, ending + with a reduce). + + States can sometimes be reached multiple ways (it's a state + transition graph). But regardless of which path you take, the symbols + pushed by the last few steps always match the symbols appearing to + the left of point in each of the state's LR items. (This implies + that those items have to agree on what has happened. Might make a + nice assertion.) + + + +### Stab 4 (nonrecursive table-driven predictive LL parser) + +I learned that testing that a Python program can do something deeply +recursive is kind of nontrivial. :-\ + +I learned that the predictive parser still takes two stacks (one +representing the future and one representing the past). It's not magic! +This makes me want to hop back to stab 3, optimize away the operand +stack, and see what kind of code I can get. + +It seems like recursive descent would be faster, but the table-driven +parser could be made to support incremental parsing (the state of the +algorithm is "just data", a pair of stacks, neither of which is the +parser program's native call stack). + + +### Stab 3 (recursive descent with principled left-recursion-elimination and left-factoring) + +I learned how to eliminate left recursion in a grammar (Algorithm 4.1 +from the book). I learned how to check that a grammar is LL(1) using +the start and follow sets, although I didn't really learn what LL(1) +means in any depth. (I'm just using it as a means to prove that the +grammar is unambiguous.) + +I learned from the book how to do a table-driven "nonrecursive +predictive parser". Something to try later. + +I came up with the "reduction symbol" thing. It seems to work as +expected! This allows me to transform the grammar, but still generate +parse trees reflecting the source grammar. However, the resulting code +is inefficient. Further optimization would improve it, but the +predictive parser will fare better even without optimization. + +I wonder what differences there are between LL(1) and LR(1) grammars. +(The book repeatedly says they are different, but the distinctions it +draws suggest differences like: left-recursive grammars can be LR but +never LL. That particular difference doesn't matter much to me, because +there's an algorithm for eliminating left recursion.) + + +### Stab 2 (recursive descent with ad hoc immediate-left-recursion-elimination) + +I learned it's easy for code to race ahead of understanding. +I learned that a little feature can mean a lot of complexity. + +I learned that it's probably hard to support indirect left-recursion using this approach. +We're able to twist left-recursion into a `while` loop because what we're doing is local to a single nonterminal's productions, +and they're all parsed by a single function. +Making this work across function boundaries would be annoying, +even ignoring the possibility that a nonterminal can be involved in multiple left-call cycles. + +I wonder if the JS spec uses any indirect left-recursion. + +I wonder if there's a nice formalization of a "grammar with actions" that abstracts away "implementation details", +so that we could prove two grammars equivalent, +not just in that they describe the same language, +but equivalent in output. +This could help me explore "grammar rewrites", +which could lead to usable optimizations. + +I noticed that the ES spec contains this: + +> ### 13.6 The if Statement +> #### Syntax +> ``` +> IfStatement[Yield, Await, Return]: +> if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] else Statement[?Yield, ?Await, ?Return] +> if ( Expression[+In, ?Yield, ?Await] ) Statement[?Yield, ?Await, ?Return] +> ``` +> +> Each `else` for which the choice of associated `if` is ambiguous shall +> be associated with the nearest possible `if` that would otherwise have +> no corresponding `else`. + +I wonder if this prose is effectively the same as adding a negative lookahead assertion +"[lookahead ≠ `else`]" at the end of the shorter production. + +(I asked bterlson and he thinks so.) + +I wonder if follow sets can be usefully considered as context-dependent. +What do I mean by this? +For example, `function` is certainly in the follow set of *Statement* in JS, +but there are plenty of contexts, like the rule `do Statement while ( Expression ) ;`, +where the nested *Statement* is never followed by `function`. +But does it matter? +I think it only matters if you're interested in better error messages. +Follow sets only matter to detect ambiguity in a grammar, +and *Statement* is ambiguous if it's ambiguous in *any* context. + + +### Stab 1 (very naive recursive descent) + +I learned that if you simply define a grammar as a set of rules, +there are all sorts of anomalies that can come up: + +* Vacant nonterminals (that do not match any input strings); + +* Nonterminals that match only infinite strings, like `a ::= X a`. + +* Cycles ("busy loops"), like `a ::= a`. + These always introduce ambiguity. + (You can also have cycles through multiple nonterminals: + `a ::= b; b ::= a`.) + +These in particular are easy to test for, with no false positives. +I wonder if there are other anomalies, +and if the "easiness" generalizes to all of them, and why. + +I know what it means for a grammar to be *ambiguous*: +it means there's at least one input with multiple valid parses. +I understand that parser generators can check for ambiguity. +But it's easiest to do so by imposing draconian restrictions. +I learned the "dangling `else` problem" is an ambiguity in exactly this sense. +I wonder if there's a principled way to deal with it. + +I know that a parse is a constructive proof that a string matches a grammar. + +I learned that start sets are important even in minimal parser generators. +This is interesting because they'll be a bit more interesting to compute +once we start considering empty productions. +I wonder if it turns out to still be pretty easy. +Does the start set of a possibly-empty production include its follow set? +(According to the dragon book, you add epsilon to the start set in this case.) + + +### Nice grammars + +I learned that the definition of a "grammar" +as a formal description of a language (= a set of strings) +is incomplete. + +Consider the Lisp syntax we're using: + +``` +sexpr ::= SYMBOL +sexpr ::= "(" tail + +tail ::= ")" +tail ::= sexpr tail +``` + +Nobody wants to parse Lisp like that. +There are two problems. + +One is expressive. +The `"("` and `")"` tokens should appear in the same production. +That way, the grammar says declaratively: these marks always appear in properly nesting pairs. + +``` +sexpr ::= SYMBOL +sexpr ::= "(" list ")" + +list ::= [empty] +list ::= sexpr list +``` + +The other problem has to do with *what you've got* when you get an automatically generated parse. +A grammar is more than just a description of a language, +to the extent we care about the form of the parse trees we get out of the parser. + +A grammar is a particular way of writing a parser, +and since we care about the parser's output, +we care about details of the grammar that would be mere "implementation details" otherwise. diff --git a/third_party/rust/jsparagus/js-quirks.md b/third_party/rust/jsparagus/js-quirks.md new file mode 100644 index 0000000000..20c621c92a --- /dev/null +++ b/third_party/rust/jsparagus/js-quirks.md @@ -0,0 +1,1036 @@ +## JS syntactic quirks + +> *To make a labyrinth, it takes* +> *Some good intentions, some mistakes.* +> โA. E. Stallings, โDaedalโ + +JavaScript is rather hard to parse. Here is an in-depth accounting of +its syntactic quirks, with an eye toward actually implementing a parser +from scratch. + +With apologies to the generous people who work on the standard. Thanks +for doing thatโbetter you than me. + +Thanks to [@bakkot](https://github.com/bakkot) and +[@mathiasbynens](https://github.com/mathiasbynens) for pointing out +several additional quirks. + +Problems are rated in terms of difficulty, from `(*)` = easy to `(***)` += hard. Weโll start with the easiest problems. + + +### Dangling else (*) + +If you know what this is, you may be excused. + +Statements like `if (EXPR) STMT if (EXPR) STMT else STMT` +are straight-up ambiguous in the JS formal grammar. +The ambiguity is resolved with +[a line of specification text](https://tc39.es/ecma262/#sec-if-statement): + +> Each `else` for which the choice of associated `if` is ambiguous shall +> be associated with the nearest possible `if` that would otherwise have +> no corresponding `else`. + +I love this sentence. Something about it cracks me up, I dunno... + +In a recursive descent parser, just doing the dumbest possible thing +correctly implements this rule. + +A parser generator has to decide what to do. In Yacc, you can use +operator precedence for this. + +Yacc aside: This should seem a little outrageous at first, as `else` is +hardly an operator. It helps if you understand what Yacc is doing. In LR +parsers, this kind of ambiguity in the grammar manifests as a +shift-reduce conflict. In this case, when weโve already parsed `if ( +Expression ) Statement if ( Expression ) Statement` +and are looking at `else`, itโs unclear to Yacc +whether to reduce the if-statement or shift `else`. Yacc does not offer +a feature that lets us just say "always shift `else` here"; but there +*is* a Yacc feature that lets us resolve shift-reduce conflicts in a +rather odd, indirect way: operator precedence. We can resolve this +conflict by making `else` higher-precedence than the preceding symbol +`)`. + +Alternatively, I believe itโs equivalent to add "[lookahead โ `else`]" +at the end of the IfStatement production that doesnโt have an `else`. + + +### Other ambiguities and informal parts of the spec (*) + +Not all of the spec is as formal as it seems at first. Most of the stuff +in this section is easy to deal with, but #4 is special. + +1. The lexical grammar is ambiguous: when looking at the characters `<<=`, + there is the question of whether to parse that as one token `<<=`, two + tokens (`< <=` or `<< =`), or three (`< < =`). + + Of course every programming language has this, and the fix is one + sentence of prose in the spec: + + > The source text is scanned from left to right, repeatedly taking the + > longest possible sequence of code points as the next input element. + + This is easy enough for hand-coded lexers, and for systems that are + designed to use separate lexical and syntactic grammars. (Other + parser generators may need help to avoid parsing `functionf(){}` as + a function.) + +2. The above line of prose does not apply *within* input elements, in + components of the lexical grammar. In those cases, the same basic + idea ("maximum munch") is specified using lookahead restrictions at + the end of productions: + + > *LineTerminatorSequence* :: + > <LF> + > <CR>[lookahead ≠ <LF>] + > <LS> + > <PS> + > <CR><LF> + + The lookahead restriction prevents a CR LF sequence from being + parsed as two adjacent *LineTerminatorSequence*s. + + This technique is used in several places, particularly in + [*NotEscapeSequences*](https://tc39.es/ecma262/#prod-NotEscapeSequence). + +3. Annex B.1.4 extends the syntax for regular expressions, making the + grammar ambiguous. Again, a line of prose explains how to cope: + + > These changes introduce ambiguities that are broken by the + > ordering of grammar productions and by contextual + > information. When parsing using the following grammar, each + > alternative is considered only if previous production alternatives + > do not match. + +4. Annex B.1.2 extends the syntax of string literals to allow legacy + octal escape sequences, like `\033`. It says: + + > The syntax and semantics of 11.8.4 is extended as follows except + > that this extension is not allowed for strict mode code: + + ...followed by a new definition of *EscapeSequence*. + + So there are two sets of productions for *EscapeSequence*, and an + implementation is required to implement both and dynamically switch + between them. + + This means that `function f() { "\033"; "use strict"; }` is a + SyntaxError, even though the octal escape is scanned before we know + we're in strict mode. + +For another ambiguity, see "Slashes" below. + + +### Unicode quirks + +JavaScript source is Unicode and usually follows Unicode rules for thing +like identifiers and whitespace, but it has a few special cases: `$`, +`_`, `U+200C ZERO WIDTH NON-JOINER`, and `U+200D ZERO WIDTH JOINER` are +legal in identifiers (the latter two only after the first character), and +`U+FEFF ZERO WIDTH NO-BREAK SPACE` (also known as the byte-order mark) is +treated as whitespace. + +It also allows any code point, including surrogate halves, even though the +Unicode standard says that unpaired surrogate halves should be treated as +encoding errors. + + +### Legacy octal literals and escape sequences (*) + +This is more funny than difficult. + +In a browser, in non-strict code, every sequence of decimal digits (not +followed by an identifier character) is a *NumericLiteral* token. + +If it starts with `0`, with more digits after, then it's a legacy Annex +B.1.1 literal. If the token contains an `8` or a `9`, it's a decimal +number. Otherwise, hilariously, it's octal. + +``` +js> [067, 068, 069, 070] +[55, 68, 69, 56] +``` + +There are also legacy octal escape sequences in strings, and these have +their own quirks. `'\07' === '\u{7}'`, but `'\08' !== '\u{8}'` since 8 +is not an octal digit. Instead `'\08' === '\0' + '8'`, because `\0` +followed by `8` or `9` is a legacy octal escape sequence representing +the null character. (Not to be confused with `\0` in strict code, not +followed by a digit, which still represents the null character, but +doesn't count as octal.) + +None of this is hard to implement, but figuring out what the spec says +is hard. + + +### Strict mode (*) + +*(entangled with: lazy compilation)* + +A script or function can start with this: + +```js +"use strict"; +``` + +This enables ["strict mode"](https://tc39.es/ecma262/#sec-strict-mode-of-ecmascript). +Additionally, all classes and modules are strict mode code. + +Strict mode has both parse-time and run-time effects. Parse-time effects +include: + +* Strict mode affects the lexical grammar: octal integer literals are + SyntaxErrors, octal character escapes are SyntaxErrors, and a + handful of words like `private` and `interface` are reserved (and + thus usually SyntaxErrors) in strict mode. + + Like the situation with slashes, this means it is not possible to + implement a complete lexer for JS without also parsingโat least + enough to detect class boundaries, "use strict" directives in + functions, and function boundaries. + +* Itโs a SyntaxError to have bindings named `eval` or `arguments` in + strict mode code, or to assign to `eval` or `arguments`. + +* Itโs a SyntaxError to have two argument bindings with the same name + in a strict function. + + Interestingly, you donโt always know if youโre in strict mode or not + when parsing arguments. + + ```js + function foo(a, a) { + "use strict"; + } + ``` + + When the implementation reaches the Use Strict Directive, it must + either know that `foo` has two arguments both named `a`, or switch + to strict mode, go back, and reparse the function from the + beginning. + + Fortunately an Early Error rule prohibits mixing `"use strict"` with + more complex parameter lists, like `function foo(x = eval('')) {`. + +* The expression syntax โ`delete` *Identifier*โ and the abominable + *WithStatement* are banned in strict mode. + + +### Conditional keywords (**) + +In some programming languages, you could write a lexer that has rules +like + +* When you see `if`, return `Token::If`. + +* When you see something like `apple` or `arrow` or `target`, + return `Token::Identifier`. + +Not so in JavaScript. The input `if` matches both the terminal `if` and +the nonterminal *IdentifierName*, both of which appear in the high-level +grammar. The same goes for `target`. + +This poses a deceptively difficult problem for table-driven parsers. +Such parsers run on a stream of token-ids, but the question of which +token-id to use for a word like `if` or `target` is ambiguous. The +current parser state can't fully resolve the ambiguity: there are cases +like `class C { get` where the token `get` might match either as a +keyword (the start of a getter) or as an *IdentifierName* (a method or +property named `get`) in different grammatical productions. + +All keywords are conditional, but some are more conditional than others. +The rules are inconsistent to a tragicomic extent. Keywords like `if` +that date back to JavaScript 1.0 are always keywords except when used as +property names or method names. They can't be variable names. Two +conditional keywords (`await` and `yield`) are in the *Keyword* list; +the rest are not. New syntax that happened to be introduced around the +same time as strict mode was awarded keyword status in strict mode. The +rules are scattered through the spec. All this interacts with `\u0065` +Unicode escape sequences somehow. Itโs just unbelievably confusing. + +(After writing this section, I +[proposed revisions to the specification](https://github.com/tc39/ecma262/pull/1694) +to make it a little less confusing.) + +* Thirty-six words are always reserved: + + > `break` `case` `catch` `class` `const` `continue` `debugger` + > `default` `delete` `do` `else` `enum` `export` `extends` `false` + > `finally` `for` `function` `if` `import` `in` `instanceof` `new` + > `null` `return` `super` `switch` `this` `throw` `true` `try` + > `typeof` `var` `void` `while` `with` + + These tokens can't be used as names of variables or arguments. + They're always considered special *except* when used as property + names, method names, or import/export names in modules. + + ```js + // property names + let obj = {if: 3, function: 4}; + assert(obj.if == 3); + + // method names + class C { + if() {} + function() {} + } + + // imports and exports + import {if as my_if} from "modulename"; + export {my_if as if}; + ``` + +* Two more words, `yield` and `await`, are in the *Keyword* list but + do not always act like keywords in practice. + + * `yield` is a *Keyword*; but it can be used as an identifier, + except in generators and strict mode code. + + This means that `yield - 1` is valid both inside and outside + generators, with different meanings. Outside a generator, itโs + subtraction. Inside, it yields the value **-1**. + + That reminds me of the Groucho Marx line: Outside of a dog, a + book is a manโs best friend. Inside of a dog itโs too dark to + read. + + * `await` is like that, but in async functions. Also itโs not a + valid identifier in modules. + + Conditional keywords are entangled with slashes: `yield /a/g` is two + tokens in a generator but five tokens elsewhere. + +* In strict mode code, `implements`, `interface`, `package`, + `private`, `protected`, and `public` are reserved (via Early Errors + rules). + + This is reflected in the message and location information for + certain syntax errors: + + ``` + SyntaxError: implements is a reserved identifier: + class implements {} + ......^ + + SyntaxError: implements is a reserved identifier: + function implements() { "use strict"; } + ....................................^ + ``` + +* `let` is not a *Keyword* or *ReservedWord*. Usually it can be an + identifier. It is special at the beginning of a statement or after + `for (` or `for await (`. + + ```js + var let = [new Date]; // ok: let as identifier + let v = let; // ok: let as keyword, then identifier + let let; // SyntaxError: banned by special early error rule + let.length; // ok: `let .` -> ExpressionStatement + let[0].getYear(); // SyntaxError: `let [` -> LexicalDeclaration + ``` + + In strict mode code, `let` is reserved. + +* `static` is similar. Itโs a valid identifier, except in strict + mode. Itโs only special at the beginning of a *ClassElement*. + + In strict mode code, `static` is reserved. + +* `async` is similar, but trickier. Itโs an identifier. It is special + only if itโs marking an `async` function, method, or arrow function + (the tough case, since you wonโt know itโs an arrow function until + you see the `=>`, possibly much later). + + ```js + function async() {} // normal function named "async" + + async(); // ok, `async` is an Identifier; function call + async() => {}; // ok, `async` is not an Identifier; async arrow function + ``` + +* `of` is special only in one specific place in `for-of` loop syntax. + + ```js + var of = [1, 2, 3]; + for (of of of) console.log(of); // logs 1, 2, 3 + ``` + + Amazingly, both of the following are valid JS code: + + ```js + for (async of => {};;) {} + for (async of []) {} + ``` + + In the first line, `async` is a keyword and `of` is an identifier; + in the second line it's the other way round. + + Even a simplified JS grammar can't be LR(1) as long as it includes + the features used here! + +* `get` and `set` are special only in a class or an object literal, + and then only if followed by a PropertyName: + + ```js + var obj1 = {get: f}; // `get` is an identifier + var obj2 = {get x() {}}; // `get` means getter + + class C1 { get = 3; } // `get` is an identifier + class C2 { get x() {} } // `get` means getter + ``` + +* `target` is special only in `new.target`. + +* `arguments` and `eval` can't be binding names, and can't be assigned + to, in strict mode code. + +To complicate matters, there are a few grammatical contexts where both +*IdentifierName* and *Identifier* match. For example, after `var {` +there are two possibilities: + +```js +// Longhand properties: BindingProperty -> PropertyName -> IdentifierName +var { xy: v } = obj; // ok +var { if: v } = obj; // ok, `if` is an IdentifierName + +// Shorthand properties: BindingProperty -> SingleNameBinding -> BindingIdentifier -> Identifier +var { xy } = obj; // ok +var { if } = obj; // SyntaxError: `if` is not an Identifier +``` + + +### Escape sequences in keywords + +*(entangled with: conditional keywords, ASI)* + +You can use escape sequences to write variable and property names, but +not keywords (including contextual keywords in contexts where they act +as keywords). + +So `if (foo) {}` and `{ i\u0066: 0 }` are legal but `i\u0066 (foo)` is not. + +And you don't necessarily know if you're lexing a contextual keyword +until the next token: `({ g\u0065t: 0 })` is legal, but +`({ g\u0065t x(){} })` is not. + +And for `let` it's even worse: `l\u0065t` by itself is a legal way to +reference a variable named `let`, which means that + +```js +let +x +``` +declares a variable named `x`, while, thanks to ASI, + +```js +l\u0065t +x +``` +is a reference to a variable named `let` followed by a reference to a +variable named `x`. + + +### Early errors (**) + +*(entangled with: lazy parsing, conditional keywords, ASI)* + +Some early errors are basically syntactic. Others are not. + +This is entangled with lazy compilation: "early errors" often involve a +retrospective look at an arbitrarily large glob of code we just parsed, +but in Beast Mode weโre not building an AST. In fact we would like to be +doing as little bookkeeping as possible. + +Even setting that aside, every early error is a special case, and itโs +just a ton of rules that all have to be implemented by hand. + +Here are some examples of Early Error rulesโsetting aside restrictions +that are covered adequately elsewhere: + +* Rules about names: + + * Rules that affect the set of keywords (character sequences that + match *IdentifierName* but are not allowed as binding names) based + on whether or not weโre in strict mode code, or in a + *Module*. Affected identifiers include `arguments`, `eval`, `yield`, + `await`, `let`, `implements`, `interface`, `package`, `private`, + `protected`, `public`, `static`. + + * One of these is a strangely worded rule which prohibits using + `yield` as a *BindingIdentifier*. At first blush, this seems + like it could be enforced in the grammar, but that approach + would make this a valid program, due to ASI: + + ```js + let + yield 0; + ``` + + Enforcing the same rule using an Early Error prohibits ASI here. + It works by exploiting the detailed inner workings of ASI case + 1, and arranging for `0` to be "the offending token" rather than + `yield`. + + * Lexical variable names have to be unique within a scope: + + * Lexical variables (`let` and `const`) canโt be declared more + than once in a block, or both lexically declared and + declared with `var`. + + * Lexically declared variables in a function body canโt have the same + name as argument bindings. + + * A lexical variable canโt be named `let`. + + * Common-sense rules dealing with unicode escape sequences in + identifiers. + +* Common-sense rules about regular expression literals. (They have to + actually be valid regular expressions, and unrecognized flags are + errors.) + +* The number of string parts that a template string can have is + limited to 2<sup>32</sup> − 1. + +* Invalid Unicode escape sequences, like `\7` or `\09` or `\u{3bjq`, are + banned in non-tagged templates (in tagged templates, they are allowed). + +* The *SuperCall* syntax is allowed only in derived class + constructors. + +* `const x;` without an initializer is a Syntax Error. + +* A direct substatement of an `if` statement, loop statement, or + `with` statement canโt be a labelled `function`. + +* Early errors are used to hook up cover grammars. + + * Early errors are also used in one case to avoid having to + specify a very large refinement grammar when *ObjectLiteral* + almost covers *ObjectAssignmentPattern*: + [sorry, too complicated to explain](https://tc39.es/ecma262/#sec-object-initializer-static-semantics-early-errors). + +* Early errors are sometimes used to prevent parsers from needing to + backtrack too much. + + * When parsing `async ( x = await/a/g )`, you don't know until the + next token if this is an async arrow or a call to a function named + `async`. This means you can't even tokenize properly, because in + the former case the thing following `x =` is two divisions and in + the latter case it's an *AwaitExpression* of a regular expression. + So an Early Error forbids having `await` in parameters at all, + allowing parsers to immediately throw an error if they find + themselves in this case. + +Many strict mode rules are enforced using Early Errors, but others +affect runtime semantics. + +<!-- +* I think the rules about assignment targets are related to cover + grammars. Not sure. + + * Expressions used in assignment context (i.e. as the operands of `++` + and `--`, the left operand of assignment, the loop variable of a + `for-in/for-of` loop, or sub-targets in destructuring) must be valid + assignment targets. + + * In destructuring assignment, `[...EXPR] = x` is an error if `EXPR` + is an array or object destructuring target. + +--> + + +### Boolean parameters (**) + +Some nonterminals are parameterized. (Search for โparameterized +productionโ in [this spec +section](https://tc39.es/ecma262/#sec-grammar-notation).) + +Implemented naively (e.g. by macro expansion) in a parser generator, +each parameter could nearly double the size of the parser. Instead, the +parameters must be tracked at run time somehow. + + +### Lookahead restrictions (**) + +*(entangled with: restricted productions)* + +TODO (I implemented this by hacking the entire LR algorithm. Most every +part of it is touched, although in ways that seem almost obvious once +you understand LR inside and out.) + +(Note: It may seem like all of the lookahead restrictions in the spec +are really just a way of saying โthis production takes precedence over +that oneโโfor example, that the lookahead restriction on +*ExpressionStatement* just means that other productions for statements +and declarations take precedence over it. But that isn't accurate; you +can't have an *ExpressionStatement* that starts with `{`, even if it +doesn't parse as a *Block* or any other kind of statement.) + + +### Automatic Semicolon Insertion (**) + +*(entangled with: restricted productions, slashes)* + +Most semicolons at the end of JS statements and declarations โmay be +omitted from the source text in certain situationsโ. This is called +[Automatic Semicolon +Insertion](https://tc39.es/ecma262/#sec-automatic-semicolon-insertion), +or ASI for short. + +The specification for this feature is both very-high-level and weirdly +procedural (โWhen, as the source text is parsed from left to right, a +token is encountered...โ, as if the specification is telling a story +about a browser. As far as I know, this is the only place in the spec +where anything is assumed or implied about the internal implementation +details of parsing.) But it would be hard to specify ASI any other way. + +Wrinkles: + +1. Whitespace is significant (including whitespace inside comments). + Most semicolons in the grammar are optional only at the end of a + line (or before `}`, or at the end of the program). + +2. The ending semicolon of a `do`-`while` statement is extra optional. + You can always omit it. + +3. A few semicolons are never optional, like the semicolons in `for (;;)`. + + This means thereโs a semicolon in the grammar that is optionally + optional! This one: + + > *LexicalDeclaration* : *LetOrConst* *BindingList* `;` + + Itโs usually optional, but not if this is the *LexicalDeclaration* + in `for (let i = 0; i < 9; i++)`! + +4. Semicolons are not inserted only as a last resort to avoid + SyntaxErrors. That turned out to be too error-prone, so there are + also *restricted productions* (see below), where semicolons are more + aggressively inferred. + +5. In implementations, ASI interacts with the ambiguity of *slashes* + (see below). + +A recursive descent parser implements ASI by calling a special method +every time it needs to parse a semicolon that might be optional. The +special method has to peek at the next token and consume it only if itโs +a semicolon. This would not be so bad if it werenโt for slashes. + +In a parser generator, ASI can be implemented using an error recovery +mechanism. + +I think the [error recovery mechanism in +yacc/Bison](https://www.gnu.org/software/bison/manual/bison.html#Error-Recovery) +is too impreciseโwhen an error happens, it discards states from the +stack searching for a matching error-handling rule. The manual says +โError recovery strategies are necessarily guesses.โ + +But hereโs a slightly more careful error recovery mechanism that could +do the job: + +1. For each production in the ES spec grammar where ASI could happen, e.g. + + ``` + ImportDeclaration ::= `import` ModuleSpecifier `;` + { import_declaration($2); } + ``` + + add an ASI production, like this: + + ``` + ImportDeclaration ::= `import` ModuleSpecifier [ERROR] + { check_asi(); import_declaration($2); } + ``` + + What does this mean? This production can be matched, like any other + production, but it's a fallback. All other productions take + precedence. + +2. While generating the parser, treat `[ERROR]` as a terminal + symbol. It can be included in start sets and follow sets, lookahead, + and so forth. + +3. At run time, when an error happens, synthesize an `[ERROR]` token. + Let that bounce through the state machine. It will cause zero or + more reductions. Then, it might actually match a production that + contains `[ERROR]`, like the ASI production above. + + Otherwise, weโll get another errorโthe entry in the parser table for + an `[ERROR]` token at this state will be an `error` entry. Then we + really have a syntax error. + +This solves most of the ASI issues: + +* [x] Whitespace sensitivity: That's what `check_asi()` is for. It + should signal an error if we're not at the end of a line. + +* [x] Special treatment of `do`-`while` loops: Make an error production, + but don't `check_asi()`. + +* [x] Rule banning ASI in *EmptyStatement* or `for(;;)`: + Easy, don't create error productions for those. + + * [x] Banning ASI in `for (let x=1 \n x<9; x++)`: Manually adjust + the grammar, copying *LexicalDeclaration* so that there's a + *LexicalDeclarationNoASI* production used only by `for` + statements. Not a big deal, as it turns out. + +* [x] Slashes: Perhaps have `check_asi` reset the lexer to rescan the + next token, if it starts with `/`. + +* [ ] Restricted productions: Not solved. Read on. + + +### Restricted productions (**) + +*(entangled with: ASI, slashes)* + +Line breaks arenโt allowed in certain places. For example, the following +is not a valid program: + + throw // SyntaxError + new Error(); + +For another example, this function contains two statements, not one: + + function f(g) { + return // ASI + g(); + } + +The indentation is misleading; actually ASI inserts a semicolon at the +end of the first line: `return; g();`. (This function always returns +undefined. The second statement is never reached.) + +These restrictions apply even to multiline comments, so the function + +```js +function f(g) { + return /* + */ g(); +} +``` +contains two statements, just as the previous example did. + +Iโm not sure why these rules exist, but itโs probably because (back in +the Netscape days) users complained about the bizarre behavior of +automatic semicolon insertion, and so some special do-what-I-mean hacks +were put in. + +This is specified with a weird special thing in the grammar: + +> *ReturnStatement* : `return` [no *LineTerminator* here] *Expression* `;` + +This is called a *restricted production*, and itโs unfortunately +necessary to go through them one by one, because there are several +kinds. Note that the particular hack required to parse them in a +recursive descent parser is a little bit different each time. + +* After `continue`, `break`, or `return`, a line break triggers ASI. + + The relevant productions are all statements, and in each case + thereโs an alternative production that ends immediately with a + semicolon: `continue ;` `break ;` and `return ;`. + + Note that the alternative production is *not* restricted: e.g. a + *LineTerminator* can appear between `return` and `;`: + + ```js + if (x) + return // ok + ; + else + f(); + ``` + +* After `throw`, a line break is a SyntaxError. + +* After `yield`, a line break terminates the *YieldExpression*. + + Here the alternative production is simply `yield`, not `yield ;`. + +* In a post-increment or post-decrement expression, there canโt be a + line break before `++` or `--`. + + The purpose of this rule is subtle. It triggers ASI and thus prevents + syntax errors: + + ```js + var x = y // ok: semicolon inserted here + ++z; + ``` + + Without the restricted production, `var x = y ++` would parse + successfully, and the โoffending tokenโ would be `z`. It would be + too late for ASI. + + However, the restriction can of course also *cause* a SyntaxError: + + ```js + var x = (y + ++); // SyntaxError + ``` + +As we said, recursive descent parsers can implement these rules with hax. + +In a generated parser, there are a few possible ways to implement +them. Here are three. If you are not interested in ridiculous +approaches, you can skip the first two. + +* Treat every token as actually a different token when it appears + after a line break: `TokenType::LeftParen` and + `TokenType::LeftParenAfterLineBreak`. Of course the parser + generator can treat these exactly the same in normal cases, and + automatically generate identical table entries (or whatever) except + in states where thereโs a relevant restricted production. + +* Add a special LineTerminator token. Normally, the lexer skips + newlines and never emits this token. However, if the current state + has a relevant restricted production, the lexer knows this and emits + a LineTerminator for the first line break it sees; and the parser + uses that token to trigger an error or transition to another state, + as appropriate. + +* When in a state that has a relevant restricted production, change + states if thereโs a line break before the next token. That is, + split each such state into two: the one we stay in when thereโs not + a line break, and the one we jump to if there is a line break. + +In all cases itโll be hard to have confidence that the resulting parser +generator is really sound. (That is, it might not properly reject all +ambiguous grammars.) I donโt know exactly what property of the few +special uses in the ES grammar makes them seem benign. + + +### Slashes (**) + +*(entangled with: ASI, restricted productions)* + +When you see `/` in a JS program, you donโt know if thatโs a +division operator or the start of a regular expression unless youโve +been paying attention up to that point. + +[The spec:](https://tc39.es/ecma262/#sec-ecmascript-language-lexical-grammar) + +> There are several situations where the identification of lexical input +> elements is sensitive to the syntactic grammar context that is +> consuming the input elements. This requires multiple goal symbols for +> the lexical grammar. + +You might think the lexer could treat `/` as an operator only if the +previous token is one that can be the last token of an expression (a set +that includes literals, identifiers, `this`, `)`, `]`, and `}`). To see +that this does not work, consider: + +```js +{} /x/ // `/` after `}` is regexp +({} / 2) // `/` after `}` is division + +for (g of /(a)(b)/) {} // `/` after `of` is regexp +var of = 6; of / 2 // `/` after `of` is division + +throw /x/; // `/` after `throw` is regexp +Math.throw / 2; // `/` after `throw` is division + +++/x/.lastIndex; // `/` after `++` is regexp +n++ / 2; // `/` after `++` is division +``` + +So how can the spec be implemented? + +In a recursive descent parser, you have to tell the lexer which goal +symbol to use every time you ask for a token. And you have to make sure, +if you look ahead at a token, but *donโt* consume it, and fall back on +another path that can accept a *RegularExpressionLiteral* or +*DivPunctuator*, that you did not initially lex it incorrectly. We have +assertions for this and it is a bit of a nightmare when we have to touch +it (which is thankfully rare). Part of the problem is that the token +youโre peeking ahead at might not be part of the same production at all. +Thanks to ASI, it might be the start of the next statement, which will +be parsed in a faraway part of the Parser. + +A table-driven parser has it easy here! The lexer can consult the state +table and see which kind of token can be accepted in the current +state. This is closer to what the spec actually says. + +Two minor things to watch out for: + +* The nonterminal *ClassTail* is used both at the end of + *ClassExpression*, which may be followed by `/`; and at the end of + *ClassDeclaration*, which may be followed by a + *RegularExpressionLiteral* at the start of the next + statement. Canonical LR creates separate states for these two uses + of *ClassTail*, but the LALR algorithm will unify them, creating + some states that have both `/` and *RegularExpressionLiteral* in the + follow set. In these states, determining which terminal is actually + allowed requires looking not only at the current state, but at the + current stack of states (to see one level of grammatical context). + +* Since this decision depends on the parser state, and automatic + semicolon insertion adjusts the parser state, a parser may need to + re-scan a token after ASI. + +In other kinds of generated parsers, at least the lexical goal symbol +can be determined automatically. + + +### Lazy compilation and scoping (**) + +*(entangled with: arrow functions)* + +JS engines *lazily compile* function bodies. During parsing, when the +engine sees a `function`, it switches to a high-speed parsing mode +(which I will call โBeast Modeโ) that just skims the function and checks +for syntax errors. Beast Mode does not compile the code. Beast Mode +doesnโt even create AST nodes. All that will be done later, on demand, +the first time the function is called. + +The point is to get through parsing *fast*, so that the script can start +running. In browsers, `<script>` compilation usually must finish before +we can show the user any meaningful content. Any part of it that can be +deferred must be deferred. (Bonus: many functions are never called, so +the work is not just deferred but avoided altogether.) + +Later, when a function is called, we can still defer compilation of +nested functions inside it. + +So what? Seems easy enough, right? Well... + +Local variables in JS are optimized. To generate reasonable code for the +script or function we are compiling, we need to know which of its local +variables are used by nested functions, which we are *not* currently +compiling. That is, we must know which variables *occur free in* each +nested function. So scoping, which otherwise could be done as a separate +phase of compilation, must be done during parsing in Beast Mode. + +Getting the scoping of arrow functions right while parsing is tricky, +because itโs often not possible to know when youโre entering a scope +until later. Consider parsing `(a`. This could be the beginning of an +arrow function, or not; we might not know until after we reach the +matching `)`, which could be a long way away. + +Annex B.3.3 adds extremely complex rules for scoping for function +declarations which makes this especially difficult. In + +```js +function f() { + let x = 1; + return function g() { + { + function x(){} + } + { + let x; + } + return x; + }; +} +``` + +the function `g` does not use the initial `let x`, but in + +```js +function f() { + let x = 1; + return function g() { + { + { + function x(){} + } + let x; + } + return x; + }; +} +``` +it does. + +[Here](https://dev.to/rkirsling/tales-from-ecma-s-crypt-annex-b-3-3-56go) +is a good writeup of what's going on in these examples. + + +### Arrow functions, assignment, destructuring, and cover grammars (\*\*\*) + +*(entangled with: lazy compilation)* + +Suppose the parser is scanning text from start to end, when it sees a +statement that starts with something like this: + +```js +let f = (a +``` + +The parser doesnโt know yet if `(a ...` is *ArrowParameters* or just a +*ParenthesizedExpression*. Either is possible. Weโll know when either +(1) we see something that rules out the other case: + +```js +let f = (a + b // can't be ArrowParameters + +let f = (a, b, ...args // can't be ParenthesizedExpression +``` + +or (2) we reach the matching `)` and see if the next token is `=>`. + +Probably (1) is a pain to implement. + +To keep the language nominally LR(1), the standard specifies a โcover +grammarโ, a nonterminal named +*CoverParenthesizedExpressionAndArrowParameterList* that is a superset +of both *ArrowParameters* and *ParenthesizedExpression*. The spec +grammar uses the *Cover* nonterminal in both places, so technically `let +f = (a + b) => 7;` and `let f = (a, b, ...args);` are both syntactically +valid *Script*s, according to the formal grammar. But there are Early +Error rules (that is, plain English text, not part of the formal +grammar) that say arrow functionsโ parameters must match +*ArrowParameters*, and parenthesized expressions must match +*ParenthesizedExpression*. + +So after the initial parse, the implementation must somehow check that +the *CoverParenthesizedExpressionAndArrowParameterList* really is valid +in context. This complicates lazy compilation because in Beast Mode we +are not even building an AST. Itโs not easy to go back and check. + +Something similar happens in a few other cases: the spec is written as +though syntactic rules are applied after the fact: + +* *CoverCallExpressionAndAsyncArrowHead* covers the syntax `async(x)` + which looks like a function call and also looks like the beginning + of an async arrow function. + +* *ObjectLiteral* is a cover grammar; it covers both actual object + literals and the syntax `propertyName = expr`, which is not valid in + object literals but allowed in destructuring assignment: + + ```js + var obj = {x = 1}; // SyntaxError, by an early error rule + + ({x = 1} = {}); // ok, assigns 1 to x + ``` + +* *LeftHandSideExpression* is used to the left of `=` in assignment, + and as the operand of postfix `++` and `--`. But this is way too lax; + most expressions shouldnโt be assigned to: + + ```js + 1 = 0; // matches the formal grammar, SyntaxError by an early error rule + + null++; // same + + ["a", "b", "c"]++; // same + ``` + + +## Conclusion + +What have we learned today? + +* Do not write a JS parser. + +* JavaScript has some syntactic horrors in it. But hey, you don't + make the world's most widely used programming language by avoiding + all mistakes. You do it by shipping a serviceable tool, in the right + circumstances, for the right users. diff --git a/third_party/rust/jsparagus/js_parser/README.md b/third_party/rust/jsparagus/js_parser/README.md new file mode 100644 index 0000000000..a8a814619d --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/README.md @@ -0,0 +1,67 @@ +## jsparagus/js_parser: Generating a parser for JavaScript + +In this directory: + +* **esgrammar.pgen** A grammar for the mini-language the ECMAScript + standard uses to describe ES grammar. + +* **es.esgrammar** - The actual grammar for ECMAScript, in emu-grammar + format, extracted automatically from the spec. + +* **extract_es_grammar.py** - The script that creates *es.esgrammar*. + +* **es-simplified.esgrammar** - A hacked version of *es.esgrammar* that + jsparagus can actually handle. + +* **generate_js_parser_tables.py** - A script to generate a JS parser + based on *es-simplified.esgrammar*. Read on for instructions. + + +## How to run it + +To generate a parser, follow these steps: + +```console +$ cd .. +$ make init +$ make all +``` + +**Note:** The last step currently takes about 35 seconds to run on my +laptop. jsparagus is slow. + +Once you're done, to see your parser run, try this: + +```console +$ cd crates/driver +$ cargo run --release +``` + +The build also produces a copy of the JS parser in Python. +After `make all`, you can use `make jsdemo` to run that. + + +### How simplified is "es-simplified"? + +Here are the differences between *es.esgrammar*, the actual ES grammar, +and *es-simplified.esgrammar*, the simplified version that jsparagus can +actually handle: + +* The four productions with [~Yield] and [~Await] conditions are dropped. + This means that `yield` and `await` do not match *IdentifierReference* + or *LabelIdentifier*. I think it's better to do that in the lexer. + +* Truncated lookahead. + + `ValueError: unsupported: lookahead > 1 token, [['{'], ['function'], ['async', ('no-LineTerminator-here',), 'function'], ['class'], ['let', '[']]` + +* Delete a rule that uses `but not` since it's not implemented. + + Identifier : + IdentifierName but not ReservedWord + + Making sense of this rule in the context of an LR parser is an + interesting task; see issue #28. + +* Ban loops of the form `for (async of EXPR) STMT` by adjusting a + lookahead assertion. The grammar is not LR(1). diff --git a/third_party/rust/jsparagus/js_parser/__init__.py b/third_party/rust/jsparagus/js_parser/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/__init__.py diff --git a/third_party/rust/jsparagus/js_parser/es-lexical-simplified.esgrammar b/third_party/rust/jsparagus/js_parser/es-lexical-simplified.esgrammar new file mode 100644 index 0000000000..6b0fd1baf6 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/es-lexical-simplified.esgrammar @@ -0,0 +1,485 @@ +InputElementDiv :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + RightBracePunctuator + +InputElementRegExp :: + WhiteSpace + LineTerminator + Comment + CommonToken + RightBracePunctuator + RegularExpressionLiteral + +InputElementRegExpOrTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + RegularExpressionLiteral + TemplateSubstitutionTail + +InputElementTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + TemplateSubstitutionTail + + +WhiteSpace :: + <TAB> + <VT> + <FF> + <SP> + <NBSP> + <ZWNBSP> + <USP> + + +LineTerminator :: + <LF> + <CR> + <LS> + <PS> + +LineTerminatorSequence :: + <LF> + <LS> + <PS> + <CR> <LF> + + +MultiLineCommentChars :: + MultiLineCommentPiece + MultiLineCommentChars MultiLineCommentPiece + +MultiLineCommentPiece :: + MultiLineNotAsteriskChar + Stars MultiLineNotForwardSlashOrAsteriskChar + +MultiLineNotAsteriskChar :: + SourceCharacter but not `*` + +MultiLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` + +SingleLineComment :: + `//` SingleLineCommentChars? + +SingleLineCommentChars :: + SingleLineCommentChar SingleLineCommentChars? + +SingleLineCommentChar :: + SourceCharacter but not LineTerminator + + +CommonToken :: + IdentifierName + Punctuator + NumericLiteral + StringLiteral + Template + + +Identifier :: + `TODO` + +IdentifierName :: + IdentifierStart + IdentifierName IdentifierPart + +IdentifierStart :: + UnicodeIDStart + `$` + `_` + `\` UnicodeEscapeSequence + +IdentifierPart :: + UnicodeIDContinue + `$` + `\` UnicodeEscapeSequence + <ZWNJ> + <ZWJ> + +UnicodeIDStart :: + `TODO` + +UnicodeIDContinue :: + `TODO` + + +ReservedWord :: + Keyword + FutureReservedWord + NullLiteral + BooleanLiteral + + +Keyword :: one of + `await` + `break` + `case` `catch` `class` `const` `continue` + `debugger` `default` `delete` `do` + `else` `export` `extends` + `finally` `for` `function` + `if` `import` `in` `instanceof` + `new` + `return` + `super` `switch` + `this` `throw` `try` `typeof` + `var` `void` + `while` `with` + `yield` + + +FutureReservedWord :: + `enum` + + +Punctuator :: one of + `{` `(` `)` `[` `]` + `.` `...` `;` `,` + `<` `>` `<=` `>=` + `==` `!=` `===` `!==` + `+` `-` `*` `%` `**` + `++` `--` + `<<` `>>` `>>>` + `&` `|` `^` + `!` `~` + `&&` `||` + `?` `:` + `=` `+=` `-=` `*=` `%=` `**=` `<<=` `>>=` `>>>=` `&=` `|=` `^=` + `=>` + +DivPunctuator :: + `/` + `/=` + +RightBracePunctuator :: + `}` + + +NullLiteral :: + `null` + + +BooleanLiteral :: + `true` + `false` + + +DecimalLiteral :: + DecimalIntegerLiteral `.` DecimalDigits? ExponentPart? + `.` DecimalDigits ExponentPart? + DecimalIntegerLiteral ExponentPart? + +DecimalDigits :: + DecimalDigit + DecimalDigits DecimalDigit + +DecimalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` + +NonZeroDigit :: one of + `1` `2` `3` `4` `5` `6` `7` `8` `9` + +ExponentPart :: + ExponentIndicator SignedInteger + +ExponentIndicator :: one of + `e` `E` + +SignedInteger :: + DecimalDigits + `+` DecimalDigits + `-` DecimalDigits + +BinaryIntegerLiteral :: + `0b` BinaryDigits + `0B` BinaryDigits + +BinaryDigits :: + BinaryDigit + BinaryDigits BinaryDigit + +BinaryDigit :: one of + `0` `1` + +OctalIntegerLiteral :: + `0o` OctalDigits + `0O` OctalDigits + +OctalDigits :: + OctalDigit + OctalDigits OctalDigit + +OctalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` + +HexIntegerLiteral :: + `0x` HexDigits + `0X` HexDigits + +HexDigits :: + HexDigit + HexDigits HexDigit + +HexDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F` + + +StringLiteral :: + `"` DoubleStringCharacters? `"` + `'` SingleStringCharacters? `'` + +DoubleStringCharacters :: + DoubleStringCharacter DoubleStringCharacters? + +SingleStringCharacters :: + SingleStringCharacter SingleStringCharacters? + +DoubleStringCharacter :: + SourceCharacter but not one of `"` or `\` or LineTerminator + <LS> + <PS> + `\` EscapeSequence + LineContinuation + +SingleStringCharacter :: + SourceCharacter but not one of `'` or `\` or LineTerminator + <LS> + <PS> + `\` EscapeSequence + LineContinuation + +LineContinuation :: + `\` LineTerminatorSequence + + +CharacterEscapeSequence :: + SingleEscapeCharacter + NonEscapeCharacter + +SingleEscapeCharacter :: one of + `'` `"` `\` `b` `f` `n` `r` `t` `v` + +NonEscapeCharacter :: + SourceCharacter but not one of EscapeCharacter or LineTerminator + +EscapeCharacter :: + SingleEscapeCharacter + DecimalDigit + `x` + `u` + +HexEscapeSequence :: + `x` HexDigit HexDigit + +UnicodeEscapeSequence :: + `u` Hex4Digits + `u{` CodePoint `}` + +Hex4Digits :: + HexDigit HexDigit HexDigit HexDigit + + +RegularExpressionLiteral :: + `/` RegularExpressionBody `/` RegularExpressionFlags + +RegularExpressionBody :: + RegularExpressionFirstChar RegularExpressionChars + +RegularExpressionChars :: + [empty] + RegularExpressionChars RegularExpressionChar + +RegularExpressionFirstChar :: + RegularExpressionNonTerminator but not one of `*` or `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionChar :: + RegularExpressionNonTerminator but not one of `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionBackslashSequence :: + `\` RegularExpressionNonTerminator + +RegularExpressionNonTerminator :: + SourceCharacter but not LineTerminator + +RegularExpressionClass :: + `[` RegularExpressionClassChars `]` + +RegularExpressionClassChars :: + [empty] + RegularExpressionClassChars RegularExpressionClassChar + +RegularExpressionClassChar :: + RegularExpressionNonTerminator but not one of `]` or `\` + RegularExpressionBackslashSequence + +RegularExpressionFlags :: + [empty] + RegularExpressionFlags IdentifierPart + + +Template :: + NoSubstitutionTemplate + TemplateHead + +NoSubstitutionTemplate :: + ``` TemplateCharacters? ``` + +TemplateHead :: + ``` TemplateCharacters? `${` + +TemplateSubstitutionTail :: + TemplateMiddle + TemplateTail + +TemplateMiddle :: + `}` TemplateCharacters? `${` + +TemplateTail :: + `}` TemplateCharacters? ``` + +TemplateCharacters :: + TemplateCharacter TemplateCharacters? + +TemplateCharacter :: + `$` [lookahead != `{` ] + `\` EscapeSequence + `\` NotEscapeSequence + LineContinuation + LineTerminatorSequence + SourceCharacter but not one of ``` or `\` or `$` or LineTerminator + +NotEscapeSequence :: + `0` DecimalDigit + DecimalDigit but not `0` + `x` [lookahead <! HexDigit] + `x` HexDigit [lookahead <! HexDigit] + `u` [lookahead <! HexDigit] [lookahead != `{`] + `u` HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit] + `u` `{` [lookahead <! HexDigit] + `u` `{` NotCodePoint [lookahead <! HexDigit] + `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`] + +NotCodePoint :: + HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ] + +CodePoint :: + HexDigits [> but only if MV of |HexDigits| โค 0x10FFFF ] + + +ControlLetter :: one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M` `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z` + + +NumericLiteral :: + DecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + LegacyOctalIntegerLiteral + +LegacyOctalIntegerLiteral :: + `0` OctalDigit + LegacyOctalIntegerLiteral OctalDigit + +DecimalIntegerLiteral :: + `0` + NonZeroDigit DecimalDigits? + NonOctalDecimalIntegerLiteral + +NonOctalDecimalIntegerLiteral :: + `0` NonOctalDigit + LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit + NonOctalDecimalIntegerLiteral DecimalDigit + +LegacyOctalLikeDecimalIntegerLiteral :: + `0` OctalDigit + LegacyOctalLikeDecimalIntegerLiteral OctalDigit + +NonOctalDigit :: one of + `8` `9` + + +EscapeSequence :: + CharacterEscapeSequence + LegacyOctalEscapeSequence + HexEscapeSequence + UnicodeEscapeSequence + +LegacyOctalEscapeSequence :: + OctalDigit [lookahead <! OctalDigit] + ZeroToThree OctalDigit [lookahead <! OctalDigit] + FourToSeven OctalDigit + ZeroToThree OctalDigit OctalDigit + +ZeroToThree :: one of + `0` `1` `2` `3` + +FourToSeven :: one of + `4` `5` `6` `7` + + +Comment :: + MultiLineComment + SingleLineComment + SingleLineHTMLOpenComment + SingleLineHTMLCloseComment + SingleLineDelimitedComment + +MultiLineComment :: + `/*` FirstCommentLine? LineTerminator MultiLineCommentChars? `*/` HTMLCloseComment? + +FirstCommentLine :: + SingleLineDelimitedCommentChars + +SingleLineHTMLOpenComment :: + `<!--` SingleLineCommentChars? + +SingleLineHTMLCloseComment :: + LineTerminatorSequence HTMLCloseComment + +SingleLineDelimitedComment :: + `/*` SingleLineDelimitedCommentChars? `*/` + +HTMLCloseComment :: + WhiteSpaceSequence? SingleLineDelimitedCommentSequence? `-->` SingleLineCommentChars? + +SingleLineDelimitedCommentChars :: + SingleLineDelimitedCommentPiece + SingleLineDelimitedCommentChars SingleLineDelimitedCommentPiece + +SingleLineDelimitedCommentPiece :: + SingleLineNotAsteriskChar + Stars SingleLineNotForwardSlashOrAsteriskChar + +Stars :: + `*` + Stars `*` + +SingleLineNotAsteriskChar :: + SourceCharacter but not one of `*` or LineTerminator + +SingleLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` or LineTerminator + +WhiteSpaceSequence :: + WhiteSpace WhiteSpaceSequence? + +SingleLineDelimitedCommentSequence :: + SingleLineDelimitedComment WhiteSpaceSequence? SingleLineDelimitedCommentSequence? + diff --git a/third_party/rust/jsparagus/js_parser/es-simplified.esgrammar b/third_party/rust/jsparagus/js_parser/es-simplified.esgrammar new file mode 100644 index 0000000000..f7ebbb2017 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/es-simplified.esgrammar @@ -0,0 +1,1453 @@ +@returns Identifier +IdentifierReference[Yield, Await] : + Identifier + => identifier_reference($0) + +@returns BindingIdentifier +BindingIdentifier[Yield, Await] : + Identifier + => binding_identifier($0) + `yield` + => binding_identifier_yield($0) + `await` + => binding_identifier_await($0) + +@returns Label +LabelIdentifier[Yield, Await] : + Identifier + => label_identifier($0) + +@returns Expression +PrimaryExpression[Yield, Await] : + `this` + => this_expr($0) + IdentifierReference[?Yield, ?Await] + => identifier_expr($0) + Literal + => $0 + ArrayLiteral[?Yield, ?Await] + => $0 + ObjectLiteral[?Yield, ?Await] + => $0 + FunctionExpression + => $0 + ClassExpression[?Yield, ?Await] + => $0 + GeneratorExpression + => $0 + AsyncFunctionExpression + => $0 + AsyncGeneratorExpression + => $0 + RegularExpressionLiteral + => regexp_literal($0) + TemplateLiteral[?Yield, ?Await, ~Tagged] + => untagged_template_expr($0) + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] + => uncover_parenthesized_expression($0) + +@returns CoverParenthesized +CoverParenthesizedExpressionAndArrowParameterList[Yield, Await] : + `(` Expression[+In, ?Yield, ?Await] `)` + => cover_parenthesized_expression($0, $1, $2) + `(` Expression[+In, ?Yield, ?Await] `,` `)` + => cover_arrow_parameter_list($0, expression_to_parameter_list($1), None, $3) + `(` `)` + => cover_arrow_parameter_list($0, empty_parameter_list(), None, $1) + `(` `...` BindingIdentifier[?Yield, ?Await] `)` + => cover_arrow_parameter_list($0, empty_parameter_list(), Some(binding_identifier_to_binding($2)), $3) + `(` `...` BindingPattern[?Yield, ?Await] `)` + => cover_arrow_parameter_list($0, empty_parameter_list(), Some($2), $3) + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingIdentifier[?Yield, ?Await] `)` + => cover_arrow_parameter_list($0, expression_to_parameter_list($1), Some(binding_identifier_to_binding($4)), $5) + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingPattern[?Yield, ?Await] `)` + => cover_arrow_parameter_list($0, expression_to_parameter_list($1), Some($4), $5) + + +@returns Expression +Literal : + NullLiteral + => null_literal($0) + BooleanLiteral + => boolean_literal($0) + NumericLiteral + => numeric_literal($0) + BigIntLiteral + => bigint_literal($0) + StringLiteral + => string_literal($0) + + +@returns Expression +ArrayLiteral[Yield, Await] : + `[` Elision? `]` + => array_literal_empty($0, $1, $2) + `[` ElementList[?Yield, ?Await] `]` + => array_literal($0, $1, $2) + `[` ElementList[?Yield, ?Await] `,` Elision? `]` + => array_literal_with_trailing_elision($0,$1, $3, $4) + +@returns ArrayExpression +ElementList[Yield, Await] : + Elision? AssignmentExpression[+In, ?Yield, ?Await] + => element_list_first($0, $1) + Elision? SpreadElement[?Yield, ?Await] + => element_list_first_spread($0, $1) + ElementList[?Yield, ?Await] `,` Elision? AssignmentExpression[+In, ?Yield, ?Await] + => element_list_append($0, $2, $3) + ElementList[?Yield, ?Await] `,` Elision? SpreadElement[?Yield, ?Await] + => element_list_append_spread($0, $2, $3) + +@returns ArrayExpression +Elision : + `,` + => elision_single($0) + Elision `,` + => elision_append($0, $1) + +@returns Expression +SpreadElement[Yield, Await] : + `...` AssignmentExpression[+In, ?Yield, ?Await] + => spread_element($1) + + +@returns Expression +ObjectLiteral[Yield, Await] : + `{` `}` + => object_literal_empty($0, $1) + `{` PropertyDefinitionList[?Yield, ?Await] `}` + => object_literal($0, $1, $2) + `{` PropertyDefinitionList[?Yield, ?Await] `,` `}` + => object_literal($0, $1, $2) + +@returns ObjectExpression +PropertyDefinitionList[Yield, Await] : + PropertyDefinition[?Yield, ?Await] + => property_definition_list_single($0) + PropertyDefinitionList[?Yield, ?Await] `,` PropertyDefinition[?Yield, ?Await] + => property_definition_list_append($0, $2) + +@returns ObjectProperty +PropertyDefinition[Yield, Await] : + IdentifierReference[?Yield, ?Await] + => shorthand_property($0) + CoverInitializedName[?Yield, ?Await] + => $0 + PropertyName[?Yield, ?Await] `:` AssignmentExpression[+In, ?Yield, ?Await] + => property_definition($0, $2) + MethodDefinition[?Yield, ?Await] + => property_definition_method($0) + `...` AssignmentExpression[+In, ?Yield, ?Await] + => property_definition_spread($1) + +@returns PropertyName +PropertyName[Yield, Await] : + LiteralPropertyName + => $0 + ComputedPropertyName[?Yield, ?Await] + => $0 + +@returns PropertyName +LiteralPropertyName : + IdentifierName + => property_name_identifier($0) + StringLiteral + => property_name_string($0) + NumericLiteral + => property_name_numeric($0) + BigIntLiteral + => property_name_bigint($0) + +@returns PropertyName +ComputedPropertyName[Yield, Await] : + `[` AssignmentExpression[+In, ?Yield, ?Await] `]` + => computed_property_name($0, $1, $2) + +@returns ObjectProperty +CoverInitializedName[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await] + => cover_initialized_name($0, $1) + +@returns Expression +Initializer[In, Yield, Await] : + `=` AssignmentExpression[?In, ?Yield, ?Await] + => $1 + + +@returns TemplateExpression +TemplateLiteral[Yield, Await, Tagged] : + NoSubstitutionTemplate + => template_literal($0) + SubstitutionTemplate[?Yield, ?Await, ?Tagged] + => $0 + +@returns TemplateExpression +SubstitutionTemplate[Yield, Await, Tagged] : + TemplateHead Expression[+In, ?Yield, ?Await] TemplateSpans[?Yield, ?Await, ?Tagged] + => substitution_template($0, $1, $2) + +@returns Void +TemplateSpans[Yield, Await, Tagged] : + TemplateTail + => template_spans(None, $0) + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateTail + => template_spans(Some($0), $1) + +@returns Void +TemplateMiddleList[Yield, Await, Tagged] : + TemplateMiddle Expression[+In, ?Yield, ?Await] + => template_middle_list_single($0, $1) + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateMiddle Expression[+In, ?Yield, ?Await] + => template_middle_list_append($0, $1, $2) + +@returns Expression +MemberExpression[Yield, Await] : + PrimaryExpression[?Yield, ?Await] + => $0 + MemberExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + => computed_member_expr($0, $2, $3) + MemberExpression[?Yield, ?Await] `.` IdentifierName + => static_member_expr($0, $2) + MemberExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + => tagged_template_expr($0, $1) + SuperProperty[?Yield, ?Await] + => $0 + MetaProperty + => $0 + `new` MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + => new_expr_with_arguments($0, $1, $2) + MemberExpression[?Yield, ?Await] `.` PrivateIdentifier + => private_field_expr($0, $2) + +@returns Expression +SuperProperty[Yield, Await] : + `super` `[` Expression[+In, ?Yield, ?Await] `]` + => super_property_computed($0, $2, $3) + `super` `.` IdentifierName + => super_property_static($0, $2) + +@returns Expression +MetaProperty : + NewTarget + => $0 + +@returns Expression +NewTarget : + `new` `.` `target` + => new_target_expr($0, $2) + +@returns Expression +NewExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] + => $0 + `new` NewExpression[?Yield, ?Await] + => new_expr_without_arguments($0, $1) + +@returns Expression +CallExpression[Yield, Await] : + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] + => $0 + SuperCall[?Yield, ?Await] + => $0 + ImportCall[?Yield, ?Await] + => $0 + CallExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + => call_expr($0, $1) + CallExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + => computed_member_expr($0, $2, $3) + CallExpression[?Yield, ?Await] `.` IdentifierName + => static_member_expr($0, $2) + CallExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + => tagged_template_expr($0, $1) + CallExpression[?Yield, ?Await] `.` PrivateIdentifier + => private_field_expr($0, $2) + +@returns Expression +SuperCall[Yield, Await] : + `super` Arguments[?Yield, ?Await] + => super_call($0, $1) + +@returns Expression +ImportCall[Yield, Await] : + `import` `(` AssignmentExpression[+In, ?Yield, ?Await] `)` + => import_call($0, $2, $3) + +@returns Arguments +Arguments[Yield, Await] : + `(` `)` + => arguments_empty($0, $1) + `(` ArgumentList[?Yield, ?Await] `)` + => arguments($0, $1, $2) + `(` ArgumentList[?Yield, ?Await] `,` `)` + => arguments($0, $1, $3) + +@returns Arguments +ArgumentList[Yield, Await] : + AssignmentExpression[+In, ?Yield, ?Await] + => arguments_single($0) + `...` AssignmentExpression[+In, ?Yield, ?Await] + => arguments_spread_single($1) + ArgumentList[?Yield, ?Await] `,` AssignmentExpression[+In, ?Yield, ?Await] + => arguments_append($0, $2) + ArgumentList[?Yield, ?Await] `,` `...` AssignmentExpression[+In, ?Yield, ?Await] + => arguments_append_spread($0, $3) + +@returns Expression +OptionalExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] OptionalChain[?Yield, ?Await] + => optional_expr($0, $1) + CallExpression[?Yield, ?Await] OptionalChain[?Yield, ?Await] + => optional_expr($0, $1) + OptionalExpression[?Yield, ?Await] OptionalChain[?Yield, ?Await] + => optional_expr($0, $1) + +@returns Expression +OptionalChain[Yield, Await] : + `?.` `[` Expression[+In, ?Yield, ?Await] `]` + => optional_computed_member_expr_tail($0, $2, $3) + `?.` IdentifierName + => optional_static_member_expr_tail($0, $1) + `?.` PrivateIdentifier + => optional_private_field_member_expr_tail($0, $1) + `?.` Arguments[?Yield, ?Await] + => optional_call_expr_tail($0, $1) + `?.` TemplateLiteral[?Yield, ?Await, +Tagged] + => error_optional_chain_with_template() + OptionalChain[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + => optional_computed_member_expr($0, $2, $3) + OptionalChain[?Yield, ?Await] `.` IdentifierName + => optional_static_member_expr($0, $2) + OptionalChain[?Yield, ?Await] `.` PrivateIdentifier + => optional_private_field_member_expr($0, $2) + OptionalChain[?Yield, ?Await] Arguments[?Yield, ?Await] + => optional_call_expr($0, $1) + OptionalChain[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + => error_optional_chain_with_template() + +@returns Expression +LeftHandSideExpression[Yield, Await] : + NewExpression[?Yield, ?Await] + => $0 + CallExpression[?Yield, ?Await] + => $0 + OptionalExpression[?Yield, ?Await] + => $0 + + +@returns Expression +CallMemberExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + => call_expr($0, $1) + + +@returns Expression +UpdateExpression[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + => $0 + LeftHandSideExpression[?Yield, ?Await] [no LineTerminator here] `++` + => post_increment_expr($0, $1) + LeftHandSideExpression[?Yield, ?Await] [no LineTerminator here] `--` + => post_decrement_expr($0, $1) + `++` UnaryExpression[?Yield, ?Await] + => pre_decrement_expr($0, $1) + `--` UnaryExpression[?Yield, ?Await] + => pre_decrement_expr($0, $1) + + +@returns Expression +UnaryExpression[Yield, Await] : + UpdateExpression[?Yield, ?Await] + => $0 + `delete` UnaryExpression[?Yield, ?Await] + => delete_expr($0, $1) + `void` UnaryExpression[?Yield, ?Await] + => void_expr($0, $1) + `typeof` UnaryExpression[?Yield, ?Await] + => typeof_expr($0, $1) + `+` UnaryExpression[?Yield, ?Await] + => unary_plus_expr($0, $1) + `-` UnaryExpression[?Yield, ?Await] + => unary_minus_expr($0, $1) + `~` UnaryExpression[?Yield, ?Await] + => bitwise_not_expr($0, $1) + `!` UnaryExpression[?Yield, ?Await] + => logical_not_expr($0, $1) + [+Await] AwaitExpression[?Yield] + => $0 + + +@returns Expression +ExponentiationExpression[Yield, Await] : + UnaryExpression[?Yield, ?Await] + => $0 + UpdateExpression[?Yield, ?Await] `**` ExponentiationExpression[?Yield, ?Await] + => binary_expr(pow_op($1), $0, $2) + + +@returns Expression +MultiplicativeExpression[Yield, Await] : + ExponentiationExpression[?Yield, ?Await] + => $0 + MultiplicativeExpression[?Yield, ?Await] MultiplicativeOperator ExponentiationExpression[?Yield, ?Await] + => multiplicative_expr($0, $1, $2) + +@returns BinaryOperator +MultiplicativeOperator : + `*` + => box_op(mul_op($0)) + `/` + => box_op(div_op($0)) + `%` + => box_op(mod_op($0)) + + +@returns Expression +AdditiveExpression[Yield, Await] : + MultiplicativeExpression[?Yield, ?Await] + => $0 + AdditiveExpression[?Yield, ?Await] `+` MultiplicativeExpression[?Yield, ?Await] + => binary_expr(add_op($1), $0, $2) + AdditiveExpression[?Yield, ?Await] `-` MultiplicativeExpression[?Yield, ?Await] + => binary_expr(sub_op($1), $0, $2) + + +@returns Expression +ShiftExpression[Yield, Await] : + AdditiveExpression[?Yield, ?Await] + => $0 + ShiftExpression[?Yield, ?Await] `<<` AdditiveExpression[?Yield, ?Await] + => binary_expr(left_shift_op($1), $0, $2) + ShiftExpression[?Yield, ?Await] `>>` AdditiveExpression[?Yield, ?Await] + => binary_expr(right_shift_op($1), $0, $2) + ShiftExpression[?Yield, ?Await] `>>>` AdditiveExpression[?Yield, ?Await] + => binary_expr(right_shift_ext_op($1), $0, $2) + + +@returns Expression +RelationalExpression[In, Yield, Await] : + ShiftExpression[?Yield, ?Await] + => $0 + RelationalExpression[?In, ?Yield, ?Await] `<` ShiftExpression[?Yield, ?Await] + => binary_expr(less_than_op($1), $0, $2) + RelationalExpression[?In, ?Yield, ?Await] `>` ShiftExpression[?Yield, ?Await] + => binary_expr(greater_than_op($1), $0, $2) + RelationalExpression[?In, ?Yield, ?Await] `<=` ShiftExpression[?Yield, ?Await] + => binary_expr(less_than_or_equal_op($1), $0, $2) + RelationalExpression[?In, ?Yield, ?Await] `>=` ShiftExpression[?Yield, ?Await] + => binary_expr(greater_than_or_equal_op($1), $0, $2) + RelationalExpression[?In, ?Yield, ?Await] `instanceof` ShiftExpression[?Yield, ?Await] + => binary_expr(instanceof_op($1), $0, $2) + [+In] RelationalExpression[+In, ?Yield, ?Await] `in` ShiftExpression[?Yield, ?Await] + => binary_expr(in_op($1), $0, $2) + + +@returns Expression +EqualityExpression[In, Yield, Await] : + RelationalExpression[?In, ?Yield, ?Await] + => $0 + EqualityExpression[?In, ?Yield, ?Await] `==` RelationalExpression[?In, ?Yield, ?Await] + => binary_expr(equals_op($1), $0, $2) + EqualityExpression[?In, ?Yield, ?Await] `!=` RelationalExpression[?In, ?Yield, ?Await] + => binary_expr(not_equals_op($1), $0, $2) + EqualityExpression[?In, ?Yield, ?Await] `===` RelationalExpression[?In, ?Yield, ?Await] + => binary_expr(strict_equals_op($1), $0, $2) + EqualityExpression[?In, ?Yield, ?Await] `!==` RelationalExpression[?In, ?Yield, ?Await] + => binary_expr(strict_not_equals_op($1), $0, $2) + + +@returns Expression +BitwiseANDExpression[In, Yield, Await] : + EqualityExpression[?In, ?Yield, ?Await] + => $0 + BitwiseANDExpression[?In, ?Yield, ?Await] `&` EqualityExpression[?In, ?Yield, ?Await] + => binary_expr(bitwise_and_op($1), $0, $2) + +@returns Expression +BitwiseXORExpression[In, Yield, Await] : + BitwiseANDExpression[?In, ?Yield, ?Await] + => $0 + BitwiseXORExpression[?In, ?Yield, ?Await] `^` BitwiseANDExpression[?In, ?Yield, ?Await] + => binary_expr(bitwise_xor_op($1), $0, $2) + +@returns Expression +BitwiseORExpression[In, Yield, Await] : + BitwiseXORExpression[?In, ?Yield, ?Await] + => $0 + BitwiseORExpression[?In, ?Yield, ?Await] `|` BitwiseXORExpression[?In, ?Yield, ?Await] + => binary_expr(bitwise_or_op($1), $0, $2) + +@returns Expression +LogicalANDExpression[In, Yield, Await] : + BitwiseORExpression[?In, ?Yield, ?Await] + => $0 + LogicalANDExpression[?In, ?Yield, ?Await] `&&` BitwiseORExpression[?In, ?Yield, ?Await] + => binary_expr(logical_and_op($1), $0, $2) + +@returns Expression +LogicalORExpression[In, Yield, Await] : + LogicalANDExpression[?In, ?Yield, ?Await] + => $0 + LogicalORExpression[?In, ?Yield, ?Await] `||` LogicalANDExpression[?In, ?Yield, ?Await] + => binary_expr(logical_or_op($1), $0, $2) + +@returns Expression +ShortCircuitExpression[In, Yield, Await] : + LogicalORExpression[?In, ?Yield, ?Await] + => $0 + CoalesceExpression[?In, ?Yield, ?Await] + => $0 + +@returns Expression +CoalesceExpression[In, Yield, Await] : + CoalesceExpressionHead[?In, ?Yield, ?Await] `??` BitwiseORExpression[?In, ?Yield, ?Await] + => binary_expr(coalesce_op($1), $0, $2) + +@returns Expression +CoalesceExpressionHead[In, Yield, Await] : + CoalesceExpression[?In, ?Yield, ?Await] + => $0 + BitwiseORExpression[?In, ?Yield, ?Await] + => $0 + +@returns Expression +ConditionalExpression[In, Yield, Await] : + ShortCircuitExpression[?In, ?Yield, ?Await] + => $0 + ShortCircuitExpression[?In, ?Yield, ?Await] `?` AssignmentExpression[+In, ?Yield, ?Await] `:` AssignmentExpression[?In, ?Yield, ?Await] + => conditional_expr($0, $2, $4) + + +@returns Expression +AssignmentExpression[In, Yield, Await] : + ConditionalExpression[?In, ?Yield, ?Await] + => $0 + [+Yield] YieldExpression[?In, ?Await] + => $0 + ArrowFunction[?In, ?Yield, ?Await] + => $0 + AsyncArrowFunction[?In, ?Yield, ?Await] + => $0 + LeftHandSideExpression[?Yield, ?Await] `=` AssignmentExpression[?In, ?Yield, ?Await] + => assignment_expr($0, $2) + LeftHandSideExpression[?Yield, ?Await] AssignmentOperator AssignmentExpression[?In, ?Yield, ?Await] + => compound_assignment_expr($0, $1, $2) + LeftHandSideExpression[?Yield, ?Await] LogicalAssignmentOperator AssignmentExpression[?In, ?Yield, ?Await] + => compound_assignment_expr($0, $1, $2) + +@returns CompoundAssignmentOperator +AssignmentOperator : + `*=` + => box_assign_op(mul_assign_op($0)) + `/=` + => box_assign_op(div_assign_op($0)) + `%=` + => box_assign_op(mod_assign_op($0)) + `+=` + => box_assign_op(add_assign_op($0)) + `-=` + => box_assign_op(sub_assign_op($0)) + `<<=` + => box_assign_op(left_shift_assign_op($0)) + `>>=` + => box_assign_op(right_shift_assign_op($0)) + `>>>=` + => box_assign_op(right_shift_ext_assign_op($0)) + `&=` + => box_assign_op(bitwise_and_assign_op($0)) + `^=` + => box_assign_op(bitwise_xor_assign_op($0)) + `|=` + => box_assign_op(bitwise_or_assign_op($0)) + `**=` + => box_assign_op(pow_assign_op($0)) + +@returns CompoundAssignmentOperator +LogicalAssignmentOperator : + `&&=` + => box_assign_op(logical_and_assign_op($0)) + `||=` + => box_assign_op(logical_or_assign_op($0)) + `??=` + => box_assign_op(coalesce_assign_op($0)) + +AssignmentPattern[Yield, Await] : + ObjectAssignmentPattern[?Yield, ?Await] + ArrayAssignmentPattern[?Yield, ?Await] + +ObjectAssignmentPattern[Yield, Await] : + `{` `}` + `{` AssignmentRestProperty[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `,` AssignmentRestProperty[?Yield, ?Await]? `}` + +ArrayAssignmentPattern[Yield, Await] : + `[` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + `[` AssignmentElementList[?Yield, ?Await] `]` + `[` AssignmentElementList[?Yield, ?Await] `,` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + +AssignmentRestProperty[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +AssignmentPropertyList[Yield, Await] : + AssignmentProperty[?Yield, ?Await] + AssignmentPropertyList[?Yield, ?Await] `,` AssignmentProperty[?Yield, ?Await] + +AssignmentElementList[Yield, Await] : + AssignmentElisionElement[?Yield, ?Await] + AssignmentElementList[?Yield, ?Await] `,` AssignmentElisionElement[?Yield, ?Await] + +AssignmentElisionElement[Yield, Await] : + Elision? AssignmentElement[?Yield, ?Await] + +AssignmentProperty[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + PropertyName[?Yield, ?Await] `:` AssignmentElement[?Yield, ?Await] + +AssignmentElement[Yield, Await] : + DestructuringAssignmentTarget[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +AssignmentRestElement[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +DestructuringAssignmentTarget[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + + +@returns Expression +Expression[In, Yield, Await] : + AssignmentExpression[?In, ?Yield, ?Await] + => $0 + Expression[?In, ?Yield, ?Await] `,` AssignmentExpression[?In, ?Yield, ?Await] + => binary_expr(comma_op($1), $0, $2) + + +@returns Statement +Statement[Yield, Await, Return] : + BlockStatement[?Yield, ?Await, ?Return] + => $0 + VariableStatement[?Yield, ?Await] + => $0 + EmptyStatement + => $0 + ExpressionStatement[?Yield, ?Await] + => $0 + IfStatement[?Yield, ?Await, ?Return] + => $0 + BreakableStatement[?Yield, ?Await, ?Return] + => $0 + ContinueStatement[?Yield, ?Await] + => $0 + BreakStatement[?Yield, ?Await] + => $0 + [+Return] ReturnStatement[?Yield, ?Await] + => $0 + WithStatement[?Yield, ?Await, ?Return] + => $0 + LabelledStatement[?Yield, ?Await, ?Return] + => $0 + ThrowStatement[?Yield, ?Await] + => $0 + TryStatement[?Yield, ?Await, ?Return] + => $0 + DebuggerStatement + => $0 + +@returns Statement +Declaration[Yield, Await] : + HoistableDeclaration[?Yield, ?Await, ~Default] + => $0 + ClassDeclaration[?Yield, ?Await, ~Default] + => $0 + LexicalDeclaration[+In, ?Yield, ?Await] + => $0 + +@returns Statement +HoistableDeclaration[Yield, Await, Default] : + FunctionDeclaration[?Yield, ?Await, ?Default] + => $0 + GeneratorDeclaration[?Yield, ?Await, ?Default] + => $0 + AsyncFunctionDeclaration[?Yield, ?Await, ?Default] + => $0 + AsyncGeneratorDeclaration[?Yield, ?Await, ?Default] + => $0 + +@returns Statement +BreakableStatement[Yield, Await, Return] : + IterationStatement[?Yield, ?Await, ?Return] + => $0 + SwitchStatement[?Yield, ?Await, ?Return] + => $0 + + +@returns Statement +BlockStatement[Yield, Await, Return] : + Block[?Yield, ?Await, ?Return] + => block_statement($0) + +@returns Block +Block[Yield, Await, Return] : + `{` StatementList[?Yield, ?Await, ?Return]? `}` + => block($0, $1, $2) + +@returns Vec<Statement> +StatementList[Yield, Await, Return] : + StatementListItem[?Yield, ?Await, ?Return] + => statement_list_single($0) + StatementList[?Yield, ?Await, ?Return] StatementListItem[?Yield, ?Await, ?Return] + => statement_list_append($0, $1) + +@returns Statement +StatementListItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + => $0 + Declaration[?Yield, ?Await] + => $0 + + +@returns Statement +LexicalDeclaration[In, Yield, Await] : + LetOrConst BindingList[?In, ?Yield, ?Await] `;` + => lexical_declaration($0, $1) + +@returns VariableDeclarationOrExpression +ForLexicalDeclaration[In, Yield, Await] : + LetOrConst BindingList[?In, ?Yield, ?Await] `;` + => for_lexical_declaration($0, $1) + +@returns VariableDeclarationKind +LetOrConst : + `let` + => let_kind($0) + `const` + => const_kind($0) + +@returns Vec<VariableDeclarator> +BindingList[In, Yield, Await] : + LexicalBinding[?In, ?Yield, ?Await] + => variable_declaration_list_single($0) + BindingList[?In, ?Yield, ?Await] `,` LexicalBinding[?In, ?Yield, ?Await] + => variable_declaration_list_append($0, $2) + +@returns VariableDeclarator +LexicalBinding[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + => variable_declaration(binding_identifier_to_binding($0), $1) + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + => variable_declaration($0, Some($1)) + + +@returns Statement +VariableStatement[Yield, Await] : + `var` VariableDeclarationList[+In, ?Yield, ?Await] `;` + => variable_statement($0, $1) + +@returns Vec<VariableDeclarator> +VariableDeclarationList[In, Yield, Await] : + VariableDeclaration[?In, ?Yield, ?Await] + => variable_declaration_list_single($0) + VariableDeclarationList[?In, ?Yield, ?Await] `,` VariableDeclaration[?In, ?Yield, ?Await] + => variable_declaration_list_append($0, $2) + +@returns VariableDeclarator +VariableDeclaration[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + => variable_declaration(binding_identifier_to_binding($0), $1) + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + => variable_declaration($0, Some($1)) + + +@returns Binding +BindingPattern[Yield, Await] : + ObjectBindingPattern[?Yield, ?Await] + => $0 + ArrayBindingPattern[?Yield, ?Await] + => $0 + +@returns Binding +ObjectBindingPattern[Yield, Await] : + `{` `}` + => object_binding_pattern($0, binding_property_list_empty(), None, $1) + `{` BindingRestProperty[?Yield, ?Await] `}` + => object_binding_pattern($0, binding_property_list_empty(), Some($1), $2) + `{` BindingPropertyList[?Yield, ?Await] `}` + => object_binding_pattern($0, $1, None, $2) + `{` BindingPropertyList[?Yield, ?Await] `,` BindingRestProperty[?Yield, ?Await]? `}` + => object_binding_pattern($0, $1, $3, $4) + +@returns Binding +ArrayBindingPattern[Yield, Await] : + `[` Elision? BindingRestElement[?Yield, ?Await]? `]` + => array_binding_pattern($0, binding_element_list_empty(), $1, $2, $3) + `[` BindingElementList[?Yield, ?Await] `]` + => array_binding_pattern($0, $1, None, None, $2) + `[` BindingElementList[?Yield, ?Await] `,` Elision? BindingRestElement[?Yield, ?Await]? `]` + => array_binding_pattern($0, $1, $3, $4, $5) + +@returns BindingIdentifier +BindingRestProperty[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + => $1 + +@returns Vec<BindingProperty> +BindingPropertyList[Yield, Await] : + BindingProperty[?Yield, ?Await] + => binding_property_list_single($0) + BindingPropertyList[?Yield, ?Await] `,` BindingProperty[?Yield, ?Await] + => binding_property_list_append($0, $2) + +@returns Vec<Option<Parameter>> +BindingElementList[Yield, Await] : + BindingElisionElement[?Yield, ?Await] + => $0 + BindingElementList[?Yield, ?Await] `,` BindingElisionElement[?Yield, ?Await] + => binding_element_list_append($0, $2) + +@returns Vec<Option<Parameter>> +BindingElisionElement[Yield, Await] : + Elision? BindingElement[?Yield, ?Await] + => binding_elision_element($0, $1) + +@returns BindingProperty +BindingProperty[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + => binding_property_shorthand($0) + PropertyName[?Yield, ?Await] `:` BindingElement[?Yield, ?Await] + => binding_property($0, $2) + +@returns Parameter +BindingElement[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + => $0 + BindingPattern[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + => binding_element_pattern($0, $1) + +@returns Parameter +SingleNameBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + => single_name_binding($0, $1) + +@returns Binding +BindingRestElement[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + => binding_identifier_to_binding($1) + `...` BindingPattern[?Yield, ?Await] + => $1 + + +@returns Statement +EmptyStatement : + `;` + => empty_statement($0) + +@returns Statement +ExpressionStatement[Yield, Await] : + [lookahead <! {`{`, `function`, `async`, `class`, `let`}] Expression[+In, ?Yield, ?Await] `;` + => expression_statement($0) + + +@returns Statement +IfStatement[Yield, Await, Return] : + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` Statement[?Yield, ?Await, ?Return] + => if_statement($0, $2, $4, Some($6)) + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] [lookahead != `else`] + => if_statement($0, $2, $4, None) + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` Statement[?Yield, ?Await, ?Return] + => if_statement($0, $2, make_block_stmt_from_function_decl($4), Some($6)) + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + => if_statement($0, $2, $4, Some(make_block_stmt_from_function_decl($6))) + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + => if_statement($0, $2, make_block_stmt_from_function_decl($4), Some(make_block_stmt_from_function_decl($6))) + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] [lookahead != `else`] + => if_statement($0, $2, make_block_stmt_from_function_decl($4), None) + + +@returns Statement +IterationStatement[Yield, Await, Return] : + `do` Statement[?Yield, ?Await, ?Return] `while` `(` Expression[+In, ?Yield, ?Await] `)` `;` + => do_while_statement($0, $1, $4, $5) + `while` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => while_statement($0, $2, $4) + `for` `(` [lookahead != `let`] Expression[~In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + => for_statement($0, for_expression($2), $4, $6, $8) + `for` `(` `var` VariableDeclarationList[~In, ?Yield, ?Await] `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + => for_statement($0, Some(for_var_declaration($2, $3)), $5, $7, $9) + `for` `(` ForLexicalDeclaration[~In, ?Yield, ?Await] Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + => for_statement_lexical($0, unbox_for_lexical_declaration($2), $3, $5, $7) + `for` `(` [lookahead != `let`] LeftHandSideExpression[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_in_statement($0, for_assignment_target($2), $4, $6) + `for` `(` `var` ForBinding[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_in_statement($0, for_in_or_of_var_declaration($2, $3, None), $5, $7) + `for` `(` `var` BindingIdentifier[?Yield, ?Await] Initializer[~In, ?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_in_statement($0, for_in_or_of_var_declaration($2, binding_identifier_to_binding($3), Some($4)), $6, $8) + `for` `(` ForDeclaration[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_in_statement_lexical($0, unbox_for_declaration($2), $4, $6) + `for` `(` [lookahead <! {`async`, `let`} ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_of_statement($0, for_assignment_target($2), $4, $6) + `for` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_of_statement($0, for_in_or_of_var_declaration($2, $3, None), $5, $7) + `for` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_of_statement_lexical($0, unbox_for_declaration($2), $4, $6) + [+Await] `for` `await` `(` [lookahead <! {`async`, `let`} ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_await_of_statement($0, for_assignment_target($3), $5, $7) + [+Await] `for` `await` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_await_of_statement($0, for_in_or_of_var_declaration($3, $4, None), $6, $8) + [+Await] `for` `await` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => for_await_of_statement_lexical($0, unbox_for_declaration($3), $5, $7) + +@returns VariableDeclarationOrAssignmentTarget +ForDeclaration[Yield, Await] : + LetOrConst ForBinding[?Yield, ?Await] + => for_declaration($0, $1) + +@returns Binding +ForBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + => binding_identifier_to_binding($0) + BindingPattern[?Yield, ?Await] + => $0 + + +@returns Statement +ContinueStatement[Yield, Await] : + `continue` `;` + => continue_statement($0, None) + `continue` [no LineTerminator here] LabelIdentifier[?Yield, ?Await] `;` + => continue_statement($0, Some($1)) + + +@returns Statement +BreakStatement[Yield, Await] : + `break` `;` + => break_statement($0, None) + `break` [no LineTerminator here] LabelIdentifier[?Yield, ?Await] `;` + => break_statement($0, Some($1)) + + +@returns Statement +ReturnStatement[Yield, Await] : + `return` `;` + => return_statement($0, None) + `return` [no LineTerminator here] Expression[+In, ?Yield, ?Await] `;` + => return_statement($0, Some($1)) + + +@returns Statement +WithStatement[Yield, Await, Return] : + `with` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + => with_statement($0, $2, $4) + + +@returns Statement +SwitchStatement[Yield, Await, Return] : + `switch` `(` Expression[+In, ?Yield, ?Await] `)` CaseBlock[?Yield, ?Await, ?Return] + => switch_statement($0, $2, $4) + +@returns Statement +CaseBlock[Yield, Await, Return] : + `{` CaseClauses[?Yield, ?Await, ?Return]? `}` + => case_block($0, $1, $2) + `{` CaseClauses[?Yield, ?Await, ?Return]? DefaultClause[?Yield, ?Await, ?Return] CaseClauses[?Yield, ?Await, ?Return]? `}` + => case_block_with_default($0, $1, $2, $3, $4) + +@returns Vec<SwitchCase> +CaseClauses[Yield, Await, Return] : + CaseClause[?Yield, ?Await, ?Return] + => case_clauses_single($0) + CaseClauses[?Yield, ?Await, ?Return] CaseClause[?Yield, ?Await, ?Return] + => case_clauses_append($0, $1) + +@returns SwitchCase +CaseClause[Yield, Await, Return] : + `case` Expression[+In, ?Yield, ?Await] `:` StatementList[?Yield, ?Await, ?Return]? + => case_clause($0, $1, $2, $3) + +@returns SwitchDefault +DefaultClause[Yield, Await, Return] : + `default` `:` StatementList[?Yield, ?Await, ?Return]? + => default_clause($0, $1, $2) + + +@returns Statement +LabelledStatement[Yield, Await, Return] : + LabelIdentifier[?Yield, ?Await] `:` LabelledItem[?Yield, ?Await, ?Return] + => labelled_statement($0, $2) + +@returns Statement +LabelledItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + => $0 + FunctionDeclaration[?Yield, ?Await, ~Default] + => $0 + + +@returns Statement +ThrowStatement[Yield, Await] : + `throw` [no LineTerminator here] Expression[+In, ?Yield, ?Await] `;` + => throw_statement($0, $1) + +@returns Statement +TryStatement[Yield, Await, Return] : + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] + => try_statement($0, $1, Some($2), None) + `try` Block[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + => try_statement($0, $1, None, Some($2)) + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + => try_statement($0, $1, Some($2), Some($3)) + +@returns CatchClause +Catch[Yield, Await, Return] : + `catch` `(` CatchParameter[?Yield, ?Await] `)` CatchBlock[?Yield, ?Await, ?Return] + => catch($0, $2, $4) + `catch` Block[?Yield, ?Await, ?Return] + => catch_no_param($0, $1) + +@returns Block +CatchBlock[Yield, Await, Return] : + `{` StatementList[?Yield, ?Await, ?Return]? `}` + => catch_block($0, $1, $2) + +@returns Block +Finally[Yield, Await, Return] : + `finally` Block[?Yield, ?Await, ?Return] + => $1 + +@returns Binding +CatchParameter[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + => binding_identifier_to_binding($0) + BindingPattern[?Yield, ?Await] + => $0 + + +@returns Statement +DebuggerStatement : + `debugger` `;` + => debugger_statement($0) + + +@returns Statement +FunctionDeclaration[Yield, Await, Default] : + `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + => function_decl(function($0, Some($1), $2, $3, $4, $5, $6, $7)) + [+Default] `function` `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + => function_decl(function($0, None, $1, $2, $3, $4, $5, $6)) + +@returns Expression +FunctionExpression : + `function` BindingIdentifier[~Yield, ~Await]? `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + => function_expr(function($0, $1, $2, $3, $4, $5, $6, $7)) + +@returns FormalParameters +UniqueFormalParameters[Yield, Await] : + FormalParameters[?Yield, ?Await] + => unique_formal_parameters($0) + +@returns FormalParameters +FormalParameters[Yield, Await] : + [empty] + => empty_formal_parameters() + FunctionRestParameter[?Yield, ?Await] + => with_rest_parameter(empty_formal_parameters(), $0) + FormalParameterList[?Yield, ?Await] + => $0 + FormalParameterList[?Yield, ?Await] `,` + => $0 + FormalParameterList[?Yield, ?Await] `,` FunctionRestParameter[?Yield, ?Await] + => with_rest_parameter($0, $2) + +@returns FormalParameters +FormalParameterList[Yield, Await] : + FormalParameter[?Yield, ?Await] + => formal_parameter_list_single($0) + FormalParameterList[?Yield, ?Await] `,` FormalParameter[?Yield, ?Await] + => formal_parameter_list_append($0, $2) + +@returns Binding +FunctionRestParameter[Yield, Await] : + BindingRestElement[?Yield, ?Await] + => $0 + +@returns Parameter +FormalParameter[Yield, Await] : + BindingElement[?Yield, ?Await] + => $0 + +@returns FunctionBody +FunctionBody[Yield, Await] : + FunctionStatementList[?Yield, ?Await] + => function_body($0) + +@returns Vec<Statement> +FunctionStatementList[Yield, Await] : + StatementList[?Yield, ?Await, +Return]? + => function_statement_list($0) + + +@returns Expression +ArrowFunction[In, Yield, Await] : + ArrowParameters[?Yield, ?Await] [no LineTerminator here] `=>` ConciseBody[?In] + => arrow_function($0, $2) + +@returns FormalParameters +ArrowParameters[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + => arrow_parameters_bare($0) + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] + => uncover_arrow_parameters($0) + +@returns ArrowExpressionBody +ConciseBody[In] : + [lookahead != `{` ] AssignmentExpression[?In, ~Yield, ~Await] + => concise_body_expression($0) + `{` FunctionBody[~Yield, ~Await] `}` + => concise_body_block($0, $1, $2) + + +ArrowFormalParameters[Yield, Await] : + `(` UniqueFormalParameters[?Yield, ?Await] `)` + + +@returns MethodDefinition +MethodDefinition[Yield, Await] : + ClassElementName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + => method_definition($0, $1, $2, $3, $4, $5, $6) + GeneratorMethod[?Yield, ?Await] + => $0 + AsyncMethod[?Yield, ?Await] + => $0 + AsyncGeneratorMethod[?Yield, ?Await] + => $0 + `get` ClassElementName[?Yield, ?Await] `(` `)` `{` FunctionBody[~Yield, ~Await] `}` + => getter($0, $1, $4, $5, $6) + `set` ClassElementName[?Yield, ?Await] `(` PropertySetParameterList `)` `{` FunctionBody[~Yield, ~Await] `}` + => setter($0, $1, $2, $3, $4, $5, $6, $7) + +@returns Parameter +PropertySetParameterList : + FormalParameter[~Yield, ~Await] + => $0 + + +@returns MethodDefinition +GeneratorMethod[Yield, Await] : + `*` ClassElementName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + => generator_method($0, $1, $2, $3, $4, $5, $6, $7) + +@returns Statement +GeneratorDeclaration[Yield, Await, Default] : + `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + => function_decl(generator($0, Some($2), $3, $4, $5, $6, $7, $8)) + [+Default] `function` `*` `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + => function_decl(generator($0, None, $2, $3, $4, $5, $6, $7)) + +@returns Expression +GeneratorExpression : + `function` `*` BindingIdentifier[+Yield, ~Await]? `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + => function_expr(generator($0, $2, $3, $4, $5, $6, $7, $8)) + +@returns FunctionBody +GeneratorBody : + FunctionBody[+Yield, ~Await] + => $0 + +@returns Expression +YieldExpression[In, Await] : + `yield` + => yield_expr($0, None) + `yield` [no LineTerminator here] AssignmentExpression[?In, +Yield, ?Await] + => yield_expr($0, Some($1)) + `yield` [no LineTerminator here] `*` AssignmentExpression[?In, +Yield, ?Await] + => yield_star_expr($0, $2) + + +@returns MethodDefinition +AsyncGeneratorMethod[Yield, Await] : + `async` [no LineTerminator here] `*` ClassElementName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + => async_generator_method($0, $2, $3, $4, $5, $6, $7, $8) + +@returns Statement +AsyncGeneratorDeclaration[Yield, Await, Default] : + `async` [no LineTerminator here] `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + => function_decl(async_generator($0, Some($3), $4, $5, $6, $7, $8, $9)) + [+Default] `async` [no LineTerminator here] `function` `*` `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + => function_decl(async_generator($0, None, $3, $4, $5, $6, $7, $8)) + +@returns Expression +AsyncGeneratorExpression : + `async` [no LineTerminator here] `function` `*` BindingIdentifier[+Yield, +Await]? `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + => function_expr(async_function($0, $3, $4, $5, $6, $7, $8, $9)) + +@returns FunctionBody +AsyncGeneratorBody : + FunctionBody[+Yield, +Await] + => $0 + + +@returns Statement +ClassDeclaration[Yield, Await, Default] : + `class` BindingIdentifier[?Yield, ?Await] ClassTail[?Yield, ?Await] + => class_declaration($0, Some($1), $2) + [+Default] `class` ClassTail[?Yield, ?Await] + => class_declaration($0, None, $1) + +@returns Expression +ClassExpression[Yield, Await] : + `class` BindingIdentifier[?Yield, ?Await]? ClassTail[?Yield, ?Await] + => class_expression($0, $1, $2) + +@returns ClassExpression +ClassTail[Yield, Await] : + ClassHeritage[?Yield, ?Await]? `{` ClassBody[?Yield, ?Await]? `}` + => class_tail($0, $2, $3) + +@returns Expression +ClassHeritage[Yield, Await] : + `extends` LeftHandSideExpression[?Yield, ?Await] + => $1 + +@returns Vec<Box<ClassElement>> +ClassBody[Yield, Await] : + ClassElementList[?Yield, ?Await] + => $0 + +@returns Vec<Box<ClassElement>> +ClassElementList[Yield, Await] : + ClassElement[?Yield, ?Await] + => $0 + ClassElementList[?Yield, ?Await] ClassElement[?Yield, ?Await] + => class_element_list_append($0, $1) + +@returns ClassElement +FieldDefinition[Yield, Await] : + ClassElementName[?Yield, ?Await] Initializer[+In, ~Yield, ~Await]? + => class_field_definition($0, $1) + +@returns ClassElementName +ClassElementName[Yield, Await] : + PropertyName[?Yield, ?Await] + => property_name_to_class_element_name($0) + PrivateIdentifier + => class_element_name_private($0) + +@returns Vec<Box<ClassElement>> +ClassElement[Yield, Await] : + MethodDefinition[?Yield, ?Await] + => class_element($0) + `static` MethodDefinition[?Yield, ?Await] + => class_element_static($0, $1) + FieldDefinition[?Yield, ?Await] `;` + => class_element_to_vec($0) + `static` FieldDefinition[?Yield, ?Await] `;` + => class_element_static_field($0, $1) + `;` + => class_element_empty() + + +@returns Statement +AsyncFunctionDeclaration[Yield, Await, Default] : + `async` [no LineTerminator here] `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + => function_decl(async_function($0, Some($2), $3, $4, $5, $6, $7, $8)) + [+Default] `async` [no LineTerminator here] `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + => function_decl(async_function($0, None, $2, $3, $4, $5, $6, $7)) + +@returns Expression +AsyncFunctionExpression : + `async` [no LineTerminator here] `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + => function_expr(async_function($0, None, $2, $3, $4, $5, $6, $7)) + `async` [no LineTerminator here] `function` BindingIdentifier[~Yield, +Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + => function_expr(async_function($0, Some($2), $3, $4, $5, $6, $7, $8)) + +@returns MethodDefinition +AsyncMethod[Yield, Await] : + `async` [no LineTerminator here] PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + => async_method($0, $1, $2, $3, $4, $5, $6, $7) + +@returns FunctionBody +AsyncFunctionBody : + FunctionBody[~Yield, +Await] + => $0 + +@returns Expression +AwaitExpression[Yield] : + `await` UnaryExpression[?Yield, +Await] + => await_expr($0, $1) + + +@returns Expression +AsyncArrowFunction[In, Yield, Await] : + `async` [no LineTerminator here] AsyncArrowBindingIdentifier[?Yield] [no LineTerminator here] `=>` AsyncConciseBody[?In] + => async_arrow_function_bare($0, $1, $3) + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] [no LineTerminator here] `=>` AsyncConciseBody[?In] + => async_arrow_function($0, $2) + +@returns ArrowExpressionBody +AsyncConciseBody[In] : + [lookahead != `{`] AssignmentExpression[?In, ~Yield, +Await] + => concise_body_expression($0) + `{` AsyncFunctionBody `}` + => concise_body_block($0, $1, $2) + +@returns BindingIdentifier +AsyncArrowBindingIdentifier[Yield] : + BindingIdentifier[?Yield, +Await] + => $0 + +@returns Expression +CoverCallExpressionAndAsyncArrowHead[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + => call_expr($0, $1) + + +AsyncArrowHead : + `async` [no LineTerminator here] ArrowFormalParameters[~Yield, +Await] + + +@returns Script +Script : + ScriptBody? + => script($0) + +@returns Script +ScriptBody : + StatementList[~Yield, ~Await, ~Return] + => script_body($0) + +@returns Vec<Statement> +Module : + ModuleBody? + => module($0) + +@returns Vec<Statement> +ModuleBody : + ModuleItemList + => $0 + +@returns Vec<Statement> +ModuleItemList : + ModuleItem + => module_item_list_single($0) + ModuleItemList ModuleItem + => module_item_list_append($0, $1) + +@returns Statement +ModuleItem : + ImportDeclaration + => $0 + ExportDeclaration + => $0 + StatementListItem[~Yield, ~Await, ~Return] + => $0 + + +@returns Statement +ImportDeclaration : + `import` ImportClause FromClause `;` + => import_declaration(Some($1), $2) + `import` ModuleSpecifier `;` + => import_declaration(None, $1) + +@returns Void +ImportClause : + ImportedDefaultBinding + => import_clause(Some($0), None, None) + NameSpaceImport + => import_clause(None, Some($0), None) + NamedImports + => import_clause(None, None, Some($0)) + ImportedDefaultBinding `,` NameSpaceImport + => import_clause(Some($0), Some($2), None) + ImportedDefaultBinding `,` NamedImports + => import_clause(Some($0), None, Some($2)) + +@returns BindingIdentifier +ImportedDefaultBinding : + ImportedBinding + => $0 + +@returns Void +NameSpaceImport : + `*` `as` ImportedBinding + => name_space_import($2) + +@returns Void +NamedImports : + `{` `}` + => imports_list_empty() + `{` ImportsList `}` + => $1 + `{` ImportsList `,` `}` + => $1 + +@returns Token +FromClause : + `from` ModuleSpecifier + => $1 + +@returns Void +ImportsList : + ImportSpecifier + => imports_list_append(imports_list_empty(), $0) + ImportsList `,` ImportSpecifier + => imports_list_append($0, $2) + +@returns Void +ImportSpecifier : + ImportedBinding + => import_specifier($0) + IdentifierName `as` ImportedBinding + => import_specifier_renaming($0, $2) + +@returns Token +ModuleSpecifier : + StringLiteral + => module_specifier($0) + +@returns BindingIdentifier +ImportedBinding : + BindingIdentifier[~Yield, ~Await] + => $0 + + +@returns Statement +ExportDeclaration : + `export` `*` FromClause `;` + => export_all_from($2) + `export` ExportClause FromClause `;` + => export_set_from($1, $2) + `export` ExportClause `;` + => export_set($1) + `export` VariableStatement[~Yield, ~Await] + => export_vars($1) + `export` Declaration[~Yield, ~Await] + => export_declaration($1) + `export` `default` HoistableDeclaration[~Yield, ~Await, +Default] + => export_default_hoistable($2) + `export` `default` ClassDeclaration[~Yield, ~Await, +Default] + => export_default_class($2) + `export` `default` [lookahead <! {`function`, `async`, `class`}] AssignmentExpression[+In, ~Yield, ~Await] `;` + => export_default_value($2) + +@returns Void +ExportClause : + `{` `}` + => exports_list_empty() + `{` ExportsList `}` + => $1 + `{` ExportsList `,` `}` + => $1 + +@returns Void +ExportsList : + ExportSpecifier + => exports_list_append(exports_list_empty(), $0) + ExportsList `,` ExportSpecifier + => exports_list_append($0, $2) + +@returns Void +ExportSpecifier : + IdentifierName + => export_specifier($0) + IdentifierName `as` IdentifierName + => export_specifier_renaming($0, $2) + diff --git a/third_party/rust/jsparagus/js_parser/es.esgrammar b/third_party/rust/jsparagus/js_parser/es.esgrammar new file mode 100644 index 0000000000..955fbdfe00 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/es.esgrammar @@ -0,0 +1,1668 @@ + +StringNumericLiteral ::: + StrWhiteSpace? + StrWhiteSpace? StrNumericLiteral StrWhiteSpace? + +StrWhiteSpace ::: + StrWhiteSpaceChar StrWhiteSpace? + +StrWhiteSpaceChar ::: + WhiteSpace + LineTerminator + +StrNumericLiteral ::: + StrDecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + +StrDecimalLiteral ::: + StrUnsignedDecimalLiteral + `+` StrUnsignedDecimalLiteral + `-` StrUnsignedDecimalLiteral + +StrUnsignedDecimalLiteral ::: + `Infinity` + DecimalDigits `.` DecimalDigits? ExponentPart? + `.` DecimalDigits ExponentPart? + DecimalDigits ExponentPart? + + +SourceCharacter :: + > any Unicode code point + + +InputElementDiv :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + RightBracePunctuator + +InputElementRegExp :: + WhiteSpace + LineTerminator + Comment + CommonToken + RightBracePunctuator + RegularExpressionLiteral + +InputElementRegExpOrTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + RegularExpressionLiteral + TemplateSubstitutionTail + +InputElementTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + TemplateSubstitutionTail + + +WhiteSpace :: + <TAB> + <VT> + <FF> + <SP> + <NBSP> + <ZWNBSP> + <USP> + + +LineTerminator :: + <LF> + <CR> + <LS> + <PS> + +LineTerminatorSequence :: + <LF> + <CR> [lookahead != <LF> ] + <LS> + <PS> + <CR> <LF> + + +Comment :: + MultiLineComment + SingleLineComment + +MultiLineComment :: + `/*` MultiLineCommentChars? `*/` + +MultiLineCommentChars :: + MultiLineNotAsteriskChar MultiLineCommentChars? + `*` PostAsteriskCommentChars? + +PostAsteriskCommentChars :: + MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars? + `*` PostAsteriskCommentChars? + +MultiLineNotAsteriskChar :: + SourceCharacter but not `*` + +MultiLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` + +SingleLineComment :: + `//` SingleLineCommentChars? + +SingleLineCommentChars :: + SingleLineCommentChar SingleLineCommentChars? + +SingleLineCommentChar :: + SourceCharacter but not LineTerminator + + +CommonToken :: + IdentifierName + Punctuator + NumericLiteral + StringLiteral + Template + + +IdentifierName :: + IdentifierStart + IdentifierName IdentifierPart + +IdentifierStart :: + UnicodeIDStart + `$` + `_` + `\` UnicodeEscapeSequence + +IdentifierPart :: + UnicodeIDContinue + `$` + `\` UnicodeEscapeSequence + <ZWNJ> + <ZWJ> + +UnicodeIDStart :: + > any Unicode code point with the Unicode property โID_Startโ + +UnicodeIDContinue :: + > any Unicode code point with the Unicode property โID_Continueโ + + +ReservedWord :: + Keyword + FutureReservedWord + NullLiteral + BooleanLiteral + + +Keyword :: one of + `await` + `break` + `case` `catch` `class` `const` `continue` + `debugger` `default` `delete` `do` + `else` `export` `extends` + `finally` `for` `function` + `if` `import` `in` `instanceof` + `new` + `return` + `super` `switch` + `this` `throw` `try` `typeof` + `var` `void` + `while` `with` + `yield` + + +FutureReservedWord :: + `enum` + + +Punctuator :: one of + `{` `(` `)` `[` `]` + `.` `...` `;` `,` + `<` `>` `<=` `>=` + `==` `!=` `===` `!==` + `+` `-` `*` `%` `**` + `++` `--` + `<<` `>>` `>>>` + `&` `|` `^` + `!` `~` + `&&` `||` + `?` `:` + `=` `+=` `-=` `*=` `%=` `**=` `<<=` `>>=` `>>>=` `&=` `|=` `^=` + `=>` + +DivPunctuator :: + `/` + `/=` + +RightBracePunctuator :: + `}` + + +NullLiteral :: + `null` + + +BooleanLiteral :: + `true` + `false` + + +NumericLiteral :: + DecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + +DecimalLiteral :: + DecimalIntegerLiteral `.` DecimalDigits? ExponentPart? + `.` DecimalDigits ExponentPart? + DecimalIntegerLiteral ExponentPart? + +DecimalIntegerLiteral :: + `0` + NonZeroDigit DecimalDigits? + +DecimalDigits :: + DecimalDigit + DecimalDigits DecimalDigit + +DecimalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` + +NonZeroDigit :: one of + `1` `2` `3` `4` `5` `6` `7` `8` `9` + +ExponentPart :: + ExponentIndicator SignedInteger + +ExponentIndicator :: one of + `e` `E` + +SignedInteger :: + DecimalDigits + `+` DecimalDigits + `-` DecimalDigits + +BinaryIntegerLiteral :: + `0b` BinaryDigits + `0B` BinaryDigits + +BinaryDigits :: + BinaryDigit + BinaryDigits BinaryDigit + +BinaryDigit :: one of + `0` `1` + +OctalIntegerLiteral :: + `0o` OctalDigits + `0O` OctalDigits + +OctalDigits :: + OctalDigit + OctalDigits OctalDigit + +OctalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` + +HexIntegerLiteral :: + `0x` HexDigits + `0X` HexDigits + +HexDigits :: + HexDigit + HexDigits HexDigit + +HexDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F` + + +StringLiteral :: + `"` DoubleStringCharacters? `"` + `'` SingleStringCharacters? `'` + +DoubleStringCharacters :: + DoubleStringCharacter DoubleStringCharacters? + +SingleStringCharacters :: + SingleStringCharacter SingleStringCharacters? + +DoubleStringCharacter :: + SourceCharacter but not one of `"` or `\` or LineTerminator + <LS> + <PS> + `\` EscapeSequence + LineContinuation + +SingleStringCharacter :: + SourceCharacter but not one of `'` or `\` or LineTerminator + <LS> + <PS> + `\` EscapeSequence + LineContinuation + +LineContinuation :: + `\` LineTerminatorSequence + +EscapeSequence :: + CharacterEscapeSequence + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + UnicodeEscapeSequence + + +CharacterEscapeSequence :: + SingleEscapeCharacter + NonEscapeCharacter + +SingleEscapeCharacter :: one of + `'` `"` `\` `b` `f` `n` `r` `t` `v` + +NonEscapeCharacter :: + SourceCharacter but not one of EscapeCharacter or LineTerminator + +EscapeCharacter :: + SingleEscapeCharacter + DecimalDigit + `x` + `u` + +HexEscapeSequence :: + `x` HexDigit HexDigit + +UnicodeEscapeSequence :: + `u` Hex4Digits + `u{` CodePoint `}` + +Hex4Digits :: + HexDigit HexDigit HexDigit HexDigit + + +RegularExpressionLiteral :: + `/` RegularExpressionBody `/` RegularExpressionFlags + +RegularExpressionBody :: + RegularExpressionFirstChar RegularExpressionChars + +RegularExpressionChars :: + [empty] + RegularExpressionChars RegularExpressionChar + +RegularExpressionFirstChar :: + RegularExpressionNonTerminator but not one of `*` or `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionChar :: + RegularExpressionNonTerminator but not one of `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionBackslashSequence :: + `\` RegularExpressionNonTerminator + +RegularExpressionNonTerminator :: + SourceCharacter but not LineTerminator + +RegularExpressionClass :: + `[` RegularExpressionClassChars `]` + +RegularExpressionClassChars :: + [empty] + RegularExpressionClassChars RegularExpressionClassChar + +RegularExpressionClassChar :: + RegularExpressionNonTerminator but not one of `]` or `\` + RegularExpressionBackslashSequence + +RegularExpressionFlags :: + [empty] + RegularExpressionFlags IdentifierPart + + +Template :: + NoSubstitutionTemplate + TemplateHead + +NoSubstitutionTemplate :: + ``` TemplateCharacters? ``` + +TemplateHead :: + ``` TemplateCharacters? `${` + +TemplateSubstitutionTail :: + TemplateMiddle + TemplateTail + +TemplateMiddle :: + `}` TemplateCharacters? `${` + +TemplateTail :: + `}` TemplateCharacters? ``` + +TemplateCharacters :: + TemplateCharacter TemplateCharacters? + +TemplateCharacter :: + `$` [lookahead != `{` ] + `\` EscapeSequence + `\` NotEscapeSequence + LineContinuation + LineTerminatorSequence + SourceCharacter but not one of ``` or `\` or `$` or LineTerminator + +NotEscapeSequence :: + `0` DecimalDigit + DecimalDigit but not `0` + `x` [lookahead <! HexDigit] + `x` HexDigit [lookahead <! HexDigit] + `u` [lookahead <! HexDigit] [lookahead != `{`] + `u` HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit] + `u` `{` [lookahead <! HexDigit] + `u` `{` NotCodePoint [lookahead <! HexDigit] + `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`] + +NotCodePoint :: + HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ] + +CodePoint :: + HexDigits [> but only if MV of |HexDigits| โค 0x10FFFF ] + + +IdentifierReference[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + +BindingIdentifier[Yield, Await] : + Identifier + `yield` + `await` + +LabelIdentifier[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + +Identifier : + IdentifierName but not ReservedWord + + +PrimaryExpression[Yield, Await] : + `this` + IdentifierReference[?Yield, ?Await] + Literal + ArrayLiteral[?Yield, ?Await] + ObjectLiteral[?Yield, ?Await] + FunctionExpression + ClassExpression[?Yield, ?Await] + GeneratorExpression + AsyncFunctionExpression + AsyncGeneratorExpression + RegularExpressionLiteral + TemplateLiteral[?Yield, ?Await, ~Tagged] + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] #parencover + +CoverParenthesizedExpressionAndArrowParameterList[Yield, Await] : + `(` Expression[+In, ?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `)` + `(` `)` + `(` `...` BindingIdentifier[?Yield, ?Await] `)` + `(` `...` BindingPattern[?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingIdentifier[?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingPattern[?Yield, ?Await] `)` + + +ParenthesizedExpression[Yield, Await] : + `(` Expression[+In, ?Yield, ?Await] `)` + + +Literal : + NullLiteral + BooleanLiteral + NumericLiteral + StringLiteral + + +ArrayLiteral[Yield, Await] : + `[` Elision? `]` + `[` ElementList[?Yield, ?Await] `]` + `[` ElementList[?Yield, ?Await] `,` Elision? `]` + +ElementList[Yield, Await] : + Elision? AssignmentExpression[+In, ?Yield, ?Await] + Elision? SpreadElement[?Yield, ?Await] + ElementList[?Yield, ?Await] `,` Elision? AssignmentExpression[+In, ?Yield, ?Await] + ElementList[?Yield, ?Await] `,` Elision? SpreadElement[?Yield, ?Await] + +Elision : + `,` + Elision `,` + +SpreadElement[Yield, Await] : + `...` AssignmentExpression[+In, ?Yield, ?Await] + + +ObjectLiteral[Yield, Await] : + `{` `}` + `{` PropertyDefinitionList[?Yield, ?Await] `}` + `{` PropertyDefinitionList[?Yield, ?Await] `,` `}` + +PropertyDefinitionList[Yield, Await] : + PropertyDefinition[?Yield, ?Await] + PropertyDefinitionList[?Yield, ?Await] `,` PropertyDefinition[?Yield, ?Await] + +PropertyDefinition[Yield, Await] : + IdentifierReference[?Yield, ?Await] + CoverInitializedName[?Yield, ?Await] + PropertyName[?Yield, ?Await] `:` AssignmentExpression[+In, ?Yield, ?Await] + MethodDefinition[?Yield, ?Await] + `...` AssignmentExpression[+In, ?Yield, ?Await] + +PropertyName[Yield, Await] : + LiteralPropertyName + ComputedPropertyName[?Yield, ?Await] + +LiteralPropertyName : + IdentifierName + StringLiteral + NumericLiteral + +ComputedPropertyName[Yield, Await] : + `[` AssignmentExpression[+In, ?Yield, ?Await] `]` + +CoverInitializedName[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await] + +Initializer[In, Yield, Await] : + `=` AssignmentExpression[?In, ?Yield, ?Await] + + +TemplateLiteral[Yield, Await, Tagged] : + NoSubstitutionTemplate + SubstitutionTemplate[?Yield, ?Await, ?Tagged] + +SubstitutionTemplate[Yield, Await, Tagged] : + TemplateHead Expression[+In, ?Yield, ?Await] TemplateSpans[?Yield, ?Await, ?Tagged] + +TemplateSpans[Yield, Await, Tagged] : + TemplateTail + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateTail + +TemplateMiddleList[Yield, Await, Tagged] : + TemplateMiddle Expression[+In, ?Yield, ?Await] + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateMiddle Expression[+In, ?Yield, ?Await] + + +MemberExpression[Yield, Await] : + PrimaryExpression[?Yield, ?Await] + MemberExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + MemberExpression[?Yield, ?Await] `.` IdentifierName + MemberExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + SuperProperty[?Yield, ?Await] + MetaProperty + `new` MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + +SuperProperty[Yield, Await] : + `super` `[` Expression[+In, ?Yield, ?Await] `]` + `super` `.` IdentifierName + +MetaProperty : + NewTarget + +NewTarget : + `new` `.` `target` + +NewExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] + `new` NewExpression[?Yield, ?Await] + +CallExpression[Yield, Await] : + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] #callcover + SuperCall[?Yield, ?Await] + ImportCall[?Yield, ?Await] + CallExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + CallExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + CallExpression[?Yield, ?Await] `.` IdentifierName + CallExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + +SuperCall[Yield, Await] : + `super` Arguments[?Yield, ?Await] + +ImportCall[Yield, Await] : + `import` `(` AssignmentExpression[+In, ?Yield, ?Await] `)` + +Arguments[Yield, Await] : + `(` `)` + `(` ArgumentList[?Yield, ?Await] `)` + `(` ArgumentList[?Yield, ?Await] `,` `)` + +ArgumentList[Yield, Await] : + AssignmentExpression[+In, ?Yield, ?Await] + `...` AssignmentExpression[+In, ?Yield, ?Await] + ArgumentList[?Yield, ?Await] `,` AssignmentExpression[+In, ?Yield, ?Await] + ArgumentList[?Yield, ?Await] `,` `...` AssignmentExpression[+In, ?Yield, ?Await] + +LeftHandSideExpression[Yield, Await] : + NewExpression[?Yield, ?Await] + CallExpression[?Yield, ?Await] + + +CallMemberExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + + +UpdateExpression[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + LeftHandSideExpression[?Yield, ?Await] [no LineTerminator here] `++` + LeftHandSideExpression[?Yield, ?Await] [no LineTerminator here] `--` + `++` UnaryExpression[?Yield, ?Await] + `--` UnaryExpression[?Yield, ?Await] + + +UnaryExpression[Yield, Await] : + UpdateExpression[?Yield, ?Await] + `delete` UnaryExpression[?Yield, ?Await] + `void` UnaryExpression[?Yield, ?Await] + `typeof` UnaryExpression[?Yield, ?Await] + `+` UnaryExpression[?Yield, ?Await] + `-` UnaryExpression[?Yield, ?Await] + `~` UnaryExpression[?Yield, ?Await] + `!` UnaryExpression[?Yield, ?Await] + [+Await] AwaitExpression[?Yield] + + +ExponentiationExpression[Yield, Await] : + UnaryExpression[?Yield, ?Await] + UpdateExpression[?Yield, ?Await] `**` ExponentiationExpression[?Yield, ?Await] + + +MultiplicativeExpression[Yield, Await] : + ExponentiationExpression[?Yield, ?Await] + MultiplicativeExpression[?Yield, ?Await] MultiplicativeOperator ExponentiationExpression[?Yield, ?Await] + +MultiplicativeOperator : one of + `*` `/` `%` + + +AdditiveExpression[Yield, Await] : + MultiplicativeExpression[?Yield, ?Await] + AdditiveExpression[?Yield, ?Await] `+` MultiplicativeExpression[?Yield, ?Await] + AdditiveExpression[?Yield, ?Await] `-` MultiplicativeExpression[?Yield, ?Await] + + +ShiftExpression[Yield, Await] : + AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `<<` AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `>>` AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `>>>` AdditiveExpression[?Yield, ?Await] + + +RelationalExpression[In, Yield, Await] : + ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `<` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `>` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `<=` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `>=` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `instanceof` ShiftExpression[?Yield, ?Await] + [+In] RelationalExpression[+In, ?Yield, ?Await] `in` ShiftExpression[?Yield, ?Await] + + +EqualityExpression[In, Yield, Await] : + RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `==` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `!=` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `===` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `!==` RelationalExpression[?In, ?Yield, ?Await] + + +BitwiseANDExpression[In, Yield, Await] : + EqualityExpression[?In, ?Yield, ?Await] + BitwiseANDExpression[?In, ?Yield, ?Await] `&` EqualityExpression[?In, ?Yield, ?Await] + +BitwiseXORExpression[In, Yield, Await] : + BitwiseANDExpression[?In, ?Yield, ?Await] + BitwiseXORExpression[?In, ?Yield, ?Await] `^` BitwiseANDExpression[?In, ?Yield, ?Await] + +BitwiseORExpression[In, Yield, Await] : + BitwiseXORExpression[?In, ?Yield, ?Await] + BitwiseORExpression[?In, ?Yield, ?Await] `|` BitwiseXORExpression[?In, ?Yield, ?Await] + + +LogicalANDExpression[In, Yield, Await] : + BitwiseORExpression[?In, ?Yield, ?Await] + LogicalANDExpression[?In, ?Yield, ?Await] `&&` BitwiseORExpression[?In, ?Yield, ?Await] + +LogicalORExpression[In, Yield, Await] : + LogicalANDExpression[?In, ?Yield, ?Await] + LogicalORExpression[?In, ?Yield, ?Await] `||` LogicalANDExpression[?In, ?Yield, ?Await] + + +ConditionalExpression[In, Yield, Await] : + LogicalORExpression[?In, ?Yield, ?Await] + LogicalORExpression[?In, ?Yield, ?Await] `?` AssignmentExpression[+In, ?Yield, ?Await] `:` AssignmentExpression[?In, ?Yield, ?Await] + + +AssignmentExpression[In, Yield, Await] : + ConditionalExpression[?In, ?Yield, ?Await] + [+Yield] YieldExpression[?In, ?Await] + ArrowFunction[?In, ?Yield, ?Await] + AsyncArrowFunction[?In, ?Yield, ?Await] + LeftHandSideExpression[?Yield, ?Await] `=` AssignmentExpression[?In, ?Yield, ?Await] #assignment + LeftHandSideExpression[?Yield, ?Await] AssignmentOperator AssignmentExpression[?In, ?Yield, ?Await] + +AssignmentOperator : one of + `*=` `/=` `%=` `+=` `-=` `<<=` `>>=` `>>>=` `&=` `^=` `|=` `**=` + + +AssignmentPattern[Yield, Await] : + ObjectAssignmentPattern[?Yield, ?Await] + ArrayAssignmentPattern[?Yield, ?Await] + +ObjectAssignmentPattern[Yield, Await] : + `{` `}` + `{` AssignmentRestProperty[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `,` AssignmentRestProperty[?Yield, ?Await]? `}` + +ArrayAssignmentPattern[Yield, Await] : + `[` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + `[` AssignmentElementList[?Yield, ?Await] `]` + `[` AssignmentElementList[?Yield, ?Await] `,` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + +AssignmentRestProperty[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +AssignmentPropertyList[Yield, Await] : + AssignmentProperty[?Yield, ?Await] + AssignmentPropertyList[?Yield, ?Await] `,` AssignmentProperty[?Yield, ?Await] + +AssignmentElementList[Yield, Await] : + AssignmentElisionElement[?Yield, ?Await] + AssignmentElementList[?Yield, ?Await] `,` AssignmentElisionElement[?Yield, ?Await] + +AssignmentElisionElement[Yield, Await] : + Elision? AssignmentElement[?Yield, ?Await] + +AssignmentProperty[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + PropertyName[?Yield, ?Await] `:` AssignmentElement[?Yield, ?Await] + +AssignmentElement[Yield, Await] : + DestructuringAssignmentTarget[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +AssignmentRestElement[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +DestructuringAssignmentTarget[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + + +Expression[In, Yield, Await] : + AssignmentExpression[?In, ?Yield, ?Await] + Expression[?In, ?Yield, ?Await] `,` AssignmentExpression[?In, ?Yield, ?Await] + + +Statement[Yield, Await, Return] : + BlockStatement[?Yield, ?Await, ?Return] + VariableStatement[?Yield, ?Await] + EmptyStatement + ExpressionStatement[?Yield, ?Await] + IfStatement[?Yield, ?Await, ?Return] + BreakableStatement[?Yield, ?Await, ?Return] + ContinueStatement[?Yield, ?Await] + BreakStatement[?Yield, ?Await] + [+Return] ReturnStatement[?Yield, ?Await] + WithStatement[?Yield, ?Await, ?Return] + LabelledStatement[?Yield, ?Await, ?Return] + ThrowStatement[?Yield, ?Await] + TryStatement[?Yield, ?Await, ?Return] + DebuggerStatement + +Declaration[Yield, Await] : + HoistableDeclaration[?Yield, ?Await, ~Default] + ClassDeclaration[?Yield, ?Await, ~Default] + LexicalDeclaration[+In, ?Yield, ?Await] + +HoistableDeclaration[Yield, Await, Default] : + FunctionDeclaration[?Yield, ?Await, ?Default] + GeneratorDeclaration[?Yield, ?Await, ?Default] + AsyncFunctionDeclaration[?Yield, ?Await, ?Default] + AsyncGeneratorDeclaration[?Yield, ?Await, ?Default] + +BreakableStatement[Yield, Await, Return] : + IterationStatement[?Yield, ?Await, ?Return] + SwitchStatement[?Yield, ?Await, ?Return] + + +BlockStatement[Yield, Await, Return] : + Block[?Yield, ?Await, ?Return] + +Block[Yield, Await, Return] : + `{` StatementList[?Yield, ?Await, ?Return]? `}` + +StatementList[Yield, Await, Return] : + StatementListItem[?Yield, ?Await, ?Return] + StatementList[?Yield, ?Await, ?Return] StatementListItem[?Yield, ?Await, ?Return] + +StatementListItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + Declaration[?Yield, ?Await] + + +LexicalDeclaration[In, Yield, Await] : + LetOrConst BindingList[?In, ?Yield, ?Await] `;` + +LetOrConst : + `let` + `const` + +BindingList[In, Yield, Await] : + LexicalBinding[?In, ?Yield, ?Await] + BindingList[?In, ?Yield, ?Await] `,` LexicalBinding[?In, ?Yield, ?Await] + +LexicalBinding[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + + +VariableStatement[Yield, Await] : + `var` VariableDeclarationList[+In, ?Yield, ?Await] `;` + +VariableDeclarationList[In, Yield, Await] : + VariableDeclaration[?In, ?Yield, ?Await] + VariableDeclarationList[?In, ?Yield, ?Await] `,` VariableDeclaration[?In, ?Yield, ?Await] + +VariableDeclaration[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + + +BindingPattern[Yield, Await] : + ObjectBindingPattern[?Yield, ?Await] + ArrayBindingPattern[?Yield, ?Await] + +ObjectBindingPattern[Yield, Await] : + `{` `}` + `{` BindingRestProperty[?Yield, ?Await] `}` + `{` BindingPropertyList[?Yield, ?Await] `}` + `{` BindingPropertyList[?Yield, ?Await] `,` BindingRestProperty[?Yield, ?Await]? `}` + +ArrayBindingPattern[Yield, Await] : + `[` Elision? BindingRestElement[?Yield, ?Await]? `]` + `[` BindingElementList[?Yield, ?Await] `]` + `[` BindingElementList[?Yield, ?Await] `,` Elision? BindingRestElement[?Yield, ?Await]? `]` + +BindingRestProperty[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + +BindingPropertyList[Yield, Await] : + BindingProperty[?Yield, ?Await] + BindingPropertyList[?Yield, ?Await] `,` BindingProperty[?Yield, ?Await] + +BindingElementList[Yield, Await] : + BindingElisionElement[?Yield, ?Await] + BindingElementList[?Yield, ?Await] `,` BindingElisionElement[?Yield, ?Await] + +BindingElisionElement[Yield, Await] : + Elision? BindingElement[?Yield, ?Await] + +BindingProperty[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + PropertyName[?Yield, ?Await] `:` BindingElement[?Yield, ?Await] + +BindingElement[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + BindingPattern[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +SingleNameBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +BindingRestElement[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + `...` BindingPattern[?Yield, ?Await] + + +EmptyStatement : + `;` + + +ExpressionStatement[Yield, Await] : + [lookahead <! {`{`, `function`, `async` [no |LineTerminator| here] `function`, `class`, `let` `[`}] Expression[+In, ?Yield, ?Await] `;` + + +IfStatement[Yield, Await, Return] : + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` Statement[?Yield, ?Await, ?Return] + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + + +IterationStatement[Yield, Await, Return] : + `do` Statement[?Yield, ?Await, ?Return] `while` `(` Expression[+In, ?Yield, ?Await] `)` `;` + `while` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead != `let` `[`] Expression[~In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` VariableDeclarationList[~In, ?Yield, ?Await] `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` LexicalDeclaration[~In, ?Yield, ?Await] Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead != `let` `[`] LeftHandSideExpression[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` ForBinding[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` ForDeclaration[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead != `let` ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` [lookahead != `let` ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + +ForDeclaration[Yield, Await] : + LetOrConst ForBinding[?Yield, ?Await] + +ForBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + BindingPattern[?Yield, ?Await] + + +ContinueStatement[Yield, Await] : + `continue` `;` + `continue` [no LineTerminator here] LabelIdentifier[?Yield, ?Await] `;` + + +BreakStatement[Yield, Await] : + `break` `;` + `break` [no LineTerminator here] LabelIdentifier[?Yield, ?Await] `;` + + +ReturnStatement[Yield, Await] : + `return` `;` + `return` [no LineTerminator here] Expression[+In, ?Yield, ?Await] `;` + + +WithStatement[Yield, Await, Return] : + `with` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + + +SwitchStatement[Yield, Await, Return] : + `switch` `(` Expression[+In, ?Yield, ?Await] `)` CaseBlock[?Yield, ?Await, ?Return] + +CaseBlock[Yield, Await, Return] : + `{` CaseClauses[?Yield, ?Await, ?Return]? `}` + `{` CaseClauses[?Yield, ?Await, ?Return]? DefaultClause[?Yield, ?Await, ?Return] CaseClauses[?Yield, ?Await, ?Return]? `}` + +CaseClauses[Yield, Await, Return] : + CaseClause[?Yield, ?Await, ?Return] + CaseClauses[?Yield, ?Await, ?Return] CaseClause[?Yield, ?Await, ?Return] + +CaseClause[Yield, Await, Return] : + `case` Expression[+In, ?Yield, ?Await] `:` StatementList[?Yield, ?Await, ?Return]? + +DefaultClause[Yield, Await, Return] : + `default` `:` StatementList[?Yield, ?Await, ?Return]? + + +LabelledStatement[Yield, Await, Return] : + LabelIdentifier[?Yield, ?Await] `:` LabelledItem[?Yield, ?Await, ?Return] + +LabelledItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + FunctionDeclaration[?Yield, ?Await, ~Default] + + +ThrowStatement[Yield, Await] : + `throw` [no LineTerminator here] Expression[+In, ?Yield, ?Await] `;` + + +TryStatement[Yield, Await, Return] : + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] + `try` Block[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + +Catch[Yield, Await, Return] : + `catch` `(` CatchParameter[?Yield, ?Await] `)` Block[?Yield, ?Await, ?Return] + `catch` Block[?Yield, ?Await, ?Return] + +Finally[Yield, Await, Return] : + `finally` Block[?Yield, ?Await, ?Return] + +CatchParameter[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + BindingPattern[?Yield, ?Await] + + +DebuggerStatement : + `debugger` `;` + + +FunctionDeclaration[Yield, Await, Default] : + `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + [+Default] `function` `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + +FunctionExpression : + `function` BindingIdentifier[~Yield, ~Await]? `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + +UniqueFormalParameters[Yield, Await] : + FormalParameters[?Yield, ?Await] + +FormalParameters[Yield, Await] : + [empty] + FunctionRestParameter[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] `,` + FormalParameterList[?Yield, ?Await] `,` FunctionRestParameter[?Yield, ?Await] + +FormalParameterList[Yield, Await] : + FormalParameter[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] `,` FormalParameter[?Yield, ?Await] + +FunctionRestParameter[Yield, Await] : + BindingRestElement[?Yield, ?Await] + +FormalParameter[Yield, Await] : + BindingElement[?Yield, ?Await] + +FunctionBody[Yield, Await] : + FunctionStatementList[?Yield, ?Await] + +FunctionStatementList[Yield, Await] : + StatementList[?Yield, ?Await, +Return]? + + +ArrowFunction[In, Yield, Await] : + ArrowParameters[?Yield, ?Await] [no LineTerminator here] `=>` ConciseBody[?In] + +ArrowParameters[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] #parencover + +ConciseBody[In] : + [lookahead != `{` ] AssignmentExpression[?In, ~Yield, ~Await] + `{` FunctionBody[~Yield, ~Await] `}` + + +ArrowFormalParameters[Yield, Await] : + `(` UniqueFormalParameters[?Yield, ?Await] `)` + + +MethodDefinition[Yield, Await] : + PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + GeneratorMethod[?Yield, ?Await] + AsyncMethod[?Yield, ?Await] + AsyncGeneratorMethod[?Yield, ?Await] + `get` PropertyName[?Yield, ?Await] `(` `)` `{` FunctionBody[~Yield, ~Await] `}` + `set` PropertyName[?Yield, ?Await] `(` PropertySetParameterList `)` `{` FunctionBody[~Yield, ~Await] `}` + +PropertySetParameterList : + FormalParameter[~Yield, ~Await] + + +GeneratorMethod[Yield, Await] : + `*` PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorDeclaration[Yield, Await, Default] : + `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + [+Default] `function` `*` `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorExpression : + `function` `*` BindingIdentifier[+Yield, ~Await]? `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorBody : + FunctionBody[+Yield, ~Await] + +YieldExpression[In, Await] : + `yield` + `yield` [no LineTerminator here] AssignmentExpression[?In, +Yield, ?Await] + `yield` [no LineTerminator here] `*` AssignmentExpression[?In, +Yield, ?Await] + + +AsyncGeneratorMethod[Yield, Await] : + `async` [no LineTerminator here] `*` PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorDeclaration[Yield, Await, Default] : + `async` [no LineTerminator here] `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + [+Default] `async` [no LineTerminator here] `function` `*` `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorExpression : + `async` [no LineTerminator here] `function` `*` BindingIdentifier[+Yield, +Await]? `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorBody : + FunctionBody[+Yield, +Await] + + +ClassDeclaration[Yield, Await, Default] : + `class` BindingIdentifier[?Yield, ?Await] ClassTail[?Yield, ?Await] + [+Default] `class` ClassTail[?Yield, ?Await] + +ClassExpression[Yield, Await] : + `class` BindingIdentifier[?Yield, ?Await]? ClassTail[?Yield, ?Await] + +ClassTail[Yield, Await] : + ClassHeritage[?Yield, ?Await]? `{` ClassBody[?Yield, ?Await]? `}` + +ClassHeritage[Yield, Await] : + `extends` LeftHandSideExpression[?Yield, ?Await] + +ClassBody[Yield, Await] : + ClassElementList[?Yield, ?Await] + +ClassElementList[Yield, Await] : + ClassElement[?Yield, ?Await] + ClassElementList[?Yield, ?Await] ClassElement[?Yield, ?Await] + +ClassElement[Yield, Await] : + MethodDefinition[?Yield, ?Await] + `static` MethodDefinition[?Yield, ?Await] + `;` + + +AsyncFunctionDeclaration[Yield, Await, Default] : + `async` [no LineTerminator here] `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + [+Default] `async` [no LineTerminator here] `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncFunctionExpression : + `async` [no LineTerminator here] `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + `async` [no LineTerminator here] `function` BindingIdentifier[~Yield, +Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncMethod[Yield, Await] : + `async` [no LineTerminator here] PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncFunctionBody : + FunctionBody[~Yield, +Await] + +AwaitExpression[Yield] : + `await` UnaryExpression[?Yield, +Await] + + +AsyncArrowFunction[In, Yield, Await] : + `async` [no LineTerminator here] AsyncArrowBindingIdentifier[?Yield] [no LineTerminator here] `=>` AsyncConciseBody[?In] + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] [no LineTerminator here] `=>` AsyncConciseBody[?In] #callcover + +AsyncConciseBody[In] : + [lookahead != `{`] AssignmentExpression[?In, ~Yield, +Await] + `{` AsyncFunctionBody `}` + +AsyncArrowBindingIdentifier[Yield] : + BindingIdentifier[?Yield, +Await] + +CoverCallExpressionAndAsyncArrowHead[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + + +AsyncArrowHead : + `async` [no LineTerminator here] ArrowFormalParameters[~Yield, +Await] + + +Script : + ScriptBody? + +ScriptBody : + StatementList[~Yield, ~Await, ~Return] + + +Module : + ModuleBody? + +ModuleBody : + ModuleItemList + +ModuleItemList : + ModuleItem + ModuleItemList ModuleItem + +ModuleItem : + ImportDeclaration + ExportDeclaration + StatementListItem[~Yield, ~Await, ~Return] + + +ImportDeclaration : + `import` ImportClause FromClause `;` + `import` ModuleSpecifier `;` + +ImportClause : + ImportedDefaultBinding + NameSpaceImport + NamedImports + ImportedDefaultBinding `,` NameSpaceImport + ImportedDefaultBinding `,` NamedImports + +ImportedDefaultBinding : + ImportedBinding + +NameSpaceImport : + `*` `as` ImportedBinding + +NamedImports : + `{` `}` + `{` ImportsList `}` + `{` ImportsList `,` `}` + +FromClause : + `from` ModuleSpecifier + +ImportsList : + ImportSpecifier + ImportsList `,` ImportSpecifier + +ImportSpecifier : + ImportedBinding + IdentifierName `as` ImportedBinding + +ModuleSpecifier : + StringLiteral + +ImportedBinding : + BindingIdentifier[~Yield, ~Await] + + +ExportDeclaration : + `export` `*` FromClause `;` + `export` ExportClause FromClause `;` + `export` ExportClause `;` + `export` VariableStatement[~Yield, ~Await] + `export` Declaration[~Yield, ~Await] + `export` `default` HoistableDeclaration[~Yield, ~Await, +Default] + `export` `default` ClassDeclaration[~Yield, ~Await, +Default] + `export` `default` [lookahead <! {`function`, `async` [no |LineTerminator| here] `function`, `class`}] AssignmentExpression[+In, ~Yield, ~Await] `;` + +ExportClause : + `{` `}` + `{` ExportsList `}` + `{` ExportsList `,` `}` + +ExportsList : + ExportSpecifier + ExportsList `,` ExportSpecifier + +ExportSpecifier : + IdentifierName + IdentifierName `as` IdentifierName + + +uri ::: + uriCharacters? + +uriCharacters ::: + uriCharacter uriCharacters? + +uriCharacter ::: + uriReserved + uriUnescaped + uriEscaped + +uriReserved ::: one of + `;` `/` `?` `:` `@` `&` `=` `+` `$` `,` + +uriUnescaped ::: + uriAlpha + DecimalDigit + uriMark + +uriEscaped ::: + `%` HexDigit HexDigit + +uriAlpha ::: one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M` `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z` + +uriMark ::: one of + `-` `_` `.` `!` `~` `*` `'` `(` `)` + + +NativeFunction : + `function` PropertyName[~Yield, ~Await]? `(` FormalParameters[~Yield, ~Await] `)` `{` `[` `native` `code` `]` `}` + + +Pattern[U, N] :: + Disjunction[?U, ?N] + +Disjunction[U, N] :: + Alternative[?U, ?N] + Alternative[?U, ?N] `|` Disjunction[?U, ?N] + +Alternative[U, N] :: + [empty] + Alternative[?U, ?N] Term[?U, ?N] + +Term[U, N] :: + Assertion[?U, ?N] + Atom[?U, ?N] + Atom[?U, ?N] Quantifier + +Assertion[U, N] :: + `^` + `$` + `\` `b` + `\` `B` + `(` `?` `=` Disjunction[?U, ?N] `)` + `(` `?` `!` Disjunction[?U, ?N] `)` + `(` `?` `<=` Disjunction[?U, ?N] `)` + `(` `?` `<!` Disjunction[?U, ?N] `)` + +Quantifier :: + QuantifierPrefix + QuantifierPrefix `?` + +QuantifierPrefix :: + `*` + `+` + `?` + `{` DecimalDigits `}` + `{` DecimalDigits `,` `}` + `{` DecimalDigits `,` DecimalDigits `}` + +Atom[U, N] :: + PatternCharacter + `.` + `\` AtomEscape[?U, ?N] + CharacterClass[?U] + `(` GroupSpecifier[?U] Disjunction[?U, ?N] `)` + `(` `?` `:` Disjunction[?U, ?N] `)` + +SyntaxCharacter :: one of + `^` `$` `\` `.` `*` `+` `?` `(` `)` `[` `]` `{` `}` `|` + +PatternCharacter :: + SourceCharacter but not SyntaxCharacter + +AtomEscape[U, N] :: + DecimalEscape + CharacterClassEscape[?U] + CharacterEscape[?U] + [+N] `k` GroupName[?U] + +CharacterEscape[U] :: + ControlEscape + `c` ControlLetter + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + RegExpUnicodeEscapeSequence[?U] + IdentityEscape[?U] + +ControlEscape :: one of + `f` `n` `r` `t` `v` + +ControlLetter :: one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M` `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z` + +GroupSpecifier[U] :: + [empty] + `?` GroupName[?U] + +GroupName[U] :: + `<` RegExpIdentifierName[?U] `>` + +RegExpIdentifierName[U] :: + RegExpIdentifierStart[?U] + RegExpIdentifierName[?U] RegExpIdentifierPart[?U] + +RegExpIdentifierStart[U] :: + UnicodeIDStart + `$` + `_` + `\` RegExpUnicodeEscapeSequence[?U] + +RegExpIdentifierPart[U] :: + UnicodeIDContinue + `$` + `\` RegExpUnicodeEscapeSequence[?U] + <ZWNJ> + <ZWJ> + +RegExpUnicodeEscapeSequence[U] :: + [+U] `u` LeadSurrogate `\u` TrailSurrogate + [+U] `u` LeadSurrogate + [+U] `u` TrailSurrogate + [+U] `u` NonSurrogate + [~U] `u` Hex4Digits + [+U] `u{` CodePoint `}` + + +LeadSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is in the inclusive range 0xD800 to 0xDBFF] + +TrailSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is in the inclusive range 0xDC00 to 0xDFFF] + +NonSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is not in the inclusive range 0xD800 to 0xDFFF] + +IdentityEscape[U] :: + [+U] SyntaxCharacter + [+U] `/` + [~U] SourceCharacter but not UnicodeIDContinue + +DecimalEscape :: + NonZeroDigit DecimalDigits? [lookahead <! DecimalDigit] + +CharacterClassEscape[U] :: + `d` + `D` + `s` + `S` + `w` + `W` + [+U] `p{` UnicodePropertyValueExpression `}` + [+U] `P{` UnicodePropertyValueExpression `}` + +UnicodePropertyValueExpression :: + UnicodePropertyName `=` UnicodePropertyValue + LoneUnicodePropertyNameOrValue + +UnicodePropertyName :: + UnicodePropertyNameCharacters + +UnicodePropertyNameCharacters :: + UnicodePropertyNameCharacter UnicodePropertyNameCharacters? + +UnicodePropertyValue :: + UnicodePropertyValueCharacters + +LoneUnicodePropertyNameOrValue :: + UnicodePropertyValueCharacters + +UnicodePropertyValueCharacters :: + UnicodePropertyValueCharacter UnicodePropertyValueCharacters? + +UnicodePropertyValueCharacter :: + UnicodePropertyNameCharacter + `0` + `1` + `2` + `3` + `4` + `5` + `6` + `7` + `8` + `9` + +UnicodePropertyNameCharacter :: + ControlLetter + `_` + +CharacterClass[U] :: + `[` [lookahead != `^`] ClassRanges[?U] `]` + `[` `^` ClassRanges[?U] `]` + +ClassRanges[U] :: + [empty] + NonemptyClassRanges[?U] + +NonemptyClassRanges[U] :: + ClassAtom[?U] + ClassAtom[?U] NonemptyClassRangesNoDash[?U] + ClassAtom[?U] `-` ClassAtom[?U] ClassRanges[?U] + +NonemptyClassRangesNoDash[U] :: + ClassAtom[?U] + ClassAtomNoDash[?U] NonemptyClassRangesNoDash[?U] + ClassAtomNoDash[?U] `-` ClassAtom[?U] ClassRanges[?U] + +ClassAtom[U] :: + `-` + ClassAtomNoDash[?U] + +ClassAtomNoDash[U] :: + SourceCharacter but not one of `\` or `]` or `-` + `\` ClassEscape[?U] + +ClassEscape[U] :: + `b` + [+U] `-` + CharacterClassEscape[?U] + CharacterEscape[?U] + + +NumericLiteral :: + DecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + LegacyOctalIntegerLiteral + +LegacyOctalIntegerLiteral :: + `0` OctalDigit + LegacyOctalIntegerLiteral OctalDigit + +DecimalIntegerLiteral :: + `0` + NonZeroDigit DecimalDigits? + NonOctalDecimalIntegerLiteral + +NonOctalDecimalIntegerLiteral :: + `0` NonOctalDigit + LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit + NonOctalDecimalIntegerLiteral DecimalDigit + +LegacyOctalLikeDecimalIntegerLiteral :: + `0` OctalDigit + LegacyOctalLikeDecimalIntegerLiteral OctalDigit + +NonOctalDigit :: one of + `8` `9` + + +EscapeSequence :: + CharacterEscapeSequence + LegacyOctalEscapeSequence + HexEscapeSequence + UnicodeEscapeSequence + +LegacyOctalEscapeSequence :: + OctalDigit [lookahead <! OctalDigit] + ZeroToThree OctalDigit [lookahead <! OctalDigit] + FourToSeven OctalDigit + ZeroToThree OctalDigit OctalDigit + +ZeroToThree :: one of + `0` `1` `2` `3` + +FourToSeven :: one of + `4` `5` `6` `7` + + +Comment :: + MultiLineComment + SingleLineComment + SingleLineHTMLOpenComment + SingleLineHTMLCloseComment + SingleLineDelimitedComment + +MultiLineComment :: + `/*` FirstCommentLine? LineTerminator MultiLineCommentChars? `*/` HTMLCloseComment? + +FirstCommentLine :: + SingleLineDelimitedCommentChars + +SingleLineHTMLOpenComment :: + `<!--` SingleLineCommentChars? + +SingleLineHTMLCloseComment :: + LineTerminatorSequence HTMLCloseComment + +SingleLineDelimitedComment :: + `/*` SingleLineDelimitedCommentChars? `*/` + +HTMLCloseComment :: + WhiteSpaceSequence? SingleLineDelimitedCommentSequence? `-->` SingleLineCommentChars? + +SingleLineDelimitedCommentChars :: + SingleLineNotAsteriskChar SingleLineDelimitedCommentChars? + `*` SingleLinePostAsteriskCommentChars? + +SingleLineNotAsteriskChar :: + SourceCharacter but not one of `*` or LineTerminator + +SingleLinePostAsteriskCommentChars :: + SingleLineNotForwardSlashOrAsteriskChar SingleLineDelimitedCommentChars? + `*` SingleLinePostAsteriskCommentChars? + +SingleLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` or LineTerminator + +WhiteSpaceSequence :: + WhiteSpace WhiteSpaceSequence? + +SingleLineDelimitedCommentSequence :: + SingleLineDelimitedComment WhiteSpaceSequence? SingleLineDelimitedCommentSequence? + + +Term[U, N] :: + [+U] Assertion[+U, ?N] + [+U] Atom[+U, ?N] + [+U] Atom[+U, ?N] Quantifier + [~U] QuantifiableAssertion[?N] Quantifier + [~U] Assertion[~U, ?N] + [~U] ExtendedAtom[?N] Quantifier + [~U] ExtendedAtom[?N] + +Assertion[U, N] :: + `^` + `$` + `\` `b` + `\` `B` + [+U] `(` `?` `=` Disjunction[+U, ?N] `)` + [+U] `(` `?` `!` Disjunction[+U, ?N] `)` + [~U] QuantifiableAssertion[?N] + `(` `?` `<=` Disjunction[?U, ?N] `)` + `(` `?` `<!` Disjunction[?U, ?N] `)` + +QuantifiableAssertion[N] :: + `(` `?` `=` Disjunction[~U, ?N] `)` + `(` `?` `!` Disjunction[~U, ?N] `)` + +ExtendedAtom[N] :: + `.` + `\` AtomEscape[~U, ?N] + `\` [lookahead == `c`] + CharacterClass[~U] + `(` Disjunction[~U, ?N] `)` + `(` `?` `:` Disjunction[~U, ?N] `)` + InvalidBracedQuantifier + ExtendedPatternCharacter + +InvalidBracedQuantifier :: + `{` DecimalDigits `}` + `{` DecimalDigits `,` `}` + `{` DecimalDigits `,` DecimalDigits `}` + +ExtendedPatternCharacter :: + SourceCharacter but not one of `^` `$` `\` `.` `*` `+` `?` `(` `)` `[` `|` + +AtomEscape[U, N] :: + [+U] DecimalEscape + [~U] DecimalEscape [> but only if the CapturingGroupNumber of |DecimalEscape| is <= _NcapturingParens_] + CharacterClassEscape[?U] + CharacterEscape[~U, ?N] + [+N] `k` GroupName[?U] + +CharacterEscape[U, N] :: + ControlEscape + `c` ControlLetter + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + RegExpUnicodeEscapeSequence[?U] + [~U] LegacyOctalEscapeSequence + IdentityEscape[?U, ?N] + +IdentityEscape[U, N] :: + [+U] SyntaxCharacter + [+U] `/` + [~U] SourceCharacterIdentityEscape[?N] + +SourceCharacterIdentityEscape[N] :: + [~N] SourceCharacter but not `c` + [+N] SourceCharacter but not one of `c` or `k` + +ClassAtomNoDash[U, N] :: + SourceCharacter but not one of `\` or `]` or `-` + `\` ClassEscape[?U, ?N] + `\` [lookahead == `c`] + +ClassEscape[U, N] :: + `b` + [+U] `-` + [~U] `c` ClassControlLetter + CharacterClassEscape[?U] + CharacterEscape[?U, ?N] + +ClassControlLetter :: + DecimalDigit + `_` + + +IfStatement[Yield, Await, Return] : + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` Statement[?Yield, ?Await, ?Return] + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] + + +IterationStatement[Yield, Await, Return] : + `for` `(` `var` BindingIdentifier[?Yield, ?Await] Initializer[~In, ?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + diff --git a/third_party/rust/jsparagus/js_parser/esgrammar.pgen b/third_party/rust/jsparagus/js_parser/esgrammar.pgen new file mode 100644 index 0000000000..7a0b4cb7b9 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/esgrammar.pgen @@ -0,0 +1,290 @@ +# Grammar for ECMArkup grammar descriptions + +var token CHR; # <NL> +var token EQ; # :: +var token MATCH_REF; # $0 +var token NL; # (actual newline character) +var token NT; # IdentifierName +var token NTALT; # |LineTerminator| +var token NTCALL; # Expression (when followed by `[` or `<`) +var token PRODID; # #comment +var token PROSE; # > any text following a greater than sign +var token T; # `var` +var token WPROSE; # [> any text after greater than sign, wrapped in brackets] +var token RUSTCOMMENT;# //comment + +token But = "but"; +token Empty = "empty"; +token Here = "here"; +token Lookahead = "lookahead"; +token No = "no"; +token Not = "not"; +token Of = "of"; +token One = "one"; +token Or = "or"; +token Through = "through"; +token Returns = "returns"; +token Some = "Some"; +token None = "None"; +token Arrow = "=>"; +token Comma = ","; +token OpenBracket = "["; +token CloseBracket = "]"; +token QuestionMark = "?"; +token Tilde = "~"; +token PlusSign = "+"; +token Equal = "="; +token Equals = "=="; +token IsNotEqualTo = "!="; +token IsNotIn = "<!"; +token OpenBrace = "{"; +token CloseBrace = "}"; +token OpenParen = "("; +token CloseParen = ")"; +token AtSign = "@"; +token OpenAngle = "<"; +token CloseAngle = ">"; +token Impl = "impl"; +token For = "for"; +token Let = "let"; +token SemiColon = ";"; +token Lifetime = "'"; + +// Entry point for grammar_extension! macro's content. +goal nt rust_edsl { + rust_impl rust_nt_def_list => rust_edsl($0, $1); +} + +nt rust_impl { + "impl" nt_type "for" nt_type "{" "}" ";" => rust_impl($1, $3); + "impl" "<" nt_type_params ">" nt_type "for" nt_type "{" "}" ";" => rust_param_impl($4, $6, $2); +} + +nt rust_nt_def_list { + rust_nt_def_or_blank_line; + rust_nt_def_list rust_nt_def_or_blank_line => concat($0, $1); +} + +nt rust_nt_def_or_blank_line { + NL => blank_line(); + RUSTCOMMENT => blank_line(); + rust_nt_def => nt_def_to_list($0); +} + +// grammar extension syntax for adding/removing/extending the productions. +nt rust_nt_def { + "let" nt_lhs "=" "{" rust_rhs_line "}" ";" => rust_nt_def($1, $4); +} + +nt rust_rhs_line { + rust_symbols => rust_rhs_line($0); +} + +nt rust_symbols { + rust_symbol; + rust_symbols rust_symbol => concat($0, $1); +} + +nt rust_symbol { + "{" rust_expr "}" => single($1); + symbol => single($0); + NL => empty(); +} + +nt rust_expr { + expr => rust_expr($0); +} + +// Entry point for esgrammar files. +goal nt grammar { + nt_def_list; +} + +nt nt_def_list { + nt_def_or_blank_line; + nt_def_list nt_def_or_blank_line => concat($0, $1); +} + +nt nt_def_or_blank_line { + NL => blank_line(); + nt_def => nt_def_to_list($0); +} + +nt nt_def { + nt_type_line? nt_lhs EQ NL rhs_lines NL => nt_def($0, $1, $2, $4); + nt_type_line? nt_lhs EQ "one" "of" NL t_list_lines NL => nt_def_one_of($0, $1, $2, $6); +} + +nt nt_lhs { + NT => nt_lhs_no_params($0); + NTCALL "[" params "]" => nt_lhs_with_params($0, $2); +} + +nt params { + param => single($0); + params "," param => append($0, $2); +} + +nt param { + NT; +} + +nt nt_type_line { + "@" "returns" nt_type NL => $2; +} + +// Define a type as understood by Rust type-system. +nt nt_type { + NT => simple_type($0); + NTCALL "<" nt_type_params ">" => parameterized_type($0, $2); +} + +nt nt_type_params { + nt_type_param => single($0); + nt_type_params "," nt_type_param => append($0, $2); +} + +nt nt_type_param { + nt_type; + "'" NT => lifetime_type($0); +} + +nt t_list_lines { + t_list_line; + t_list_lines t_list_line => concat($0, $1); +} + +nt t_list_line { + terminal_seq NL => t_list_line($0); +} + +nt terminal_seq { + terminal => single($0); + terminal_seq terminal => append($0, $1); +} + +nt terminal { + T => terminal($0); + CHR => chr($0); +} + +nt rhs_lines { + rhs_line => single($0); + rhs_lines rhs_line => append($0, $1); +} + +nt rhs_line { + ifdef? rhs reducer? PRODID? NL => rhs_line($0, $1, $2, $3); + PROSE NL => rhs_line_prose($0); +} + +nt rhs { + symbols; + "[" "empty" "]" => empty_rhs(); +} + +nt reducer { + NL? "=>" expr => $2; +} + +nt expr { + MATCH_REF => expr_match_ref($0); + NT "(" expr_args? ")" expr_try? => expr_call($0, $2, $4); + "Some" "(" expr ")" => expr_some($2); + "None" => expr_none(); +} + +nt expr_try { + "?"; +} + +nt expr_args { + expr => single($0); + expr_args "," expr => append($0, $2); +} + +nt ifdef { + "[" definite_sigil NT "]" => ifdef($1, $2); +} + +nt symbols { + symbol => single($0); + symbols symbol => append($0, $1); +} + +nt symbol { + terminal; + nonterminal; + nonterminal "?" => optional($0); + nonterminal "but" "not" exclusion => but_not($0, $3); + nonterminal "but" "not" "one" "of" exclusion_list => but_not_one_of($0, $5); + "[" "lookahead" lookahead_assertion "]" => $2; + no_line_terminator_here; + WPROSE => $0; +} + +nt no_line_terminator_here { + "[" "no" line_terminator "here" "]" => no_line_terminator_here($2); +} + +nt nonterminal { + NT => nonterminal($0); + NTCALL "[" args "]" => nonterminal_apply($0, $2); +} + +nt args { + arg => single($0); + args "," arg => append($0, $2); +} + +nt arg { + sigil NT => arg_expr($0, $1); +} + +nt sigil { + definite_sigil; + "?"; +} + +nt definite_sigil { + "~" => sigil_false(); + "+" => sigil_true(); +} + +nt exclusion_list { + exclusion => single($0); + exclusion_list "or" exclusion => append($0, $2); +} + +nt exclusion { + terminal => exclusion_terminal($0); + nonterminal => exclusion_nonterminal($0); + CHR "through" CHR => exclusion_chr_range($0, $2); +} + +nt lookahead_assertion { + "==" terminal => la_eq($1); + "!=" terminal => la_ne($1); + "<!" NT => la_not_in_nonterminal($1); + "<!" "{" lookahead_exclusions "}" => la_not_in_set($2); +} + +nt lookahead_exclusions { + lookahead_exclusion => single($0); + lookahead_exclusions "," lookahead_exclusion => append($0, $2); +} + +nt lookahead_exclusion { + lookahead_exclusion_element => single($0); + lookahead_exclusion lookahead_exclusion_element => append($0, $1); +} + +nt lookahead_exclusion_element { + terminal; + no_line_terminator_here; +} + +nt line_terminator { + NT; + NTALT; +} diff --git a/third_party/rust/jsparagus/js_parser/extract_es_grammar.py b/third_party/rust/jsparagus/js_parser/extract_es_grammar.py new file mode 100755 index 0000000000..b1fc35473f --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/extract_es_grammar.py @@ -0,0 +1,567 @@ +"""extract_es_grammar.py - Extract the grammar from the ECMAScript spec + +To run this script, you first need to get the source of the version of +the ECMAScript spec you're interested in. + + cd ../.. + mkdir tc39 + cd tc39 + git clone git@github.com:tc39/ecma262.git + +Then: + + make js_parser/es.esgrammar + +You can also use this script on a random HTTPS URL, like: + + URL=https://raw.githubusercontent.com/tc39/proposal-class-fields/master/spec.html + python extract_esgrammar.py $URL + +""" + +import argparse +import urllib +import html5lib # type: ignore +import re +from textwrap import dedent + + +HTML = "{http://www.w3.org/1999/xhtml}" +INS_TAG = HTML + "ins" +DEL_TAG = HTML + "del" + +INS = '+' +DEL = '-' +KEEP = ' ' + + +def pre_with_code_filter_factory(e): + """Checks if the <pre> is used in the following pattern: + + ``` + <pre><code> + </code></pre> + ``` + + If so, return a filter that formats the content, removing extra spaces. + line-wrap, and backquote added by <code>. + + """ + if e.text and e.text.strip() != '': + return False + + if len(e) != 1: + return False + + if e[0].tag != '{http://www.w3.org/1999/xhtml}code': + return False + + if e[0].tail and e[0].tail.strip() != '': + return False + + def children_filter(texts): + while len(texts) > 0 and texts[0].strip() == '': + texts.pop(0) + if len(texts) > 0 and texts[0].strip() == '`': + texts.pop(0) + while len(texts) > 0 and texts[0].strip() == '': + texts.pop(0) + + while len(texts) > 0 and texts[-1].strip() == '': + texts.pop() + if len(texts) > 0 and texts[-1].strip() == '`': + texts.pop() + while len(texts) > 0 and texts[-1].strip() == '': + texts.pop() + + is_first = True + for text in texts: + for line in text.split('\n'): + line = line.strip() + if line == '': + continue + + if not is_first: + yield '\n' + is_first = False + + yield line + + return children_filter + + +# Rules for extracting text, used by extracting Early Errors. +EXTRA_RULES_FOR_EE = { + 'b': {}, + 'br': {}, + 'code': { + 'prefix': '`', + 'postfix': '`', + }, + 'emu-alg': {}, + 'emu-grammar': {}, + 'emu-note': { + 'prefix': ['NOTE', '\n'], + 'strip': True, + }, + 'emu-xref': { + 'prefix': lambda e: e.attrib.get('href'), + }, + 'ins': { + 'ignore_highlighted': True, + }, + 'p': { + 'strip': True, + }, + 'pre': { + 'prefix': ['\n', '\n', '```', '\n'], + 'postfix': ['\n', '```', '\n', '\n'], + 'strip': True, + 'children_filter_factroy': pre_with_code_filter_factory, + }, + 'sub': { + 'prefix': '_', + }, + 'sup': { + 'prefix': '^', + }, +} + + +def apply_prefix_postfix_rule(e, rule, name): + """If rule is provided, apply prefix/postfix rule to the element `e`. + """ + if not rule: + return + + fix = rule.get(name) + if callable(fix): + yield fix(e) + elif isinstance(fix, list): + for item in fix: + yield item + elif fix: + yield fix + + +def apply_strip_rule(text, rule): + """If rule is provided, apply strip rule to the text. + """ + if not text: + return + + if not rule: + yield text + return + + strip = rule.get('strip') + if strip: + yield text.strip() + else: + yield text + + +def fragment_child_chunks(e, extra_rules={}): + """Partly interpret the content of `e`, yielding `text`, + applying extra_rules. + + Concatenating the yielded `text` values gives the full text of `e`. + """ + rule = extra_rules[e.tag.replace(HTML, '')] + + children_filter = None + factroy = rule.get('children_filter_factroy') + if factroy: + children_filter = factroy(e) + + yield from apply_prefix_postfix_rule(e, rule, 'prefix') + yield from apply_strip_rule(e.text, rule) + + for child in e: + if child.tag.replace(HTML, '') not in extra_rules: + raise ValueError("unrecognized element: " + child.tag) + + texts = [] + for text in fragment_child_chunks(child, extra_rules): + if children_filter: + texts.append(text) + else: + yield text + + if children_filter: + for text in children_filter(texts): + yield text + + yield from apply_strip_rule(e.tail, rule) + yield from apply_prefix_postfix_rule(e, rule, 'postfix') + + +def is_highlighted_ins(e): + """Returns True if e matches the following pattern: + + <ins>highlighted</ins> text: + + See `fragment_chunks` comment for the details + """ + if len(e) != 0: + return False + + if not e.text: + return False + + if e.text != 'highlighted': + return False + + if not e.tail: + return False + + if not e.tail.startswith(' text:'): + return False + + return True + + +def is_negligible_ins(e, extra_rules): + """Returns True if the 'ignore_highlighted' rule is defined for <ins>, + and it matches to the negligible pattern. + + See `fragment_chunks` comment for the details + """ + + rule = extra_rules.get(e.tag.replace(HTML, '')) + if not rule: + return False + + if rule.get('ignore_highlighted'): + if is_highlighted_ins(e): + return True + + return False + + +def fragment_chunks(e, extra_rules={}): + """Partly interpret the content of `e`, yielding pairs (ty, text). + + If `extra_rules` isn't provided, the content of `e` must be text with 0 + or more <ins>/<del> elements. + + The goal is to turn the tree `e` into a simple series of tagged strings. + + Yields pairs (ty, text) where ty in (INS, DEL, KEEP). Concatenating the + yielded `text` values gives the full text of `e`. + + `extra_rules` is a dictionary that defines extra elements that is allowed + as the content of `e`. + Each item defines a rule for the tag, with the following: + * prefix + Put a prefix before the text + Possible values: + * string + * list of string + * function + receives `Element` and returns a prefix string + * postfix + Put a postfix after the text + value uses the same format as prefix + * strip + True to strip whitespaces before/after element's text + * children_filter_factroy + A function that receives `Element`, and returns a filter function or None + The filter function receives a list of texts for child nodes, and + returns a list of filtered text + * ignore_highlighted + Effective only with <ins> + Do not treat <ins> as an insertion if it matches the following pattern: + + <ins>highlighted</ins> text: + + This pattern is used in Annex B description. + """ + + rule = extra_rules.get(e.tag.replace(HTML, '')) + + for text in apply_prefix_postfix_rule(e, rule, 'prefix'): + yield KEEP, text + for text in apply_strip_rule(e.text, rule): + yield KEEP, text + + for child in e: + if child.tag == INS_TAG and not is_negligible_ins(child, extra_rules): + ty = INS + elif child.tag == DEL_TAG: + ty = DEL + else: + if child.tag.replace(HTML, '') not in extra_rules: + raise ValueError("unrecognized element: " + child.tag) + + for text in fragment_child_chunks(child, extra_rules): + yield KEEP, text + continue + + if child.text: + yield ty, child.text + if len(child) != 0: + for grandchild in child: + if grandchild.tag.replace(HTML, '') not in extra_rules: + raise ValueError("unsupported nested element {} in {}" + .format(grandchild.tag, child.tag)) + + for text in fragment_child_chunks(grandchild, extra_rules): + yield ty, text + if child.tail: + yield KEEP, child.tail + + for text in apply_strip_rule(e.tail, rule): + yield KEEP, text + for text in apply_prefix_postfix_rule(e, rule, 'postfix'): + yield KEEP, text + + +def fragment_parts(e, **kwargs): + """Like fragment_chunks, but with two fixups. + + 1. Break up pairs that include both a newline and any other text. + + 2. Move newlines inside of a preceding INS or DEL element that spans its + whole line. + """ + line_has_ins = False + line_has_del = False + for chunk_ty, text in fragment_chunks(e, **kwargs): + for piece in re.split(r'(\n)', text): + ty = chunk_ty + if piece != '': + if piece == '\n': + # Possibly move newline inside preceding INS or DEL. + if line_has_ins and not line_has_del: + ty = INS + elif line_has_del and not line_has_ins: + ty = DEL + else: + ty = KEEP + line_has_del = False + line_has_ins = False + elif piece.strip() != '': + if ty in (INS, KEEP): + line_has_ins = True + if ty in (DEL, KEEP): + line_has_del = True + yield ty, piece + + +def generate_fragment_patch(e, **kwargs): + line_before = '' + line_after = '' + + def end_line(ty): + nonlocal line_before, line_after + if line_before.rstrip() == line_after.rstrip(): + yield " ", line_after + else: + if line_before.strip() != '' or ty != INS: + yield "-", line_before + if line_after.strip() != '' or ty != DEL: + yield "+", line_after + line_before = '' + line_after = '' + + for ty, text in fragment_parts(e, **kwargs): + if text == '\n': + yield from end_line(ty) + else: + if ty in (KEEP, DEL): + line_before += text + if ty in (KEEP, INS): + line_after += text + if line_before or line_after: + yield from end_line(KEEP) + + +def dedent_pairs(pairs): + """Dedent the `pairs`'s `text` part + """ + pairs = list(pairs) + + # Using textwrap.dedent on this requires a few lines of hackery. + types = [ty for ty, _line in pairs] + dedented_lines = dedent(''.join(line + '\n' for ty, line in pairs)).splitlines() + assert len(dedented_lines) == len(pairs) + + return zip(types, dedented_lines) + + +def print_pairs(pairs): + last_line_was_empty = False + + for ty, line in pairs: + if ty == KEEP and line == '': + if last_line_was_empty: + continue + last_line_was_empty = True + else: + last_line_was_empty = False + + print(ty + line) + + +def print_fragment_patch(e): + print_pairs(dedent_pairs(generate_fragment_patch(e))) + + +def is_annex_early_errors(e): + """Returns True if the <emu-annex> element contains Early Errors. + """ + h1 = e.find('{http://www.w3.org/1999/xhtml}h1') + if 'Early Errors' in h1.text: + return True + + p = e.find('{http://www.w3.org/1999/xhtml}p') + if p: + if 'Early Error' in html5lib.serializer.serialize(p): + return True + + return False + + +def get_parent_map(document): + """Returns a map from a element to parent element. + This is necessary because `xml.etree.ElementTree.Element` doesn't have + a reference to parent element. + """ + parent_map = dict() + for parent in document.iter(): + for child in parent: + parent_map[child] = parent + return parent_map + + +def get_titles(parent_map, e): + """Returns a list of section titles for a section. + """ + titles = [] + while e.tag != '{http://www.w3.org/1999/xhtml}body': + h1 = e.find('{http://www.w3.org/1999/xhtml}h1') + titles.insert(0, h1.text) + e = parent_map[e] + + return titles + + +def generate_ul_fragment_patch(e, depth): + """Similar to generate_fragment_patch, but for <ul> + """ + first_line_prefix = '{}* '.format(' ' * depth) + other_line_prefix = '{} '.format(' ' * depth) + + for item in e: + if item.tag != '{http://www.w3.org/1999/xhtml}li': + raise ValueError("unrecognized element: " + item.tag) + + pairs = generate_fragment_patch(item, + extra_rules=EXTRA_RULES_FOR_EE) + + is_first_line = True + + for ty, line in dedent_pairs(pairs): + if is_first_line and line.strip() == '': + continue + + if is_first_line: + is_first_line = False + yield ty, '{}{}'.format(first_line_prefix, line.strip()) + else: + yield ty, '{}{}'.format(other_line_prefix, line.strip()) + + +def generate_early_errors_fragment_patch(parent_map, e): + for t in get_titles(parent_map, e): + yield KEEP, '# {}'.format(t) + yield KEEP, '# #{}'.format(e.attrib.get('id')) + yield KEEP, '' + + for child in e: + if child.tag == '{http://www.w3.org/1999/xhtml}h1': + continue + + if child.tag == '{http://www.w3.org/1999/xhtml}emu-grammar': + pairs = generate_fragment_patch(child) + yield from dedent_pairs(pairs) + yield KEEP, '' + elif child.tag == '{http://www.w3.org/1999/xhtml}ul': + yield from generate_ul_fragment_patch(child, 0) + elif child.tag == '{http://www.w3.org/1999/xhtml}emu-note': + pairs = generate_fragment_patch(child, + extra_rules=EXTRA_RULES_FOR_EE) + yield from dedent_pairs(pairs) + yield KEEP, '' + elif child.tag == '{http://www.w3.org/1999/xhtml}p': + pairs = generate_fragment_patch(child, + extra_rules=EXTRA_RULES_FOR_EE) + yield from dedent_pairs(pairs) + yield KEEP, '' + elif (child.tag == '{http://www.w3.org/1999/xhtml}emu-alg' + and e.attrib.get('id') == 'sec-__proto__-property-names-in-object-initializers'): + # "__proto__ Property Names in Object Initializers" section + # contains changes both for early errors and algorithm. + # Ignore algorithm part. + pass + else: + raise ValueError('unsupported element in early errors section: {}' + .format(child.tag)) + + +def print_early_errors(parent_map, e): + pairs = generate_early_errors_fragment_patch(parent_map, e) + print_pairs(dedent_pairs(pairs)) + + +def extract(filename, unfiltered, target): + if filename.startswith("https:"): + file_obj = urllib.request.urlopen(filename) + else: + file_obj = open(filename, "rb") + + with file_obj: + document = html5lib.parse(file_obj) + + if target == 'grammar': + for e in document.iter("{http://www.w3.org/1999/xhtml}emu-grammar"): + if unfiltered or e.attrib.get("type") == "definition": + print_fragment_patch(e) + elif target == 'ee': + parent_map = get_parent_map(document) + for e in document.iter("{http://www.w3.org/1999/xhtml}emu-clause"): + if e.attrib.get("id").endswith("-early-errors"): + print_early_errors(parent_map, e) + elif target == 'ee-annex': + parent_map = get_parent_map(document) + for e in document.iter("{http://www.w3.org/1999/xhtml}emu-annex"): + if is_annex_early_errors(e): + print_early_errors(parent_map, e) + else: + raise ValueError('Unknown target: {}'.format(target)) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description="Extract esgrammar from ECMAScript specifications.") + parser.add_argument( + 'url', + nargs=1, + help="the https: url or local filename of an HTML file containing <emu-grammar> tags") + parser.add_argument( + '--unfiltered', + action='store_true', + help="Include even <emu-grammar> elements that don't have `type=definition`") + parser.add_argument( + '--target', + default='grammar', + choices=['grammar', 'ee', 'ee-annex'], + help="What to extract (\ + grammar = esgrammar, \ + ee = early errors, \ + ee-annex = early errors in Annex\ + )") + + args = parser.parse_args() + extract(args.url[0], args.unfiltered, args.target) diff --git a/third_party/rust/jsparagus/js_parser/generate_js_parser_tables.py b/third_party/rust/jsparagus/js_parser/generate_js_parser_tables.py new file mode 100755 index 0000000000..f85f7aca86 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/generate_js_parser_tables.py @@ -0,0 +1,140 @@ +"""generate_js_parser_tables.py - Generate tables from the ES grammar.""" + +import argparse +import os +import jsparagus.gen +import jsparagus.grammar +from . import load_es_grammar + + +def hack_grammar(g): + # We throw away most of the boolean parameters in the grammar, as the + # current parser generator's approach of fully expanding them is a huge + # pain. + + PARAM_WHITELIST = ['In', 'Default'] + + def filter_params(params): + return tuple(p for p in params if p in PARAM_WHITELIST) + + def filter_args(args): + return tuple(pair for pair in args if pair[0] in PARAM_WHITELIST) + + def filter_element(e): + """ Strip nt arguments. """ + if isinstance(e, jsparagus.grammar.Nt): + return jsparagus.grammar.Nt(e.name, filter_args(e.args)) + elif isinstance(e, jsparagus.grammar.Optional): + return jsparagus.grammar.Optional(filter_element(e.inner)) + else: + return e + + def filter_condition(c): + if c is None or c[0] not in PARAM_WHITELIST: + return None + return c + + def filter_production(p): + """ Discard production conditions and nt arguments. """ + body = [filter_element(e) for e in p.body] + return jsparagus.grammar.Production(body, p.reducer, + condition=filter_condition(p.condition)) + + nonterminals = {} + for nt, nt_def in g.nonterminals.items(): + params = tuple(filter_params(nt_def.params)) + rhs_list = [filter_production(p) for p in nt_def.rhs_list] + nonterminals[nt] = jsparagus.grammar.NtDef(params, rhs_list, nt_def.type) + return g.with_nonterminals(nonterminals) + + +def main(): + # Read command-line options. + parser = argparse.ArgumentParser( + description='Ponder the ECMAScript grammar.', + allow_abbrev=False) + default_filename = os.path.join(os.path.dirname(__file__), + "es-simplified.esgrammar") + parser.add_argument( + 'filename', metavar='FILE', nargs='?', default=default_filename, + help=".esgrammar (or .jsparagus_dump) input file") + parser.add_argument( + 'handler_info', metavar='HANDLER_INFO', nargs='?', + help="JSON file that contains information about handler") + parser.add_argument( + '-e', '--extend', action='append', default=[], + help="name of a files which contains a grammar_extension Rust macro.") + parser.add_argument( + '-o', '--output', metavar='FILE', default='/dev/stdout', + help="output filename for parser tables") + parser.add_argument( + '-v', '--verbose', action='store_true', + help="print some debug output") + parser.add_argument( + '--progress', action='store_true', + help="print a dot each time a state is analyzed (thousands of them)") + parser.add_argument( + '--debug', action='store_true', + help="annotate the generated code with grammar productions") + args = parser.parse_args() + + # Check filenames. + in_filename = args.filename + if in_filename.endswith('.esgrammar'): + from_source = True + elif in_filename.endswith('.jsparagus_dump'): + from_source = False + else: + raise ValueError("input file extension should be .esgrammar or .jsparagus_dump") + + out_filename = args.output + if out_filename.endswith('.py'): + target = 'python' + elif out_filename.endswith('.rs'): + target = 'rust' + elif out_filename.endswith('.jsparagus_dump'): + target = 'dump' + else: + raise ValueError("-o file extension should be .py, .rs, or .jsparagus_dump") + + in_extend = args.extend + if from_source: + assert all(f.endswith('.rs') for f in in_extend), "Extension are only supposed to be Rust files." + else: + assert in_extend == [], "Cannot add extensions to the generated parse table." + + # Load input and analyze it. + if from_source: + grammar = load_es_grammar.load_syntactic_grammar(in_filename, in_extend) + grammar = hack_grammar(grammar) + if args.verbose: + grammar.dump() + + states = jsparagus.gen.generate_parser_states( + grammar, verbose=args.verbose, progress=args.progress) + else: + states = jsparagus.gen.ParseTable.load(in_filename) + + # Generate output. + try: + if target in ('python', 'rust'): + with open(out_filename, 'w') as f: + jsparagus.gen.generate_parser(f, states, + target=target, + verbose=args.verbose, + debug=args.debug, + handler_info=args.handler_info) + else: + assert target == 'dump' + states.save(out_filename) + except Exception: + # On failure, don't leave a partial output file lying around. + try: + os.remove(out_filename) + except Exception: + pass + raise + + +if __name__ == '__main__': + main() diff --git a/third_party/rust/jsparagus/js_parser/lexer.py b/third_party/rust/jsparagus/js_parser/lexer.py new file mode 100644 index 0000000000..2d8ed530ed --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/lexer.py @@ -0,0 +1,315 @@ +"""Vague approximation of an ECMAScript lexer. + +A parser has two levels: the *lexer* scans bytes to produce tokens. The +*parser* consumes tokens and produces ASTs. + +In a traditional design, the parser drives the process. It *pulls* one token at +a time from the lexer. However, for a parser that can accept arbitrary slabs of +data, scan them, then keep going, it makes more sense for the user to feed +those slabs to the lexer, which then *pushes* tokens to the parser. So that's +what we do. + +Usage: + + from js_parser.lexer import JSLexer + from js_parser.parser import JSParser + + lexer = JSLexer(JSParser()) + lexer.write(some_source_text) + lexer.write(some_more_source_text) + ast = lexer.close() +""" + +import re +import jsparagus.lexer + + +def _get_punctuators(): + punctuators = ''' + &&= ||= ??= + { ( ) [ ] . ... ; , < > <= >= == != === !== + - * % ** ++ -- + << >> >>> & | ^ ! ~ && || ? : = += -= *= %= + **= ><<= >>= >>>= &= |= ^= => + '''.split() + + return '|'.join( + re.escape(token) + for token in sorted(punctuators, key=len, reverse=True)) + + +TOKEN_RE = re.compile(r'''(?x) + (?: + # WhiteSpace + [\ \t\v\r\n\u00a0\u2028\u2029\ufeff] + # SingleLineComment + | // [^\r\n\u2028\u2029]* (?= [\r\n\u2028\u2029] | \Z ) + # MultiLineComment + | /\* (?: [^*] | \*+[^/] )* \*+/ + )* + ( + # Incomplete MultiLineComment + /\* (?: [^*] | \*+[^/] )* \** + | # Incomplete SingleLineComment + // [^\r\n\u2028\u2029]* + | # IdentifierName + (?: [$_A-Za-z] | \\ u [0-9A-Fa-f]{4} | \\ u \{ [0-9A-Fa-f]+ \}) + (?: [$_0-9A-Za-z] | \\ u [0-9A-Fa-f]{4} | \\ u \{ [0-9A-Fa-f]+ \})* + | # NumericLiteral + [0-9][0-9A-Za-z]*(?:\.[0-9A-Za-z]*)? + | \.[0-9][0-9A-Za-z]* + | # Punctuator + <INSERT_PUNCTUATORS> + | # The slash special case + / + | # The curly brace special case + } + | # StringLiteral + ' + # SingleStringCharacters + (?: + # SourceCharacter but not one of ' or \\ or LineTerminator + # but also allow LINE SEPARATOR or PARAGRAPH SEPARATOR + [^'\\\r\n] + | \\ [^0-9xu\r\n\u2028\u2029] # CharacterEscapeSequence + | \\ x [0-9A-Fa-f]{2} # HexEscapeSequence + | \\ u [0-9A-Fa-f]{4} # UnicodeEscapeSequence + | \\ u \{ [0-9A-Fa-f]+ \} + | \\\r\n? # LineContinuation + | \\[\n\u2028\u2029] + )* + ' + | " + # DoubleStringCharacters + (?: + # SourceCharacter but not one of " or \\ or LineTerminator + # but also allow LINE SEPARATOR or PARAGRAPH SEPARATOR + [^"\\\r\n] + | \\ [^0-9xu\r\n\u2028\u2029] # CharacterEscapeSequence + | \\ x [0-9A-Fa-f]{2} # HexEscapeSequence + | \\ u [0-9A-Fa-f]{4} # UnicodeEscapeSequence + | \\ u \{ [0-9A-Fa-f]+ \} + | \\\r\n? # LineContinuation + | \\[\n\u2028\u2029] + )* + " + | # Template + ` (?: [^`\\$] | \\. )* (?: \${ | ` ) + | # illegal character or end of input (this branch matches no characters) + ) +'''.replace("<INSERT_PUNCTUATORS>", _get_punctuators())) + +DIV_RE = re.compile(r'(/=?)') + +REGEXP_RE = re.compile(r'''(?x) +( + / + (?: + # RegularExpressionFirstChar - implemented using + # RegularExpressionChars on the theory that we have already + # ruled out the possibility of a comment. + # RegularExpressionChars + (?: + # RegularExpressionNonTerminator but not one of \\ or / or [ + [^/\\\[\r\n\u2028\u2029] + | # RegularExpressionBackslashSequence + \\ [^\r\n\u2028\u2029] + | # RegularExpressionClass + \[ + # RegularExpressionClassChars + (?: + # RegularExpressionNonTerminator but not one of ] or \\ + [^]\\\r\n\u2028\u2029] + | # RegularExpressionBackslashSequence + \\ [^\r\n\u2028\u2029] + )* + \] + )+ + ) + / + (?: \w* ) +) +''') + +# Words that never match Identifier. (`await` and `yield` nonetheless +# conditionally match IdentifierReference, BindingIdentifier, and +# LabelIdentifier.) +# +# Technically the term for these is "reserved word", not "keyword", but +# whatever. +ECMASCRIPT_FULL_KEYWORDS = [ + 'await', + 'break', + 'case', + 'catch', + 'class', + 'const', + 'continue', + 'debugger', + 'default', + 'delete', + 'do', + 'else', + 'enum', + 'export', + 'extends', + 'finally', + 'for', + 'function', + 'if', + 'import', + 'in', + 'instanceof', + 'new', + 'null', + 'return', + 'super', + 'switch', + 'this', + 'throw', + 'true', + 'false', + 'try', + 'typeof', + 'var', + 'void', + 'while', + 'with', + 'yield', +] + +ECMASCRIPT_CONDITIONAL_KEYWORDS = [ + # Words that are identifiers except in strict mode + 'let', # this one is also banned at the beginning of an ExpressionStatement + 'static', + 'implements', + 'interface', + 'package', + 'private', + 'protected', + 'public', + + # Words that are always allowed as identifiers, but are also keywords in + # other contexts. + 'as', + 'async', + 'from', + 'get', + 'of', + 'set', + 'target', +] + +# Technically this set includes a reserved word that isn't currently being used +# as a keyword in the grammar: `enum`. +ALL_KEYWORDS = set(ECMASCRIPT_FULL_KEYWORDS + ECMASCRIPT_CONDITIONAL_KEYWORDS) + + +class JSLexer(jsparagus.lexer.FlatStringLexer): + """Vague approximation of an ECMAScript lexer. """ + def __init__(self, parser, filename=None): + super().__init__(parser, filename) + + def _match(self, closing): + match = TOKEN_RE.match(self.src, self.point) + assert match is not None + + if match.end() == len(self.src) and not closing: + # The current token runs right up against the end of the current + # chunk of source and thus might continue in the next chunk. Do not + # move self.point. + return None + + token = match.group(1) + if token == '': + # Whitespace followed by end of input or illegal character. + if match.end() == len(self.src): + # End of input. Success! + assert closing + self.point = match.end() + return None + else: + c = self.src[match.end()] + self.throw("unexpected character: {!r}".format(c)) + + c = token[0] + t = None + if c.isdigit() or c == '.' and token != '.': + t = 'NumericLiteral' + elif c.isalpha() or c in '$_': + if token in ALL_KEYWORDS: # TODO support strict mode + if token == 'null': + t = 'NullLiteral' + elif token in ('true', 'false'): + t = 'BooleanLiteral' + else: + t = token + else: + t = 'Name' + elif c == '/': + if token.startswith(('/*', '//')): + # Incomplete comment. (In non-closing mode, this is handled + # above, immediately after the match.) + assert match.end() == len(self.src) + assert closing + self.point = len(self.src) + self.throw("incomplete comment at end of source") + + # We choose RegExp vs. division based on what the parser can + # accept, a literal implementation of the spec. + # + # To make this correct in combination with end-of-line ASI, make + # the parser rewind the lexer one token and ask for it again in + # that case, so that the lexer asks the can-accept question again. + point = match.start(1) + if self.parser.can_accept_terminal(self, 'RegularExpressionLiteral'): + match = REGEXP_RE.match(self.src, point) + if match is None: + if closing: + self.throw("unterminated regexp literal") + else: + return None + token = 'RegularExpressionLiteral' + else: + match = DIV_RE.match(self.src, point) + token = match.group(1) + + if not closing and match.end() == len(self.src): + # At the end of a chunk, `/a*b/` could be the start of + # `/a*b/g`, and `/` could be the start of `/=`. + return None + + t = token + elif c == '`': + if token.endswith('`'): + t = 'NoSubstitutionTemplate' + else: + t = 'TemplateHead' + elif c == '"' or c == "'": + t = 'StringLiteral' + elif c == '}': + # TODO: TemplateTail + t = token + elif c in '{()[];,~?:.<>=!+-*%&|^': + t = token + else: + assert False + + self._current_match = match + self.previous_token_end = self.point + self.current_token_start = match.start(1) + self.point = match.end() + return t + + def take(self): + return self._current_match.group(1) + + def saw_line_terminator(self): + """True if there's a LineTerminator before the current token.""" + i = self.previous_token_end + j = self.current_token_start + ws_between = self.src[i:j] + return any(c in ws_between for c in '\r\n\u2028\u2029') + + def can_close(self): + match = TOKEN_RE.match(self.src) + return match.group(1) == '' and self.parser.can_close() diff --git a/third_party/rust/jsparagus/js_parser/load_es_grammar.py b/third_party/rust/jsparagus/js_parser/load_es_grammar.py new file mode 100644 index 0000000000..c50688534d --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/load_es_grammar.py @@ -0,0 +1,129 @@ +""" Functions for loading the ECMAScript lexical and syntactic grammars. """ + +from jsparagus.ordered import OrderedSet, OrderedFrozenSet +from jsparagus import gen, grammar +from .lexer import ECMASCRIPT_FULL_KEYWORDS, ECMASCRIPT_CONDITIONAL_KEYWORDS +from .parse_esgrammar import parse_esgrammar + + +ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS: grammar.SyntheticTerminalsDict = { + # Theoretically, this should be the set of all Unicode characters, but that + # would take a lot of memory, and in practice, the set is not used. + 'SourceCharacter': OrderedFrozenSet([]), +} + +ECMASCRIPT_LEXICAL_GOAL_NTS = [ + 'WhiteSpace', + 'InputElementDiv', + 'InputElementRegExp', +] + + +def load_lexical_grammar(filename): + """Load the ECMAScript lexical grammar.""" + with open(filename) as f: + grammar_text = f.read() + g = parse_esgrammar( + grammar_text, + filename=filename, + goals=ECMASCRIPT_LEXICAL_GOAL_NTS, + synthetic_terminals=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS, + terminal_names=ECMASCRIPT_LEXICAL_SYNTHETIC_TERMINALS.keys()) + return gen.expand_parameterized_nonterminals(g) + + +ECMASCRIPT_SYNTACTIC_GOAL_NTS = [ + 'Script', + 'Module', + # 'FormalParameters', + # 'FunctionBody', +] + +# Identifiers are complicated. A "synthetic terminal" is a shorthand symbol +# that stands for any one of a set of terminals. For example, *IdentifierName* +# stands for any token that looks like an identifier, including keywords. +# +# These sets must use the names of the terminals produced by the lexer. Except +# for `Name`, our lexer output uses the terminal symbols of the syntactic +# grammar, which include some nonterminals of the lexical grammar. The +# syntactic grammar uses `BooleanLiteral`, not `true` and `false`; and it uses +# `NullLiteral` instead of `null`. +ECMASCRIPT_SYNTHETIC_TERMINALS = { + 'IdentifierName': OrderedSet([ + 'Name', + 'BooleanLiteral', + 'NullLiteral', + 'NameWithEscape', + *ECMASCRIPT_FULL_KEYWORDS, + *ECMASCRIPT_CONDITIONAL_KEYWORDS + ]) - OrderedSet(['true', 'false', 'null']), + 'Identifier': OrderedSet([ + 'Name', + 'NameWithEscape', + *ECMASCRIPT_CONDITIONAL_KEYWORDS + ]), +} + +# Lexical nonterminals that are used as terminals in the syntactic grammar. +ECMASCRIPT_TOKEN_NAMES = [ + 'BooleanLiteral', + 'IdentifierName', + 'PrivateIdentifier', + 'NoSubstitutionTemplate', + 'NullLiteral', + 'NumericLiteral', + 'BigIntLiteral', + 'RegularExpressionLiteral', + 'StringLiteral', + 'TemplateHead', + 'TemplateMiddle', + 'TemplateTail', +] + +# List of all terminals, other than keywords, that our (hand-coded) lexer +# produces. +# +# (What our lexer implements for IdentifierName and friends is a slight +# variation on the spec. See `ECMASCRIPT_SYNTHETIC_TERMINALS` above.) +TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR = ECMASCRIPT_TOKEN_NAMES + [ + 'Identifier', + 'Name', +] + + +def load_syntactic_grammar(filename, extensions): + """Load the ECMAScript syntactic grammar.""" + with open(filename) as f: + grammar_text = f.read() + + extensions_content = [] + for ext_filename in extensions: + # Extract grammar_extension! macro content, and store in a list. + with open(ext_filename) as ext_file: + content = None + start_line = 0 + for lineno, line in enumerate(ext_file): + if line.startswith("grammar_extension!"): + assert line.endswith("{\n") + content = "" + # +2: enumerate starts at 0, while the first line is 1. + # Also, the first line added to the content variable is the + # next one. + start_line = lineno + 2 + continue + if line.startswith("}") and content: + extensions_content.append((ext_filename, start_line, content)) + content = None + continue + if content is not None: + content += line + + g = parse_esgrammar( + grammar_text, + filename=filename, + extensions=extensions_content, + goals=ECMASCRIPT_SYNTACTIC_GOAL_NTS, + synthetic_terminals=ECMASCRIPT_SYNTHETIC_TERMINALS, + terminal_names=TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR) + + return g diff --git a/third_party/rust/jsparagus/js_parser/parse_esgrammar.py b/third_party/rust/jsparagus/js_parser/parse_esgrammar.py new file mode 100644 index 0000000000..efcb640406 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/parse_esgrammar.py @@ -0,0 +1,545 @@ +"""Parse a grammar written in ECMArkup.""" + +from __future__ import annotations +# mypy: no-implicit-optional + +import os +import collections +from typing import Dict, Iterable, Optional, Tuple + +from jsparagus import parse_pgen, gen, grammar, extension, types +from jsparagus.lexer import LexicalGrammar +from jsparagus.ordered import OrderedSet, OrderedFrozenSet + + +ESGrammarLexer = LexicalGrammar( + # the operators and keywords: + "[ ] { } , ~ + ? <! = == != => ( ) @ < > ' ; " + "but empty here lookahead no not of one or returns through Some None impl for let", + + NL="\n", + + # any number of colons together + EQ=r':+', + + # terminals of the ES grammar, quoted with backticks + T=r'`[^` \n]+`|```', + + # also terminals, denoting control characters + CHR=r'<[A-Z ]+>|U\+[0-9A-f]{4}', + + # nonterminals/types that will be followed by parameters + NTCALL=r'[A-Za-z]\w*(?=[\[<])', + + # nonterminals (also, boolean parameters and type names) + NT=r'[A-Za-z]\w*', + + # nonterminals wrapped in vertical bars for no apparent reason + NTALT=r'\|[A-Z]\w+\|', + + # the spec also gives a few productions names + PRODID=r'#[A-Za-z]\w*', + + # prose not wrapped in square brackets + # To avoid conflict with the `>` token, this is recognized only after a space. + PROSE=r'(?<= )>[^\n]*', + + # prose wrapped in square brackets + WPROSE=r'\[>[^]]*\]', + + # expression denoting a matched terminal or nonterminal + MATCH_REF=r'\$(?:0|[1-9][0-9]*)', + + # the spec also gives a few productions names + RUSTCOMMENT=r'//.*\n', +) + + +ESGrammarParser = gen.compile( + parse_pgen.load_grammar( + os.path.join(os.path.dirname(__file__), "esgrammar.pgen"))) + + +SIGIL_FALSE = '~' +SIGIL_TRUE = '+' + +# Abbreviations for single-character terminals, used in the lexical grammar. +ECMASCRIPT_CODE_POINTS = { + # From <https://tc39.es/ecma262/#table-31> + '<ZWNJ>': grammar.Literal('\u200c'), + '<ZWJ>': grammar.Literal('\u200d'), + '<ZWNBSP>': grammar.Literal('\ufeff'), + + # From <https://tc39.es/ecma262/#table-32> + '<TAB>': grammar.Literal('\t'), + '<VT>': grammar.Literal('\u000b'), + '<FF>': grammar.Literal('\u000c'), + '<SP>': grammar.Literal(' '), + '<NBSP>': grammar.Literal('\u00a0'), + # <ZWNBSP> already defined above + '<USP>': grammar.UnicodeCategory('Zs'), + + # From <https://tc39.es/ecma262/#table-33> + '<LF>': grammar.Literal('\u000a'), + '<CR>': grammar.Literal('\u000d'), + '<LS>': grammar.Literal('\u2028'), + '<PS>': grammar.Literal('\u2028'), +} + + +class ESGrammarBuilder: + def __init__(self, terminal_names): + # Names of terminals that are written as nonterminals in the grammar. + # For example, "BooleanLiteral" is a terminal name when parsing the + # syntactic grammar. + if terminal_names is None: + terminal_names = frozenset() + self.terminal_names = frozenset(terminal_names) + self.reset() + + def reset(self): + self.lexer = None + # This is how full-parsing and lazy-parsing are implemented, using + # different traits. + # + # This field contains the Rust's trait used for calling the method. + # When a CallMethod is generated, it is assumed to be a function of + # this trait. The trait is used by the Rust backend to generate + # multiple backends which are implementing different set of traits. + # Having the trait on the function call is useful as a way to filter + # functions calls at code-generation time. + # + # This field is updated by the `rust_param_impl`, which is used in + # grammar extensions, and visited before producing any CallMethod. + self.method_trait = "AstBuilder" + + def rust_edsl(self, impl, grammar): + return extension.GrammarExtension(impl, grammar, self.lexer.filename) + + def rust_param_impl(self, trait, for_type, param): + self.method_trait = trait + return extension.ImplFor(param, trait, for_type) + + def rust_impl(self, trait, impl_type): + return self.rust_param_impl(trait, impl_type, []) + + def rust_nt_def(self, lhs, rhs_line): + # Right now, only focus on the syntactic grammar, and assume that all + # rules are patching existing grammar production by adding code. + return extension.ExtPatch(self.nt_def(None, lhs, ':', [rhs_line])) + + def rust_rhs_line(self, symbols): + return self.rhs_line(None, symbols, None, None) + + def rust_expr(self, expr): + assert isinstance(expr, grammar.CallMethod) + return expr + + def empty(self): + return [] + + def single(self, x): + return [x] + + def append(self, x, y): + return x + [y] + + def concat(self, x, y): + return x + y + + def blank_line(self): + return [] + + def nt_def_to_list(self, nt_def): + return [nt_def] + + def to_production(self, lhs, i, rhs, is_sole_production): + """Wrap a list of grammar symbols `rhs` in a Production object.""" + body, reducer, condition = rhs + if reducer is None: + reducer = self.default_reducer(lhs, i, body, is_sole_production) + return grammar.Production(body, reducer, condition=condition) + + def default_reducer(self, lhs, i, body, is_sole_production): + assert isinstance(lhs, grammar.Nt) + nt_name = lhs.name + + nargs = sum(1 for e in body if grammar.is_concrete_element(e)) + if is_sole_production: + method_name = nt_name + else: + method_name = '{} {}'.format(nt_name, i) + return self.expr_call(method_name, tuple(range(nargs)), None) + + def needs_asi(self, lhs, p): + """True if p is a production in which ASI can happen.""" + # The purpose of the fake ForLexicalDeclaration production is to have a + # copy of LexicalDeclaration that does not trigger ASI. + # + # Two productions have body == [";"] -- one for EmptyStatement and one + # for ClassMember. Neither should trigger ASI. + # + # The only other semicolons that should not trigger ASI are the ones in + # `for` statement productions, which happen to be exactly those + # semicolons that are not at the end of a production. + return (not (isinstance(lhs, grammar.Nt) + and lhs.name == 'ForLexicalDeclaration') + and len(p.body) > 1 + and p.body[-1] == ';') + + def apply_asi(self, p, reducer_was_autogenerated): + """Return two rules based on p, so that ASI can be applied.""" + assert isinstance(p.reducer, grammar.CallMethod) + + if reducer_was_autogenerated: + # Don't pass the semicolon to the method. + reducer = self.expr_call(p.reducer.method, + p.reducer.args[:-1], + None) + else: + reducer = p.reducer + + # Except for do-while loops, check at runtime that ASI occurs only at + # the end of a line. + if (len(p.body) == 7 + and p.body[0] == 'do' + and p.body[2] == 'while' + and p.body[3] == '(' + and p.body[5] == ')' + and p.body[6] == ';'): + code = "do_while_asi" + else: + code = "asi" + + return [ + # The preferred production, with the semicolon in. + p.copy_with(body=p.body[:], + reducer=reducer), + # The fallback production, performing ASI. + p.copy_with(body=p.body[:-1] + [grammar.ErrorSymbol(code)], + reducer=reducer), + ] + + def expand_lexical_rhs(self, rhs): + body, reducer, condition = rhs + out = [] + for e in body: + if isinstance(e, str): + # The terminal symbols of the lexical grammar are characters, so + # add each character of this string as a separate element. + out += [grammar.Literal(ch) for ch in e] + else: + out.append(e) + return [out, reducer, condition] + + def nt_def(self, nt_type, lhs, eq, rhs_list): + has_sole_production = (len(rhs_list) == 1) + production_list = [] + for i, rhs in enumerate(rhs_list): + if eq == ':': + # Syntactic grammar. A hack is needed for ASI. + reducer_was_autogenerated = rhs[1] is None + p = self.to_production(lhs, i, rhs, has_sole_production) + if self.needs_asi(lhs, p): + production_list += self.apply_asi(p, reducer_was_autogenerated) + else: + production_list.append(p) + elif eq == '::': + # Lexical grammar. A hack is needed to replace multicharacter + # terminals like `!==` into sequences of character terminals. + rhs = self.expand_lexical_rhs(rhs) + p = self.to_production(lhs, i, rhs, has_sole_production) + production_list.append(p) + return (lhs.name, eq, grammar.NtDef(lhs.args, production_list, nt_type)) + + def nt_def_one_of(self, nt_type, nt_lhs, eq, terminals): + return self.nt_def(nt_type, nt_lhs, eq, [([t], None, None) for t in terminals]) + + def nt_lhs_no_params(self, name): + return grammar.Nt(name, ()) + + def nt_lhs_with_params(self, name, params): + return grammar.Nt(name, tuple(params)) + + def simple_type(self, name): + return types.Type(name) + + def lifetime_type(self, name): + return types.Lifetime(name) + + def parameterized_type(self, name, args): + return types.Type(name, tuple(args)) + + def t_list_line(self, terminals): + return terminals + + def terminal(self, t): + assert t[0] == "`" + assert t[-1] == "`" + return t[1:-1] + + def terminal_chr(self, chr): + raise ValueError("FAILED: %r" % chr) + + def rhs_line(self, ifdef, rhs, reducer, _prodid): + return (rhs, reducer, ifdef) + + def rhs_line_prose(self, prose): + return ([prose], None, None) + + def empty_rhs(self): + return [] + + def expr_match_ref(self, token): + assert token.startswith('$') + return int(token[1:]) + + def expr_call(self, method, args, fallible): + # NOTE: Currently "AstBuilder" functions are made fallible using the + # fallible_methods taken from some Rust code which extract this + # information to produce a JSON file. + if self.method_trait == "AstBuilder": + fallible = None + return grammar.CallMethod(method, args or (), types.Type(self.method_trait), + fallible is not None) + + def expr_some(self, expr): + return grammar.Some(expr) + + def expr_none(self): + return None + + def ifdef(self, value, nt): + return nt, value + + def optional(self, nt): + return grammar.Optional(nt) + + def but_not(self, nt, exclusion): + _, exclusion = exclusion + return grammar.Exclude(nt, [exclusion]) + # return ('-', nt, exclusion) + + def but_not_one_of(self, nt, exclusion_list): + exclusion_list = [exclusion for _, exclusion in exclusion_list] + return grammar.Exclude(nt, exclusion_list) + # return ('-', nt, exclusion_list) + + def no_line_terminator_here(self, lt): + if lt not in ('LineTerminator', '|LineTerminator|'): + raise ValueError("unrecognized directive " + repr("[no " + lt + " here]")) + return grammar.NoLineTerminatorHere + + def nonterminal(self, name): + if name in self.terminal_names: + return name + return grammar.Nt(name, ()) + + def nonterminal_apply(self, name, args): + if name in self.terminal_names: + raise ValueError("parameters applied to terminal {!r}".format(name)) + if len(set(k for k, expr in args)) != len(args): + raise ValueError("parameter passed multiple times") + return grammar.Nt(name, tuple(args)) + + def arg_expr(self, sigil, argname): + if sigil == '?': + return (argname, grammar.Var(argname)) + else: + return (argname, sigil) + + def sigil_false(self): + return False + + def sigil_true(self): + return True + + def exclusion_terminal(self, t): + return ("t", t) + + def exclusion_nonterminal(self, nt): + return ("nt", nt) + + def exclusion_chr_range(self, c1, c2): + return ("range", c1, c2) + + def la_eq(self, t): + return grammar.LookaheadRule(OrderedFrozenSet([t]), True) + + def la_ne(self, t): + return grammar.LookaheadRule(OrderedFrozenSet([t]), False) + + def la_not_in_nonterminal(self, nt): + return grammar.LookaheadRule(OrderedFrozenSet([nt]), False) + + def la_not_in_set(self, lookahead_exclusions): + if all(len(excl) == 1 for excl in lookahead_exclusions): + return grammar.LookaheadRule( + OrderedFrozenSet(excl[0] for excl in lookahead_exclusions), + False) + raise ValueError("unsupported: lookahead > 1 token, {!r}" + .format(lookahead_exclusions)) + + def chr(self, t): + assert t[0] == "<" or t[0] == 'U' + if t[0] == "<": + assert t[-1] == ">" + if t not in ECMASCRIPT_CODE_POINTS: + raise ValueError("unrecognized character abbreviation {!r}".format(t)) + return ECMASCRIPT_CODE_POINTS[t] + else: + assert t[1] == "+" + return grammar.Literal(chr(int(t[2:], base=16))) + + +def finish_grammar(nt_defs, goals, variable_terminals, synthetic_terminals, + single_grammar=True, extensions=[]): + nt_grammars = {} + for nt_name, eq, _ in nt_defs: + if nt_name in nt_grammars: + raise ValueError( + "duplicate definitions for nonterminal {!r}" + .format(nt_name)) + nt_grammars[nt_name] = eq + + # Figure out which grammar we were trying to get (":" for syntactic, + # "::" for lexical) based on the goal symbols. + goals = list(goals) + if len(goals) == 0: + raise ValueError("no goal nonterminals specified") + if single_grammar: + selected_grammars = set(nt_grammars[goal] for goal in goals) + assert len(selected_grammars) != 0 + if len(selected_grammars) > 1: + raise ValueError( + "all goal nonterminals must be part of the same grammar; " + "got {!r} (matching these grammars: {!r})" + .format(set(goals), set(selected_grammars))) + [selected_grammar] = selected_grammars + + terminal_set = set() + + def hack_production(p): + for i, e in enumerate(p.body): + if isinstance(e, str) and e[:1] == "`": + if len(e) < 3 or e[-1:] != "`": + raise ValueError( + "Unrecognized grammar symbol: {!r} (in {!r})" + .format(e, p)) + p[i] = token = e[1:-1] + terminal_set.add(token) + + nonterminals = {} + for nt_name, eq, rhs_list_or_lambda in nt_defs: + if single_grammar and eq != selected_grammar: + continue + + if isinstance(rhs_list_or_lambda, grammar.NtDef): + nonterminals[nt_name] = rhs_list_or_lambda + else: + rhs_list = rhs_list_or_lambda + for p in rhs_list: + if not isinstance(p, grammar.Production): + raise ValueError( + "invalid grammar: ifdef in non-function-call context") + hack_production(p) + if nt_name in nonterminals: + raise ValueError( + "unsupported: multiple definitions for nt " + nt_name) + nonterminals[nt_name] = rhs_list + + for t in terminal_set: + if t in nonterminals: + raise ValueError( + "grammar contains both a terminal `{}` and nonterminal {}" + .format(t, t)) + + # Add execution modes to generate the various functions needed to handle + # syntax parsing and full parsing execution modes. + exec_modes = collections.defaultdict(OrderedSet) + noop_parser = types.Type("ParserTrait", (types.Lifetime("alloc"), types.UnitType)) + token_parser = types.Type("ParserTrait", ( + types.Lifetime("alloc"), types.Type("StackValue", (types.Lifetime("alloc"),)))) + ast_builder = types.Type("AstBuilderDelegate", (types.Lifetime("alloc"),)) + + # Full parsing takes token as input and build an AST. + exec_modes["full_actions"].extend([token_parser, ast_builder]) + + # Syntax parsing takes token as input but skip building the AST. + # TODO: The syntax parser is commented out for now, as we need something to + # be produced when we cannot call the AstBuilder for producing the values. + + # No-op parsing is used for the simulator, which is so far used for + # querying whether we can end the incremental input and lookup if a state + # can accept some kind of tokens. + exec_modes["noop_actions"].add(noop_parser) + + # Extensions are using an equivalent of Rust types to define the kind of + # parsers to be used, this map is used to convert these type names to the + # various execution modes. + full_parser = types.Type("FullParser") + syntax_parser = types.Type("SyntaxParser") + noop_parser = types.Type("NoopParser") + type_to_modes = { + noop_parser: ["noop_actions", "full_actions"], + syntax_parser: ["full_actions"], + full_parser: ["full_actions"], + } + + result = grammar.Grammar( + nonterminals, + goal_nts=goals, + variable_terminals=variable_terminals, + synthetic_terminals=synthetic_terminals, + exec_modes=exec_modes, + type_to_modes=type_to_modes) + result.patch(extensions) + return result + + +def parse_esgrammar( + text: str, + *, + filename: Optional[str] = None, + extensions: Iterable[Tuple[os.PathLike, int, str]] = (), + goals: Optional[Iterable[str]] = None, + terminal_names: Iterable[str] = (), + synthetic_terminals: Optional[Dict[str, OrderedSet[str]]] = None, + single_grammar: bool = True +) -> grammar.Grammar: + if not text.endswith("\n\n"): + # Horrible hack: add a blank line at the end of the document so that + # the esgrammar grammar can use newlines as delimiters. :-P + text += "\n" + + terminal_names = frozenset(terminal_names) + if synthetic_terminals is None: + synthetic_terminals = {} + + builder = ESGrammarBuilder(terminal_names) + parser = ESGrammarParser(builder=builder, goal="grammar") + lexer = ESGrammarLexer(parser, filename=filename) + lexer.write(text) + nt_defs = lexer.close() + grammar_extensions = [] + for ext_filename, start_lineno, content in extensions: + builder.reset() + parser = ESGrammarParser(builder=builder, goal="rust_edsl") + lexer = ESGrammarLexer(parser, filename=ext_filename) + builder.lexer = lexer + lexer.start_lineno = start_lineno + lexer.write(content) + result = lexer.close() + grammar_extensions.append(result) + + if goals is None: + # Default to the first nonterminal in the input. + goals = [nt_defs[0][0]] + + return finish_grammar( + nt_defs, + goals=goals, + variable_terminals=terminal_names - frozenset(synthetic_terminals), + synthetic_terminals=synthetic_terminals, + single_grammar=single_grammar, + extensions=grammar_extensions) diff --git a/third_party/rust/jsparagus/js_parser/parser.py b/third_party/rust/jsparagus/js_parser/parser.py new file mode 100644 index 0000000000..f67708a9cc --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/parser.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +"""parser.py - A JavaScript parser, currently with many bugs. + +See README.md for instructions. +""" + +from . import parser_tables +from .lexer import JSLexer + + +# "type: ignore" because mypy can't see inside js_parser.parser_tables. +class JSParser(parser_tables.Parser): # type: ignore + def __init__(self, goal='Script', builder=None): + super().__init__(goal, builder) + self._goal = goal + + def clone(self): + return JSParser(self._goal, self.methods) + + def on_recover(self, error_code, lexer, stv): + """Check that ASI error recovery is really acceptable.""" + if error_code == 'asi': + # ASI is allowed in three places: + # - at the end of the source text + # - before a close brace `}` + # - after a LineTerminator + # Hence the three-part if-condition below. + # + # The other quirks of ASI are implemented by massaging the syntax, + # in parse_esgrammar.py. + if not self.closed and stv.term != '}' and not lexer.saw_line_terminator(): + lexer.throw("missing semicolon") + else: + # ASI is always allowed in this one state. + assert error_code == 'do_while_asi' + + +def parse_Script(text): + lexer = JSLexer(JSParser('Script')) + lexer.write(text) + return lexer.close() diff --git a/third_party/rust/jsparagus/js_parser/slash.esgrammar b/third_party/rust/jsparagus/js_parser/slash.esgrammar new file mode 100644 index 0000000000..60bad3f660 --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/slash.esgrammar @@ -0,0 +1,1683 @@ + +StringNumericLiteral ::: + StrWhiteSpace? + StrWhiteSpace? StrNumericLiteral StrWhiteSpace? + +StrWhiteSpace ::: + StrWhiteSpaceChar StrWhiteSpace? + +StrWhiteSpaceChar ::: + WhiteSpace + LineTerminator + +StrNumericLiteral ::: + StrDecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + +StrDecimalLiteral ::: + StrUnsignedDecimalLiteral + `+` StrUnsignedDecimalLiteral + `-` StrUnsignedDecimalLiteral + +StrUnsignedDecimalLiteral ::: + `Infinity` + DecimalDigits `.` DecimalDigits? ExponentPart? + `.` DecimalDigits ExponentPart? + DecimalDigits ExponentPart? + + +SourceCharacter :: + {} + + +InputElementDiv :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + RightBracePunctuator + +InputElementRegExp :: + WhiteSpace + LineTerminator + Comment + CommonToken + RightBracePunctuator + RegularExpressionLiteral + +InputElementRegExpOrTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + RegularExpressionLiteral + TemplateSubstitutionTail + +InputElementTemplateTail :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator + TemplateSubstitutionTail + + +WhiteSpace :: + <TAB> + <VT> + <FF> + <SP> + <NBSP> + <ZWNBSP> + {Zs} + + +LineTerminator :: + <LF> + <CR> + <LINE SEPARATOR> + <PARAGRAPH SEPARATOR> + +LineTerminatorSequence :: + <LF> + <CR> [lookahead != <LF> ] + <LINE SEPARATOR> + <PARAGRAPH SEPARATOR> + <CR> <LF> + + +Comment :: + MultiLineComment + SingleLineComment + +MultiLineComment :: + `/*` MultiLineCommentChars? `*/` + +MultiLineCommentChars :: + MultiLineNotAsteriskChar MultiLineCommentChars? + `*` PostAsteriskCommentChars? + +PostAsteriskCommentChars :: + MultiLineNotForwardSlashOrAsteriskChar MultiLineCommentChars? + `*` PostAsteriskCommentChars? + +MultiLineNotAsteriskChar :: + SourceCharacter but not `*` + +MultiLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` + +SingleLineComment :: + `//` SingleLineCommentChars? + +SingleLineCommentChars :: + SingleLineCommentChar SingleLineCommentChars? + +SingleLineCommentChar :: + SourceCharacter but not LineTerminator + + +CommonToken :: + IdentifierName + Punctuator + NumericLiteral + StringLiteral + Template + + +IdentifierName :: + IdentifierStart + IdentifierName IdentifierPart + +IdentifierStart :: + UnicodeIDStart + `$` + `_` + `\` UnicodeEscapeSequence + +IdentifierPart :: + UnicodeIDContinue + `$` + `\` UnicodeEscapeSequence + <ZWNJ> + <ZWJ> + +UnicodeIDStart :: + {L} + {NI} + U+2118 + U+212E + U+309B + U+309C + +UnicodeIDContinue :: + UnicodeIDStart + {Mn} + {Mc} + {Nd} + {Pc} + U+1369 + U+1370 + U+1371 + U+00B7 + U+0387 + U+19DA + + +ReservedWord :: + Keyword + FutureReservedWord + NullLiteral + BooleanLiteral + + +Keyword :: one of + `await` + `break` + `case` `catch` `class` `const` `continue` + `debugger` `default` `delete` `do` + `else` `export` `extends` + `finally` `for` `function` + `if` `import` `in` `instanceof` + `new` + `return` + `super` `switch` + `this` `throw` `try` `typeof` + `var` `void` + `while` `with` + `yield` + + +FutureReservedWord :: + `enum` + + +Punctuator :: one of + `{` `(` `)` `[` `]` + `.` `...` `;` `,` + `<` `>` `<=` `>=` + `==` `!=` `===` `!==` + `+` `-` `*` `%` `**` + `++` `--` + `<<` `>>` `>>>` + `&` `|` `^` + `!` `~` + `&&` `||` + `?` `:` + `=` `+=` `-=` `*=` `%=` `**=` `<<=` `>>=` `>>>=` `&=` `|=` `^=` + `=>` + +DivPunctuator :: + `/` + `/=` + +RightBracePunctuator :: + `}` + + +NullLiteral :: + `null` + + +BooleanLiteral :: + `true` + `false` + + +NumericLiteral :: + DecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + +DecimalLiteral :: + DecimalIntegerLiteral `.` DecimalDigits? ExponentPart? + `.` DecimalDigits ExponentPart? + DecimalIntegerLiteral ExponentPart? + +DecimalIntegerLiteral :: + `0` + NonZeroDigit DecimalDigits? + +DecimalDigits :: + DecimalDigit + DecimalDigits DecimalDigit + +DecimalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` + +NonZeroDigit :: one of + `1` `2` `3` `4` `5` `6` `7` `8` `9` + +ExponentPart :: + ExponentIndicator SignedInteger + +ExponentIndicator :: one of + `e` `E` + +SignedInteger :: + DecimalDigits + `+` DecimalDigits + `-` DecimalDigits + +BinaryIntegerLiteral :: + `0b` BinaryDigits + `0B` BinaryDigits + +BinaryDigits :: + BinaryDigit + BinaryDigits BinaryDigit + +BinaryDigit :: one of + `0` `1` + +OctalIntegerLiteral :: + `0o` OctalDigits + `0O` OctalDigits + +OctalDigits :: + OctalDigit + OctalDigits OctalDigit + +OctalDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` + +HexIntegerLiteral :: + `0x` HexDigits + `0X` HexDigits + +HexDigits :: + HexDigit + HexDigits HexDigit + +HexDigit :: one of + `0` `1` `2` `3` `4` `5` `6` `7` `8` `9` `a` `b` `c` `d` `e` `f` `A` `B` `C` `D` `E` `F` + + +StringLiteral :: + `"` DoubleStringCharacters? `"` + `'` SingleStringCharacters? `'` + +DoubleStringCharacters :: + DoubleStringCharacter DoubleStringCharacters? + +SingleStringCharacters :: + SingleStringCharacter SingleStringCharacters? + +DoubleStringCharacter :: + SourceCharacter but not one of `"` or `\` or LineTerminator + <LINE SEPARATOR> + <PARAGRAPH SEPARATOR> + `\` EscapeSequence + LineContinuation + +SingleStringCharacter :: + SourceCharacter but not one of `'` or `\` or LineTerminator + <LINE SEPARATOR> + <PARAGRAPH SEPARATOR> + `\` EscapeSequence + LineContinuation + +LineContinuation :: + `\` LineTerminatorSequence + +EscapeSequence :: + CharacterEscapeSequence + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + UnicodeEscapeSequence + + +CharacterEscapeSequence :: + SingleEscapeCharacter + NonEscapeCharacter + +SingleEscapeCharacter :: one of + `'` `"` `\` `b` `f` `n` `r` `t` `v` + +NonEscapeCharacter :: + SourceCharacter but not one of EscapeCharacter or LineTerminator + +EscapeCharacter :: + SingleEscapeCharacter + DecimalDigit + `x` + `u` + +HexEscapeSequence :: + `x` HexDigit HexDigit + +UnicodeEscapeSequence :: + `u` Hex4Digits + `u{` CodePoint `}` + +Hex4Digits :: + HexDigit HexDigit HexDigit HexDigit + + +RegularExpressionLiteral :: + `/` RegularExpressionBody `/` RegularExpressionFlags + +RegularExpressionBody :: + RegularExpressionFirstChar RegularExpressionChars + +RegularExpressionChars :: + [empty] + RegularExpressionChars RegularExpressionChar + +RegularExpressionFirstChar :: + RegularExpressionNonTerminator but not one of `*` or `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionChar :: + RegularExpressionNonTerminator but not one of `\` or `/` or `[` + RegularExpressionBackslashSequence + RegularExpressionClass + +RegularExpressionBackslashSequence :: + `\` RegularExpressionNonTerminator + +RegularExpressionNonTerminator :: + SourceCharacter but not LineTerminator + +RegularExpressionClass :: + `[` RegularExpressionClassChars `]` + +RegularExpressionClassChars :: + [empty] + RegularExpressionClassChars RegularExpressionClassChar + +RegularExpressionClassChar :: + RegularExpressionNonTerminator but not one of `]` or `\` + RegularExpressionBackslashSequence + +RegularExpressionFlags :: + [empty] + RegularExpressionFlags IdentifierPart + + +Template :: + NoSubstitutionTemplate + TemplateHead + +NoSubstitutionTemplate :: + ``` TemplateCharacters? ``` + +TemplateHead :: + ``` TemplateCharacters? `${` + +TemplateSubstitutionTail :: + TemplateMiddle + TemplateTail + +TemplateMiddle :: + `}` TemplateCharacters? `${` + +TemplateTail :: + `}` TemplateCharacters? ``` + +TemplateCharacters :: + TemplateCharacter TemplateCharacters? + +TemplateCharacter :: + `$` [lookahead != `{` ] + `\` EscapeSequence + `\` NotEscapeSequence + LineContinuation + LineTerminatorSequence + SourceCharacter but not one of ``` or `\` or `$` or LineTerminator + +NotEscapeSequence :: + `0` DecimalDigit + DecimalDigit but not `0` + `x` [lookahead <! HexDigit] + `x` HexDigit [lookahead <! HexDigit] + `u` [lookahead <! HexDigit] [lookahead != `{`] + `u` HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit [lookahead <! HexDigit] + `u` HexDigit HexDigit HexDigit [lookahead <! HexDigit] + `u` `{` [lookahead <! HexDigit] + `u` `{` NotCodePoint [lookahead <! HexDigit] + `u` `{` CodePoint [lookahead <! HexDigit] [lookahead != `}`] + +NotCodePoint :: + HexDigits [> but only if MV of |HexDigits| > 0x10FFFF ] + +CodePoint :: + HexDigits [> but only if MV of |HexDigits| โค 0x10FFFF ] + + +IdentifierReference[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + +BindingIdentifier[Yield, Await] : + Identifier + `yield` + `await` + +LabelIdentifier[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + +Identifier : + IdentifierName but not ReservedWord + + +PrimaryExpression[Yield, Await] : + `this` + IdentifierReference[?Yield, ?Await] + Literal + ArrayLiteral[?Yield, ?Await] + ObjectLiteral[?Yield, ?Await] + FunctionExpression + ClassExpression[?Yield, ?Await] + GeneratorExpression + AsyncFunctionExpression + AsyncGeneratorExpression + RegularExpressionLiteral + TemplateLiteral[?Yield, ?Await, ~Tagged] + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] #parencover + +CoverParenthesizedExpressionAndArrowParameterList[Yield, Await] : + `(` Expression[+In, ?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `)` + `(` `)` + `(` `...` BindingIdentifier[?Yield, ?Await] `)` + `(` `...` BindingPattern[?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingIdentifier[?Yield, ?Await] `)` + `(` Expression[+In, ?Yield, ?Await] `,` `...` BindingPattern[?Yield, ?Await] `)` + + +ParenthesizedExpression[Yield, Await] : + `(` Expression[+In, ?Yield, ?Await] `)` + + +Literal : + NullLiteral + BooleanLiteral + NumericLiteral + StringLiteral + + +ArrayLiteral[Yield, Await] : + `[` Elision? `]` + `[` ElementList[?Yield, ?Await] `]` + `[` ElementList[?Yield, ?Await] `,` Elision? `]` + +ElementList[Yield, Await] : + Elision? AssignmentExpression[+In, ?Yield, ?Await] + Elision? SpreadElement[?Yield, ?Await] + ElementList[?Yield, ?Await] `,` Elision? AssignmentExpression[+In, ?Yield, ?Await] + ElementList[?Yield, ?Await] `,` Elision? SpreadElement[?Yield, ?Await] + +Elision : + `,` + Elision `,` + +SpreadElement[Yield, Await] : + `...` AssignmentExpression[+In, ?Yield, ?Await] + + +ObjectLiteral[Yield, Await] : + `{` `}` + `{` PropertyDefinitionList[?Yield, ?Await] `}` + `{` PropertyDefinitionList[?Yield, ?Await] `,` `}` + +PropertyDefinitionList[Yield, Await] : + PropertyDefinition[?Yield, ?Await] + PropertyDefinitionList[?Yield, ?Await] `,` PropertyDefinition[?Yield, ?Await] + +PropertyDefinition[Yield, Await] : + IdentifierReference[?Yield, ?Await] + CoverInitializedName[?Yield, ?Await] + PropertyName[?Yield, ?Await] `:` AssignmentExpression[+In, ?Yield, ?Await] + MethodDefinition[?Yield, ?Await] + `...` AssignmentExpression[+In, ?Yield, ?Await] + +PropertyName[Yield, Await] : + LiteralPropertyName + ComputedPropertyName[?Yield, ?Await] + +LiteralPropertyName : + IdentifierName + StringLiteral + NumericLiteral + +ComputedPropertyName[Yield, Await] : + `[` AssignmentExpression[+In, ?Yield, ?Await] `]` + +CoverInitializedName[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await] + +Initializer[In, Yield, Await] : + `=` AssignmentExpression[?In, ?Yield, ?Await] + + +TemplateLiteral[Yield, Await, Tagged] : + NoSubstitutionTemplate + SubstitutionTemplate[?Yield, ?Await, ?Tagged] + +SubstitutionTemplate[Yield, Await, Tagged] : + TemplateHead Expression[+In, ?Yield, ?Await] TemplateSpans[?Yield, ?Await, ?Tagged] + +TemplateSpans[Yield, Await, Tagged] : + TemplateTail + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateTail + +TemplateMiddleList[Yield, Await, Tagged] : + TemplateMiddle Expression[+In, ?Yield, ?Await] + TemplateMiddleList[?Yield, ?Await, ?Tagged] TemplateMiddle Expression[+In, ?Yield, ?Await] + + +MemberExpression[Yield, Await] : + PrimaryExpression[?Yield, ?Await] + MemberExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + MemberExpression[?Yield, ?Await] `.` IdentifierName + MemberExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + SuperProperty[?Yield, ?Await] + MetaProperty + `new` MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + +SuperProperty[Yield, Await] : + `super` `[` Expression[+In, ?Yield, ?Await] `]` + `super` `.` IdentifierName + +MetaProperty : + NewTarget + +NewTarget : + `new` `.` `target` + +NewExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] + `new` NewExpression[?Yield, ?Await] + +CallExpression[Yield, Await] : + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] #callcover + SuperCall[?Yield, ?Await] + ImportCall[?Yield, ?Await] + CallExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + CallExpression[?Yield, ?Await] `[` Expression[+In, ?Yield, ?Await] `]` + CallExpression[?Yield, ?Await] `.` IdentifierName + CallExpression[?Yield, ?Await] TemplateLiteral[?Yield, ?Await, +Tagged] + +SuperCall[Yield, Await] : + `super` Arguments[?Yield, ?Await] + +ImportCall[Yield, Await] : + `import` `(` AssignmentExpression[+In, ?Yield, ?Await] `)` + +Arguments[Yield, Await] : + `(` `)` + `(` ArgumentList[?Yield, ?Await] `)` + `(` ArgumentList[?Yield, ?Await] `,` `)` + +ArgumentList[Yield, Await] : + AssignmentExpression[+In, ?Yield, ?Await] + `...` AssignmentExpression[+In, ?Yield, ?Await] + ArgumentList[?Yield, ?Await] `,` AssignmentExpression[+In, ?Yield, ?Await] + ArgumentList[?Yield, ?Await] `,` `...` AssignmentExpression[+In, ?Yield, ?Await] + +LeftHandSideExpression[Yield, Await] : + NewExpression[?Yield, ?Await] + CallExpression[?Yield, ?Await] + + +CallMemberExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + + +UpdateExpression[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + LeftHandSideExpression[?Yield, ?Await] `++` + LeftHandSideExpression[?Yield, ?Await] `--` + `++` UnaryExpression[?Yield, ?Await] + `--` UnaryExpression[?Yield, ?Await] + + +UnaryExpression[Yield, Await] : + UpdateExpression[?Yield, ?Await] + `delete` UnaryExpression[?Yield, ?Await] + `void` UnaryExpression[?Yield, ?Await] + `typeof` UnaryExpression[?Yield, ?Await] + `+` UnaryExpression[?Yield, ?Await] + `-` UnaryExpression[?Yield, ?Await] + `~` UnaryExpression[?Yield, ?Await] + `!` UnaryExpression[?Yield, ?Await] + [+Await] AwaitExpression[?Yield] + + +ExponentiationExpression[Yield, Await] : + UnaryExpression[?Yield, ?Await] + UpdateExpression[?Yield, ?Await] `**` ExponentiationExpression[?Yield, ?Await] + + +MultiplicativeExpression[Yield, Await] : + ExponentiationExpression[?Yield, ?Await] + MultiplicativeExpression[?Yield, ?Await] MultiplicativeOperator ExponentiationExpression[?Yield, ?Await] + +MultiplicativeOperator : one of + `*` `/` `%` + + +AdditiveExpression[Yield, Await] : + MultiplicativeExpression[?Yield, ?Await] + AdditiveExpression[?Yield, ?Await] `+` MultiplicativeExpression[?Yield, ?Await] + AdditiveExpression[?Yield, ?Await] `-` MultiplicativeExpression[?Yield, ?Await] + + +ShiftExpression[Yield, Await] : + AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `<<` AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `>>` AdditiveExpression[?Yield, ?Await] + ShiftExpression[?Yield, ?Await] `>>>` AdditiveExpression[?Yield, ?Await] + + +RelationalExpression[In, Yield, Await] : + ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `<` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `>` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `<=` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `>=` ShiftExpression[?Yield, ?Await] + RelationalExpression[?In, ?Yield, ?Await] `instanceof` ShiftExpression[?Yield, ?Await] + [+In] RelationalExpression[+In, ?Yield, ?Await] `in` ShiftExpression[?Yield, ?Await] + + +EqualityExpression[In, Yield, Await] : + RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `==` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `!=` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `===` RelationalExpression[?In, ?Yield, ?Await] + EqualityExpression[?In, ?Yield, ?Await] `!==` RelationalExpression[?In, ?Yield, ?Await] + + +BitwiseANDExpression[In, Yield, Await] : + EqualityExpression[?In, ?Yield, ?Await] + BitwiseANDExpression[?In, ?Yield, ?Await] `&` EqualityExpression[?In, ?Yield, ?Await] + +BitwiseXORExpression[In, Yield, Await] : + BitwiseANDExpression[?In, ?Yield, ?Await] + BitwiseXORExpression[?In, ?Yield, ?Await] `^` BitwiseANDExpression[?In, ?Yield, ?Await] + +BitwiseORExpression[In, Yield, Await] : + BitwiseXORExpression[?In, ?Yield, ?Await] + BitwiseORExpression[?In, ?Yield, ?Await] `|` BitwiseXORExpression[?In, ?Yield, ?Await] + + +LogicalANDExpression[In, Yield, Await] : + BitwiseORExpression[?In, ?Yield, ?Await] + LogicalANDExpression[?In, ?Yield, ?Await] `&&` BitwiseORExpression[?In, ?Yield, ?Await] + +LogicalORExpression[In, Yield, Await] : + LogicalANDExpression[?In, ?Yield, ?Await] + LogicalORExpression[?In, ?Yield, ?Await] `||` LogicalANDExpression[?In, ?Yield, ?Await] + + +ConditionalExpression[In, Yield, Await] : + LogicalORExpression[?In, ?Yield, ?Await] + LogicalORExpression[?In, ?Yield, ?Await] `?` AssignmentExpression[+In, ?Yield, ?Await] `:` AssignmentExpression[?In, ?Yield, ?Await] + + +AssignmentExpression[In, Yield, Await] : + ConditionalExpression[?In, ?Yield, ?Await] + [+Yield] YieldExpression[?In, ?Await] + ArrowFunction[?In, ?Yield, ?Await] + AsyncArrowFunction[?In, ?Yield, ?Await] + LeftHandSideExpression[?Yield, ?Await] `=` AssignmentExpression[?In, ?Yield, ?Await] #assignment + LeftHandSideExpression[?Yield, ?Await] AssignmentOperator AssignmentExpression[?In, ?Yield, ?Await] + +AssignmentOperator : one of + `*=` `/=` `%=` `+=` `-=` `<<=` `>>=` `>>>=` `&=` `^=` `|=` `**=` + + +AssignmentPattern[Yield, Await] : + ObjectAssignmentPattern[?Yield, ?Await] + ArrayAssignmentPattern[?Yield, ?Await] + +ObjectAssignmentPattern[Yield, Await] : + `{` `}` + `{` AssignmentRestProperty[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `}` + `{` AssignmentPropertyList[?Yield, ?Await] `,` AssignmentRestProperty[?Yield, ?Await]? `}` + +ArrayAssignmentPattern[Yield, Await] : + `[` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + `[` AssignmentElementList[?Yield, ?Await] `]` + `[` AssignmentElementList[?Yield, ?Await] `,` Elision? AssignmentRestElement[?Yield, ?Await]? `]` + +AssignmentRestProperty[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +AssignmentPropertyList[Yield, Await] : + AssignmentProperty[?Yield, ?Await] + AssignmentPropertyList[?Yield, ?Await] `,` AssignmentProperty[?Yield, ?Await] + +AssignmentElementList[Yield, Await] : + AssignmentElisionElement[?Yield, ?Await] + AssignmentElementList[?Yield, ?Await] `,` AssignmentElisionElement[?Yield, ?Await] + +AssignmentElisionElement[Yield, Await] : + Elision? AssignmentElement[?Yield, ?Await] + +AssignmentProperty[Yield, Await] : + IdentifierReference[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + PropertyName[?Yield, ?Await] `:` AssignmentElement[?Yield, ?Await] + +AssignmentElement[Yield, Await] : + DestructuringAssignmentTarget[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +AssignmentRestElement[Yield, Await] : + `...` DestructuringAssignmentTarget[?Yield, ?Await] + +DestructuringAssignmentTarget[Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] + + +Expression[In, Yield, Await] : + AssignmentExpression[?In, ?Yield, ?Await] + Expression[?In, ?Yield, ?Await] `,` AssignmentExpression[?In, ?Yield, ?Await] + + +Statement[Yield, Await, Return] : + BlockStatement[?Yield, ?Await, ?Return] + VariableStatement[?Yield, ?Await] + EmptyStatement + ExpressionStatement[?Yield, ?Await] + IfStatement[?Yield, ?Await, ?Return] + BreakableStatement[?Yield, ?Await, ?Return] + ContinueStatement[?Yield, ?Await] + BreakStatement[?Yield, ?Await] + [+Return] ReturnStatement[?Yield, ?Await] + WithStatement[?Yield, ?Await, ?Return] + LabelledStatement[?Yield, ?Await, ?Return] + ThrowStatement[?Yield, ?Await] + TryStatement[?Yield, ?Await, ?Return] + DebuggerStatement + +Declaration[Yield, Await] : + HoistableDeclaration[?Yield, ?Await, ~Default] + ClassDeclaration[?Yield, ?Await, ~Default] + LexicalDeclaration[+In, ?Yield, ?Await] + +HoistableDeclaration[Yield, Await, Default] : + FunctionDeclaration[?Yield, ?Await, ?Default] + GeneratorDeclaration[?Yield, ?Await, ?Default] + AsyncFunctionDeclaration[?Yield, ?Await, ?Default] + AsyncGeneratorDeclaration[?Yield, ?Await, ?Default] + +BreakableStatement[Yield, Await, Return] : + IterationStatement[?Yield, ?Await, ?Return] + SwitchStatement[?Yield, ?Await, ?Return] + + +BlockStatement[Yield, Await, Return] : + Block[?Yield, ?Await, ?Return] + +Block[Yield, Await, Return] : + `{` StatementList[?Yield, ?Await, ?Return]? `}` + +StatementList[Yield, Await, Return] : + StatementListItem[?Yield, ?Await, ?Return] + StatementList[?Yield, ?Await, ?Return] StatementListItem[?Yield, ?Await, ?Return] + +StatementListItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + Declaration[?Yield, ?Await] + + +LexicalDeclaration[In, Yield, Await] : + LetOrConst BindingList[?In, ?Yield, ?Await] `;` + +LetOrConst : + `let` + `const` + +BindingList[In, Yield, Await] : + LexicalBinding[?In, ?Yield, ?Await] + BindingList[?In, ?Yield, ?Await] `,` LexicalBinding[?In, ?Yield, ?Await] + +LexicalBinding[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + + +VariableStatement[Yield, Await] : + `var` VariableDeclarationList[+In, ?Yield, ?Await] `;` + +VariableDeclarationList[In, Yield, Await] : + VariableDeclaration[?In, ?Yield, ?Await] + VariableDeclarationList[?In, ?Yield, ?Await] `,` VariableDeclaration[?In, ?Yield, ?Await] + +VariableDeclaration[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]? + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] + + +BindingPattern[Yield, Await] : + ObjectBindingPattern[?Yield, ?Await] + ArrayBindingPattern[?Yield, ?Await] + +ObjectBindingPattern[Yield, Await] : + `{` `}` + `{` BindingRestProperty[?Yield, ?Await] `}` + `{` BindingPropertyList[?Yield, ?Await] `}` + `{` BindingPropertyList[?Yield, ?Await] `,` BindingRestProperty[?Yield, ?Await]? `}` + +ArrayBindingPattern[Yield, Await] : + `[` Elision? BindingRestElement[?Yield, ?Await]? `]` + `[` BindingElementList[?Yield, ?Await] `]` + `[` BindingElementList[?Yield, ?Await] `,` Elision? BindingRestElement[?Yield, ?Await]? `]` + +BindingRestProperty[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + +BindingPropertyList[Yield, Await] : + BindingProperty[?Yield, ?Await] + BindingPropertyList[?Yield, ?Await] `,` BindingProperty[?Yield, ?Await] + +BindingElementList[Yield, Await] : + BindingElisionElement[?Yield, ?Await] + BindingElementList[?Yield, ?Await] `,` BindingElisionElement[?Yield, ?Await] + +BindingElisionElement[Yield, Await] : + Elision? BindingElement[?Yield, ?Await] + +BindingProperty[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + PropertyName[?Yield, ?Await] `:` BindingElement[?Yield, ?Await] + +BindingElement[Yield, Await] : + SingleNameBinding[?Yield, ?Await] + BindingPattern[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +SingleNameBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[+In, ?Yield, ?Await]? + +BindingRestElement[Yield, Await] : + `...` BindingIdentifier[?Yield, ?Await] + `...` BindingPattern[?Yield, ?Await] + + +EmptyStatement : + `;` + + +ExpressionStatement[Yield, Await] : + [lookahead <! {`{`, `function`, `async`, `class`, `let`}] Expression[+In, ?Yield, ?Await] `;` + + +IfStatement[Yield, Await, Return] : + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` Statement[?Yield, ?Await, ?Return] + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + + +IterationStatement[Yield, Await, Return] : + `do` Statement[?Yield, ?Await, ?Return] `while` `(` Expression[+In, ?Yield, ?Await] `)` `;` + `while` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead != `let`] Expression[~In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` VariableDeclarationList[~In, ?Yield, ?Await] `;` Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` ForLexicalDeclaration[~In, ?Yield, ?Await] Expression[+In, ?Yield, ?Await]? `;` Expression[+In, ?Yield, ?Await]? `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead != `let`] LeftHandSideExpression[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` ForBinding[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` ForDeclaration[?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` [lookahead <! {`async`, `let`} ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + `for` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` [lookahead <! {`async`, `let`} ] LeftHandSideExpression[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` `var` ForBinding[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + [+Await] `for` `await` `(` ForDeclaration[?Yield, ?Await] `of` AssignmentExpression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + +ForDeclaration[Yield, Await] : + LetOrConst ForBinding[?Yield, ?Await] + +ForBinding[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + BindingPattern[?Yield, ?Await] + + +ContinueStatement[Yield, Await] : + `continue` `;` + `continue` LabelIdentifier[?Yield, ?Await] `;` + + +BreakStatement[Yield, Await] : + `break` `;` + `break` LabelIdentifier[?Yield, ?Await] `;` + + +ReturnStatement[Yield, Await] : + `return` `;` + `return` Expression[+In, ?Yield, ?Await] `;` + + +WithStatement[Yield, Await, Return] : + `with` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + + +SwitchStatement[Yield, Await, Return] : + `switch` `(` Expression[+In, ?Yield, ?Await] `)` CaseBlock[?Yield, ?Await, ?Return] + +CaseBlock[Yield, Await, Return] : + `{` CaseClauses[?Yield, ?Await, ?Return]? `}` + `{` CaseClauses[?Yield, ?Await, ?Return]? DefaultClause[?Yield, ?Await, ?Return] CaseClauses[?Yield, ?Await, ?Return]? `}` + +CaseClauses[Yield, Await, Return] : + CaseClause[?Yield, ?Await, ?Return] + CaseClauses[?Yield, ?Await, ?Return] CaseClause[?Yield, ?Await, ?Return] + +CaseClause[Yield, Await, Return] : + `case` Expression[+In, ?Yield, ?Await] `:` StatementList[?Yield, ?Await, ?Return]? + +DefaultClause[Yield, Await, Return] : + `default` `:` StatementList[?Yield, ?Await, ?Return]? + + +LabelledStatement[Yield, Await, Return] : + LabelIdentifier[?Yield, ?Await] `:` LabelledItem[?Yield, ?Await, ?Return] + +LabelledItem[Yield, Await, Return] : + Statement[?Yield, ?Await, ?Return] + FunctionDeclaration[?Yield, ?Await, ~Default] + + +ThrowStatement[Yield, Await] : + `throw` Expression[+In, ?Yield, ?Await] `;` + + +TryStatement[Yield, Await, Return] : + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] + `try` Block[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + `try` Block[?Yield, ?Await, ?Return] Catch[?Yield, ?Await, ?Return] Finally[?Yield, ?Await, ?Return] + +Catch[Yield, Await, Return] : + `catch` `(` CatchParameter[?Yield, ?Await] `)` Block[?Yield, ?Await, ?Return] + `catch` Block[?Yield, ?Await, ?Return] + +Finally[Yield, Await, Return] : + `finally` Block[?Yield, ?Await, ?Return] + +CatchParameter[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + BindingPattern[?Yield, ?Await] + + +DebuggerStatement : + `debugger` `;` + + +FunctionDeclaration[Yield, Await, Default] : + `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + [+Default] `function` `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + +FunctionExpression : + `function` BindingIdentifier[~Yield, ~Await]? `(` FormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + +UniqueFormalParameters[Yield, Await] : + FormalParameters[?Yield, ?Await] + +FormalParameters[Yield, Await] : + [empty] + FunctionRestParameter[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] `,` + FormalParameterList[?Yield, ?Await] `,` FunctionRestParameter[?Yield, ?Await] + +FormalParameterList[Yield, Await] : + FormalParameter[?Yield, ?Await] + FormalParameterList[?Yield, ?Await] `,` FormalParameter[?Yield, ?Await] + +FunctionRestParameter[Yield, Await] : + BindingRestElement[?Yield, ?Await] + +FormalParameter[Yield, Await] : + BindingElement[?Yield, ?Await] + +FunctionBody[Yield, Await] : + FunctionStatementList[?Yield, ?Await] + +FunctionStatementList[Yield, Await] : + StatementList[?Yield, ?Await, +Return]? + + +ArrowFunction[In, Yield, Await] : + ArrowParameters[?Yield, ?Await] `=>` ConciseBody[?In] + +ArrowParameters[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] #parencover + +ConciseBody[In] : + [lookahead != `{` ] AssignmentExpression[?In, ~Yield, ~Await] + `{` FunctionBody[~Yield, ~Await] `}` + + +ArrowFormalParameters[Yield, Await] : + `(` UniqueFormalParameters[?Yield, ?Await] `)` + + +MethodDefinition[Yield, Await] : + PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, ~Await] `)` `{` FunctionBody[~Yield, ~Await] `}` + GeneratorMethod[?Yield, ?Await] + AsyncMethod[?Yield, ?Await] + AsyncGeneratorMethod[?Yield, ?Await] + `get` PropertyName[?Yield, ?Await] `(` `)` `{` FunctionBody[~Yield, ~Await] `}` + `set` PropertyName[?Yield, ?Await] `(` PropertySetParameterList `)` `{` FunctionBody[~Yield, ~Await] `}` + +PropertySetParameterList : + FormalParameter[~Yield, ~Await] + + +GeneratorMethod[Yield, Await] : + `*` PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorDeclaration[Yield, Await, Default] : + `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + [+Default] `function` `*` `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorExpression : + `function` `*` BindingIdentifier[+Yield, ~Await]? `(` FormalParameters[+Yield, ~Await] `)` `{` GeneratorBody `}` + +GeneratorBody : + FunctionBody[+Yield, ~Await] + +YieldExpression[In, Await] : + `yield` + `yield` AssignmentExpression[?In, +Yield, ?Await] + `yield` `*` AssignmentExpression[?In, +Yield, ?Await] + + +AsyncGeneratorMethod[Yield, Await] : + `async` `*` PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorDeclaration[Yield, Await, Default] : + `async` `function` `*` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + [+Default] `async` `function` `*` `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorExpression : + `async` `function` `*` BindingIdentifier[+Yield, +Await]? `(` FormalParameters[+Yield, +Await] `)` `{` AsyncGeneratorBody `}` + +AsyncGeneratorBody : + FunctionBody[+Yield, +Await] + + +ClassDeclaration[Yield, Await, Default] : + `class` BindingIdentifier[?Yield, ?Await] ClassTail[?Yield, ?Await] + [+Default] `class` ClassTail[?Yield, ?Await] + +ClassExpression[Yield, Await] : + `class` BindingIdentifier[?Yield, ?Await]? ClassTail[?Yield, ?Await] + +ClassTail[Yield, Await] : + ClassHeritage[?Yield, ?Await]? `{` ClassBody[?Yield, ?Await]? `}` + +ClassHeritage[Yield, Await] : + `extends` LeftHandSideExpression[?Yield, ?Await] + +ClassBody[Yield, Await] : + ClassElementList[?Yield, ?Await] + +ClassElementList[Yield, Await] : + ClassElement[?Yield, ?Await] + ClassElementList[?Yield, ?Await] ClassElement[?Yield, ?Await] + +ClassElement[Yield, Await] : + MethodDefinition[?Yield, ?Await] + `static` MethodDefinition[?Yield, ?Await] + `;` + + +AsyncFunctionDeclaration[Yield, Await, Default] : + `async` `function` BindingIdentifier[?Yield, ?Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + [+Default] `async` `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncFunctionExpression : + `async` `function` `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + `async` `function` BindingIdentifier[~Yield, +Await] `(` FormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncMethod[Yield, Await] : + `async` PropertyName[?Yield, ?Await] `(` UniqueFormalParameters[~Yield, +Await] `)` `{` AsyncFunctionBody `}` + +AsyncFunctionBody : + FunctionBody[~Yield, +Await] + +AwaitExpression[Yield] : + `await` UnaryExpression[?Yield, +Await] + + +AsyncArrowFunction[In, Yield, Await] : + `async` AsyncArrowBindingIdentifier[?Yield] `=>` AsyncConciseBody[?In] + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] `=>` AsyncConciseBody[?In] #callcover + +AsyncConciseBody[In] : + [lookahead != `{`] AssignmentExpression[?In, ~Yield, +Await] + `{` AsyncFunctionBody `}` + +AsyncArrowBindingIdentifier[Yield] : + BindingIdentifier[?Yield, +Await] + +CoverCallExpressionAndAsyncArrowHead[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + + +AsyncArrowHead : + `async` ArrowFormalParameters[~Yield, +Await] + + +Script : + ScriptBody? + +ScriptBody : + StatementList[~Yield, ~Await, ~Return] + + +Module : + ModuleBody? + +ModuleBody : + ModuleItemList + +ModuleItemList : + ModuleItem + ModuleItemList ModuleItem + +ModuleItem : + ImportDeclaration + ExportDeclaration + StatementListItem[~Yield, ~Await, ~Return] + + +ImportDeclaration : + `import` ImportClause FromClause `;` + `import` ModuleSpecifier `;` + +ImportClause : + ImportedDefaultBinding + NameSpaceImport + NamedImports + ImportedDefaultBinding `,` NameSpaceImport + ImportedDefaultBinding `,` NamedImports + +ImportedDefaultBinding : + ImportedBinding + +NameSpaceImport : + `*` `as` ImportedBinding + +NamedImports : + `{` `}` + `{` ImportsList `}` + `{` ImportsList `,` `}` + +FromClause : + `from` ModuleSpecifier + +ImportsList : + ImportSpecifier + ImportsList `,` ImportSpecifier + +ImportSpecifier : + ImportedBinding + IdentifierName `as` ImportedBinding + +ModuleSpecifier : + StringLiteral + +ImportedBinding : + BindingIdentifier[~Yield, ~Await] + + +ExportDeclaration : + `export` `*` FromClause `;` + `export` ExportClause FromClause `;` + `export` ExportClause `;` + `export` VariableStatement[~Yield, ~Await] + `export` Declaration[~Yield, ~Await] + `export` `default` HoistableDeclaration[~Yield, ~Await, +Default] + `export` `default` ClassDeclaration[~Yield, ~Await, +Default] + `export` `default` [lookahead <! {`function`, `async`, `class`}] AssignmentExpression[+In, ~Yield, ~Await] `;` + +ExportClause : + `{` `}` + `{` ExportsList `}` + `{` ExportsList `,` `}` + +ExportsList : + ExportSpecifier + ExportsList `,` ExportSpecifier + +ExportSpecifier : + IdentifierName + IdentifierName `as` IdentifierName + + +uri ::: + uriCharacters? + +uriCharacters ::: + uriCharacter uriCharacters? + +uriCharacter ::: + uriReserved + uriUnescaped + uriEscaped + +uriReserved ::: one of + `;` `/` `?` `:` `@` `&` `=` `+` `$` `,` + +uriUnescaped ::: + uriAlpha + DecimalDigit + uriMark + +uriEscaped ::: + `%` HexDigit HexDigit + +uriAlpha ::: one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M` `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z` + +uriMark ::: one of + `-` `_` `.` `!` `~` `*` `'` `(` `)` + + +NativeFunction : + `function` PropertyName[~Yield, ~Await]? `(` FormalParameters[~Yield, ~Await] `)` `{` `[` `native` `code` `]` `}` + + +Pattern[U, N] :: + Disjunction[?U, ?N] + +Disjunction[U, N] :: + Alternative[?U, ?N] + Alternative[?U, ?N] `|` Disjunction[?U, ?N] + +Alternative[U, N] :: + [empty] + Alternative[?U, ?N] Term[?U, ?N] + +Term[U, N] :: + Assertion[?U, ?N] + Atom[?U, ?N] + Atom[?U, ?N] Quantifier + +Assertion[U, N] :: + `^` + `$` + `\` `b` + `\` `B` + `(` `?` `=` Disjunction[?U, ?N] `)` + `(` `?` `!` Disjunction[?U, ?N] `)` + `(` `?` `<=` Disjunction[?U, ?N] `)` + `(` `?` `<!` Disjunction[?U, ?N] `)` + +Quantifier :: + QuantifierPrefix + QuantifierPrefix `?` + +QuantifierPrefix :: + `*` + `+` + `?` + `{` DecimalDigits `}` + `{` DecimalDigits `,` `}` + `{` DecimalDigits `,` DecimalDigits `}` + +Atom[U, N] :: + PatternCharacter + `.` + `\` AtomEscape[?U, ?N] + CharacterClass[?U] + `(` GroupSpecifier[?U] Disjunction[?U, ?N] `)` + `(` `?` `:` Disjunction[?U, ?N] `)` + +SyntaxCharacter :: one of + `^` `$` `\` `.` `*` `+` `?` `(` `)` `[` `]` `{` `}` `|` + +PatternCharacter :: + SourceCharacter but not SyntaxCharacter + +AtomEscape[U, N] :: + DecimalEscape + CharacterClassEscape[?U] + CharacterEscape[?U] + [+N] `k` GroupName[?U] + +CharacterEscape[U] :: + ControlEscape + `c` ControlLetter + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + RegExpUnicodeEscapeSequence[?U] + IdentityEscape[?U] + +ControlEscape :: one of + `f` `n` `r` `t` `v` + +ControlLetter :: one of + `a` `b` `c` `d` `e` `f` `g` `h` `i` `j` `k` `l` `m` `n` `o` `p` `q` `r` `s` `t` `u` `v` `w` `x` `y` `z` + `A` `B` `C` `D` `E` `F` `G` `H` `I` `J` `K` `L` `M` `N` `O` `P` `Q` `R` `S` `T` `U` `V` `W` `X` `Y` `Z` + +GroupSpecifier[U] :: + [empty] + `?` GroupName[?U] + +GroupName[U] :: + `<` RegExpIdentifierName[?U] `>` + +RegExpIdentifierName[U] :: + RegExpIdentifierStart[?U] + RegExpIdentifierName[?U] RegExpIdentifierPart[?U] + +RegExpIdentifierStart[U] :: + UnicodeIDStart + `$` + `_` + `\` RegExpUnicodeEscapeSequence[?U] + +RegExpIdentifierPart[U] :: + UnicodeIDContinue + `$` + `\` RegExpUnicodeEscapeSequence[?U] + <ZWNJ> + <ZWJ> + +RegExpUnicodeEscapeSequence[U] :: + [+U] `u` LeadSurrogate `\u` TrailSurrogate + [+U] `u` LeadSurrogate + [+U] `u` TrailSurrogate + [+U] `u` NonSurrogate + [~U] `u` Hex4Digits + [+U] `u{` CodePoint `}` + + +LeadSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is in the inclusive range 0xD800 to 0xDBFF] + +TrailSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is in the inclusive range 0xDC00 to 0xDFFF] + +NonSurrogate :: + Hex4Digits [> but only if the SV of |Hex4Digits| is not in the inclusive range 0xD800 to 0xDFFF] + +IdentityEscape[U] :: + [+U] SyntaxCharacter + [+U] `/` + [~U] SourceCharacter but not UnicodeIDContinue + +DecimalEscape :: + NonZeroDigit DecimalDigits? [lookahead <! DecimalDigit] + +CharacterClassEscape[U] :: + `d` + `D` + `s` + `S` + `w` + `W` + [+U] `p{` UnicodePropertyValueExpression `}` + [+U] `P{` UnicodePropertyValueExpression `}` + +UnicodePropertyValueExpression :: + UnicodePropertyName `=` UnicodePropertyValue + LoneUnicodePropertyNameOrValue + +UnicodePropertyName :: + UnicodePropertyNameCharacters + +UnicodePropertyNameCharacters :: + UnicodePropertyNameCharacter UnicodePropertyNameCharacters? + +UnicodePropertyValue :: + UnicodePropertyValueCharacters + +LoneUnicodePropertyNameOrValue :: + UnicodePropertyValueCharacters + +UnicodePropertyValueCharacters :: + UnicodePropertyValueCharacter UnicodePropertyValueCharacters? + +UnicodePropertyValueCharacter :: + UnicodePropertyNameCharacter + `0` + `1` + `2` + `3` + `4` + `5` + `6` + `7` + `8` + `9` + +UnicodePropertyNameCharacter :: + ControlLetter + `_` + +CharacterClass[U] :: + `[` [lookahead != `^`] ClassRanges[?U] `]` + `[` `^` ClassRanges[?U] `]` + +ClassRanges[U] :: + [empty] + NonemptyClassRanges[?U] + +NonemptyClassRanges[U] :: + ClassAtom[?U] + ClassAtom[?U] NonemptyClassRangesNoDash[?U] + ClassAtom[?U] `-` ClassAtom[?U] ClassRanges[?U] + +NonemptyClassRangesNoDash[U] :: + ClassAtom[?U] + ClassAtomNoDash[?U] NonemptyClassRangesNoDash[?U] + ClassAtomNoDash[?U] `-` ClassAtom[?U] ClassRanges[?U] + +ClassAtom[U] :: + `-` + ClassAtomNoDash[?U] + +ClassAtomNoDash[U] :: + SourceCharacter but not one of `\` or `]` or `-` + `\` ClassEscape[?U] + +ClassEscape[U] :: + `b` + [+U] `-` + CharacterClassEscape[?U] + CharacterEscape[?U] + + +NumericLiteral :: + DecimalLiteral + BinaryIntegerLiteral + OctalIntegerLiteral + HexIntegerLiteral + LegacyOctalIntegerLiteral + +LegacyOctalIntegerLiteral :: + `0` OctalDigit + LegacyOctalIntegerLiteral OctalDigit + +DecimalIntegerLiteral :: + `0` + NonZeroDigit DecimalDigits? + NonOctalDecimalIntegerLiteral + +NonOctalDecimalIntegerLiteral :: + `0` NonOctalDigit + LegacyOctalLikeDecimalIntegerLiteral NonOctalDigit + NonOctalDecimalIntegerLiteral DecimalDigit + +LegacyOctalLikeDecimalIntegerLiteral :: + `0` OctalDigit + LegacyOctalLikeDecimalIntegerLiteral OctalDigit + +NonOctalDigit :: one of + `8` `9` + + +EscapeSequence :: + CharacterEscapeSequence + LegacyOctalEscapeSequence + HexEscapeSequence + UnicodeEscapeSequence + +LegacyOctalEscapeSequence :: + OctalDigit [lookahead <! OctalDigit] + ZeroToThree OctalDigit [lookahead <! OctalDigit] + FourToSeven OctalDigit + ZeroToThree OctalDigit OctalDigit + +ZeroToThree :: one of + `0` `1` `2` `3` + +FourToSeven :: one of + `4` `5` `6` `7` + + +Comment :: + MultiLineComment + SingleLineComment + SingleLineHTMLOpenComment + SingleLineHTMLCloseComment + SingleLineDelimitedComment + +MultiLineComment :: + `/*` FirstCommentLine? LineTerminator MultiLineCommentChars? `*/` HTMLCloseComment? + +FirstCommentLine :: + SingleLineDelimitedCommentChars + +SingleLineHTMLOpenComment :: + `<!--` SingleLineCommentChars? + +SingleLineHTMLCloseComment :: + LineTerminatorSequence HTMLCloseComment + +SingleLineDelimitedComment :: + `/*` SingleLineDelimitedCommentChars? `*/` + +HTMLCloseComment :: + WhiteSpaceSequence? SingleLineDelimitedCommentSequence? `-->` SingleLineCommentChars? + +SingleLineDelimitedCommentChars :: + SingleLineNotAsteriskChar SingleLineDelimitedCommentChars? + `*` SingleLinePostAsteriskCommentChars? + +SingleLineNotAsteriskChar :: + SourceCharacter but not one of `*` or LineTerminator + +SingleLinePostAsteriskCommentChars :: + SingleLineNotForwardSlashOrAsteriskChar SingleLineDelimitedCommentChars? + `*` SingleLinePostAsteriskCommentChars? + +SingleLineNotForwardSlashOrAsteriskChar :: + SourceCharacter but not one of `/` or `*` or LineTerminator + +WhiteSpaceSequence :: + WhiteSpace WhiteSpaceSequence? + +SingleLineDelimitedCommentSequence :: + SingleLineDelimitedComment WhiteSpaceSequence? SingleLineDelimitedCommentSequence? + + +Term[U, N] :: + [+U] Assertion[+U, ?N] + [+U] Atom[+U, ?N] + [+U] Atom[+U, ?N] Quantifier + [~U] QuantifiableAssertion[?N] Quantifier + [~U] Assertion[~U, ?N] + [~U] ExtendedAtom[?N] Quantifier + [~U] ExtendedAtom[?N] + +Assertion[U, N] :: + `^` + `$` + `\` `b` + `\` `B` + [+U] `(` `?` `=` Disjunction[+U, ?N] `)` + [+U] `(` `?` `!` Disjunction[+U, ?N] `)` + [~U] QuantifiableAssertion[?N] + `(` `?` `<=` Disjunction[?U, ?N] `)` + `(` `?` `<!` Disjunction[?U, ?N] `)` + +QuantifiableAssertion[N] :: + `(` `?` `=` Disjunction[~U, ?N] `)` + `(` `?` `!` Disjunction[~U, ?N] `)` + +ExtendedAtom[N] :: + `.` + `\` AtomEscape[~U, ?N] + `\` [lookahead == `c`] + CharacterClass[~U] + `(` Disjunction[~U, ?N] `)` + `(` `?` `:` Disjunction[~U, ?N] `)` + InvalidBracedQuantifier + ExtendedPatternCharacter + +InvalidBracedQuantifier :: + `{` DecimalDigits `}` + `{` DecimalDigits `,` `}` + `{` DecimalDigits `,` DecimalDigits `}` + +ExtendedPatternCharacter :: + SourceCharacter but not one of `^` `$` `\` `.` `*` `+` `?` `(` `)` `[` `|` + +AtomEscape[U, N] :: + [+U] DecimalEscape + [~U] DecimalEscape [> but only if the CapturingGroupNumber of |DecimalEscape| is <= _NcapturingParens_] + CharacterClassEscape[?U] + CharacterEscape[~U, ?N] + [+N] `k` GroupName[?U] + +CharacterEscape[U, N] :: + ControlEscape + `c` ControlLetter + `0` [lookahead <! DecimalDigit] + HexEscapeSequence + RegExpUnicodeEscapeSequence[?U] + [~U] LegacyOctalEscapeSequence + IdentityEscape[?U, ?N] + +IdentityEscape[U, N] :: + [+U] SyntaxCharacter + [+U] `/` + [~U] SourceCharacterIdentityEscape[?N] + +SourceCharacterIdentityEscape[N] :: + [~N] SourceCharacter but not `c` + [+N] SourceCharacter but not one of `c` or `k` + +ClassAtomNoDash[U] :: + SourceCharacter but not one of `\` or `]` or `-` + `\` ClassEscape[?U, ~N] + `\` [lookahead == `c`] + +ClassEscape[U, N] :: + `b` + [+U] `-` + [~U] `c` ClassControlLetter + CharacterClassEscape[?U] + CharacterEscape[?U, ?N] + +ClassControlLetter :: + DecimalDigit + `_` + + +IfStatement[Yield, Await, Return] : + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` Statement[?Yield, ?Await, ?Return] + `if` `(` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] `else` FunctionDeclaration[?Yield, ?Await, ~Default] + `if` `(` Expression[+In, ?Yield, ?Await] `)` FunctionDeclaration[?Yield, ?Await, ~Default] + + +IterationStatement[Yield, Await, Return] : + `for` `(` `var` BindingIdentifier[?Yield, ?Await] Initializer[~In, ?Yield, ?Await] `in` Expression[+In, ?Yield, ?Await] `)` Statement[?Yield, ?Await, ?Return] + diff --git a/third_party/rust/jsparagus/js_parser/try_it.py b/third_party/rust/jsparagus/js_parser/try_it.py new file mode 100755 index 0000000000..d8cb89457a --- /dev/null +++ b/third_party/rust/jsparagus/js_parser/try_it.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python + +"""js.py - Repl-like toy to explore parsing of lines of JS. + +See README.md for instructions. +""" + +import argparse +import traceback +from .lexer import JSLexer +from .parser import JSParser +from jsparagus.lexer import SyntaxError + + +def interactive_input(lexer, prompt="js> "): + while True: + line = input(prompt) + lexer.write(line + "\n") + if lexer.can_close(): + return lexer.close() + prompt = "..> " + + +def rpl(): + """Read-print loop.""" + while True: + parser = JSLexer(JSParser(), filename="<stdin>") + try: + result = interactive_input(parser) + except EOFError: + print() + break + except SyntaxError: + traceback.print_exc(limit=0) + continue + print(result) + + +def main(): + parser = argparse.ArgumentParser(description="Try out the JS parser.") + parser.add_argument('input_file', metavar='FILE', nargs='?', + help=".js file to parse") + options = parser.parse_args() + + if options.input_file is not None: + with open(options.input_file) as f: + source = f.readlines() + lexer = JSLexer(JSParser()) + for line in source: + print(line.rstrip()) + lexer.write(line) + ast = lexer.close() + print(ast) + else: + rpl() + + +if __name__ == '__main__': + main() diff --git a/third_party/rust/jsparagus/jsparagus/README.md b/third_party/rust/jsparagus/jsparagus/README.md new file mode 100644 index 0000000000..24bb5632ca --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/README.md @@ -0,0 +1,65 @@ +# jsparagus parser generator + +This directory contains an LALR parser generator called "jsparagus", +written in Python. + +This is used to build parts of the jsparagus JS parser in +[crates/generated_parser](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/crates/generated_parser/). + +jsparagus generates parsers only; it's "bring your own lexer". + +Parser generators are complicated. Here's how this works. + +1. **Input.** jsparagus can load **grammar files** that describe languages. + It's designed, in particular, to handle + [this `.esgrammar` file that describes JavaScript](https://github.com/jorendorff/jsparagus/blob/master/js_parser/es-simplified.esgrammar), + but it can handle a variety of languages. + (For example, the code we use to parse the `esgrammar` file itself + is generated by jsparagus.) + + To understand what a grammar is, see the comments in + [grammar.py](https://github.com/jorendorff/jsparagus/blob/master/jsparagus/grammar.py). + +2. **LALR.** jsparagus runs the + [LALR](https://en.wikipedia.org/wiki/LALR_parser) parser generator + algorithm to generate parser tables. See + [gen.py](https://github.com/jorendorff/jsparagus/blob/master/jsparagus/gen.py). + This code also rejects invalid or ambiguous grammars and so on. + + There are a few comments in gen.py, but they assume a pretty solid + background understanding of parser theory. If you're starting from + scratch, check out: + + * [Crafting + Interpreters](http://craftinginterpreters.com/contents.html); or + + * [Stanfordโs CS1 Compilers](https://lagunita.stanford.edu/courses/Engineering/Compilers/Fall2014/about), + an excellent, challenging, free course with exercises. + + * [The Dragon + Book](https://en.wikipedia.org/wiki/Compilers:_Principles,_Techniques,_and_Tools) + by Aho et al. Often hard to follow, but it contains a + complete description of LR and LALR. + + jsparagus has a few special features geared toward being able to parse + JavaScript, which [has an idiosyncratic syntax](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/js-quirks.md#js-syntactic-quirks). + See [js_parser/README.md](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/js_parser) + for more. + +3. **Output.** The `emit` directory contains code for dumping the parser tables as + code in [Rust](https://github.com/jorendorff/jsparagus/blob/master/jsparagus/emit/rust.py) + or [Python](https://github.com/jorendorff/jsparagus/blob/master/jsparagus/emit/python.py). + +4. **Run time support.** Since the output is mostly just tables, there + has to be some code to actually look at tokens and the parser tables + and decide what to do. + + For Python, that's in + [runtime.py](https://github.com/jorendorff/jsparagus/blob/master/jsparagus/runtime.py). + + For Rust, it's in + [crates/parser/src/parser.rs](https://github.com/jorendorff/jsparagus/blob/master/crates/parser/src/parser.rs). + Because this code is currently tightly coupled to the JS lexer, + jsparagus is not a fully general Rust parser generator yet. + The Python code is more flexible. + diff --git a/third_party/rust/jsparagus/jsparagus/__init__.py b/third_party/rust/jsparagus/jsparagus/__init__.py new file mode 100644 index 0000000000..c0132320cc --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/__init__.py @@ -0,0 +1 @@ +"""A parser generator ____ enough to cope with JavaScript.""" diff --git a/third_party/rust/jsparagus/jsparagus/actions.py b/third_party/rust/jsparagus/jsparagus/actions.py new file mode 100644 index 0000000000..29d810edbb --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/actions.py @@ -0,0 +1,651 @@ +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import typing +import dataclasses + +from .ordered import OrderedFrozenSet +from .grammar import Element, ErrorSymbol, InitNt, Nt +from . import types, grammar + +# Avoid circular reference between this module and parse_table.py +if typing.TYPE_CHECKING: + from .parse_table import StateId + + +@dataclasses.dataclass(frozen=True) +class StackDiff: + """StackDiff represent stack mutations which have to be performed when executing an action. + """ + __slots__ = ['pop', 'nt', 'replay'] + + # Example: We have shifted `b * c X Y`. We want to reduce `b * c` to Mul. + # + # In the initial LR0 pass over the grammar, this produces a Reduce edge. + # + # action pop replay + # -------------- ------ --------- + # Reduce 3 (`b * c`) 2 (`X Y`) + # The parser moves `X Y` to the replay queue, pops `b * c`, creates the + # new `Mul` nonterminal, consults the stack and parse table to determine + # the new state id, then replays the new nonterminal. Reduce leaves `X Y` + # on the runtime replay queue. It's the runtime's responsibility to + # notice that they are there and replay them. + # + # Later, the Reduce edge might be lowered into an [Unwind; FilterState; + # Replay] sequence, which encode both the Reduce action, and the expected + # behavior of the runtime. + # + # action pop replay + # -------------- ------ --------- + # Unwind 3 2 + # The parser moves `X Y` to the replay queue, pops `b * c`, creates the + # new `Mul` nonterminal, and inserts it at the front of the replay queue. + # + # FilterState --- --- + # Determines the new state id, if it's context-dependent. + # This doesn't touch the stack, so no StackDiff. + # + # Replay 0 -3 + # Shift the three elements we left on the replay queue back to the stack: + # `(b*c) X Y`. + + # Number of elements to be popped from the stack, this is used when + # reducing the stack with a non-terminal. + # + # This number is always positive or zero. + pop: int + + # When reducing, a non-terminal is pushed after removing all replayed and + # popped elements. If not None, this is the non-terminal which is produced + # by reducing the action. + nt: typing.Union[Nt, ErrorSymbol, None] + + # Number of terms this action moves from the stack to the runtime replay + # queue (not counting `self.nt`), or from the replay queue to the stack if + # negative. + # + # When executing actions, some lookahead might have been used to make the + # parse table consistent. Replayed terms are popped before popping any + # elements from the stack, and they are added in reversed order in the + # replay list, such that they would be shifted after shifting the `reduced` + # non-terminal. + # + # This number might also be negative, in which case some lookahead terms + # are expected to exists in the replay list, and they are shifted back. + # This must happen only follow_edge is True. + replay: int + + def reduce_stack(self) -> bool: + """Returns whether the action is reducing the stack by replacing popped + elements by a non-terminal. Note, this test is simpler than checking + for instances, as Reduce / Unwind might either be present, or present + as part of the last element of a Seq action. """ + return self.nt is not None + + +class Action: + __slots__ = ["_hash"] + + # Cached hash. + _hash: typing.Optional[int] + + def __init__(self) -> None: + self._hash = None + + def is_inconsistent(self) -> bool: + """Returns True if this action is inconsistent. An action can be + inconsistent if the parameters it is given cannot be evaluated given + its current location in the parse table. Such as CheckNotOnNewLine. + """ + return False + + def is_condition(self) -> bool: + "Unordered condition, which accept or not to reach the next state." + return False + + def condition(self) -> Action: + "Return the conditional action." + raise TypeError("Action.condition not implemented") + + def check_same_variable(self, other: Action) -> bool: + "Return whether both conditionals are checking the same variable." + assert self.is_condition() + raise TypeError("Action.check_same_variable not implemented") + + def check_different_values(self, other: Action) -> bool: + "Return whether these 2 conditions are mutually exclusive." + assert self.is_condition() + raise TypeError("Action.check_different_values not implemented") + + def follow_edge(self) -> bool: + """Whether the execution of this action resume following the epsilon transition + (True) or if it breaks the graph epsilon transition (False) and returns + at a different location, defined by the top of the stack.""" + return True + + def update_stack(self) -> bool: + """Whether the execution of this action changes the parser stack.""" + return False + + def update_stack_with(self) -> StackDiff: + """Returns a StackDiff which represents the mutation to be applied to the + parser stack.""" + assert self.update_stack() + raise TypeError("Action.update_stack_with not implemented") + + def unshift_action(self, num: int) -> Action: + """When manipulating stack operation, we have the option to unshift some + replayed token which were shifted to disambiguate the grammar. However, + they might no longer be needed in some cases.""" + raise TypeError("{} cannot be unshifted".format(self.__class__.__name__)) + + def shifted_action(self, shifted_term: Element) -> ShiftedAction: + """Transpose this action with shifting the given terminal or Nt. + + That is, the sequence of: + - performing the action `self`, then + - shifting `shifted_term` + has the same effect as: + - shifting `shifted_term`, then + - performing the action `self.shifted_action(shifted_term)`. + + If the resulting shifted action would be a no-op, instead return True. + + If this is a conditional action and `shifted_term` indicates that the + condition wasn't met, return False. + """ + return self + + def contains_accept(self) -> bool: + "Returns whether the current action stops the parser." + return False + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> Action: + """If the action contains any state index, use the map to map the old index to + the new indexes""" + return self + + def fold_by_destination(self, actions: typing.List[Action]) -> typing.List[Action]: + """If after rewriting state indexes, multiple condition are reaching the same + destination state, we attempt to fold them by destination. Not + implementing this function can lead to the introduction of inconsistent + states, as the conditions might be redundant. """ + + # By default do nothing. + return actions + + def state_refs(self) -> typing.List[StateId]: + """List of states which are referenced by this action.""" + # By default do nothing. + return [] + + def __eq__(self, other: object) -> bool: + if self.__class__ != other.__class__: + return False + assert isinstance(other, Action) + for s in self.__slots__: + if getattr(self, s) != getattr(other, s): + return False + return True + + def __hash__(self) -> int: + if self._hash is not None: + return self._hash + + def hashed_content() -> typing.Iterator[object]: + yield self.__class__ + for s in self.__slots__: + yield repr(getattr(self, s)) + + self._hash = hash(tuple(hashed_content())) + return self._hash + + def __lt__(self, other: Action) -> bool: + return hash(self) < hash(other) + + def __repr__(self) -> str: + return str(self) + + def stable_str(self, states: typing.Any) -> str: + return str(self) + + +ShiftedAction = typing.Union[Action, bool] + + +class Replay(Action): + """Replay a term which was previously saved by the Unwind function. Note that + this does not Shift a term given as argument as the replay action should + always be garanteed and that we want to maximize the sharing of code when + possible.""" + __slots__ = ['replay_steps'] + + replay_steps: typing.Tuple[StateId, ...] + + def __init__(self, replay_steps: typing.Iterable[StateId]): + super().__init__() + self.replay_steps = tuple(replay_steps) + + def update_stack(self) -> bool: + return True + + def update_stack_with(self) -> StackDiff: + return StackDiff(0, None, -len(self.replay_steps)) + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> Replay: + return Replay(map(lambda s: state_map[s], self.replay_steps)) + + def state_refs(self) -> typing.List[StateId]: + return list(self.replay_steps) + + def __str__(self) -> str: + return "Replay({})".format(str(self.replay_steps)) + + +class Unwind(Action): + """Define an unwind operation which pops N elements of the stack and pushes one + non-terminal. The replay argument of an unwind action corresponds to the + number of stack elements which would have to be popped and pushed again + using the parser table after executing this operation.""" + __slots__ = ['nt', 'replay', 'pop'] + + nt: Nt + pop: int + replay: int + + def __init__(self, nt: Nt, pop: int, replay: int = 0) -> None: + super().__init__() + self.nt = nt # Non-terminal which is reduced + self.pop = pop # Number of stack elements which should be replayed. + self.replay = replay # List of terms to shift back + + def __str__(self) -> str: + return "Unwind({}, {}, {})".format(self.nt, self.pop, self.replay) + + def update_stack(self) -> bool: + return True + + def update_stack_with(self) -> StackDiff: + return StackDiff(self.pop, self.nt, self.replay) + + def unshift_action(self, num: int) -> Unwind: + return Unwind(self.nt, self.pop, replay=self.replay - num) + + def shifted_action(self, shifted_term: Element) -> Unwind: + return Unwind(self.nt, self.pop, replay=self.replay + 1) + + +class Reduce(Action): + """Prevent the fall-through to the epsilon transition and returns to the shift + table execution to resume shifting or replaying terms.""" + __slots__ = ['unwind'] + + unwind: Unwind + + def __init__(self, unwind: Unwind) -> None: + nt_name = unwind.nt.name + if isinstance(nt_name, InitNt): + name = "Start_" + str(nt_name.goal.name) + else: + name = nt_name + super().__init__() + self.unwind = unwind + + def __str__(self) -> str: + return "Reduce({})".format(str(self.unwind)) + + def follow_edge(self) -> bool: + return False + + def update_stack(self) -> bool: + return self.unwind.update_stack() + + def update_stack_with(self) -> StackDiff: + return self.unwind.update_stack_with() + + def unshift_action(self, num: int) -> Reduce: + unwind = self.unwind.unshift_action(num) + return Reduce(unwind) + + def shifted_action(self, shifted_term: Element) -> Reduce: + unwind = self.unwind.shifted_action(shifted_term) + return Reduce(unwind) + + +class Accept(Action): + """This state terminate the parser by accepting the content consumed until + now.""" + __slots__: typing.List[str] = [] + + def __init__(self) -> None: + super().__init__() + + def __str__(self) -> str: + return "Accept()" + + def contains_accept(self) -> bool: + "Returns whether the current action stops the parser." + return True + + def shifted_action(self, shifted_term: Element) -> Accept: + return Accept() + + +class Lookahead(Action): + """Define a Lookahead assertion which is meant to either accept or reject + sequences of terminal/non-terminals sequences.""" + __slots__ = ['terms', 'accept'] + + terms: typing.FrozenSet[str] + accept: bool + + def __init__(self, terms: typing.FrozenSet[str], accept: bool): + super().__init__() + self.terms = terms + self.accept = accept + + def is_inconsistent(self) -> bool: + # A lookahead restriction cannot be encoded in code, it has to be + # solved using fix_with_lookahead, which encodes the lookahead + # resolution in the generated parse table. + return True + + def is_condition(self) -> bool: + return True + + def condition(self) -> Lookahead: + return self + + def check_same_variable(self, other: Action) -> bool: + raise TypeError("Lookahead.check_same_variables: Lookahead are always inconsistent") + + def check_different_values(self, other: Action) -> bool: + raise TypeError("Lookahead.check_different_values: Lookahead are always inconsistent") + + def __str__(self) -> str: + return "Lookahead({}, {})".format(self.terms, self.accept) + + def shifted_action(self, shifted_term: Element) -> ShiftedAction: + if isinstance(shifted_term, Nt): + return True + if shifted_term in self.terms: + return self.accept + return not self.accept + + +class CheckNotOnNewLine(Action): + """Check whether the terminal at the given stack offset is on a new line or + not. If not this would produce an Error, otherwise this rule would be + shifted.""" + __slots__ = ['offset'] + + offset: int + + def __init__(self, offset: int = 0) -> None: + # assert offset >= -1 and "Smaller offsets are not supported on all backends." + super().__init__() + self.offset = offset + + def is_inconsistent(self) -> bool: + # We can only look at stacked terminals. Having an offset of 0 implies + # that we are looking for the next terminal, which is not yet shifted. + # Therefore this action is inconsistent as long as the terminal is not + # on the stack. + return self.offset >= 0 + + def is_condition(self) -> bool: + return True + + def condition(self) -> CheckNotOnNewLine: + return self + + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, CheckNotOnNewLine) and self.offset == other.offset + + def check_different_values(self, other: Action) -> bool: + return False + + def shifted_action(self, shifted_term: Element) -> ShiftedAction: + if isinstance(shifted_term, Nt): + return True + return CheckNotOnNewLine(self.offset - 1) + + def __str__(self) -> str: + return "CheckNotOnNewLine({})".format(self.offset) + + +class FilterStates(Action): + """Check whether the stack at a given depth match the state value, if so + transition to the destination, otherwise check other states.""" + __slots__ = ['states'] + + states: OrderedFrozenSet[StateId] + + def __init__(self, states: typing.Iterable[StateId]): + super().__init__() + # Set of states which can follow this transition. + self.states = OrderedFrozenSet(sorted(states)) + + def is_condition(self) -> bool: + return True + + def condition(self) -> FilterStates: + return self + + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, FilterStates) + + def check_different_values(self, other: Action) -> bool: + assert isinstance(other, FilterStates) + return self.states.is_disjoint(other.states) + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> FilterStates: + states = list(state_map[s] for s in self.states) + return FilterStates(states) + + def fold_by_destination(self, actions: typing.List[Action]) -> typing.List[Action]: + states: typing.List[StateId] = [] + for a in actions: + if not isinstance(a, FilterStates): + # Do nothing in case the state is inconsistent. + return actions + states.extend(a.states) + return [FilterStates(states)] + + def state_refs(self) -> typing.List[StateId]: + return list(self.states) + + def __str__(self) -> str: + return "FilterStates({})".format(self.states) + + +class FilterFlag(Action): + """Define a filter which check for one value of the flag, and continue to the + next state if the top of the flag stack matches the expected value.""" + __slots__ = ['flag', 'value'] + + flag: str + value: object + + def __init__(self, flag: str, value: object) -> None: + super().__init__() + self.flag = flag + self.value = value + + def is_condition(self) -> bool: + return True + + def condition(self) -> FilterFlag: + return self + + def check_same_variable(self, other: Action) -> bool: + return isinstance(other, FilterFlag) and self.flag == other.flag + + def check_different_values(self, other: Action) -> bool: + assert isinstance(other, FilterFlag) + return self.value != other.value + + def __str__(self) -> str: + return "FilterFlag({}, {})".format(self.flag, self.value) + + +class PushFlag(Action): + """Define an action which pushes a value on a stack dedicated to the flag. This + other stack correspond to another parse stack which live next to the + default state machine and is popped by PopFlag, as-if this was another + reduce action. This is particularly useful to raise the parse table from a + LR(0) to an LR(k) without needing as much state duplications.""" + __slots__ = ['flag', 'value'] + + flag: str + value: object + + def __init__(self, flag: str, value: object) -> None: + super().__init__() + self.flag = flag + self.value = value + + def __str__(self) -> str: + return "PushFlag({}, {})".format(self.flag, self.value) + + +class PopFlag(Action): + """Define an action which pops a flag from the flag bit stack.""" + __slots__ = ['flag'] + + flag: str + + def __init__(self, flag: str) -> None: + super().__init__() + self.flag = flag + + def __str__(self) -> str: + return "PopFlag({})".format(self.flag) + + +# OutputExpr: An expression mini-language that compiles very directly to code +# in the output language (Rust or Python). An OutputExpr is one of: +# +# str - an identifier in the generated code +# int - an index into the runtime stack +# None or Some(FunCallArg) - an optional value +# +OutputExpr = typing.Union[str, int, None, grammar.Some] + + +class FunCall(Action): + """Define a call method operation which reads N elements of he stack and + pushpathne non-terminal. The replay attribute of a reduce action correspond + to the number of stack elements which would have to be popped and pushed + again using the parser table after reducing this operation. """ + __slots__ = ['trait', 'method', 'offset', 'args', 'fallible', 'set_to'] + + trait: types.Type + method: str + offset: int + args: typing.Tuple[OutputExpr, ...] + fallible: bool + set_to: str + + def __init__( + self, + method: str, + args: typing.Tuple[OutputExpr, ...], + trait: types.Type = types.Type("AstBuilder"), + fallible: bool = False, + set_to: str = "val", + offset: int = 0, + ) -> None: + super().__init__() + self.trait = trait # Trait on which this method is implemented. + self.method = method # Method and argument to be read for calling it. + self.fallible = fallible # Whether the function call can fail. + self.offset = offset # Offset to add to each argument offset. + self.args = args # Tuple of arguments offsets. + self.set_to = set_to # Temporary variable name to set with the result. + + def __str__(self) -> str: + return "{} = {}::{}({}){} [off: {}]".format( + self.set_to, self.trait, self.method, + ", ".join(map(str, self.args)), + self.fallible and '?' or '', + self.offset) + + def __repr__(self) -> str: + return "FunCall({})".format(', '.join(map(repr, [ + self.trait, self.method, self.fallible, + self.args, self.set_to, self.offset + ]))) + + def unshift_action(self, num: int) -> FunCall: + return FunCall(self.method, self.args, + trait=self.trait, + fallible=self.fallible, + set_to=self.set_to, + offset=self.offset - num) + + def shifted_action(self, shifted_term: Element) -> FunCall: + return FunCall(self.method, + self.args, + trait=self.trait, + fallible=self.fallible, + set_to=self.set_to, + offset=self.offset + 1) + + +class Seq(Action): + """Aggregate multiple actions in one statement. Note, that the aggregated + actions should not contain any condition or action which are mutating the + state. Only the last action aggregated can update the parser stack""" + __slots__ = ['actions'] + + actions: typing.Tuple[Action, ...] + + def __init__(self, actions: typing.Sequence[Action]) -> None: + super().__init__() + self.actions = tuple(actions) # Ordered list of actions to execute. + assert all([not a.is_condition() for a in actions]) + assert all([not isinstance(a, Seq) for a in actions]) + assert all([a.follow_edge() for a in actions[:-1]]) + assert all([not a.update_stack() for a in actions[:-1]]) + + def __str__(self) -> str: + return "{{ {} }}".format("; ".join(map(str, self.actions))) + + def __repr__(self) -> str: + return "Seq({})".format(repr(self.actions)) + + def follow_edge(self) -> bool: + return self.actions[-1].follow_edge() + + def update_stack(self) -> bool: + return self.actions[-1].update_stack() + + def update_stack_with(self) -> StackDiff: + return self.actions[-1].update_stack_with() + + def unshift_action(self, num: int) -> Seq: + actions = list(map(lambda a: a.unshift_action(num), self.actions)) + return Seq(actions) + + def shifted_action(self, shift: Element) -> ShiftedAction: + actions: typing.List[Action] = [] + for a in self.actions: + b = a.shifted_action(shift) + if isinstance(b, bool): + if b is False: + return False + else: + actions.append(b) + return Seq(actions) + + def contains_accept(self) -> bool: + return any(a.contains_accept() for a in self.actions) + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> Seq: + actions = list(map(lambda a: a.rewrite_state_indexes(state_map), self.actions)) + return Seq(actions) + + def state_refs(self) -> typing.List[StateId]: + return [s for a in self.actions for s in a.state_refs()] diff --git a/third_party/rust/jsparagus/jsparagus/aps.py b/third_party/rust/jsparagus/jsparagus/aps.py new file mode 100644 index 0000000000..18bb7f84a7 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/aps.py @@ -0,0 +1,422 @@ +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import typing +from dataclasses import dataclass +from .lr0 import ShiftedTerm, Term +from .actions import Action, FilterStates, Replay + +# Avoid circular reference between this module and parse_table.py +if typing.TYPE_CHECKING: + from .parse_table import StateId, StateAndTransitions, ParseTable + + +def shifted_path_to(pt: ParseTable, n: int, right_of: Path) -> typing.Iterator[Path]: + "Compute all paths with n shifted terms, ending with right_of." + assert isinstance(right_of, list) and len(right_of) >= 1 + if n == 0: + yield right_of + state = right_of[0].src + assert isinstance(state, int) + for edge in pt.states[state].backedges: + if isinstance(edge.term, Action) and edge.term.update_stack(): + # Some Action such as Unwind and Replay are actions which are + # forking the execution state from the parse stable state. + # While computing the shifted_path_to, we only iterate over the + # parse table states. + continue + assert pt.term_is_shifted(edge.term) + if pt.term_is_stacked(edge.term): + s_n = n - 1 + if n == 0: + continue + else: + s_n = n + if n == 0 and not pt.assume_inconsistent: + # If the parse table is no longer inconsistent, then there is + # no point on walking back on actions as they are expected to + # be resolved. Thus we cannot have the restrictions issue that + # we have on inconsistent parse tables. + continue + from_edge = Edge(edge.src, edge.term) + for path in shifted_path_to(pt, s_n, [from_edge] + right_of): + yield path + + +def reduce_path(pt: ParseTable, shifted: Path) -> typing.Iterator[Path]: + """Compute all paths which might be reduced by a given action. This function + assumes that the state is reachable from the starting goals, and that + the depth which is being queried has valid answers.""" + assert len(shifted) >= 1 + action = shifted[-1].term + assert isinstance(action, Action) + assert action.update_stack() + stack_diff = action.update_stack_with() + nt = stack_diff.nt + assert nt is not None + depth = stack_diff.pop + stack_diff.replay + assert depth >= 0 + if depth > 0: + # We are reducing at least one element from the stack. + stacked = [i for i, e in enumerate(shifted) if pt.term_is_stacked(e.term)] + if len(stacked) < depth: + # We have not shifted enough elements to cover the full reduce + # rule, start looking for context using backedges. + shifted_from = 0 + depth -= len(stacked) + else: + # We shifted much more elements than necessary for reducing, + # just start from the first stacked element which correspond to + # consuming all stack element reduced. + shifted_from = stacked[-depth] + depth = 0 + shifted_end = shifted[shifted_from:] + else: + # We are reducing no element from the stack. + shifted_end = shifted[-1:] + for path in shifted_path_to(pt, depth, shifted_end): + # NOTE: When reducing, we might be tempted to verify that the + # reduced non-terminal is part of the state we are reducing to, and + # it surely is for one of the shifted path. However, this would be + # an error in an inconsistent grammar. (see issue #464) + # + # Thus, we might yield plenty of path which are not reducing the + # expected non-terminal, but these are expected to be filtered out + # by the APS, as the inability of shifting these non-terminals + # would remove these cases. + assert pt.assume_inconsistent or nt in pt.states[path[0].src].nonterminals + yield path + + +@dataclass(frozen=True) +class Edge: + """An edge in a Parse table is a tuple of a source state and the term followed + to exit this state. The destination is not saved here as it can easily be + inferred by looking it up in the parse table. + + Note, the term might be `None` if no term is specified yet. This is useful + for specifying the last state in a Path. + + src: Index of the state from which this directed edge is coming from. + + term: Edge transition value, this can be a terminal, non-terminal or an + action to be executed on an epsilon transition. + """ + src: StateId + term: typing.Optional[Term] + + def stable_str(self, states: typing.List[StateAndTransitions]) -> str: + return "{} -- {} -->".format(states[self.src].stable_hash, str(self.term)) + + def __str__(self) -> str: + return "{} -- {} -->".format(self.src, str(self.term)) + + +# A path through the state graph. +# +# `e.src for e in path` is the sequence of states visited, and +# `e.term for e in path[:-1]` is the sequence of edges traversed. +# `path[-1].term` should be ignored and is often None. +Path = typing.List[Edge] + + +@dataclass(frozen=True) +class APS: + # To fix inconsistencies of the grammar, we have to traverse the grammar + # both forward by using the lookahead and backward by using the state + # recovered from following unwind actions. + # + # To do so we define the notion of abstract parser state (APS), which is a + # class which represents the known state of the parser, relative to its + # starting point. + # + # An APS does not exclusively start at the parser entry point, but starts + # from any state of the parse table by calling `APS.start`. Then we walk + # the parse table forward, as-if we were shifting tokens or epsilon edges + # in the parse table. The function `aps.shift_next(parse_table)` will + # explore all possible futures reachable from the starting point. + # + # As the parse table is explored, new APS are produced by + # `aps.shift_next(parse_table)`, which are containing the new state of the + # parser and the history which has been seen by the APS since it started. + slots = [ + 'state', + 'stack', + 'shift', + 'lookahead', + 'replay', + 'history', + 'reducing' + ] + + # This the state at which we are at, and from which edges would be listed. + # In most cases, this corresponds to the source of last edge of the shift + # list. However, it differs only after executing actions which are mutating + # the parser state while following the out-going edge such as Unwind and + # Replay. + state: StateId + + # This is the known stack at the location where we started investigating. + # As more history is discovered by resolving unwind actions, this stack + # would be filled with the predecessors which have been visited before + # reaching the starting state. + stack: Path + + # This is the stack as manipulated by an LR parser. States are shifted to + # it, including actions, and popped from it when visiting a unwind action. + shift: Path + + # This is the list of terminals and non-terminals encountered by shifting + # edges which are not replying tokens. + lookahead: typing.List[ShiftedTerm] + + # This is the list of lookahead terminals and non-terminals which remains + # to be shifted. This list corresponds to terminals and non-terminals which + # were necessary for removing inconsistencies, but have to be replayed + # after shifting the reduced non-terminals. + replay: typing.List[ShiftedTerm] + + # This is the list of edges visited since the starting state. + history: Path + + # This is a flag which is used to distinguish whether the next term to be + # replayed is the result of a Reduce action or not. When reducing, epsilon + # transitions should be ignored. This flag is useful to implement Unwind + # and Reduce as 2 different actions. + reducing: bool + + @staticmethod + def start(state: StateId) -> APS: + "Return an Abstract Parser State starting at a given state of a parse table" + edge = Edge(state, None) + return APS(state, [edge], [edge], [], [], [], False) + + def shift_next(self, pt: ParseTable) -> typing.Iterator[APS]: + """Yield an APS for each state reachable from this APS in a single step, + by handling a single term (terminal, nonterminal, or action). + + All yielded APS are representing context information around the same + starting state as `self`, either by having additional lookahead terms, + or a larger stack representing the path taken to reach the starting + state. + + For each outgoing edge, it builds a new APS which represents the state + of the Parser if we were to have taken this edge. Only valid APS are + yielded given the context provided by `self`. + + For example, we cannot reduce to a path which is different than what is + already present in the `shift` list, or shift a term different than the + next term to be shifted from the `replay` list. + """ + + # The actual type of parameter `pt` is ParseTable, but this would + # require a cyclic dependency, so we bail out of the type system using + # typing.Any. + + st, sh, la, rp, hs = self.stack, self.shift, self.lookahead, self.replay, self.history + last_edge = sh[-1] + state = pt.states[last_edge.src] + state_match_shift_end = self.state == self.shift[-1].src + if self.replay == []: + assert state_match_shift_end + for term, to in state.shifted_edges(): + edge = Edge(self.state, term) + new_sh = self.shift[:-1] + [edge] + edge_to = Edge(to, None) + yield APS(to, st, new_sh + [edge_to], la + [term], rp, hs + [edge], False) + elif state_match_shift_end: + term = self.replay[0] + rp = self.replay[1:] + if term in state: + edge = Edge(self.state, term) + new_sh = self.shift[:-1] + [edge] + to = state[term] + edge_to = Edge(to, None) + yield APS(to, st, new_sh + [edge_to], la, rp, hs + [edge], False) + + rp = self.replay + for a, to in state.epsilon: + edge = Edge(self.state, a) + prev_sh = self.shift[:-1] + [edge] + # TODO: Add support for Lookahead and flag manipulation rules, as + # both of these would invalide potential reduce paths. + if a.update_stack(): + new_rp: typing.List[ShiftedTerm] + stack_diff = a.update_stack_with() + if isinstance(a, Replay): + assert stack_diff.pop == 0 + assert stack_diff.nt is None + assert stack_diff.replay < 0 + num_replay = -stack_diff.replay + assert len(self.replay) >= num_replay + new_rp = self.replay[:] + new_sh = self.shift[:] + while num_replay > 0: + num_replay -= 1 + term = new_rp[0] + del new_rp[0] + sh_state = new_sh[-1].src + sh_edge = Edge(sh_state, term) + sh_to = pt.states[sh_state][term] + sh_edge_to = Edge(sh_to, None) + del new_sh[-1] + new_sh = new_sh + [sh_edge, sh_edge_to] + yield APS(to, st, new_sh, la, new_rp, hs + [edge], False) + continue + + if self.reducing: + # When reducing, do not attempt to execute any actions + # which might update the stack. Without this restriction, + # we might loop on Optional rules. Which would not match + # the expected behaviour of the parser. + continue + + reducing = not a.follow_edge() + assert stack_diff.pop >= 0 + assert stack_diff.nt is not None + for path in reduce_path(pt, prev_sh): + # path contains the chains of state shifted, including + # epsilon transitions. The head of the path should be able + # to shift the reduced nonterminal or any state reachable + # through an epsilon state after it. + + # print( + # "Compare shifted path, with reduced path:\n" + # "\tshifted = {},\n" + # "\treduced = {},\n" + # "\taction = {},\n" + # "\tnew_path = {}\n" + # .format( + # " ".join(edge_str(e) for e in prev_sh), + # " ".join(edge_str(e) for e in path), + # str(a), + # " ".join(edge_str(e) for e in reduced_path))) + if prev_sh[-len(path):] != path[-len(prev_sh):]: + # If the reduced production does not match the shifted + # state, then this reduction does not apply. This is + # the equivalent result as splitting the parse table + # based on the predecessor. + continue + + # The stack corresponds to the stack present at the + # starting point. The shift list correspond to the actual + # parser stack as we iterate through the state machine. + # Each time we consume all the shift list, this implies + # that we had extra stack elements which were not present + # initially, and therefore we are learning about the + # context. + new_st = path[:max(len(path) - len(prev_sh), 0)] + st + assert pt.is_valid_path(new_st) + + # The shift list corresponds to the stack which is used in + # an LR parser, in addition to all the states which are + # epsilon transitions. We pop from this list the reduced + # path, as long as it matches. Then all popped elements are + # replaced by the state that we visit after replaying the + # non-terminal reduced by this action. + new_sh = prev_sh[:-len(path)] + [Edge(path[0].src, None)] + assert pt.is_valid_path(new_sh) + + # Update the replay list of the new APS, starting with the + # reduced non-terminal and followed by the lookahead terms + # which have to be replayed and/or the truncated replay + # list, if any are consumed while reducing. + replay = stack_diff.replay + nt = stack_diff.nt + assert nt is not None + new_rp = [nt] + if replay > 0: + # Move previously shifted terms to the replay list, as + # they would have to be replayed after reducing the + # non-terminal. + stacked_terms = [ + typing.cast(ShiftedTerm, edge.term) + for edge in path if pt.term_is_stacked(edge.term) + ] + new_rp = new_rp + stacked_terms[-replay:] + rp + elif replay == 0: + new_rp = new_rp + rp + elif replay < 0: + # Remove the replayed tokens from the front of the + # replay list as they are consumed by this Unwind + # action. + assert len(rp) >= -replay + new_rp = new_rp + rp[-replay:] + new_la = la[:max(len(la) - replay, 0)] + + # If we are reducing, this implies that we are not + # following the edge of the reducing action, and resume the + # execution at the last edge of the shift action. At this + # point the execution and the stack diverge from standard + # LR parser. However, the stack is still manipulated + # through Unwind and Replay actions but the state which is + # executed no longer matches the last element of the + # shifted term or action. + if reducing: + to = new_sh[-1].src + yield APS(to, new_st, new_sh, new_la, new_rp, hs + [edge], reducing) + elif isinstance(a, FilterStates): + # FilterStates is added by the graph transformation and is + # expected to be added after the replacement of + # Reduce(Unwind(...)) by Unwind, FilterStates and Replay + # actions. Thus, at the time when FilterStates is encountered, + # we do not expect `self.states` to match the last element of + # the `shift` list to match. + assert not state_match_shift_end + + # Emulate FilterStates condition, which is to branch to the + # destination if the state value from the top of the stack is + # in the list of states of this condition. + if self.shift[-1].src in a.states: + yield APS(to, st, sh, la, rp, hs + [edge], False) + else: + edge_to = Edge(to, None) + yield APS(to, st, prev_sh + [edge_to], la, rp, hs + [edge], self.reducing) + + def stable_str(self, states: typing.List[StateAndTransitions], name: str = "aps") -> str: + return """{}.state = {} +{}.stack = [{}] +{}.shift = [{}] +{}.lookahead = [{}] +{}.replay = [{}] +{}.history = [{}] +{}.reducing = {} + """.format( + name, self.state, + name, " ".join(e.stable_str(states) for e in self.stack), + name, " ".join(e.stable_str(states) for e in self.shift), + name, ", ".join(repr(e) for e in self.lookahead), + name, ", ".join(repr(e) for e in self.replay), + name, " ".join(e.stable_str(states) for e in self.history), + name, self.reducing + ) + + def string(self, name: str = "aps") -> str: + return """{}.state = {} +{}.stack = [{}] +{}.shift = [{}] +{}.lookahead = [{}] +{}.replay = [{}] +{}.history = [{}] +{}.reducing = {} + """.format( + name, self.state, + name, " ".join(str(e) for e in self.stack), + name, " ".join(str(e) for e in self.shift), + name, ", ".join(repr(e) for e in self.lookahead), + name, ", ".join(repr(e) for e in self.replay), + name, " ".join(str(e) for e in self.history), + name, self.reducing + ) + + def __str__(self) -> str: + return self.string() + + +def stable_aps_lanes_str( + aps_lanes: typing.List[APS], + states: typing.List[StateAndTransitions], + header: str = "lanes:", + name: str = "\taps" +) -> str: + return "{}\n{}".format(header, "\n".join(aps.stable_str(states, name) for aps in aps_lanes)) diff --git a/third_party/rust/jsparagus/jsparagus/emit/__init__.py b/third_party/rust/jsparagus/jsparagus/emit/__init__.py new file mode 100644 index 0000000000..198195244a --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/emit/__init__.py @@ -0,0 +1,6 @@ +"""Emit code and parser tables in Python and Rust.""" + +__all__ = ['write_python_parse_table', 'write_rust_parse_table'] + +from .python import write_python_parse_table +from .rust import write_rust_parse_table diff --git a/third_party/rust/jsparagus/jsparagus/emit/python.py b/third_party/rust/jsparagus/jsparagus/emit/python.py new file mode 100644 index 0000000000..c974d371cd --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/emit/python.py @@ -0,0 +1,228 @@ +"""Emit code and parser tables in Python.""" + +from __future__ import annotations + +import io +import typing + +from ..grammar import ErrorSymbol, Nt, Some +from ..actions import (Accept, Action, CheckNotOnNewLine, FilterFlag, FilterStates, FunCall, + Lookahead, OutputExpr, PopFlag, PushFlag, Reduce, Replay, Seq, Unwind) +from ..runtime import ErrorToken, ErrorTokenClass +from ..ordered import OrderedSet +from ..lr0 import Term +from ..parse_table import StateId, ParseTable + + +def method_name_to_python(name: str) -> str: + return name.replace(" ", "_") + + +def write_python_parse_table(out: io.TextIOBase, parse_table: ParseTable) -> None: + # Disable MyPy type checking for everything in this module. + out.write("# type: ignore\n\n") + + out.write("from jsparagus import runtime\n") + if any(isinstance(key, Nt) for key in parse_table.nonterminals): + out.write( + "from jsparagus.runtime import (Nt, InitNt, End, ErrorToken, StateTermValue,\n" + " ShiftError, ShiftAccept)\n") + out.write("\n") + + methods: OrderedSet[typing.Tuple[str, int]] = OrderedSet() + + def write_epsilon_transition(indent: str, dest_idx: StateId): + dest = parse_table.states[dest_idx] + if dest.epsilon != []: + assert dest.index < len(parse_table.states) + # This is a transition to an action. + args = "" + for i in range(dest.arguments): + out.write("{}r{} = parser.replay.pop()\n".format(indent, i)) + args += ", r{}".format(i) + out.write("{}state_{}_actions(parser, lexer{})\n".format(indent, dest.index, args)) + else: + # This is a transition to a shift. + assert dest.arguments == 0 + out.write("{}top = parser.stack.pop()\n".format(indent)) + out.write("{}top = StateTermValue({}, top.term, top.value, top.new_line)\n" + .format(indent, dest.index)) + out.write("{}parser.stack.append(top)\n".format(indent)) + + def write_action(act: Action, indent: str = "") -> typing.Tuple[str, bool]: + assert not act.is_inconsistent() + if isinstance(act, Replay): + for s in act.replay_steps: + out.write("{}parser.replay_action({})\n".format(indent, s)) + return indent, True + if isinstance(act, (Unwind, Reduce)): + stack_diff = act.update_stack_with() + replay = stack_diff.replay + out.write("{}replay = []\n".format(indent)) + while replay > 0: + replay -= 1 + out.write("{}replay.append(parser.stack.pop())\n".format(indent)) + out.write("{}replay.append(StateTermValue(0, {}, value, False))\n" + .format(indent, repr(stack_diff.nt))) + if stack_diff.pop > 0: + out.write("{}del parser.stack[-{}:]\n".format(indent, stack_diff.pop)) + out.write("{}parser.replay.extend(replay)\n".format(indent)) + return indent, act.follow_edge() + if isinstance(act, Accept): + out.write("{}raise ShiftAccept()\n".format(indent)) + return indent, False + if isinstance(act, Lookahead): + raise ValueError("Unexpected Lookahead action") + if isinstance(act, CheckNotOnNewLine): + out.write("{}if not parser.check_not_on_new_line(lexer, {}):\n".format(indent, -act.offset)) + out.write("{} return\n".format(indent)) + return indent, True + if isinstance(act, FilterStates): + out.write("{}if parser.top_state() in [{}]:\n".format(indent, ", ".join(map(str, act.states)))) + return indent + " ", True + if isinstance(act, FilterFlag): + out.write("{}if parser.flags[{}][-1] == {}:\n".format(indent, act.flag, act.value)) + return indent + " ", True + if isinstance(act, PushFlag): + out.write("{}parser.flags[{}].append({})\n".format(indent, act.flag, act.value)) + return indent, True + if isinstance(act, PopFlag): + out.write("{}parser.flags[{}].pop()\n".format(indent, act.flag)) + return indent, True + if isinstance(act, FunCall): + enclosing_call_offset = act.offset + if enclosing_call_offset < 0: + # When replayed terms are given as function arguments, they are + # not part of the stack. However, we cheat the system by + # replaying all terms necessary to pop them uniformly. Thus, the + # naming of variable for negative offsets will always match the + # naming of when the offset is 0. + enclosing_call_offset = 0 + + def map_with_offset(args: typing.Iterable[OutputExpr]) -> typing.Iterator[str]: + get_value = "parser.stack[{}].value" + for a in args: + if isinstance(a, int): + yield get_value.format(-(a + enclosing_call_offset)) + elif isinstance(a, str): + yield a + elif isinstance(a, Some): + # `typing.cast` because Some isn't generic, unfortunately. + yield next(map_with_offset([typing.cast(OutputExpr, a.inner)])) + elif a is None: + yield "None" + else: + raise ValueError(a) + + if act.method == "id": + assert len(act.args) == 1 + out.write("{}{} = {}\n".format(indent, act.set_to, next(map_with_offset(act.args)))) + else: + methods.add((act.method, len(act.args))) + out.write("{}{} = parser.methods.{}({})\n".format( + indent, act.set_to, method_name_to_python(act.method), + ", ".join(map_with_offset(act.args)) + )) + return indent, True + if isinstance(act, Seq): + for a in act.actions: + indent, fallthrough = write_action(a, indent) + return indent, fallthrough + raise ValueError("Unknown action type") + + # Write code correspond to each action which has to be performed. + for i, state in enumerate(parse_table.states): + assert i == state.index + if state.epsilon == []: + continue + args = [] + for j in range(state.arguments): + args.append("a{}".format(j)) + out.write("def state_{}_actions(parser, lexer{}):\n".format( + i, "".join(map(lambda s: ", " + s, args)))) + if state.arguments > 0: + out.write(" parser.replay.extend([{}])\n".format(", ".join(reversed(args)))) + term, dest = next(iter(state.epsilon)) + if term.update_stack(): + # If we Unwind, make sure all elements are replayed on the stack before starting. + out.write(" # {}\n".format(term)) + stack_diff = term.update_stack_with() + replay = stack_diff.replay + if stack_diff.pop + replay >= 0: + while replay < 0: + replay += 1 + out.write(" parser.stack.append(parser.replay.pop())\n") + out.write("{}\n".format(parse_table.debug_context(i, "\n", " # "))) + out.write(" value = None\n") + for action, dest in state.edges(): + assert isinstance(action, Action) + try: + indent, fallthrough = write_action(action, " ") + except Exception: + print("Error while writing code for {}\n\n".format(state)) + parse_table.debug_info = True + print(parse_table.debug_context(state.index, "\n", "# ")) + raise + if fallthrough: + write_epsilon_transition(indent, dest) + out.write("{}return\n".format(indent)) + out.write("\n") + + out.write("actions = [\n") + for i, state in enumerate(parse_table.states): + assert i == state.index + out.write(" # {}.\n{}\n".format(i, parse_table.debug_context(i, "\n", " # "))) + if state.epsilon == []: + row: typing.Dict[typing.Union[Term, ErrorTokenClass], StateId] + row = {term: dest for term, dest in state.edges()} + for err, dest in state.errors.items(): + del row[err] + row[ErrorToken] = dest + out.write(" " + repr(row) + ",\n") + else: + out.write(" state_{}_actions,\n".format(i)) + out.write("\n") + out.write("]\n\n") + + out.write("error_codes = [\n") + + def repr_code(symb: typing.Optional[ErrorSymbol]) -> str: + if isinstance(symb, ErrorSymbol): + return repr(symb.error_code) + return repr(symb) + + SLICE_LEN = 16 + for i in range(0, len(parse_table.states), SLICE_LEN): + states_slice = parse_table.states[i:i + SLICE_LEN] + out.write(" {}\n".format( + " ".join(repr_code(state.get_error_symbol()) + "," + for state in states_slice))) + out.write("]\n\n") + + out.write("goal_nt_to_init_state = {}\n\n".format( + repr({nt.name: goal for nt, goal in parse_table.named_goals}) + )) + + if len(parse_table.named_goals) == 1: + init_nt = parse_table.named_goals[0][0] + default_goal = '=' + repr(init_nt.name) + else: + default_goal = '' + + # Class used to provide default methods when not defined by the caller. + out.write("class DefaultMethods:\n") + for method, arglen in methods: + act_args = ", ".join("x{}".format(i) for i in range(arglen)) + name = method_name_to_python(method) + out.write(" def {}(self, {}):\n".format(name, act_args)) + out.write(" return ({}, {})\n".format(repr(name), act_args)) + if not methods: + out.write(" pass\n") + out.write("\n") + + out.write("class Parser(runtime.Parser):\n") + out.write(" def __init__(self, goal{}, builder=None):\n".format(default_goal)) + out.write(" if builder is None:\n") + out.write(" builder = DefaultMethods()\n") + out.write(" super().__init__(actions, error_codes, goal_nt_to_init_state[goal], builder)\n") + out.write("\n") diff --git a/third_party/rust/jsparagus/jsparagus/emit/rust.py b/third_party/rust/jsparagus/jsparagus/emit/rust.py new file mode 100644 index 0000000000..ca8382954e --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/emit/rust.py @@ -0,0 +1,903 @@ +"""Emit code and parser tables in Rust.""" + +import json +import re +import unicodedata +import sys +import itertools +import collections +from contextlib import contextmanager + +from ..runtime import (ERROR, ErrorToken, SPECIAL_CASE_TAG) +from ..ordered import OrderedSet + +from ..grammar import (Some, Nt, InitNt, End, ErrorSymbol) +from ..actions import (Accept, Action, Replay, Unwind, Reduce, CheckNotOnNewLine, FilterStates, + PushFlag, PopFlag, FunCall, Seq) + +from .. import types + + +TERMINAL_NAMES = { + '&&=': 'LogicalAndAssign', + '||=': 'LogicalOrAssign', + '??=': 'CoalesceAssign', + '{': 'OpenBrace', + '}': 'CloseBrace', + '(': 'OpenParenthesis', + ')': 'CloseParenthesis', + '[': 'OpenBracket', + ']': 'CloseBracket', + '+': 'Plus', + '-': 'Minus', + '~': 'BitwiseNot', + '!': 'LogicalNot', + '++': 'Increment', + '--': 'Decrement', + ':': 'Colon', + '=>': 'Arrow', + '=': 'EqualSign', + '*=': 'MultiplyAssign', + '/=': 'DivideAssign', + '%=': 'RemainderAssign', + '+=': 'AddAssign', + '-=': 'SubtractAssign', + '<<=': 'LeftShiftAssign', + '>>=': 'SignedRightShiftAssign', + '>>>=': 'UnsignedRightShiftAssign', + '&=': 'BitwiseAndAssign', + '^=': 'BitwiseXorAssign', + '|=': 'BitwiseOrAssign', + '**=': 'ExponentiateAssign', + '.': 'Dot', + '**': 'Exponentiate', + '?.': 'OptionalChain', + '?': 'QuestionMark', + '??': 'Coalesce', + '*': 'Star', + '/': 'Divide', + '%': 'Remainder', + '<<': 'LeftShift', + '>>': 'SignedRightShift', + '>>>': 'UnsignedRightShift', + '<': 'LessThan', + '>': 'GreaterThan', + '<=': 'LessThanOrEqualTo', + '>=': 'GreaterThanOrEqualTo', + '==': 'LaxEqual', + '!=': 'LaxNotEqual', + '===': 'StrictEqual', + '!==': 'StrictNotEqual', + '&': 'BitwiseAnd', + '^': 'BitwiseXor', + '|': 'BitwiseOr', + '&&': 'LogicalAnd', + '||': 'LogicalOr', + ',': 'Comma', + '...': 'Ellipsis', +} + + +@contextmanager +def indent(writer): + """This function is meant to be used with the `with` keyword of python, and + allow the user of it to add an indentation level to the code which is + enclosed in the `with` statement. + + This has the advantage that the indentation of the python code is reflected + to the generated code when `with indent(self):` is used. """ + writer.indent += 1 + yield None + writer.indent -= 1 + +def extract_ranges(iterator): + """Given a sorted iterator of integer, yield the contiguous ranges""" + # Identify contiguous ranges of states. + ranges = collections.defaultdict(list) + # A sorted list of contiguous integers implies that elements are separated + # by 1, as well as their indexes. Thus we can categorize them into buckets + # of contiguous integers using the base, which is the value v from which we + # remove the index i. + for i, v in enumerate(iterator): + ranges[v - i].append(v) + for l in ranges.values(): + yield (l[0], l[-1]) + +def rust_range(riter): + """Prettify a list of tuple of (min, max) of matched ranges into Rust + syntax.""" + def minmax_join(rmin, rmax): + if rmin == rmax: + return str(rmin) + else: + return "{}..={}".format(rmin, rmax) + return " | ".join(minmax_join(rmin, rmax) for rmin, rmax in riter) + +class RustActionWriter: + """Write epsilon state transitions for a given action function.""" + ast_builder = types.Type("AstBuilderDelegate", (types.Lifetime("alloc"),)) + + def __init__(self, writer, mode, traits, indent): + self.states = writer.states + self.writer = writer + self.mode = mode + self.traits = traits + self.indent = indent + self.has_ast_builder = self.ast_builder in traits + self.used_variables = set() + self.replay_args = [] + + def implement_trait(self, funcall): + "Returns True if this function call should be encoded" + ty = funcall.trait + if ty.name == "AstBuilder": + return "AstBuilderDelegate<'alloc>" in map(str, self.traits) + if ty in self.traits: + return True + if len(ty.args) == 0: + return ty.name in map(lambda t: t.name, self.traits) + return False + + def reset(self, act): + "Traverse all action to collect preliminary information." + self.used_variables = set(self.collect_uses(act)) + + def collect_uses(self, act): + "Generator which visit all used variables." + assert isinstance(act, Action) + if isinstance(act, (Reduce, Unwind)): + yield "value" + elif isinstance(act, FunCall): + arg_offset = act.offset + if arg_offset < 0: + # See write_funcall. + arg_offset = 0 + def map_with_offset(args): + for a in args: + if isinstance(a, int): + yield a + arg_offset + if isinstance(a, str): + yield a + elif isinstance(a, Some): + for offset in map_with_offset([a.inner]): + yield offset + if self.implement_trait(act): + for var in map_with_offset(act.args): + yield var + elif isinstance(act, Seq): + for a in act.actions: + for var in self.collect_uses(a): + yield var + + def write(self, string, *format_args): + "Delegate to the RustParserWriter.write function" + self.writer.write(self.indent, string, *format_args) + + def write_state_transitions(self, state, replay_args): + "Given a state, generate the code corresponding to all outgoing epsilon edges." + try: + self.replay_args = replay_args + assert not state.is_inconsistent() + assert len(list(state.shifted_edges())) == 0 + for ctx in self.writer.parse_table.debug_context(state.index, None): + self.write("// {}", ctx) + first, dest = next(state.edges(), (None, None)) + if first is None: + return + self.reset(first) + if first.is_condition(): + self.write_condition(state, first) + else: + assert len(list(state.edges())) == 1 + self.write_action(first, dest) + except Exception as exc: + print("Error while writing code for {}\n\n".format(state)) + self.writer.parse_table.debug_info = True + print(self.writer.parse_table.debug_context(state.index, "\n", "# ")) + raise exc + + def write_replay_args(self, n): + rp_args = self.replay_args[:n] + rp_stck = self.replay_args[n:] + for tv in rp_stck: + self.write("parser.replay({});", tv) + return rp_args + + + def write_epsilon_transition(self, dest): + # Replay arguments which are not accepted as input of the next state. + dest = self.states[dest] + rp_args = self.write_replay_args(dest.arguments) + self.write("// --> {}", dest.index) + if dest.index >= self.writer.shift_count: + self.write("{}_{}(parser{})", self.mode, dest.index, "".join(map(lambda v: ", " + v, rp_args))) + else: + assert dest.arguments == 0 + self.write("parser.epsilon({});", dest.index) + self.write("Ok(false)") + + def write_condition(self, state, first_act): + "Write code to test a conditions, and dispatch to the matching destination" + # NOTE: we already asserted that this state is consistent, this implies + # that the first state check the same variables as all remaining + # states. Thus we use the first action to produce the match statement. + assert isinstance(first_act, Action) + assert first_act.is_condition() + if isinstance(first_act, CheckNotOnNewLine): + # TODO: At the moment this is Action is implemented as a single + # operation with a single destination. However, we should implement + # it in the future as 2 branches, one which is verifying the lack + # of new lines, and one which is shifting an extra error token. + # This might help remove the overhead of backtracking in addition + # to make this backtracking visible through APS. + assert len(list(state.edges())) == 1 + act, dest = next(state.edges()) + assert len(self.replay_args) == 0 + assert -act.offset > 0 + self.write("// {}", str(act)) + self.write("if !parser.check_not_on_new_line({})? {{", -act.offset) + with indent(self): + self.write("return Ok(false);") + self.write("}") + self.write_epsilon_transition(dest) + elif isinstance(first_act, FilterStates): + if len(state.epsilon) == 1: + # This is an attempt to avoid huge unending compilations. + _, dest = next(iter(state.epsilon), (None, None)) + pattern = rust_range(extract_ranges(first_act.states)) + self.write("// parser.top_state() in ({})", pattern) + self.write_epsilon_transition(dest) + else: + self.write("match parser.top_state() {") + with indent(self): + # Consider the branch which has the largest number of + # potential top-states to be most likely, and therefore the + # default branch to go to if all other fail to match. + default_weight = max(len(act.states) for act, dest in state.edges()) + default_states = [] + default_dest = None + for act, dest in state.edges(): + assert first_act.check_same_variable(act) + if default_dest is None and default_weight == len(act.states): + # This range has the same weight as the default + # branch. Ignore it and use it as the default + # branch which would be generated at the end. + default_states = act.states + default_dest = dest + continue + pattern = rust_range(extract_ranges(act.states)) + self.write("{} => {{", pattern) + with indent(self): + self.write_epsilon_transition(dest) + self.write("}") + # Generate code for the default branch, which got skipped + # while producing the loop. + self.write("_ => {") + with indent(self): + pattern = rust_range(extract_ranges(default_states)) + self.write("// {}", pattern) + self.write_epsilon_transition(default_dest) + self.write("}") + self.write("}") + else: + raise ValueError("Unexpected action type") + + def write_action(self, act, dest): + assert isinstance(act, Action) + assert not act.is_condition() + is_packed = {} + + # Do not pop any of the stack elements if the reduce action has an + # accept function call. Ideally we should be returning the result + # instead of keeping it on the parser stack. + if act.update_stack() and not act.contains_accept(): + stack_diff = act.update_stack_with() + start = 0 + depth = stack_diff.pop + args = len(self.replay_args) + replay = stack_diff.replay + if replay < 0: + # At the moment, we do not handle having more arguments than + # what is being popped and replay, thus write back the extra + # arguments and continue. + if stack_diff.pop + replay < 0: + self.replay_args = self.write_replay_args(replay) + replay = 0 + if replay + stack_diff.pop - args > 0: + assert (replay >= 0 and args == 0) or \ + (replay == 0 and args >= 0) + if replay > 0: + # At the moment, assume that arguments are only added once we + # consumed all replayed terms. Thus the replay_args can only be + # non-empty once replay is 0. Otherwise some of the replay_args + # would have to be replayed. + assert args == 0 + self.write("parser.rewind({});", replay) + start = replay + depth += start + + inputs = [] + for i in range(start, depth): + name = 's{}'.format(i + 1) + if i + 1 not in self.used_variables: + name = '_' + name + inputs.append(name) + if stack_diff.pop > 0: + args_pop = min(len(self.replay_args), stack_diff.pop) + # Pop by moving arguments of the action function. + for i, name in enumerate(inputs[:args_pop]): + self.write("let {} = {};", name, self.replay_args[-i - 1]) + # Pop by removing elements from the parser stack. + for name in inputs[args_pop:]: + self.write("let {} = parser.pop();", name) + if args_pop > 0: + del self.replay_args[-args_pop:] + + if isinstance(act, Seq): + for a in act.actions: + self.write_single_action(a, is_packed) + if a.contains_accept(): + break + else: + self.write_single_action(act, is_packed) + + # If we fallthrough the execution of the action, then generate an + # epsilon transition. + if act.follow_edge() and not act.contains_accept(): + assert 0 <= dest < self.writer.shift_count + self.writer.action_count + self.write_epsilon_transition(dest) + + def write_single_action(self, act, is_packed): + self.write("// {}", str(act)) + if isinstance(act, Replay): + self.write_replay(act) + elif isinstance(act, (Reduce, Unwind)): + self.write_reduce(act, is_packed) + elif isinstance(act, Accept): + self.write_accept() + elif isinstance(act, PushFlag): + raise ValueError("NYI: PushFlag action") + elif isinstance(act, PopFlag): + raise ValueError("NYI: PopFlag action") + elif isinstance(act, FunCall): + self.write_funcall(act, is_packed) + else: + raise ValueError("Unexpected action type") + + def write_replay(self, act): + assert len(self.replay_args) == 0 + for shift_state in act.replay_steps: + self.write("parser.shift_replayed({});", shift_state) + + def write_reduce(self, act, is_packed): + value = "value" + if value in is_packed: + packed = is_packed[value] + else: + packed = False + value = "None" + + if packed: + # Extract the StackValue from the packed TermValue + value = "{}.value".format(value) + elif self.has_ast_builder: + # Convert into a StackValue + value = "TryIntoStack::try_into_stack({})?".format(value) + else: + # Convert into a StackValue (when no ast-builder) + value = "value" + + stack_diff = act.update_stack_with() + assert stack_diff.nt is not None + self.write("let term = NonterminalId::{}.into();", + self.writer.nonterminal_to_camel(stack_diff.nt)) + if value != "value": + self.write("let value = {};", value) + self.write("let reduced = TermValue { term, value };") + self.replay_args.append("reduced") + + def write_accept(self): + self.write("return Ok(true);") + + def write_funcall(self, act, is_packed): + arg_offset = act.offset + if arg_offset < 0: + # NOTE: When replacing replayed stack elements by arguments, the + # offset is reduced by -1, and can become negative for cases where + # we read the value associated with an argument instead of the + # value read from the stack. However, write_action shift everything + # as-if we had replayed all the necessary terms, and therefore + # variables are named as-if the offset were 0. + arg_offset = 0 + + def no_unpack(val): + return val + + def unpack(val): + if val in is_packed: + packed = is_packed[val] + else: + packed = True + if packed: + return "{}.value.to_ast()?".format(val) + return val + + def map_with_offset(args, unpack): + get_value = "s{}" + for a in args: + if isinstance(a, int): + yield unpack(get_value.format(a + arg_offset)) + elif isinstance(a, str): + yield unpack(a) + elif isinstance(a, Some): + yield "Some({})".format(next(map_with_offset([a.inner], unpack))) + elif a is None: + yield "None" + else: + raise ValueError(a) + + packed = False + # If the variable is used, then generate the let binding. + set_var = "" + if act.set_to in self.used_variables: + set_var = "let {} = ".format(act.set_to) + + # If the function cannot be call as the generated action function does + # not use the trait on which this function is implemented, then replace + # the value by `()`. + if not self.implement_trait(act): + self.write("{}();", set_var) + return + + # NOTE: Currently "AstBuilder" is implemented through the + # AstBuilderDelegate which returns a mutable reference to the + # AstBuilder. This would call the specific special case method to get + # the actual AstBuilder. + delegate = "" + if str(act.trait) == "AstBuilder": + delegate = "ast_builder_refmut()." + + # NOTE: Currently "AstBuilder" functions are made fallible + # using the fallible_methods taken from some Rust code + # which extract this information to produce a JSON file. + forward_errors = "" + if act.fallible or act.method in self.writer.fallible_methods: + forward_errors = "?" + + # By default generate a method call, with the method name. However, + # there is a special case for the "id" function which is an artifact, + # which does not have to unpack the content of its argument. + value = "parser.{}{}({})".format( + delegate, act.method, + ", ".join(map_with_offset(act.args, unpack))) + packed = False + if act.method == "id": + assert len(act.args) == 1 + value = next(map_with_offset(act.args, no_unpack)) + if isinstance(act.args[0], str): + packed = is_packed[act.args[0]] + else: + assert isinstance(act.args[0], int) + packed = True + + self.write("{}{}{};", set_var, value, forward_errors) + is_packed[act.set_to] = packed + + +class RustParserWriter: + def __init__(self, out, pt, fallible_methods): + self.out = out + self.fallible_methods = fallible_methods + assert pt.exec_modes is not None + self.parse_table = pt + self.states = pt.states + self.shift_count = pt.count_shift_states() + self.action_count = pt.count_action_states() + self.action_from_shift_count = pt.count_action_from_shift_states() + self.init_state_map = pt.named_goals + self.terminals = list(OrderedSet(pt.terminals)) + # This extra terminal is used to represent any ErrorySymbol transition, + # knowing that we assert that there is only one ErrorSymbol kind per + # state. + self.terminals.append("ErrorToken") + self.nonterminals = list(OrderedSet(pt.nonterminals)) + + def emit(self): + self.header() + self.terms_id() + self.shift() + self.error_codes() + self.check_camel_case() + self.actions() + self.entry() + + def write(self, indentation, string, *format_args): + if len(format_args) == 0: + formatted = string + else: + formatted = string.format(*format_args) + self.out.write(" " * indentation + formatted + "\n") + + def header(self): + self.write(0, "// WARNING: This file is autogenerated.") + self.write(0, "") + self.write(0, "use crate::ast_builder::AstBuilderDelegate;") + self.write(0, "use crate::stack_value_generated::{StackValue, TryIntoStack};") + self.write(0, "use crate::traits::{TermValue, ParserTrait};") + self.write(0, "use crate::error::Result;") + traits = OrderedSet() + for mode_traits in self.parse_table.exec_modes.values(): + traits |= mode_traits + traits = list(traits) + traits = [ty for ty in traits if ty.name != "AstBuilderDelegate"] + traits = [ty for ty in traits if ty.name != "ParserTrait"] + if traits == []: + pass + elif len(traits) == 1: + self.write(0, "use crate::traits::{};", traits[0].name) + else: + self.write(0, "use crate::traits::{{{}}};", ", ".join(ty.name for ty in traits)) + self.write(0, "") + self.write(0, "const ERROR: i64 = {};", hex(ERROR)) + self.write(0, "") + + def terminal_name(self, value): + if isinstance(value, End) or value is None: + return "End" + elif isinstance(value, ErrorSymbol) or value is ErrorToken: + return "ErrorToken" + elif value in TERMINAL_NAMES: + return TERMINAL_NAMES[value] + elif value.isalpha(): + if value.islower(): + return value.capitalize() + else: + return value + else: + raw_name = " ".join((unicodedata.name(c) for c in value)) + snake_case = raw_name.replace("-", " ").replace(" ", "_").lower() + camel_case = self.to_camel_case(snake_case) + return camel_case + + def terminal_name_camel(self, value): + return self.to_camel_case(self.terminal_name(value)) + + def terms_id(self): + self.write(0, "#[derive(Copy, Clone, Debug, PartialEq)]") + self.write(0, "#[repr(u32)]") + self.write(0, "pub enum TerminalId {") + for i, t in enumerate(self.terminals): + name = self.terminal_name(t) + self.write(1, "{} = {}, // {}", name, i, repr(t)) + self.write(0, "}") + self.write(0, "") + self.write(0, "#[derive(Clone, Copy, Debug, PartialEq)]") + self.write(0, "#[repr(u32)]") + self.write(0, "pub enum NonterminalId {") + offset = len(self.terminals) + for i, nt in enumerate(self.nonterminals): + self.write(1, "{} = {},", self.nonterminal_to_camel(nt), i + offset) + self.write(0, "}") + self.write(0, "") + self.write(0, "#[derive(Clone, Copy, Debug, PartialEq)]") + self.write(0, "pub struct Term(u32);") + self.write(0, "") + self.write(0, "impl Term {") + self.write(1, "pub fn is_terminal(&self) -> bool {") + self.write(2, "self.0 < {}", offset) + self.write(1, "}") + self.write(1, "pub fn to_terminal(&self) -> TerminalId {") + self.write(2, "assert!(self.is_terminal());") + self.write(2, "unsafe { std::mem::transmute(self.0) }") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + self.write(0, "impl From<TerminalId> for Term {") + self.write(1, "fn from(t: TerminalId) -> Self {") + self.write(2, "Term(t as _)") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + self.write(0, "impl From<NonterminalId> for Term {") + self.write(1, "fn from(nt: NonterminalId) -> Self {") + self.write(2, "Term(nt as _)") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + self.write(0, "impl From<Term> for usize {") + self.write(1, "fn from(term: Term) -> Self {") + self.write(2, "term.0 as _") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + self.write(0, "impl From<Term> for &'static str {") + self.write(1, "fn from(term: Term) -> Self {") + self.write(2, "match term.0 {") + for i, t in enumerate(self.terminals): + self.write(3, "{} => &\"{}\",", i, repr(t)) + for j, nt in enumerate(self.nonterminals): + i = j + offset + self.write(3, "{} => &\"{}\",", i, str(nt.name)) + self.write(3, "_ => panic!(\"unknown Term\")", i, str(nt.name)) + self.write(2, "}") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + + def shift(self): + self.write(0, "#[rustfmt::skip]") + width = len(self.terminals) + len(self.nonterminals) + num_shifted_edges = 0 + + def state_get(state, t): + nonlocal num_shifted_edges + res = state.get(t, "ERROR") + if res == "ERROR": + error_symbol = state.get_error_symbol() + if t == "ErrorToken" and error_symbol: + res = state[error_symbol] + num_shifted_edges += 1 + else: + num_shifted_edges += 1 + return res + + self.write(0, "static SHIFT: [i64; {}] = [", self.shift_count * width) + assert self.terminals[-1] == "ErrorToken" + for i, state in enumerate(self.states[:self.shift_count]): + num_shifted_edges = 0 + self.write(1, "// {}.", i) + for ctx in self.parse_table.debug_context(state.index, None): + self.write(1, "// {}", ctx) + self.write(1, "{}", + ' '.join("{},".format(state_get(state, t)) for t in self.terminals)) + self.write(1, "{}", + ' '.join("{},".format(state_get(state, t)) for t in self.nonterminals)) + try: + assert sum(1 for _ in state.shifted_edges()) == num_shifted_edges + except Exception: + print("Some edges are not encoded.") + print("List of terminals: {}".format(', '.join(map(repr, self.terminals)))) + print("List of nonterminals: {}".format(', '.join(map(repr, self.nonterminals)))) + print("State having the issue: {}".format(str(state))) + raise + self.write(0, "];") + self.write(0, "") + + def render_action(self, action): + if isinstance(action, tuple): + if action[0] == 'IfSameLine': + _, a1, a2 = action + if a1 is None: + a1 = 'ERROR' + if a2 is None: + a2 = 'ERROR' + index = self.add_special_case( + "if token.is_on_new_line { %s } else { %s }" + % (a2, a1)) + else: + raise ValueError("unrecognized kind of special case: {!r}".format(action)) + return SPECIAL_CASE_TAG + index + elif action == 'ERROR': + return action + else: + assert isinstance(action, int) + return action + + def emit_special_cases(self): + self.write(0, "static SPECIAL_CASES: [fn(&Token) -> i64; {}] = [", + len(self.special_cases)) + for i, code in enumerate(self.special_cases): + self.write(1, "|token| {{ {} }},", code) + self.write(0, "];") + self.write(0, "") + + def error_codes(self): + self.write(0, "#[derive(Clone, Copy, Debug, PartialEq)]") + self.write(0, "pub enum ErrorCode {") + error_symbols = (s.get_error_symbol() for s in self.states[:self.shift_count]) + error_codes = (e.error_code for e in error_symbols if e is not None) + for error_code in OrderedSet(error_codes): + self.write(1, "{},", self.to_camel_case(error_code)) + self.write(0, "}") + self.write(0, "") + + self.write(0, "static STATE_TO_ERROR_CODE: [Option<ErrorCode>; {}] = [", + self.shift_count) + for i, state in enumerate(self.states[:self.shift_count]): + error_symbol = state.get_error_symbol() + if error_symbol is None: + self.write(1, "None,") + else: + self.write(1, "// {}.", i) + for ctx in self.parse_table.debug_context(state.index, None): + self.write(1, "// {}", ctx) + self.write(1, "Some(ErrorCode::{}),", + self.to_camel_case(error_symbol.error_code)) + self.write(0, "];") + self.write(0, "") + + def nonterminal_to_snake(self, ident): + if isinstance(ident, Nt): + if isinstance(ident.name, InitNt): + name = "Start" + ident.name.goal.name + else: + name = ident.name + base_name = self.to_snek_case(name) + args = ''.join((("_" + self.to_snek_case(name)) + for name, value in ident.args if value)) + return base_name + args + else: + assert isinstance(ident, str) + return self.to_snek_case(ident) + + def nonterminal_to_camel(self, nt): + return self.to_camel_case(self.nonterminal_to_snake(nt)) + + def to_camel_case(self, ident): + if '_' in ident: + return ''.join(word.capitalize() for word in ident.split('_')) + elif ident.islower(): + return ident.capitalize() + else: + return ident + + def check_camel_case(self): + seen = {} + for nt in self.nonterminals: + cc = self.nonterminal_to_camel(nt) + if cc in seen: + raise ValueError("{} and {} have the same camel-case spelling ({})".format( + seen[cc], nt, cc)) + seen[cc] = nt + + def to_snek_case(self, ident): + # https://stackoverflow.com/questions/1175208 + s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', ident) + return re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() + + def type_to_rust(self, ty, namespace="", boxed=False): + """ + Convert a jsparagus type (see types.py) to Rust. + + Pass boxed=True if the type needs to be boxed. + """ + if isinstance(ty, types.Lifetime): + assert not boxed + rty = "'" + ty.name + elif ty == types.UnitType: + assert not boxed + rty = '()' + elif ty == types.TokenType: + rty = "Token" + elif ty.name == 'Option' and len(ty.args) == 1: + # We auto-translate `Box<Option<T>>` to `Option<Box<T>>` since + # that's basically the same thing but more efficient. + [arg] = ty.args + return 'Option<{}>'.format(self.type_to_rust(arg, namespace, boxed)) + elif ty.name == 'Vec' and len(ty.args) == 1: + [arg] = ty.args + rty = "Vec<'alloc, {}>".format(self.type_to_rust(arg, namespace, boxed=False)) + else: + if namespace == "": + rty = ty.name + else: + rty = namespace + '::' + ty.name + if ty.args: + rty += '<{}>'.format(', '.join(self.type_to_rust(arg, namespace, boxed) + for arg in ty.args)) + if boxed: + return "Box<'alloc, {}>".format(rty) + else: + return rty + + def actions(self): + # For each execution mode, add a corresponding function which + # implements various traits. The trait list is used for filtering which + # function is added in the generated code. + for mode, traits in self.parse_table.exec_modes.items(): + action_writer = RustActionWriter(self, mode, traits, 2) + start_at = self.shift_count + end_at = start_at + self.action_from_shift_count + assert len(self.states[self.shift_count:]) == self.action_count + traits_text = ' + '.join(map(self.type_to_rust, traits)) + table_holder_name = self.to_camel_case(mode) + table_holder_type = table_holder_name + "<'alloc, Handler>" + # As we do not have default associated types yet in Rust + # (rust-lang#29661), we have to peak from the parameter of the + # ParserTrait. + assert list(traits)[0].name == "ParserTrait" + arg_type = "TermValue<" + self.type_to_rust(list(traits)[0].args[1]) + ">" + self.write(0, "struct {} {{", table_holder_type) + self.write(1, "fns: [fn(&mut Handler) -> Result<'alloc, bool>; {}]", + self.action_from_shift_count) + self.write(0, "}") + self.write(0, "impl<'alloc, Handler> {}", table_holder_type) + self.write(0, "where") + self.write(1, "Handler: {}", traits_text) + self.write(0, "{") + self.write(1, "const TABLE : {} = {} {{", table_holder_type, table_holder_name) + self.write(2, "fns: [") + for state in self.states[start_at:end_at]: + assert state.arguments == 0 + self.write(3, "{}_{},", mode, state.index) + self.write(2, "],") + self.write(1, "};") + self.write(0, "}") + self.write(0, "") + self.write(0, + "pub fn {}<'alloc, Handler>(parser: &mut Handler, state: usize) " + "-> Result<'alloc, bool>", + mode) + self.write(0, "where") + self.write(1, "Handler: {}", traits_text) + self.write(0, "{") + self.write(1, "{}::<'alloc, Handler>::TABLE.fns[state - {}](parser)", + table_holder_name, start_at) + self.write(0, "}") + self.write(0, "") + for state in self.states[self.shift_count:]: + state_args = "" + for i in range(state.arguments): + state_args += ", v{}: {}".format(i, arg_type) + replay_args = ["v{}".format(i) for i in range(state.arguments)] + self.write(0, "#[inline]") + self.write(0, "#[allow(unused)]") + self.write(0, + "pub fn {}_{}<'alloc, Handler>(parser: &mut Handler{}) " + "-> Result<'alloc, bool>", + mode, state.index, state_args) + self.write(0, "where") + self.write(1, "Handler: {}", ' + '.join(map(self.type_to_rust, traits))) + self.write(0, "{") + action_writer.write_state_transitions(state, replay_args) + self.write(0, "}") + + def entry(self): + self.write(0, "#[derive(Clone, Copy)]") + self.write(0, "pub struct ParseTable<'a> {") + self.write(1, "pub shift_count: usize,") + self.write(1, "pub action_count: usize,") + self.write(1, "pub action_from_shift_count: usize,") + self.write(1, "pub shift_table: &'a [i64],") + self.write(1, "pub shift_width: usize,") + self.write(1, "pub error_codes: &'a [Option<ErrorCode>],") + self.write(0, "}") + self.write(0, "") + + self.write(0, "impl<'a> ParseTable<'a> {") + self.write(1, "pub fn check(&self) {") + self.write(2, "assert_eq!(") + self.write(3, "self.shift_table.len(),") + self.write(3, "(self.shift_count * self.shift_width) as usize") + self.write(2, ");") + self.write(1, "}") + self.write(0, "}") + self.write(0, "") + + self.write(0, "pub static TABLES: ParseTable<'static> = ParseTable {") + self.write(1, "shift_count: {},", self.shift_count) + self.write(1, "action_count: {},", self.action_count) + self.write(1, "action_from_shift_count: {},", self.action_from_shift_count) + self.write(1, "shift_table: &SHIFT,") + self.write(1, "shift_width: {},", len(self.terminals) + len(self.nonterminals)) + self.write(1, "error_codes: &STATE_TO_ERROR_CODE,") + self.write(0, "};") + self.write(0, "") + + for init_nt, index in self.init_state_map: + assert init_nt.args == () + self.write(0, "pub static START_STATE_{}: usize = {};", + self.nonterminal_to_snake(init_nt).upper(), index) + self.write(0, "") + + +def write_rust_parse_table(out, parse_table, handler_info): + if not handler_info: + print("WARNING: info.json is not provided", file=sys.stderr) + fallible_methods = [] + else: + with open(handler_info, "r") as json_file: + handler_info_json = json.load(json_file) + fallible_methods = handler_info_json["fallible-methods"] + + RustParserWriter(out, parse_table, fallible_methods).emit() diff --git a/third_party/rust/jsparagus/jsparagus/extension.py b/third_party/rust/jsparagus/jsparagus/extension.py new file mode 100644 index 0000000000..515fc68c1a --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/extension.py @@ -0,0 +1,108 @@ +"""Data structure extracted from parsing the EDSL which are added within the +Rust code.""" + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import typing +import os + +from dataclasses import dataclass +from .utils import keep_until +from .grammar import Element, Grammar, LenientNt, NtDef, Production + + +@dataclass(frozen=True) +class ImplFor: + __slots__ = ['param', 'trait', 'for_type'] + param: str + trait: str + for_type: str + + +def eq_productions(grammar: Grammar, prod1: Production, prod2: Production) -> bool: + s1 = tuple(e for e in prod1.body if grammar.is_shifted_element(e)) + s2 = tuple(e for e in prod2.body if grammar.is_shifted_element(e)) + return s1 == s2 + + +def merge_productions(grammar: Grammar, prod1: Production, prod2: Production) -> Production: + # Consider all shifted elements as non-moveable elements, and insert other + # around these. + assert eq_productions(grammar, prod1, prod2) + l1 = list(prod1.body) + l2 = list(prod2.body) + body: typing.List[Element] = [] + while l1 != [] and l2 != []: + front1 = list(keep_until(l1, grammar.is_shifted_element)) + front2 = list(keep_until(l2, grammar.is_shifted_element)) + assert front1[-1] == front2[-1] + l1 = l1[len(front1):] + l2 = l2[len(front2):] + if len(front1) == 1: + body = body + front2 + elif len(front2) == 1: + body = body + front1 + else: + raise ValueError("We do not know how to sort operations yet.") + return prod1.copy_with(body=body) + + +@dataclass(frozen=True) +class ExtPatch: + "Patch an existing grammar rule by adding Code" + + prod: typing.Tuple[LenientNt, str, NtDef] + + def apply_patch( + self, + filename: os.PathLike, + grammar: Grammar, + nonterminals: typing.Dict[LenientNt, NtDef] + ) -> None: + # - name: non-terminal. + # - namespace: ":" for syntactic or "::" for lexical. Always ":" as + # defined by rust_nt_def. + # - nt_def: A single non-terminal definition with a single production. + (name, namespace, nt_def) = self.prod + gnt_def = nonterminals[name] + # Find a matching production in the grammar. + assert nt_def.params == gnt_def.params + new_rhs_list = [] + assert len(nt_def.rhs_list) == 1 + patch_prod = nt_def.rhs_list[0] + applied = False + for grammar_prod in gnt_def.rhs_list: + if eq_productions(grammar, grammar_prod, patch_prod): + grammar_prod = merge_productions(grammar, grammar_prod, patch_prod) + applied = True + new_rhs_list.append(grammar_prod) + if not applied: + raise ValueError("{}: Unable to find a matching production for {} in the grammar:\n {}" + .format(filename, name, grammar.production_to_str(name, patch_prod))) + result = gnt_def.with_rhs_list(new_rhs_list) + nonterminals[name] = result + + +@dataclass +class GrammarExtension: + """A collection of grammar extensions, with added code, added traits for the + action functions. + + """ + + target: None + grammar: typing.List[ExtPatch] + filename: os.PathLike + + def apply_patch( + self, + grammar: Grammar, + nonterminals: typing.Dict[LenientNt, NtDef] + ) -> None: + # A grammar extension is composed of multiple production patches. + for ext in self.grammar: + if isinstance(ext, ExtPatch): + ext.apply_patch(self.filename, grammar, nonterminals) + else: + raise ValueError("Extension of type {} not yet supported.".format(ext.__class__)) diff --git a/third_party/rust/jsparagus/jsparagus/gen.py b/third_party/rust/jsparagus/jsparagus/gen.py new file mode 100755 index 0000000000..72f8d1dd58 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/gen.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 + +"""gen.py - Fifth stab at a parser generator. + +**Grammars.** +A grammar is a dictionary {str: [[symbol]]} mapping names of nonterminals to +lists of right-hand sides. Each right-hand side is a list of symbols. There +are several kinds of symbols; see grammar.py to learn more. + +Instead of a list of right-hand sides, the value of a grammar entry may be a +function; see grammar.Nt for details. + +**Token streams.** +The user passes to each method an object representing the input sequence. +This object must support two methods: + +* `src.peek()` returns the kind of the next token, or `None` at the end of + input. + +* `src.take(kind)` throws an exception if `src.peek() != kind`; + otherwise, it removes the next token from the input stream and returns it. + The special case `src.take(None)` checks that the input stream is empty: + if so, it returns None; if not, it throws. + +For very basic needs, see `lexer.LexicalGrammar`. + +""" + +from __future__ import annotations + +import io +import typing + +from .grammar import Grammar +from . import emit +from .rewrites import CanonicalGrammar +from .parse_table import ParseTable + + +# *** Parser generation ******************************************************* + +def generate_parser_states( + grammar: Grammar, + *, + verbose: bool = False, + progress: bool = False, + debug: bool = False +) -> ParseTable: + parse_table = ParseTable(CanonicalGrammar(grammar), verbose, progress, debug) + return parse_table + + +def generate_parser( + out: io.TextIOBase, + source: Grammar, + *, + verbose: bool = False, + progress: bool = False, + debug: bool = False, + target: str = 'python', + handler_info: typing.Any = None +) -> None: + assert target in ('python', 'rust') + + if isinstance(source, Grammar): + parser_data = generate_parser_states( + source, verbose=verbose, progress=progress, debug=debug) + elif isinstance(source, ParseTable): + parser_data = source + parser_data.debug_info = debug + else: + raise TypeError("unrecognized source: {!r}".format(source)) + + if target == 'rust': + if isinstance(parser_data, ParseTable): + emit.write_rust_parse_table(out, parser_data, handler_info) + else: + raise ValueError("Unexpected parser_data kind") + else: + if isinstance(parser_data, ParseTable): + emit.write_python_parse_table(out, parser_data) + else: + raise ValueError("Unexpected parser_data kind") + + +def compile(grammar, verbose=False, debug=False): + assert isinstance(grammar, Grammar) + out = io.StringIO() + generate_parser(out, grammar, verbose=verbose, debug=debug) + scope = {} + if verbose: + with open("parse_with_python.py", "w") as f: + f.write(out.getvalue()) + exec(out.getvalue(), scope) + return scope['Parser'] + + +# *** Fun demo **************************************************************** + +def demo(): + from .grammar import example_grammar + grammar = example_grammar() + + from . import lexer + tokenize = lexer.LexicalGrammar( + "+ - * / ( )", NUM=r'0|[1-9][0-9]*', VAR=r'[_A-Za-z]\w+') + + import io + out = io.StringIO() + generate_parser(out, grammar) + code = out.getvalue() + print(code) + print("----") + + sandbox = {} + exec(code, sandbox) + Parser = sandbox['Parser'] + + while True: + try: + line = input('> ') + except EOFError: + break + + try: + parser = Parser() + lexer = tokenize(parser) + lexer.write(line) + result = lexer.close() + except Exception as exc: + print(exc.__class__.__name__ + ": " + str(exc)) + else: + print(result) + + +if __name__ == '__main__': + demo() diff --git a/third_party/rust/jsparagus/jsparagus/grammar.py b/third_party/rust/jsparagus/jsparagus/grammar.py new file mode 100644 index 0000000000..bcaf5a02ae --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/grammar.py @@ -0,0 +1,1248 @@ +""" Data structures for representing grammars. """ + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import copy +import typing +import dataclasses +from dataclasses import dataclass +from .ordered import OrderedSet, OrderedFrozenSet +from . import types + + +# *** What is a grammar? ****************************************************** +# +# A grammar is a dictionary mapping nonterminal names to lists of right-hand +# sides. Each right-hand side (also called a "production") is a list whose +# elements can include terminals, nonterminals, Optional elements, LookaheadRules, +# and NoLineTerminatorHere. +# +# The most common elements are terminals and nonterminals, so a grammar usually +# looks something like this: +def example_grammar() -> Grammar: + rules: typing.Dict[typing.Union[str, InitNt, Nt], LenientNtDef] = { + 'expr': [ + ['term'], + ['expr', '+', 'term'], + ['expr', '-', 'term'], + ], + 'term': [ + ['unary'], + ['term', '*', 'unary'], + ['term', '/', 'unary'], + ], + 'unary': [ + ['prim'], + ['-', 'unary'], + ], + 'prim': [ + ['NUM'], + ['VAR'], + ['(', 'expr', ')'], + ], + } + + # The goal nonterminals are the nonterminals we're actually interested in + # parsing. Here we want to parse expressions; all the other nonterminals + # are interesting only as the building blocks of expressions. + # + # Variable terminals are terminal symbols that can have several different + # values, like a VAR token that could be any identifier, or a NUM token + # that could be any number. + return Grammar(rules, goal_nts=['expr'], variable_terminals=['NUM', 'VAR']) + + +Condition = typing.Tuple[str, bool] + + +# A production consists of a left side, an optional condition, a right side, +# and a reducer. A `Production` object includes everything except the left +# side. Incorporating reducers lets us transform a grammar while preserving +# behavior. +# +# The production `expr ::= term` is represented by +# `Production(["term"], 0)`. +# +# The production `expr ::= expr + term => add($0, $2)` is represented by +# `Production(["expr", "+", "term"], CallMethod("add", (0, 2))`. +# +@dataclass +class Production: + __slots__ = ['body', 'reducer', 'condition'] + body: typing.List[Element] + reducer: ReduceExprOrAccept + condition: typing.Optional[Condition] + + def __init__(self, + body: typing.List[Element], + reducer: ReduceExprOrAccept, + *, + condition: typing.Optional[Condition] = None): + self.body = body + self.reducer = reducer + self.condition = condition + + def __repr__(self) -> str: + if self.condition is None: + return "Production({!r}, reducer={!r})".format(self.body, self.reducer) + else: + return ("Production({!r}, reducer={!r}, condition={!r})" + .format(self.body, self.reducer, self.condition)) + + def copy_with(self, **kwargs: typing.Any) -> Production: + return dataclasses.replace(self, **kwargs) + + +# *** Reduce expressions ****************************************************** +# +# Reduce expressions ("reducers" for short) are a little language used to +# specify what happens when a production is matched. A reduce expression is +# one of these: +# +# * An integer evaluates to one of the syntactic components of the +# production. For example, if the production we're reducing is +# `sum ::= sum + term`, the integer `0` evaluates the `sum` to the left of +# the plus sign, and `2` means the `term` on the right. (The integer +# `1` would refer to the `+` token itself, but that's not super useful.) +# +# These integers are not quite indexes into `production.body`, because +# LookaheadRule, ErrorSymbol, and NoLineTerminatorHere elements don't +# count: in the production `stmt ::= [lookahead != "let"] expr ";"`, `0` is +# the expr, and `1` is the semicolon token. See `is_concrete_element(e)`. +# +# * CallMethod objects pass values to a builder method and return the result. +# The `args` are nested reduce expressions. +# +# * None is an expression used as a placeholder when an optional symbol is +# omitted. +# +# * Some(expr) is used when an optional symbol is found and parsed. +# In Python, this just expands to the same thing as `expr`, but in Rust +# this expands to a use of `Option::Some()`. +# +# In addition, the special reducer 'accept' means stop parsing. This is +# used only in productions for init nonterminals, created automatically by +# Grammar.__init__(). It's not a reduce expression, so it can't be nested. + + +@dataclass(frozen=True) +class CallMethod: + """Express a method call, and give it a given set of arguments. A trait is + added as the parser should implement this trait to call this method.""" + + method: str + args: typing.Tuple[ReduceExpr, ...] + trait: types.Type = types.Type("AstBuilder") + fallible: bool = False + + +@dataclass(frozen=True) +class Some: + inner: ReduceExpr + + +def expr_to_str(expr: ReduceExprOrAccept) -> str: + if isinstance(expr, int): + return "${}".format(expr) + elif isinstance(expr, CallMethod): + return "{}::{}({}){}".format( + expr.trait, expr.method, + ', '.join(expr_to_str(arg) for arg in expr.args), + expr.fallible and '?' or '') + elif expr is None: + return "None" + elif isinstance(expr, Some): + return "Some({})".format(expr_to_str(expr.inner)) + elif expr == "accept": + return "<accept>" + else: + raise ValueError("unrecognized expression: {!r}".format(expr)) + + +# Type parameter for Grammar.intern(). +Internable = typing.TypeVar("Internable") + +SyntheticTerminalsDict = typing.Dict[str, OrderedFrozenSet[str]] + + +class Grammar: + """A collection of productions. + + * self.variable_terminals - OrderedFrozenSet[str] - Terminals that carry + data, like (in JS) numeric literals and RegExps. + + * self.terminals - OrderedFrozenSet[str] - All terminals used in the + language, including those in self.variable_terminals and + self.synthetic_terminals. + + * self.synthetic_terminals - {str: OrderedFrozenSet[str]} - Maps terminals + to sets of terminals. An entry `name: set` in this dictionary means + that `name` is shorthand for "one of the terminals in `set`". + + * self.nonterminals - {LenientNt: NtDef} - Keys are either (str|InitNt), early + in the pipeline, or Nt objects later on. Values are the NtDef objects + that contain the actual Productions. + + * self.methods - {str: MethodType} - Type information for methods. + + * self.init_nts - [InitNt or Nt] - The list of all elements of + self.nonterminals.keys() that are init nonterminals. + + * self.exec_modes - DefaultDict{str, OrderedSet[Type]} or None - ? + + * self.type_to_mods - {Type: [str]} or None - ? + + * self._cache - {Any: Any} - Cache of immutable objects used by + Grammar.intern(). + """ + + variable_terminals: OrderedFrozenSet[str] + terminals: OrderedFrozenSet[typing.Union[str, End]] + synthetic_terminals: SyntheticTerminalsDict + nonterminals: typing.Dict[LenientNt, NtDef] + methods: typing.Dict[str, types.MethodType] + init_nts: typing.List[typing.Union[Nt, InitNt]] + exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]] + type_to_modes: typing.Optional[typing.Mapping[types.Type, typing.List[str]]] + _cache: typing.Dict[typing.Any, typing.Any] + + def __init__( + self, + nonterminals: typing.Mapping[LenientNt, LenientNtDef], + *, + goal_nts: typing.Optional[typing.Iterable[LenientNt]] = None, + variable_terminals: typing.Iterable[str] = (), + synthetic_terminals: SyntheticTerminalsDict = None, + method_types: typing.Optional[typing.Dict[str, types.MethodType]] = None, + exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]] = None, + type_to_modes: typing.Optional[typing.Mapping[types.Type, typing.List[str]]] = None): + + # This constructor supports passing in a sort of jumbled blob of + # strings, lists, and actual objects, and normalizes it all to a more + # typeful structure. Being able to interpret simple + # "list-of-lists-of-strings" input is super useful for tests. + # + # We don't check here that the grammar is LR, that it's cycle-free, or + # any other nice properties. + + # Copy/infer the arguments. + my_nonterminals: typing.Dict[LenientNt, LenientNtDef] = dict(nonterminals.items()) + if goal_nts is None: + # Default to the first nonterminal in the dictionary. + my_goal_nts = [] + for name in my_nonterminals: + my_goal_nts.append(name) + break + else: + my_goal_nts = list(goal_nts) + self.variable_terminals = OrderedFrozenSet(variable_terminals) + if synthetic_terminals is None: + synthetic_terminals = {} + else: + synthetic_terminals = { + name: OrderedFrozenSet(set) + for name, set in synthetic_terminals.items() + } + for synthetic_key, values in synthetic_terminals.items(): + if synthetic_key in my_nonterminals: + raise ValueError( + "invalid grammar: {!r} is both a terminal and a nonterminal" + .format(synthetic_key)) + for t in values: + if t in my_nonterminals: + raise ValueError( + "invalid grammar: {!r} is both ".format(t) + + "a representation of a synthetic terminal and a nonterminal") + if t in synthetic_terminals: + # Nested synthetic terminals. Throw, for now. (This + # should pretty much just work, except expand_terminal + # doesn't support it; and we don't check for cycles + # where a synthetic terminal includes itself directly + # or indirectly.) + raise ValueError( + "unsupported: synthetic terminals can't include other " + "synthetic terminals; {!r} includes {!r}" + .format(synthetic_key, t)) + # self.synthetic_terminals = synthetic_terminals + self.synthetic_terminals = {} + + keys_are_nt = isinstance(next(iter(my_nonterminals)), Nt) + key_type: typing.Union[typing.Type, typing.Tuple[typing.Type, ...]] + key_type = Nt if keys_are_nt else (str, InitNt) + + self._cache = {} + + # Gather some information just by looking at keys (without examining + # every production). + # + # str_to_nt maps the name of each non-parameterized + # nonterminal to `Nt(name)`, a cache. + str_to_nt: typing.Dict[typing.Union[str, InitNt], Nt] = {} + # nt_params lists the names of each nonterminal's parameters (empty + # tuple for non-parameterized nts). + nt_params: typing.Dict[typing.Union[str, InitNt], typing.Tuple[str, ...]] = {} + for key in my_nonterminals: + if not isinstance(key, key_type): + raise ValueError( + "invalid grammar: conflicting key types in nonterminals dict - " + "expected either all str or all Nt, got {!r}" + .format(key.__class__.__name__)) + nt_name: typing.Union[str, InitNt] + param_names: typing.Tuple[str, ...] + if keys_are_nt: + assert isinstance(key, Nt) + nt_name = key.name + param_names = tuple(name for name, value in key.args) + else: + assert isinstance(key, (str, InitNt)) + nt_name = key + param_names = () + my_nt = my_nonterminals[key] + if isinstance(my_nt, NtDef): + param_names = tuple(my_nt.params) + if nt_name not in nt_params: + nt_params[nt_name] = param_names + else: + if nt_params[nt_name] != param_names: + raise ValueError( + "conflicting parameter name lists for nt {!r}: " + "both {!r} and {!r}" + .format(nt_name, nt_params[nt_name], param_names)) + if param_names == () and nt_name not in str_to_nt: + str_to_nt[nt_name] = self.intern(Nt(nt_name)) + + # Validate, desugar, and copy the grammar. As a side effect, calling + # validate_element on every element of the grammar populates + # all_terminals. + all_terminals: OrderedSet[typing.Union[str, End]] = OrderedSet(self.variable_terminals) + all_terminals.add(End()) + + def note_terminal(t: str) -> None: + """Add t (and all representations of it, if synthetic) to all_terminals.""" + if t not in all_terminals: + all_terminals.add(t) + if t in self.synthetic_terminals: + for k in self.synthetic_terminals[t]: + note_terminal(k) + + # Note: i and j are normally list indexes, but they are sometimes the + # special string '?'. It's OK because they are used only in error + # messages. + def validate_element( + nt: LenientNt, + i: typing.Union[int, str], + j: typing.Union[int, str], + e: Element, + context_params: typing.Tuple[str, ...] + ) -> Element: + if isinstance(e, str): + if e in nt_params: + if nt_params[e] != (): + raise ValueError( + "invalid grammar: missing parameters for {!r} " + "in production `grammar[{!r}][{}][{}].inner`: {!r}" + .format(e, nt, i, j, nt_params[e])) + return str_to_nt[e] + else: + note_terminal(e) + return e + elif isinstance(e, Optional): + if not isinstance(e.inner, (str, Nt)): + raise TypeError( + "invalid grammar: unrecognized element " + "in production `grammar[{!r}][{}][{}].inner`: {!r}" + .format(nt, i, j, e.inner)) + inner = validate_element(nt, i, j, e.inner, context_params) + return self.intern(Optional(inner)) + elif isinstance(e, Literal): + if not isinstance(e.text, str): + raise TypeError( + "invalid grammar: unrecognized element " + "in production `grammar[{!r}][{}][{}].text`: {!r}" + .format(nt, i, j, e.text)) + return self.intern(e) + elif isinstance(e, UnicodeCategory): + if not isinstance(e.cat_prefix, str): + raise TypeError( + "invalid grammar: unrecognized element " + "in production `grammar[{!r}][{}][{}].cat_prefix`: {!r}" + .format(nt, i, j, e.cat_prefix)) + return self.intern(e) + elif isinstance(e, Exclude): + if not isinstance(e.inner, (str, Nt)): + raise TypeError( + "invalid grammar: unrecognized element " + "in production `grammar[{!r}][{}][{}].inner`: {!r}" + .format(nt, i, j, e.inner)) + exclusion_list = [] + for value in e.exclusion_list: + if not isinstance(value, (str, Nt)): + raise TypeError( + "invalid grammar: unrecognized element " + "in production `grammar[{!r}][{}][{}].exclusion_list`: {!r}" + .format(nt, i, j, value)) + value = validate_element(nt, i, j, value, context_params) + exclusion_list.append(value) + inner = validate_element(nt, i, j, e.inner, context_params) + return self.intern(Exclude(inner, tuple(exclusion_list))) + elif isinstance(e, Nt): + # Either the application or the original parameterized + # production must be present in the dictionary. + if e not in my_nonterminals and e.name not in my_nonterminals: + raise ValueError( + "invalid grammar: unrecognized nonterminal " + "in production `grammar[{!r}][{}][{}]`: {!r}" + .format(nt, i, j, e.name)) + args = tuple(pair[0] for pair in e.args) + if e.name in nt_params and args != nt_params[e.name]: + raise ValueError( + "invalid grammar: wrong arguments passed to {!r} " + "in production `grammar[{!r}][{}][{}]`: " + "passed {!r}, expected {!r}" + .format(e.name, nt, i, j, + args, nt_params[e.name])) + for param_name, arg_expr in e.args: + if isinstance(arg_expr, Var): + if arg_expr.name not in context_params: + raise ValueError( + "invalid grammar: undefined variable {!r} " + "in production `grammar[{!r}][{}][{}]`" + .format(arg_expr.name, nt, i, j)) + return self.intern(e) + elif isinstance(e, (LookaheadRule, End, ErrorSymbol)): + return self.intern(e) + elif e is NoLineTerminatorHere: + return e + elif isinstance(e, CallMethod): + return self.intern(e) + else: + raise TypeError( + "invalid grammar: unrecognized element in production " + "`grammar[{!r}][{}][{}]`: {!r}" + .format(nt, i, j, e)) + assert False, "unreachable" + + def check_reduce_expr( + nt: LenientNt, + i: int, + rhs: Production, + expr: ReduceExprOrAccept) -> None: + if isinstance(expr, int): + concrete_len = sum(1 for e in rhs.body + if is_concrete_element(e)) + if not (0 <= expr < concrete_len): + raise ValueError( + "invalid grammar: element number {} out of range for " + "production {!r} in grammar[{!r}][{}].reducer ({!r})" + .format(expr, nt, rhs.body, i, rhs.reducer)) + elif isinstance(expr, CallMethod): + if not isinstance(expr.method, str): + raise TypeError( + "invalid grammar: method names must be strings, " + "not {!r}, in grammar[{!r}[{}].reducer" + .format(expr.method, nt, i)) + if not expr.method.isidentifier(): + name, space, pn = expr.method.partition(' ') + if space == ' ' and name.isidentifier() and pn.isdigit(): + pass + else: + raise ValueError( + "invalid grammar: invalid method name {!r} " + "(not an identifier), in grammar[{!r}[{}].reducer" + .format(expr.method, nt, i)) + for arg_expr in expr.args: + check_reduce_expr(nt, i, rhs, arg_expr) + elif expr is None: + pass + elif isinstance(expr, Some): + check_reduce_expr(nt, i, rhs, expr.inner) + else: + raise TypeError( + "invalid grammar: unrecognized reduce expression {!r} " + "in grammar[{!r}][{}].reducer" + .format(expr, nt, i)) + + def copy_rhs( + nt: LenientNt, + i: int, + sole_production: bool, + rhs: LenientProduction, + context_params: typing.Tuple[str, ...]) -> Production: + if isinstance(rhs, list): + # Bare list, no reducer. Desugar to a Production, inferring a + # reasonable default reducer. + nargs = sum(1 for e in rhs if is_concrete_element(e)) + reducer: ReduceExpr + if len(rhs) == 1 and nargs == 1: + reducer = 0 # don't call a method, just propagate the value + else: + # Call a method named after the production. If the + # nonterminal has exactly one production, there's no need + # to include the production index `i` to the method name. + if sole_production: + method = str(nt) + else: + method = '{}_{}'.format(nt, i) + reducer = CallMethod(method, tuple(range(nargs))) + rhs = Production(rhs, reducer) + + if not isinstance(rhs, Production): + raise TypeError( + "invalid grammar: grammar[{!r}][{}] should be " + "a Production or list of grammar symbols, not {!r}" + .format(nt, i, rhs)) + + if rhs.condition is not None: + param, value = rhs.condition + if param not in context_params: + raise TypeError( + "invalid grammar: undefined parameter {!r} " + "in conditional for grammar[{!r}][{}]" + .format(param, nt, i)) + if rhs.reducer != 'accept': + check_reduce_expr(nt, i, rhs, rhs.reducer) + assert isinstance(rhs.body, list) + return rhs.copy_with(body=[ + validate_element(nt, i, j, e, context_params) + for j, e in enumerate(rhs.body) + ]) + + def copy_nt_def( + nt: LenientNt, + nt_def: typing.Union[NtDef, typing.List[LenientProduction]], + ) -> NtDef: + rhs_list: typing.Sequence[LenientProduction] + if isinstance(nt_def, NtDef): + for i, param in enumerate(nt_def.params): + if not isinstance(param, str): + raise TypeError( + "invalid grammar: parameter {} of {} should be " + "a string, not {!r}" + .format(i + 1, nt, param)) + params = nt_def.params + rhs_list = nt_def.rhs_list + ty = nt_def.type + else: + params = () + rhs_list = nt_def + ty = None + + if not isinstance(rhs_list, list): + raise TypeError( + "invalid grammar: grammar[{!r}] should be either a " + "list of right-hand sides or NtDef, not {!r}" + .format(nt, type(rhs_list).__name__)) + + sole_production = len(rhs_list) == 1 + productions = [copy_rhs(nt, i, sole_production, rhs, params) + for i, rhs in enumerate(rhs_list)] + return NtDef(params, productions, ty) + + def check_nt_key(nt: LenientNt) -> None: + if isinstance(nt, str): + if not nt.isidentifier(): + raise ValueError( + "invalid grammar: nonterminal names must be identifiers, not {!r}" + .format(nt)) + if nt in self.variable_terminals or nt in self.synthetic_terminals: + raise TypeError( + "invalid grammar: {!r} is both a nonterminal and a variable terminal" + .format(nt)) + elif isinstance(nt, Nt): + assert keys_are_nt # checked earlier + if not (isinstance(nt.name, (str, InitNt)) + and isinstance(nt.args, tuple)): + raise TypeError( + "invalid grammar: expected str or Nt(name=str, " + "args=tuple) keys in nonterminals dict, got {!r}" + .format(nt)) + check_nt_key(nt.name) + for pair in nt.args: + if (not isinstance(pair, tuple) + or len(pair) != 2 + or not isinstance(pair[0], str) + or not isinstance(pair[1], bool)): + raise TypeError( + "invalid grammar: expected tuple((str, bool)) args, got {!r}" + .format(nt)) + elif isinstance(nt, InitNt): + # Users don't include init nonterminals when initially creating + # a Grammar. They are automatically added below. But if this + # Grammar is being created by hacking on a previous Grammar, it + # will already have them. + if not isinstance(nt.goal, Nt): + raise TypeError( + "invalid grammar: InitNt.goal should be a nonterminal, " + "got {!r}" + .format(nt)) + # nt.goal is a "use", not a "def". Check it like a use. + # Bogus question marks appear in error messages :-| + validate_element(nt, '?', '?', nt.goal, ()) + if nt.goal not in my_goal_nts: + raise TypeError( + "invalid grammar: nonterminal referenced by InitNt " + "is not in the list of goals: {!r}" + .format(nt)) + else: + raise TypeError( + "invalid grammar: expected string keys in nonterminals dict, got {!r}" + .format(nt)) + + def validate_nt( + nt: LenientNt, + nt_def: LenientNtDef + ) -> typing.Tuple[LenientNt, NtDef]: + check_nt_key(nt) + if isinstance(nt, InitNt): + # Check the form of init productions. Initially these look like + # [[goal]], but after the pipeline goes to work, they can be + # [[Optional(goal)]] or [[], [goal]]. + if not isinstance(nt_def, NtDef): + raise TypeError( + "invalid grammar: key {!r} must map to " + "value of type NtDef, not {!r}" + .format(nt, nt_def)) + rhs_list = nt_def.rhs_list + g = nt.goal + if (rhs_list != [Production([g], 0), + Production([Nt(nt, ()), End()], 'accept')] + and rhs_list != [Production([Optional(g)], 0), + Production([Nt(nt, ()), End()], 'accept')] + and rhs_list != [Production([End()], 'accept'), + Production([g, End()], 'accept'), + Production([Nt(nt, ()), End()], 'accept')]): + raise ValueError( + "invalid grammar: grammar[{!r}] is not one of " + "the expected forms: got {!r}" + .format(nt, rhs_list)) + + return nt, copy_nt_def(nt, nt_def) + + self.nonterminals = {} + for nt1, nt_def1 in my_nonterminals.items(): + nt, nt_def = validate_nt(nt1, nt_def1) + self.nonterminals[nt] = nt_def + for syn_term_name, t_set in synthetic_terminals.items(): + nt_def = NtDef((syn_term_name,), [Production([e], 0) for e in t_set], None) + nt, nt_def = validate_nt(syn_term_name, nt_def) + self.nonterminals[nt] = nt_def + # Remove synthetic terminals from the list of terminals. + all_terminals.remove(syn_term_name) + + self.terminals = OrderedFrozenSet(all_terminals) + + # Check types of reduce expressions and infer method types. But if the + # caller passed in precalculated type info, skip it -- otherwise we + # would redo type checking many times as we make minor changes to the + # Grammar along the pipeline. + if method_types is None: + types.infer_types(self) + else: + for nt, nt_def in self.nonterminals.items(): + assert isinstance(nt_def, NtDef) + assert isinstance(nt_def.type, types.Type) + self.methods = method_types + + # Synthesize "init" nonterminals. + self.init_nts = [] + for goal in my_goal_nts: + # Convert str goals to Nt objects and validate. + if isinstance(goal, str): + ok = goal in str_to_nt + if ok: + goal = str_to_nt[goal] + elif isinstance(goal, Nt): + if keys_are_nt: + ok = goal in my_nonterminals + else: + ok = goal.name in my_nonterminals + if not ok: + raise ValueError( + "goal nonterminal {!r} is undefined".format(goal)) + assert isinstance(goal, Nt) + + # Weird, but the key of an init nonterminal really is + # `Nt(InitNt(Nt(goal_name, goal_args)), ())`. It takes no arguments, + # but it refers to a goal that might take arguments. + init_nt = InitNt(goal) + init_key: LenientNt = init_nt + goal_nt = Nt(init_nt, ()) + if keys_are_nt: + init_key = goal_nt + if init_key not in self.nonterminals: + self.nonterminals[init_key] = NtDef( + (), + [Production([goal], 0), + Production([goal_nt, End()], 'accept')], + types.NoReturnType) + self.init_nts.append(goal_nt) + + # Add the various execution backends which would rely on the same parse table. + self.exec_modes = exec_modes + self.type_to_modes = type_to_modes + + # The argument is a list of extension.GrammarExtensions. The annotation is + # vague because this module does not import the extension module. It would + # be a cyclic dependency. + def patch(self, extensions: typing.List) -> None: + assert self.type_to_modes is not None + assert self.exec_modes is not None + if extensions == []: + return + # Copy of nonterminals which would be mutated by the patches. + nonterminals = copy.copy(self.nonterminals) + for ext in extensions: + # Add the given trait to the execution mode, depending on which + # type it got implemented for. + for mode in self.type_to_modes[ext.target.for_type]: + self.exec_modes[mode].add(ext.target.trait) + # Apply grammar transformations. + ext.apply_patch(self, nonterminals) + # Replace with the modified version of nonterminals + self.nonterminals = nonterminals + + def intern(self, obj: Internable) -> Internable: + """Return a shared copy of the immutable object `obj`. + + This saves memory and consistent use allows code to use `is` for + equality testing. + """ + try: + return self._cache[obj] + except KeyError: + self._cache[obj] = obj + return obj + + # Terminals are tokens that must appear verbatim in the input wherever they + # appear in the grammar, like the operators '+' '-' *' '/' and brackets '(' ')' + # in the example grammar. + def is_terminal(self, element: object) -> bool: + return type(element) is str + + def expand_set_of_terminals( + self, + terminals: typing.Iterable[typing.Union[str, None, ErrorSymbol]] + ) -> OrderedSet[typing.Union[str, None, ErrorSymbol]]: + """Copy a set of terminals, replacing any synthetic terminals with their representations. + + Returns a new OrderedSet. + + terminals - an iterable of terminals and/or other unique elements like + None or ErrorSymbol. + """ + result: OrderedSet[typing.Union[str, None, ErrorSymbol]] = OrderedSet() + for t in terminals: + if isinstance(t, str) and t in self.synthetic_terminals: + result |= self.expand_set_of_terminals(self.synthetic_terminals[t]) + else: + result.add(t) + return result + + def goals(self) -> typing.List[Nt]: + """Return a list of this grammar's goal nonterminals.""" + return [init_nt.name.goal for init_nt in self.init_nts] # type: ignore + + def with_nonterminals( + self, + nonterminals: typing.Mapping[LenientNt, LenientNtDef] + ) -> Grammar: + """Return a copy of self with the same attributes except different nonterminals.""" + if self.methods is not None: + for nt_def in nonterminals.values(): + assert isinstance(nt_def, NtDef) + assert nt_def.type is not None + return Grammar( + nonterminals, + goal_nts=self.goals(), + variable_terminals=self.variable_terminals, + synthetic_terminals=self.synthetic_terminals, + method_types=self.methods, + exec_modes=self.exec_modes, + type_to_modes=self.type_to_modes) + + # === A few methods for dumping pieces of grammar. + + def element_to_str(self, e: Element) -> str: + if isinstance(e, Nt): + return e.pretty() + elif self.is_terminal(e): + assert isinstance(e, str) + if e in self.variable_terminals or e in self.synthetic_terminals: + return e + return '"' + repr(e)[1:-1] + '"' + elif isinstance(e, Optional): + return self.element_to_str(e.inner) + "?" + elif isinstance(e, LookaheadRule): + if len(e.set) == 1: + op = "==" if e.positive else "!=" + s = repr(list(e.set)[0]) + else: + op = "in" if e.positive else "not in" + s = '{' + repr(list(e.set))[1:-1] + '}' + return "[lookahead {} {}]".format(op, s) + elif isinstance(e, End): + return "<END>" + elif e is NoLineTerminatorHere: + return "[no LineTerminator here]" + elif isinstance(e, CallMethod): + return "{{ {} }}".format(expr_to_str(e)) + else: + return str(e) + + def symbols_to_str(self, rhs: typing.Iterable[Element]) -> str: + return " ".join(self.element_to_str(e) for e in rhs) + + def rhs_to_str(self, rhs: LenientProduction) -> str: + if isinstance(rhs, Production): + if rhs.condition is None: + prefix = '' + else: + param, value = rhs.condition + if value is True: + condition = "+" + param + elif value is False: + condition = "~" + param + else: + condition = "{} == {!r}".format(param, value) + prefix = "#[if {}] ".format(condition) + return prefix + self.rhs_to_str(rhs.body) + elif len(rhs) == 0: + return "[empty]" + else: + return self.symbols_to_str(rhs) + + def nt_to_str(self, nt: LenientNt) -> str: + if isinstance(nt, Nt): + return self.element_to_str(nt) + else: + return str(nt) + + def production_to_str( + self, + nt: LenientNt, + rhs: LenientProduction, + *reducer: ReduceExpr + ) -> str: + # As we have two ways of representing productions at the moment, just + # take multiple arguments :( + return "{} ::= {}{}".format( + self.nt_to_str(nt), + self.rhs_to_str(rhs), + "".join(" => " + expr_to_str(expr) for expr in reducer)) + + # The type of `item` is `lr0.LRItem`. No annotation because this module + # does not import `lr0`. It would be a cyclic dependency. + def lr_item_to_str(self, prods: typing.List, item: typing.Any) -> str: + prod = prods[item.prod_index] + if item.lookahead is None: + la = [] + else: + la = [self.element_to_str(item.lookahead)] + return "{} ::= {} >> {{{}}}".format( + self.element_to_str(prod.nt), + " ".join([self.element_to_str(e) for e in prod.rhs[:item.offset]] + + ["\N{MIDDLE DOT}"] + + la + + [self.element_to_str(e) for e in prod.rhs[item.offset:]]), + ", ".join( + "$" if t is None else self.element_to_str(t) + for t in item.followed_by) + ) + + def item_set_to_str( + self, + prods: typing.List, + item_set: OrderedFrozenSet + ) -> str: + return "{{{}}}".format( + ", ".join(self.lr_item_to_str(prods, item) for item in item_set) + ) + + def expand_terminal(self, t: str) -> OrderedFrozenSet[str]: + return self.synthetic_terminals.get(t) or OrderedFrozenSet([t]) + + def compatible_elements(self, e1: Element, e2: Element) -> bool: + # "type: ignore" because mypy doesn't know that `self.is_terminal(e1)` + # means `e1` is a terminal, and thus `self.expand_terminal(e1)` is OK. + return (e1 == e2 + or (self.is_terminal(e1) + and self.is_terminal(e2) + and len(self.expand_terminal(e1) # type: ignore + & self.expand_terminal(e2)) > 0)) # type: ignore + + def compatible_sequences( + self, + seq1: typing.Sequence[Element], + seq2: typing.Sequence[Element]) -> bool: + """True if the two sequences could be the same terminals.""" + return (len(seq1) == len(seq1) + and all(self.compatible_elements(e1, e2) for e1, e2 in zip(seq1, seq2))) + + def dump(self) -> None: + for nt, nt_def in self.nonterminals.items(): + left_side = self.nt_to_str(nt) + if nt_def.params: + left_side += "[" + ", ".join(nt_def.params) + "]" + print(left_side + " ::=") + for rhs in nt_def.rhs_list: + print(" ", self.rhs_to_str(rhs)) + print() + + def dump_type_info(self) -> None: + for nt, nt_def in self.nonterminals.items(): + print(nt, nt_def.type) + for name, mty in self.methods.items(): + print("fn {}({}) -> {}" + .format(name, + ", ".join(str(ty) for ty in mty.argument_types), + str(mty.return_type))) + + def is_shifted_element(self, e: Element) -> bool: + if isinstance(e, Nt): + return True + elif self.is_terminal(e): + return True + elif isinstance(e, Optional): + return True + elif isinstance(e, LookaheadRule): + return False + elif isinstance(e, End): + return True + elif e is NoLineTerminatorHere: + return True + return False + + +@dataclass(frozen=True) +class InitNt: + """InitNt(goal) is the name of the init nonterminal for the given goal. + + One init nonterminal is created internally for each goal symbol in the grammar. + + The idea is to have a nonterminal that the user has no control over, that is + never used in any production, but *only* as an entry point for the grammar, + that always has a single production "init_nt ::= goal_nt". This predictable + structure makes it easier to get into and out of parsing at run time. + + When an init nonterminal is matched, we take the "accept" action rather than + a "reduce" action. + """ + goal: Nt + + +# *** Elements **************************************************************** +# +# Elements are the things that can appear in the .body list of a Production: +# +# * Strings represent terminals (see `Grammar.is_terminal`) +# +# * `Nt` objects refer to nonterminals. +# +# * `Optional` objects represent optional elements. +# +# * `LookaheadRule` objects are like lookahead assertions in regular +# expressions. +# +# * The `NoLineTerminatorHere` singleton object can appear between two other +# symbols to rule out line breaks between them. +# +# * `ErrorSymbol` objects never match anything produced by the lexer. Instead +# they match an ErrorToken that's artificially injected into the token +# stream at runtime, by the parser itself, just before a token that does +# not match anything else. + + +def is_concrete_element(e: Element) -> bool: + """True if parsing the element `e` produces a value. + + A production's concrete elements can be used in reduce expressions. + """ + return not isinstance(e, (LookaheadRule, ErrorSymbol, NoLineTerminatorHereClass)) + + +# Nonterminals in the ECMAScript grammar can be parameterized; NtParameter is +# the type of the parameters. +# +# For example, `BindingIdentifier[?Yield, ?Await]` is represented as +# `Nt('BindingIdentifier', (('Yield', Var('Yield')), ('Await', Var('Await'))))`. +# +# A nonterminal-parameter-expression is represented by either a Var object or +# the actual value, a boolean. (In theory, parameters don't *have* to be +# boolean; all the code would probably work for anything hashable. In practice, +# all parameters in the ECMAScript grammar are boolean.) +NtParameter = typing.Hashable + + +class Nt: + """Nt(name, ((param0, arg0), ...)) - An invocation of a nonterminal. + + Nonterminals are like lambdas. Each nonterminal in a grammar is defined by an + NtDef which has 0 or more parameters. + + Parameter names `param0...` are strings. The actual arguments `arg0...` are + NtParameters (see above). + """ + + __slots__ = ['name', 'args'] + + name: typing.Union[str, InitNt] + args: typing.Tuple[typing.Tuple[str, NtParameter], ...] + + def __init__(self, + name: typing.Union[str, InitNt], + args: typing.Tuple[typing.Tuple[str, NtParameter], ...] = ()): + self.name = name + self.args = args + + def __hash__(self) -> int: + return hash(('nt', self.name, self.args)) + + def __eq__(self, other: object) -> bool: + return (isinstance(other, Nt) + and (self.name, self.args) == (other.name, other.args)) + + def __repr__(self) -> str: + if self.args: + return 'Nt({!r}, {!r})'.format(self.name, self.args) + else: + return 'Nt({!r})'.format(self.name) + + def pretty(self) -> str: + """Unique version of this Nt to use in the Python runtime. + + Also used in debug/verbose output. + """ + def arg_to_str(name: str, value: NtParameter) -> str: + if value is True: + return '+' + name + elif value is False: + return '~' + name + elif isinstance(value, Var): + if value.name == name: + return '?' + value.name + return name + "=" + value.name + else: + return name + "=" + repr(value) + + if isinstance(self.name, InitNt): + return "Start_" + self.name.goal.pretty() + if len(self.args) == 0: + return self.name + return "{}[{}]".format(self.name, + ", ".join(arg_to_str(name, value) + for name, value in self.args)) + + +@dataclass(frozen=True) +class Optional: + """Optional(nt) matches either nothing or the given nt. + + Optional elements are expanded out before states are calculated, so the + core of the algorithm never sees them. + """ + inner: Element + + +@dataclass(frozen=True) +class Literal: + """Literal(str) matches a sequence of characters. + + Literal elements are sequences of characters which are expected to appear + verbatim in the input. + """ + text: str + + +@dataclass(frozen=True) +class UnicodeCategory: + """UnicodeCategory(str) matches any character with a category matching + the cat_prefix. + + UnicodeCategory elements are a set of literal elements which correspond to a + given unicode cat_prefix. + """ + cat_prefix: str + + +@dataclass(frozen=True) +class LookaheadRule: + """LookaheadRule(set, pos) imposes a lookahead restriction on whatever follows. + + It never consumes any tokens itself. Instead, the right-hand side + [LookaheadRule(frozenset(['a', 'b']), False), 'Thing'] + matches a Thing that does not start with the token `a` or `b`. + """ + set: typing.FrozenSet[str] + positive: bool + + +# A lookahead restriction really just specifies a set of allowed terminals. +# +# - No lookahead restriction at all is equivalent to a rule specifying all terminals. +# +# - A positive lookahead restriction explicitly lists all allowed tokens. +# +# - A negative lookahead restriction instead specifies the set of all tokens +# except a few. +# +def lookahead_contains(rule: typing.Optional[LookaheadRule], t: str) -> bool: + """True if the given lookahead restriction `rule` allows the terminal `t`.""" + return (rule is None + or (t in rule.set if rule.positive + else t not in rule.set)) + + +def lookahead_intersect( + a: typing.Optional[LookaheadRule], + b: typing.Optional[LookaheadRule] +) -> typing.Optional[LookaheadRule]: + """Returns a single rule enforcing both `a` and `b`, allowing only terminals that pass both.""" + if a is None: + return b + elif b is None: + return a + elif a.positive: + if b.positive: + return LookaheadRule(a.set & b.set, True) + else: + return LookaheadRule(a.set - b.set, True) + else: + if b.positive: + return LookaheadRule(b.set - a.set, True) + else: + return LookaheadRule(a.set | b.set, False) + + +class NoLineTerminatorHereClass: + def __str__(self) -> str: + return 'NoLineTerminatorHere' + + +NoLineTerminatorHere = NoLineTerminatorHereClass() + + +# Optional elements. These are expanded out before states are calculated, +# so the core of the algorithm never sees them. +@dataclass(frozen=True) +class Exclude: + """Exclude(nt1, nt2) matches if nt1 matches and nt2 does not.""" + inner: Element + exclusion_list: typing.Tuple[Element, ...] + + +# End. This is used to represent the terminal which is infinitely produced by +# the lexer when input end is reached. +@dataclass(frozen=True) +class End: + """End() represents the end of the input content.""" + + +# Special grammar symbol that can be consumed to handle a syntax error. +# +# The error code is passed to an error-handling routine at run time which +# decides if the error is recoverable or not. +@dataclass(frozen=True) +class ErrorSymbol: + """Special grammar symbol that can be consumed to handle a syntax error.""" + error_code: int + + +Element = typing.Union[ + str, + Optional, + Literal, + UnicodeCategory, + Exclude, + Nt, + LookaheadRule, + End, + ErrorSymbol, + NoLineTerminatorHereClass, + CallMethod] + + +@dataclass +class NtDef: + """Definition of a nonterminal. + + Instances have three attributes: + + .params - Tuple of strings, the names of the parameters. + + .rhs_list - List of Production objects. Arguments to Nt elements in the + productions can be Var(s) where `s in params`, indicating that parameter + should be passed through unchanged. + + .type - The type of runtime value produced by parsing an instance of this + nonterminal, or None. + + An NtDef is a sort of lambda. + + Some langauges have constructs that are allowed or disallowed in particular + situations. For example, in many languages `return` statements are allowed + only inside functions or methods. The ECMAScript standard (5.1.5 "Grammar + Notation") offers this example of the notation it uses to specify this sort + of thing: + + StatementList [Return] : + [+Return] ReturnStatement + ExpressionStatement + + This is an abbreviation for: + + StatementList : + ExpressionStatement + + StatementList_Return : + ReturnStatement + ExpressionStatement + + We offer NtDef.params as a way of representing this in our system. + + "StatementList": NtDef(("Return",), [ + Production(["ReturnStatement"], condition=("Return", True)), + ["ExpressionStatement"], + ], None), + + This is an abbreviation for: + + "StatementList_0": [ + ["ExpressionStatement"], + ], + "StatementList_1": [ + ["ReturnStatement"], + ["ExpressionStatement"], + ], + + """ + + __slots__ = ['params', 'rhs_list', 'type'] + + params: typing.Tuple[str, ...] + rhs_list: typing.List[Production] + type: typing.Optional[types.Type] + + def with_rhs_list(self, new_rhs_list: typing.List[Production]) -> NtDef: + return dataclasses.replace(self, rhs_list=new_rhs_list) + + +@dataclass(frozen=True) +class Var: + """Var(name) represents the run-time value of the parameter with the given name.""" + + name: str + + +ReduceExpr = typing.Union[int, CallMethod, None, Some] +ReduceExprOrAccept = typing.Union[ReduceExpr, str] + +# The Grammar constructor is very lax about the types you pass to it. It can +# accept a `Dict[str, List[List[str]]]`, for example; it copies the data into +# NtDef and Production objects. +# +# The `Lenient` types below are the relaxed types that Grammar() actually +# accepts. +LenientNt = typing.Union[Nt, str, InitNt] +LenientProduction = typing.Union[Production, typing.List[Element]] +LenientNtDef = typing.Union[NtDef, typing.List[LenientProduction]] diff --git a/third_party/rust/jsparagus/jsparagus/lexer.py b/third_party/rust/jsparagus/jsparagus/lexer.py new file mode 100644 index 0000000000..865068e9c7 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/lexer.py @@ -0,0 +1,219 @@ +""" Lexical analysis is the breaking of a string into tokens. """ + +import re +import linecache +from builtins import SyntaxError as BaseSyntaxError + + +class SyntaxError(BaseSyntaxError): + pass + + +class UnexpectedEndError(SyntaxError): + pass + + +class LexicalGrammar: + """Quick and dirty lexer implementation. + + In order to support multi-part lexing (multiple calls to .write()), + both 1. the `ignore` regular expression; and 2. the union of the family of + regular expressions given by `tokens` and `regexp`; must have have the + following property: if they match a string s, they also match every prefix + of that string. + + This requirement is not enforced by assertions; if it's not met, the + tokenizer will just have bugs when sent multiple chunks of data. + """ + def __init__(self, tokens, ignore=r'[ \t]*', **regexps): + def token_to_re(token): + s = re.escape(token) + if s.isalpha(): + s += r'\b' + return s + + token_list = sorted(tokens.split(), key=len, reverse=True) + self.ignore_re = re.compile(ignore) + self.token_re = re.compile("|".join(token_to_re(token) for token in token_list)) + self.parser_pairs = [(k, re.compile(v)) for k, v in regexps.items()] + + def __call__(self, parser, filename=None): + return Tokenizer(self, parser, filename) + + +class FlatStringLexer: + def __init__(self, parser, filename=None): + self.parser = parser + self.src = '' + self.previous_token_end = 0 + self.current_token_start = 0 + self.start_lineno = 1 + self.start_column = 0 + self.point = 0 + self.filename = filename + self.closed = False + + def write(self, text): + assert not self.closed + self.src += text + self._drain() + + def close(self): + assert not self.closed + self.closed = True + self._drain() + assert self.src == '' + return self.parser.close(self) + + def _drain(self): + assert self.previous_token_end == 0 + assert self.current_token_start == 0 + assert self.point == 0 + closing = self.closed + + terminal_id = self._match(closing) + while terminal_id is not None: + self.parser.write_terminal(self, terminal_id) + terminal_id = self._match(closing) + + # Update position info. + discarded_text = self.src[:self.point] + newline_count = self.src[:self.point].count('\n') + self.start_lineno += newline_count + if newline_count > 0: + self.start_column = self.point - discarded_text.rindex('\n') + else: + self.start_column += self.point + + # Drop the parsed text and reset counters. Note that setting + # self.previous_token_end to 0 really is correct. Setting + # self.current_token_start to 0 is as good as anything else, because + # there is no current token. + self.src = self.src[self.point:] + self.point = 0 + self.previous_token_end = 0 + self.current_token_start = 0 + + def current_token_position(self): + src_pre = self.src[:self.current_token_start] + lineno = self.start_lineno + src_pre.count("\n") + if '\n' in src_pre: + line_start_index = src_pre.rfind("\n") + 1 + column = self.current_token_start - line_start_index # can be zero + else: + column = self.start_column + self.current_token_start + return lineno, column + + def current_line(self): + # OK, this is gruesome, but we return the current line if we have the + # whole thing and otherwise we ... try loading it from disk. + if '\n' in self.src[:self.current_token_start]: + line_start = self.src.rindex('\n', 0, self.current_token_start) + 1 + elif self.start_column == 0: + line_start = 0 + else: + line_start = -1 + + if line_start != -1: + line_end = self.src.find('\n', line_start) + if line_end == -1: + if self.closed: + return self.src[line_start:] + '\n' + else: + return self.src[line_start:line_end] + '\n' + + # Fallback case. Python's linecache.getline() deliberately silences all + # errors. + lineno = self.current_token_position()[0] + return linecache.getline(self.filename, lineno) + + def throw(self, msg_or_exception): + lineno, column = self.current_token_position() + if isinstance(msg_or_exception, Exception): + e = msg_or_exception + e.filename = self.filename + e.lineno = lineno + e.offset = column + 1 + else: + # Apparently this is the secret handshake to create a Python + # SyntaxError and get a good error message when Python prints it. + line = self.current_line() + args = (self.filename, lineno, column + 1, line) + e = SyntaxError(msg_or_exception, args) + raise e + + def throw_unexpected_end(self): + self.throw(UnexpectedEndError("unexpected end of input")) + + +class Tokenizer(FlatStringLexer): + def __init__(self, lexical_grammar, parser, filename=None): + super().__init__(parser, filename) + self.ignore_re = lexical_grammar.ignore_re + self.token_re = lexical_grammar.token_re + self.parser_pairs = lexical_grammar.parser_pairs + self.src = '' + self.filename = filename + self.last_point = 0 + self.point = 0 + self._current_match = None + + def take(self): + return self._current_match.group() + + def saw_line_terminator(self): + """True if there's a LineTerminator before the current token.""" + i = self.previous_token_end + j = self.current_token_start + ws_between = self.src[i:j] + return any(c in ws_between for c in '\r\n\u2028\u2029') + + def _match(self, closing): + # Advance over text matching ignore_re. + ignore_match = self.ignore_re.match(self.src, self.point) + if ignore_match is None: + raise ValueError("ignore_re should always match") + point = ignore_match.end() + if point == len(self.src): + if closing: + self.point = point + self._current_match = None + return None + + # Try the token_re. + token_match = self.token_re.match(self.src, point) + + # Try all the parser_pairs. + for name, pattern in self.parser_pairs: + match = pattern.match(self.src, point) + if match is not None: + break + else: + name = match = None + + if match is not None and token_match is not None and match.end() > token_match.end(): + pass + elif token_match is not None: + name, match = token_match.group(0), token_match + elif match is not None: + pass + else: + self.throw("unexpected characters {!r}" + .format(self.src[point:point + 12])) + + # But how do we know subsequent .write() calls won't provide more text, + # extending this token? Here we take advantage of the odd requirement + # LexicalGrammar imposes on its users. Every prefix of a match is a + # match. So if this hypothetical "extended" token would match, then the + # entire remainder of self.src is a match. + if not closing and match.end() == len(self.src): + # This token might be extensible. Refuse to match. + self._current_match = None + return None + + # This token definitely is not extensible. + self.previous_token_end = self.point + self.current_token_start = match.start() + self.point = match.end() + self._current_match = match + return name diff --git a/third_party/rust/jsparagus/jsparagus/lr0.py b/third_party/rust/jsparagus/jsparagus/lr0.py new file mode 100644 index 0000000000..5e4364df3c --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/lr0.py @@ -0,0 +1,385 @@ +"""Generate a simple LR0 state graph from a CanonicalGrammar. + +The resulting graph may contain inconsistent states, which must be resolved by the +ParseTable before a parser can be generated. +""" + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import collections +from dataclasses import dataclass +import typing + +from .actions import (Accept, Action, CheckNotOnNewLine, FunCall, Lookahead, + OutputExpr, Unwind, Reduce, Seq) +from .ordered import OrderedFrozenSet +from .grammar import (CallMethod, Element, End, ErrorSymbol, Grammar, + LookaheadRule, NoLineTerminatorHere, Nt, ReduceExpr, + ReduceExprOrAccept, Some) +from .rewrites import CanonicalGrammar, Prod +from . import types + + +# ## LR parsers: Why? +# +# Consider a single production `expr ::= expr "+" term` being parsed in a +# recursive descent parser. As we read the source left to right, our parser's +# internal state looks like this (marking our place with a dot): +# +# expr ::= ยท expr "+" term +# expr ::= expr ยท "+" term +# expr ::= expr "+" ยท term +# expr ::= expr "+" term ยท +# +# As we go, we build an AST. First we parse an *expr* and temporarily set it +# aside. Then we expect to see a `+` operator. Then we parse a *term*. Then, +# having got to the end, we create an AST node for the whole addition +# expression. +# +# Since the grammar is nested, at run time we really have a stack of these +# intermediate states. +# +# But how do we decide which production we should be matching? Often the first +# token just tells us: the `while` keyword means there's a `while` statement +# coming up. Grammars in which this is always the case are called LL(1). But +# while it's possible to wrangle *most* of the ES grammar into an LL(1) form, +# not everything works out. For example, here's the ES assignment syntax (much +# simplified): +# +# assignment ::= sum +# assignment ::= primitive "=" assignment +# sum ::= primitive +# sum ::= sum "+" primitive +# primitive ::= VAR +# +# Note that the bogus assignment `a + b = c` doesn't parse because `a + b` +# isn't a primitive. +# +# Suppose we want to parse an expression, and the first token is `a`. We don't +# know yet which *assignment* production to use. So this grammar is not in +# LL(1). +# +# +# ## LR parsers: How +# +# An LR parser generator allows for a *superposition* of states. While parsing, +# we can sometimes have multiple productions at once that might match. It's +# like how in quantum theory, Schrรถdingerโs cat can tentatively be both alive +# and dead until decisive information is observed. +# +# As we read `a = b + c`, our parser's internal state is like this +# (eliding a few steps, like how we recognize that `a` is a primitive): +# +# current point in input superposed parser state +# ---------------------- ----------------------- +# ยท a = b + c assignment ::= ยท sum +# assignment ::= ยท primitive "=" assignment +# +# (Then, after recognizing that `a` is a *primitive*...) +# +# a ยท = b + c sum ::= primitive ยท +# assignment ::= primitive ยท "=" assignment +# +# (The next token, `=`, rules out the first alternative, +# collapsing the waveform...) +# +# a = ยท b + c assignment ::= primitive "=" ยท assignment +# +# (After recognizing that `b` is a primitive, we again have options:) +# +# a = b ยท + c sum ::= primitive ยท +# assignment ::= primitive ยท "=" assignment +# +# And so on. We call each dotted production an "LR item", and the superposition +# of several LR items is called a "state". (It is not meant to be clear yet +# just *how* the parser knows which rules might match.) +# +# Since the grammar is nested, at run time we'll have a stack of these parser +# state superpositions. +# +# The uncertainty in LR parsing means that code for an LR parser written by +# hand, in the style of recursive descent, would read like gibberish. What we +# can do instead is generate a parser table. + + +@dataclass(frozen=True, order=True) +class LRItem: + """A snapshot of progress through a single specific production. + + * `prod_index` identifies the production. (Every production in the grammar + gets a unique index; see the loop that computes + prods_with_indexes_by_nt.) + + * `offset` is the position of the cursor within the production. + + `lookahead` and `followed_by` are two totally different kinds of lookahead. + + * `lookahead` is the LookaheadRule, if any, that applies to the immediately + upcoming input. It is present only if this LRItem is subject to a + `[lookahead]` restriction; otherwise it's None. These restrictions can't + extend beyond the end of a production, or else the grammar is invalid. + This implements the lookahead restrictions in the ECMAScript grammar. + It is not part of any account of LR I've seen. + + * `followed_by` is a completely different kind of lookahead restriction. + This is the kind of lookahead that is a central part of canonical LR + table generation. It applies to the token *after* the whole current + production, so `followed_by` always applies to completely different and + later tokens than `lookahead`. `followed_by` is a set of terminals; if + `None` is in this set, it means `END`, not that the LRItem is + unrestricted. + """ + + prod_index: int + offset: int + lookahead: typing.Optional[LookaheadRule] + followed_by: OrderedFrozenSet[typing.Optional[str]] + + +# A Term is the label on an edge from one state to another. It's normally a +# terminal, nonterminal, or epsilon action. A state can also have a special +# catchall edge, labeled with an ErrorSymbol. +ShiftedTerm = typing.Union[str, Nt, ErrorSymbol] +Term = typing.Union[ShiftedTerm, Action] + + +def on_stack(grammar: Grammar, term: Element) -> bool: + """Returns whether an element of a production is consuming stack space or + not.""" + if isinstance(term, Nt): + return True + elif grammar.is_terminal(term): + return True + elif isinstance(term, LookaheadRule): + return False + elif isinstance(term, ErrorSymbol): + return True + elif isinstance(term, End): + return True + elif term is NoLineTerminatorHere: + # No line terminator is a property of the next token being shifted. It + # is implemented as an action which once shifted past the next term, + # will check whether the previous term shifted is on a new line. + return False + elif isinstance(term, CallMethod): + return False + raise ValueError(term) + + +def callmethods_to_funcalls( + expr: ReduceExprOrAccept, + pop: int, + ret: str, + depth: int, + funcalls: typing.List[Action] +) -> OutputExpr: + """Lower a reduce-expression to the OutputExpr language. + + CallMethod expressions are replaced with FunCalls; all new FunCalls created + in this way are appended to `funcalls`. + """ + + if isinstance(expr, int): + stack_index = pop - expr + if depth == 0: + call = FunCall("id", (stack_index,), fallible=False, + trait=types.Type("AstBuilder"), set_to=ret) + funcalls.append(call) + return ret + else: + return stack_index + elif isinstance(expr, Some): + res = callmethods_to_funcalls(expr.inner, pop, ret, depth, funcalls) + # "type: ignore" because Some is not generic, unfortunately. + return Some(res) # type: ignore + elif expr is None: + return None + elif isinstance(expr, CallMethod): + def convert_args(args: typing.Iterable[ReduceExpr]) -> typing.Iterator[OutputExpr]: + for i, arg in enumerate(args): + yield callmethods_to_funcalls(arg, pop, ret + "_{}".format(i), depth + 1, funcalls) + args = tuple(convert_args(expr.args)) + call = FunCall(expr.method, args, + trait=expr.trait, + fallible=expr.fallible, + set_to=ret) + funcalls.append(call) + return ret + elif expr == "accept": + funcalls.append(Accept()) + return ret + else: + raise ValueError(expr) + + +class LR0Generator: + """Provide a way to iterate over the grammar, given a set of LR items.""" + __slots__ = [ + "grammar", + "lr_items", + "key", + "_hash", + ] + + grammar: CanonicalGrammar + lr_items: OrderedFrozenSet[LRItem] + key: str + _hash: int + + def __init__( + self, + grammar: CanonicalGrammar, + lr_items: typing.Iterable[LRItem] = () + ) -> None: + self.grammar = grammar + self.lr_items = OrderedFrozenSet(lr_items) + # This is used to reuse states which have already been encoded. + self.key = "".join(repr((item.prod_index, item.offset)) + "\n" + for item in sorted(self.lr_items)) + self._hash = hash(self.key) + + def __eq__(self, other: object) -> bool: + return isinstance(other, LR0Generator) and self.key == other.key + + def __hash__(self) -> int: + return self._hash + + def __str__(self) -> str: + s = "" + for lr_item in self.lr_items: + s += self.grammar.grammar.lr_item_to_str(self.grammar.prods, lr_item) + s += "\n" + return s + + def stable_locations(self) -> OrderedFrozenSet[str]: + locations = [] + for lr_item in self.lr_items: + locations.append(self.grammar.grammar.lr_item_to_str(self.grammar.prods, lr_item)) + return OrderedFrozenSet(sorted(locations)) + + @staticmethod + def start(grammar: CanonicalGrammar, nt: Nt) -> LR0Generator: + lr_items: typing.List[LRItem] = [] + # Visit the initial non-terminal, as well as all the non-terminals + # which are at the left of each productions. + todo: typing.Deque[Nt] = collections.deque() + visited_nts = [] + todo.append(nt) + while todo: + nt = todo.popleft() + if nt in visited_nts: + continue + visited_nts.append(nt) + for prod_index, _ in grammar.prods_with_indexes_by_nt[nt]: + assert isinstance(prod_index, int) + lr_items.append(LRItem( + prod_index=prod_index, + offset=0, + lookahead=None, + followed_by=OrderedFrozenSet(), + )) + + prod = grammar.prods[prod_index] + assert isinstance(prod, Prod) + try: + term = prod.rhs[0] + if isinstance(term, Nt): + todo.append(term) + except IndexError: + pass + return LR0Generator(grammar, lr_items) + + def transitions(self) -> typing.Dict[Term, LR0Generator]: + """Returns the dictionary which maps the state transitions with the next + LR0Generators. This can be used to generate the states and the + transitions between the states of an LR0 parse table.""" + followed_by: typing.DefaultDict[Term, typing.List[LRItem]] + followed_by = collections.defaultdict(list) + for lr_item in self.lr_items: + self.item_transitions(lr_item, followed_by) + + return {k: LR0Generator(self.grammar, lr_items) + for k, lr_items in followed_by.items()} + + def item_transitions( + self, + lr_item: LRItem, + followed_by: typing.DefaultDict[Term, typing.List[LRItem]] + ) -> None: + """Given one LRItem, register all the transitions and LR Items reachable + through these transitions.""" + prod = self.grammar.prods[lr_item.prod_index] + assert isinstance(prod, Prod) + + # Read the term located at the offset in the production. + if lr_item.offset < len(prod.rhs): + term = prod.rhs[lr_item.offset] + if isinstance(term, Nt): + pass + elif self.grammar.grammar.is_terminal(term): + pass + elif isinstance(term, LookaheadRule): + term = Lookahead(term.set, term.positive) + elif isinstance(term, ErrorSymbol): + # ErrorSymbol as considered as terminals. These terminals would + # be produced by the error handling code which produces these + # error symbols on-demand. + pass + elif isinstance(term, End): + # End is considered as a terminal which is produduced once by + # the lexer upon reaching the end. However, the parser might + # finish without consuming the End terminal, if there is no + # ambiguity on whether the End is expected. + pass + elif term is NoLineTerminatorHere: + # Check whether the following terminal is on a new line. If + # not, this would produce a syntax error. The argument is the + # terminal offset. + term = CheckNotOnNewLine() + elif isinstance(term, CallMethod): + funcalls: typing.List[Action] = [] + pop = sum(1 for e in prod.rhs[:lr_item.offset] if on_stack(self.grammar.grammar, e)) + callmethods_to_funcalls(term, pop, "expr", 0, funcalls) + term = Seq(funcalls) + + elif lr_item.offset == len(prod.rhs): + # Add the reduce operation as a state transition in the generated + # parse table. (TODO: this supposed that the canonical form did not + # move the reduce action to be part of the production) + pop = sum(1 for e in prod.rhs if on_stack(self.grammar.grammar, e)) + term = Reduce(Unwind(prod.nt, pop)) + expr = prod.reducer + if expr is not None: + funcalls = [] + callmethods_to_funcalls(expr, pop, "value", 0, funcalls) + term = Seq(funcalls + [term]) + else: + # No edges after the reduce operation. + return + + # Add terminals, non-terminals and lookahead actions, as transitions to + # the next LR Item. + new_transition = term not in followed_by + followed_by[term].append(LRItem( + prod_index=lr_item.prod_index, + offset=lr_item.offset + 1, + lookahead=None, + followed_by=OrderedFrozenSet(), + )) + + # If the term is a non-terminal, then recursively add transitions from + # the beginning of all the productions which are matching this + # non-terminal. + # + # Only do it once per non-terminal to avoid infinite recursion on + # left-recursive grammars. + if isinstance(term, Nt) and new_transition: + for prod_index, _ in self.grammar.prods_with_indexes_by_nt[term]: + assert isinstance(prod_index, int) + self.item_transitions(LRItem( + prod_index=prod_index, + offset=0, + lookahead=None, + followed_by=OrderedFrozenSet(), + ), followed_by) diff --git a/third_party/rust/jsparagus/jsparagus/main.py b/third_party/rust/jsparagus/jsparagus/main.py new file mode 100755 index 0000000000..f54827099f --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/main.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +"""jsparagus/main.py - Generate a parser from a pgen grammar. + +(This is for testing. pgen will likely go away. Ignore this for now.) +""" + +import sys +import argparse +from . import parse_pgen +from . import gen + + +def main(): + parser = argparse.ArgumentParser(description="Generate a parser.") + parser.add_argument('--target', choices=['python', 'rust'], default='rust', + help="target language to use when printing the parser tables") + parser.add_argument('grammar', metavar='FILE', nargs=1, + help=".pgen file containing the grammar") + options = parser.parse_args() + + [pgen_filename] = options.grammar + grammar = parse_pgen.load_grammar(pgen_filename) + gen.generate_parser(sys.stdout, grammar, target=options.target) + + +if __name__ == '__main__': + main() diff --git a/third_party/rust/jsparagus/jsparagus/ordered.py b/third_party/rust/jsparagus/jsparagus/ordered.py new file mode 100644 index 0000000000..93a46ae2bf --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/ordered.py @@ -0,0 +1,157 @@ +""" Deterministic data structures. """ + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +from typing import (AbstractSet, Dict, Generic, Iterable, Iterator, List, + MutableSet, Optional, TypeVar, Union) + + +__all__ = ['OrderedSet', 'OrderedFrozenSet'] + + +# Type parameters for OrderedSet[T] and OrderedFrozenSet[T]. +# +# These should be `bound=Hashable`, but I gather MyPy has some issues with +# hashability. It doesn't enforce hashability on Dict and Set. +T = TypeVar('T') +T_co = TypeVar('T_co', covariant=True) + +S = TypeVar('S') + + +class OrderedSet(Generic[T], MutableSet[T]): + """Like set(), but iteration order is insertion order. + + Two OrderedSets, x and y, that have different insertion order are still + considered equal (x == y) if they contain the same elements. + """ + _data: Dict[T, int] + + def __init__(self, values: Iterable[T] = ()): + self._data = {} + for v in values: + self.add(v) + + def __repr__(self) -> str: + return self.__class__.__name__ + "(" + repr(list(self)) + ")" + + def add(self, v: T) -> None: + self._data[v] = 1 + + def extend(self, iterable: Iterable[T]) -> None: + for v in iterable: + self.add(v) + + def remove(self, v: T) -> None: + del self._data[v] + + def discard(self, v: T) -> None: + if v in self._data: + del self._data[v] + + def __eq__(self, other: object) -> bool: + return isinstance(other, OrderedSet) and self._data == other._data + + def __hash__(self) -> int: + raise TypeError("unhashable type: " + self.__class__.__name__) + + def __len__(self) -> int: + return len(self._data) + + def __bool__(self) -> bool: + return bool(self._data) + + def __contains__(self, v: object) -> bool: + return v in self._data + + def __iter__(self) -> Iterator[T]: + return iter(self._data) + + def __ior__(self, other: AbstractSet[S]) -> OrderedSet[Union[T, S]]: + for v in other: + self.add(v) # type: ignore + return self # type: ignore + + def __or__(self, other: AbstractSet[S]) -> OrderedSet[Union[T, S]]: + u: OrderedSet[Union[T, S]] = OrderedSet(self) + u |= other + return u + + def __iand__(self, other: AbstractSet[T]) -> OrderedSet[T]: + self._data = {v: 1 for v in self if v in other} + return self + + def __and__(self, other: AbstractSet[T]) -> OrderedSet[T]: + return OrderedSet(v for v in self if v in other) + + def __sub__(self, other: AbstractSet[T]) -> OrderedSet[T]: + return OrderedSet(v for v in self if v not in other) + + def __isub__(self, other: AbstractSet[T]) -> OrderedSet[T]: + for v in other: + if v in self: + self.remove(v) + return self + + def is_disjoint(self, other: AbstractSet[T]) -> bool: + for v in self: + if v in other: + return False + return True + + +class OrderedFrozenSet(Generic[T_co], AbstractSet[T_co]): + """Like frozenset(), but iteration order is insertion order. + + Two OrderedFrozenSets, x and y, that have different insertion order are + still considered equal (x == y) if they contain the same elements. + """ + __slots__ = ['_data', '_hash'] + + _data: Dict[T_co, int] + _hash: Optional[int] + + def __init__(self, values: Iterable[T_co] = ()): + self._data = {v: 1 for v in values} + self._hash = None + + def __repr__(self) -> str: + return self.__class__.__name__ + "(" + repr(list(self)) + ")" + + def __len__(self) -> int: + return len(self._data) + + def __bool__(self) -> bool: + return bool(self._data) + + def __contains__(self, v: object) -> bool: + return v in self._data + + def __iter__(self) -> Iterator[T_co]: + return iter(self._data) + + def __eq__(self, other: object) -> bool: + return isinstance(other, OrderedFrozenSet) and self._data == other._data + + def __hash__(self) -> int: + if self._hash is None: + self._hash = hash(frozenset(self._data)) + return self._hash + + def __and__(self, other: AbstractSet[T_co]) -> OrderedFrozenSet[T_co]: + return OrderedFrozenSet(v for v in self._data if v in other) + + def __or__(self, other: AbstractSet[S]) -> OrderedFrozenSet[Union[T_co, S]]: + values: List[Union[T_co, S]] = list(self) + values += list(other) + return OrderedFrozenSet(values) + + def __sub__(self, other: AbstractSet[T_co]) -> OrderedFrozenSet[T_co]: + return OrderedFrozenSet(v for v in self._data if v not in other) + + def is_disjoint(self, other: AbstractSet[T_co]) -> bool: + for v in self: + if v in other: + return False + return True diff --git a/third_party/rust/jsparagus/jsparagus/parse_pgen.py b/third_party/rust/jsparagus/jsparagus/parse_pgen.py new file mode 100755 index 0000000000..99545b6804 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/parse_pgen.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python + +"""parse_pgen.py - Parse grammars written in the pgen parser specification language. + +I'm not sure I want to keep this pgen mini-language around; ignore this for now. +""" + +import sys +from collections import namedtuple + +from .lexer import LexicalGrammar +from .grammar import Grammar, Production, CallMethod, is_concrete_element, Optional +from . import gen +from . import parse_pgen_generated + + +pgen_lexer = LexicalGrammar( + "goal nt var token { } ; ? = => ( ) ,", + r'([ \t\r\n]|#.*)*', + IDENT=r'[A-Za-z_](?:\w|[_-])*', + STR=r'"[^\\\n"]*"', + MATCH=r'\$(?:0|[1-9][0-9]*)', + COMMENT=r'//.*', +) + + +def list_of(e, allow_comments=False): + nt = e + 's' + prods = [ + Production([e], CallMethod('single', (0,))), + Production([nt, e], CallMethod('append', (0, 1))), + ] + if allow_comments: + prods.append(Production(['COMMENT'], CallMethod('empty', (0,)))) + return prods + + +def call_method(name, body): + arg_indexes = [] + current = 0 + for e in body: + if is_concrete_element(e): + if e not in discards: + arg_indexes.append(current) + current += 1 + + return CallMethod(name, tuple(arg_indexes)) + + +def prod(body, reducer): + if isinstance(reducer, str): + reducer = call_method(reducer, body) + return Production(body, reducer) + + +discards = set('token var nt goal Some None = => ; ( ) { } , ?'.split()) + +pgen_grammar = Grammar( + { + 'grammar': [ + [Optional('token_defs'), 'nt_defs'] + ], + 'token_defs': list_of('token_def'), + 'token_def': [ + prod(['token', 'IDENT', '=', 'STR', ';'], 'const_token'), + prod(['var', 'token', 'IDENT', ';'], 'var_token'), + ], + 'nt_defs': [ + prod(['nt_def'], 'nt_defs_single'), + prod(['nt_defs', 'nt_def'], 'nt_defs_append'), + ], + 'nt_def': [ + prod([Optional('COMMENT'), Optional('goal'), 'nt', 'IDENT', '{', + Optional('prods'), '}'], 'nt_def'), + ], + 'prods': list_of('prod', allow_comments=True), + 'prod': [ + prod(['terms', Optional('reducer'), ';'], 'prod'), + ], + 'terms': list_of('term'), + 'term': [ + ['symbol'], + prod(['symbol', '?'], 'optional'), + ], + 'symbol': [ + prod(['IDENT'], 'ident'), + prod(['STR'], 'str'), + ], + 'reducer': [ + prod(['=>', 'expr'], 1) + ], + 'expr': [ + prod(['MATCH'], 'expr_match'), + prod(['IDENT', '(', Optional('expr_args'), ')'], 'expr_call'), + prod(['Some', '(', 'expr', ')'], 'expr_some'), + prod(['None'], 'expr_none'), + ], + 'expr_args': [ + prod(['expr'], 'args_single'), + prod(['expr_args', ',', 'expr'], 'args_append'), + ], + }, + goal_nts=['grammar'], + variable_terminals=['IDENT', 'STR', 'MATCH', 'COMMENT'] +) + + +Literal = namedtuple("Literal", "chars") + +default_token_list = [ + ("Var", "var"), + ("Token", "token"), + ("Goal", "goal"), + ("Nt", "nt"), + ("IDENT", None), + ("STR", None), + ("OpenBrace", "{"), + ("CloseBrace", "}"), + ("OpenParenthesis", "("), + ("CloseParenthesis", ")"), + ("Colon", ":"), + ("EqualSign", "="), + ("Asterisk", "*"), + ("PlusSign", "+"), + ("MinusSign", "-"), + ("Slash", "/"), + ("Semicolon", ";"), + ("QuestionMark", "?"), + ("RightArrow", "->"), + ("Comma", ","), +] + + +class AstBuilder: + def grammar(self, token_defs, nt_defs): + nonterminals, goal_nts = nt_defs + return (token_defs or default_token_list, nonterminals, goal_nts) + + def empty(self, value): + return [] + + def single(self, value): + return [value] + + def append(self, values, value): + values.append(value) + return values + + def const_token(self, name, picture): + assert picture[0] == '"' + assert picture[-1] == '"' + return (name, picture[1:-1]) + + def var_token(self, name): + return (name, None) + + def comment(self, comment): + pass + + def nt_defs_single(self, nt_def): + return self.nt_defs_append(({}, []), nt_def) + + def nt_defs_append(self, grammar_in, nt_def): + is_goal, nt, prods = nt_def + grammar, goal_nts = grammar_in + if nt in grammar: + raise ValueError("multiple definitions for nt {}".format(nt)) + grammar[nt] = prods + if is_goal: + goal_nts.append(nt) + return grammar, goal_nts + + def nt_def(self, _comment, goal_kw, ident, prods): + is_goal = goal_kw == "goal" + prods = [Production(body, reducer) for body, reducer in prods] + return (is_goal, ident, prods) + + def prod(self, symbols, reducer): + if reducer is None: + if sum(1 for e in symbols if is_concrete_element(e)) == 1: + reducer = 0 + else: + raise ValueError("reducer required for {!r}".format(symbols)) + return (symbols, reducer) + + def optional(self, sym): + return Optional(sym) + + def ident(self, sym): + return sym + + def str(self, sym): + assert len(sym) > 1 + assert sym[0] == '"' + assert sym[-1] == '"' + chars = sym[1:-1] # This is a bit sloppy. + return Literal(chars) + + def expr_match(self, match): + assert match.startswith('$') + return int(match[1:]) + + def expr_call(self, ident, args): + return CallMethod(ident, tuple(args or ())) + + def args_single(self, expr): + return [expr] + + def args_append(self, args, arg): + args.append(arg) + return args + + +def check_grammar(result): + tokens, nonterminals, goal_nts = result + tokens_by_name = {} + tokens_by_image = {} + for name, image in tokens: + if name in tokens_by_name: + raise ValueError("token `{}` redeclared".format(name)) + tokens_by_name[name] = image + if image is not None and image in tokens_by_image: + raise ValueError("multiple tokens look like \"{}\"".format(image)) + tokens_by_image[image] = name + if name in nonterminals: + raise ValueError("`{}` is declared as both a token and a nonterminal (pick one)".format(name)) + + def check_element(nt, i, e): + if isinstance(e, Optional): + return Optional(check_element(nt, i, e.inner)) + elif isinstance(e, Literal): + if e.chars not in tokens_by_image: + raise ValueError("in {} production {}: undeclared token \"{}\"".format(nt, i, e.chars)) + return e.chars + else: + assert isinstance(e, str), e.__class__.__name__ + if e in nonterminals: + return e + elif e in tokens_by_name: + image = tokens_by_name[e] + if image is not None: + return image + return e + else: + raise ValueError("in {} production {}: undeclared symbol {}".format(nt, i, e)) + + out = {nt: [] for nt in nonterminals} + for nt, rhs_list in nonterminals.items(): + for i, p in enumerate(rhs_list): + out_rhs = [check_element(nt, i, e) for e in p.body] + out[nt].append(p.copy_with(body=out_rhs)) + + return (tokens, out, goal_nts) + + +def load_grammar(filename): + with open(filename) as f: + text = f.read() + parser = parse_pgen_generated.Parser(builder=AstBuilder()) + lexer = pgen_lexer(parser, filename=filename) + lexer.write(text) + result = lexer.close() + tokens, nonterminals, goals = check_grammar(result) + variable_terminals = [name for name, image in tokens if image is None] + return Grammar(nonterminals, + goal_nts=goals, + variable_terminals=variable_terminals) + + +def regenerate(): + import sys + gen.generate_parser(sys.stdout, pgen_grammar) + + +if __name__ == '__main__': + if sys.argv[1:] == ['--regenerate']: + regenerate() + else: + print("usage: python -m jsparagus.parse_pgen --regenerate") + sys.exit(1) diff --git a/third_party/rust/jsparagus/jsparagus/parse_pgen_generated.py b/third_party/rust/jsparagus/jsparagus/parse_pgen_generated.py new file mode 100644 index 0000000000..0c14f9a7c1 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/parse_pgen_generated.py @@ -0,0 +1,1429 @@ +# type: ignore + +from jsparagus import runtime +from jsparagus.runtime import (Nt, InitNt, End, ErrorToken, StateTermValue, + ShiftError, ShiftAccept) + +def state_43_actions(parser, lexer): + # { value = AstBuilder::id(1) [off: 0]; Unwind(Nt(InitNt(goal=Nt('grammar'))), 1, 0) } + + value = None + value = parser.stack[-1].value + replay = [] + replay.append(StateTermValue(0, Nt(InitNt(goal=Nt('grammar'))), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_84_actions(parser, lexer, r0) + return + +def state_44_actions(parser, lexer): + # { value = AstBuilder::nt_defs_single(1) [off: 0]; Unwind(Nt('nt_defs'), 1, 0) } + + value = None + value = parser.methods.nt_defs_single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_defs'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_82_actions(parser, lexer, r0) + return + +def state_45_actions(parser, lexer): + # { value = AstBuilder::single(1) [off: 0]; Unwind(Nt('token_defs'), 1, 0) } + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_defs'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_83_actions(parser, lexer, r0) + return + +def state_46_actions(parser, lexer): + # { value = AstBuilder::nt_defs_append(2, 1) [off: 0]; Unwind(Nt('nt_defs'), 2, 0) } + + value = None + value = parser.methods.nt_defs_append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_defs'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_82_actions(parser, lexer, r0) + return + +def state_47_actions(parser, lexer): + # { value = AstBuilder::append(2, 1) [off: 0]; Unwind(Nt('token_defs'), 2, 0) } + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_defs'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_83_actions(parser, lexer, r0) + return + +def state_48_actions(parser, lexer): + # { Accept(); Unwind(Nt(InitNt(goal=Nt('grammar'))), 2, 0) } + + value = None + raise ShiftAccept() + replay = [] + replay.append(StateTermValue(0, Nt(InitNt(goal=Nt('grammar'))), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_84_actions(parser, lexer, r0) + return + +def state_49_actions(parser, lexer): + # { value = AstBuilder::nt_def(None, None, 3, None) [off: 0]; Unwind(Nt('nt_def'), 4, 0) } + + value = None + value = parser.methods.nt_def(None, None, parser.stack[-3].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-4:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_50_actions(parser, lexer): + # { value = AstBuilder::single(1) [off: 0]; Unwind(Nt('prods'), 1, 0) } + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('prods'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_94_actions(parser, lexer, r0) + return + +def state_51_actions(parser, lexer): + # { value = AstBuilder::single(1) [off: 0]; Unwind(Nt('terms'), 1, 0) } + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('terms'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_97_actions(parser, lexer, r0) + return + +def state_52_actions(parser, lexer): + # { value = AstBuilder::ident(1) [off: 0]; Unwind(Nt('symbol'), 1, 0) } + + value = None + value = parser.methods.ident(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('symbol'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_91_actions(parser, lexer, r0) + return + +def state_53_actions(parser, lexer): + # { value = AstBuilder::str(1) [off: 0]; Unwind(Nt('symbol'), 1, 0) } + + value = None + value = parser.methods.str(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('symbol'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_91_actions(parser, lexer, r0) + return + +def state_54_actions(parser, lexer): + # { value = AstBuilder::empty(1) [off: 0]; Unwind(Nt('prods'), 1, 0) } + + value = None + value = parser.methods.empty(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('prods'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_94_actions(parser, lexer, r0) + return + +def state_55_actions(parser, lexer): + # { value = AstBuilder::var_token(2) [off: 0]; Unwind(Nt('token_def'), 4, 0) } + + value = None + value = parser.methods.var_token(parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_def'), value, False)) + del parser.stack[-4:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_100_actions(parser, lexer, r0) + return + +def state_56_actions(parser, lexer): + # { value = AstBuilder::nt_def(None, None, 4, Some(inner=2)) [off: 0]; Unwind(Nt('nt_def'), 5, 0) } + + value = None + value = parser.methods.nt_def(None, None, parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-5:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_57_actions(parser, lexer): + # { value = AstBuilder::append(2, 1) [off: 0]; Unwind(Nt('prods'), 2, 0) } + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('prods'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_94_actions(parser, lexer, r0) + return + +def state_58_actions(parser, lexer): + # { value = AstBuilder::prod(2, None) [off: 0]; Unwind(Nt('prod'), 2, 0) } + + value = None + value = parser.methods.prod(parser.stack[-2].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('prod'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_101_actions(parser, lexer, r0) + return + +def state_59_actions(parser, lexer): + # { value = AstBuilder::append(2, 1) [off: 0]; Unwind(Nt('terms'), 2, 0) } + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('terms'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_97_actions(parser, lexer, r0) + return + +def state_60_actions(parser, lexer): + # { value = AstBuilder::optional(2) [off: 0]; Unwind(Nt('term'), 2, 0) } + + value = None + value = parser.methods.optional(parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('term'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_111_actions(parser, lexer, r0) + return + +def state_61_actions(parser, lexer): + # { value = AstBuilder::nt_def(Some(inner=5), None, 3, None) [off: 0]; Unwind(Nt('nt_def'), 5, 0) } + + value = None + value = parser.methods.nt_def(parser.stack[-5].value, None, parser.stack[-3].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-5:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_62_actions(parser, lexer): + # { value = AstBuilder::nt_def(None, Some(inner=5), 3, None) [off: 0]; Unwind(Nt('nt_def'), 5, 0) } + + value = None + value = parser.methods.nt_def(None, parser.stack[-5].value, parser.stack[-3].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-5:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_63_actions(parser, lexer): + # { value = AstBuilder::const_token(4, 2) [off: 0]; Unwind(Nt('token_def'), 5, 0) } + + value = None + value = parser.methods.const_token(parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_def'), value, False)) + del parser.stack[-5:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_100_actions(parser, lexer, r0) + return + +def state_64_actions(parser, lexer): + # { value = AstBuilder::prod(3, Some(inner=2)) [off: 0]; Unwind(Nt('prod'), 3, 0) } + + value = None + value = parser.methods.prod(parser.stack[-3].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('prod'), value, False)) + del parser.stack[-3:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_101_actions(parser, lexer, r0) + return + +def state_65_actions(parser, lexer): + # { value = AstBuilder::id(1) [off: 0]; Unwind(Nt('reducer'), 2, 0) } + + value = None + value = parser.stack[-1].value + replay = [] + replay.append(StateTermValue(0, Nt('reducer'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_102_actions(parser, lexer, r0) + return + +def state_66_actions(parser, lexer): + # { value = AstBuilder::expr_match(1) [off: 0]; Unwind(Nt('expr'), 1, 0) } + + value = None + value = parser.methods.expr_match(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_108_actions(parser, lexer, r0) + return + +def state_67_actions(parser, lexer): + # { value = AstBuilder::expr_none() [off: 0]; Unwind(Nt('expr'), 1, 0) } + + value = None + value = parser.methods.expr_none() + replay = [] + replay.append(StateTermValue(0, Nt('expr'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_108_actions(parser, lexer, r0) + return + +def state_68_actions(parser, lexer): + # { value = AstBuilder::nt_def(Some(inner=6), None, 4, Some(inner=2)) [off: 0]; Unwind(Nt('nt_def'), 6, 0) } + + value = None + value = parser.methods.nt_def(parser.stack[-6].value, None, parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-6:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_69_actions(parser, lexer): + # { value = AstBuilder::nt_def(Some(inner=6), Some(inner=5), 3, None) [off: 0]; Unwind(Nt('nt_def'), 6, 0) } + + value = None + value = parser.methods.nt_def(parser.stack[-6].value, parser.stack[-5].value, parser.stack[-3].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-6:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_70_actions(parser, lexer): + # { value = AstBuilder::nt_def(None, Some(inner=6), 4, Some(inner=2)) [off: 0]; Unwind(Nt('nt_def'), 6, 0) } + + value = None + value = parser.methods.nt_def(None, parser.stack[-6].value, parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-6:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_71_actions(parser, lexer): + # { value = AstBuilder::nt_def(Some(inner=7), Some(inner=6), 4, Some(inner=2)) [off: 0]; Unwind(Nt('nt_def'), 7, 0) } + + value = None + value = parser.methods.nt_def(parser.stack[-7].value, parser.stack[-6].value, parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_def'), value, False)) + del parser.stack[-7:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_107_actions(parser, lexer, r0) + return + +def state_72_actions(parser, lexer): + # { value = AstBuilder::expr_call(3, None) [off: 0]; Unwind(Nt('expr'), 3, 0) } + + value = None + value = parser.methods.expr_call(parser.stack[-3].value, None) + replay = [] + replay.append(StateTermValue(0, Nt('expr'), value, False)) + del parser.stack[-3:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_108_actions(parser, lexer, r0) + return + +def state_73_actions(parser, lexer): + # { value = AstBuilder::args_single(1) [off: 0]; Unwind(Nt('expr_args'), 1, 0) } + + value = None + value = parser.methods.args_single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr_args'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_109_actions(parser, lexer, r0) + return + +def state_74_actions(parser, lexer): + # { value = AstBuilder::expr_call(4, Some(inner=2)) [off: 0]; Unwind(Nt('expr'), 4, 0) } + + value = None + value = parser.methods.expr_call(parser.stack[-4].value, parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr'), value, False)) + del parser.stack[-4:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_108_actions(parser, lexer, r0) + return + +def state_75_actions(parser, lexer): + # { value = AstBuilder::expr_some(2) [off: 0]; Unwind(Nt('expr'), 4, 0) } + + value = None + value = parser.methods.expr_some(parser.stack[-2].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr'), value, False)) + del parser.stack[-4:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_108_actions(parser, lexer, r0) + return + +def state_76_actions(parser, lexer): + # { value = AstBuilder::args_append(3, 1) [off: 0]; Unwind(Nt('expr_args'), 3, 0) } + + value = None + value = parser.methods.args_append(parser.stack[-3].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr_args'), value, False)) + del parser.stack[-3:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_109_actions(parser, lexer, r0) + return + +def state_77_actions(parser, lexer): + # { value = AstBuilder::grammar(None, 1) [off: 1]; Unwind(Nt('grammar'), 1, 1) } + + value = None + value = parser.methods.grammar(None, parser.stack[-2].value) + replay = [] + replay.append(parser.stack.pop()) + replay.append(StateTermValue(0, Nt('grammar'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_110_actions(parser, lexer, r0) + return + +def state_78_actions(parser, lexer): + # { value = AstBuilder::grammar(Some(inner=2), 1) [off: 1]; Unwind(Nt('grammar'), 2, 1) } + + value = None + value = parser.methods.grammar(parser.stack[-3].value, parser.stack[-2].value) + replay = [] + replay.append(parser.stack.pop()) + replay.append(StateTermValue(0, Nt('grammar'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_110_actions(parser, lexer, r0) + return + +def state_79_actions(parser, lexer): + # { value = AstBuilder::id(1) [off: 1]; Unwind(Nt('term'), 1, 1) } + + value = None + value = parser.stack[-2].value + replay = [] + replay.append(parser.stack.pop()) + replay.append(StateTermValue(0, Nt('term'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_111_actions(parser, lexer, r0) + return + +def state_80_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((12,)) + + value = None + parser.replay_action(12) + top = parser.stack.pop() + top = StateTermValue(12, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_81_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((13,)) + + value = None + parser.replay_action(13) + top = parser.stack.pop() + top = StateTermValue(13, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_82_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [10]: + r0 = parser.replay.pop() + state_80_actions(parser, lexer, r0) + return + if parser.top_state() in [11]: + r0 = parser.replay.pop() + state_81_actions(parser, lexer, r0) + return + +def state_83_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((11,)) + + value = None + parser.replay_action(11) + top = parser.stack.pop() + top = StateTermValue(11, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_84_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((23,)) + + value = None + parser.replay_action(23) + top = parser.stack.pop() + top = StateTermValue(23, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_85_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::nt_defs_single(1) [off: -1]; Unwind(Nt('nt_defs'), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.nt_defs_single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_defs'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_82_actions(parser, lexer, r0) + return + +def state_86_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::nt_defs_append(2, 1) [off: -1]; Unwind(Nt('nt_defs'), 2, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.nt_defs_append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('nt_defs'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_82_actions(parser, lexer, r0) + return + +def state_87_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((4,)) + + value = None + parser.replay_action(4) + top = parser.stack.pop() + top = StateTermValue(4, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_88_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((5,)) + + value = None + parser.replay_action(5) + top = parser.stack.pop() + top = StateTermValue(5, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_89_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((6,)) + + value = None + parser.replay_action(6) + top = parser.stack.pop() + top = StateTermValue(6, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_90_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((7,)) + + value = None + parser.replay_action(7) + top = parser.stack.pop() + top = StateTermValue(7, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_91_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((9,)) + + value = None + parser.replay_action(9) + top = parser.stack.pop() + top = StateTermValue(9, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_92_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::single(1) [off: -1]; Unwind(Nt('token_defs'), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_defs'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_83_actions(parser, lexer, r0) + return + +def state_93_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::append(2, 1) [off: -1]; Unwind(Nt('token_defs'), 2, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('token_defs'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_83_actions(parser, lexer, r0) + return + +def state_94_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [0]: + r0 = parser.replay.pop() + state_87_actions(parser, lexer, r0) + return + if parser.top_state() in [1]: + r0 = parser.replay.pop() + state_88_actions(parser, lexer, r0) + return + if parser.top_state() in [2]: + r0 = parser.replay.pop() + state_89_actions(parser, lexer, r0) + return + if parser.top_state() in [3]: + r0 = parser.replay.pop() + state_90_actions(parser, lexer, r0) + return + +def state_95_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::single(1) [off: -1]; Unwind(Nt('prods'), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('prods'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_94_actions(parser, lexer, r0) + return + +def state_96_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::append(2, 1) [off: -1]; Unwind(Nt('prods'), 2, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('prods'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_94_actions(parser, lexer, r0) + return + +def state_97_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((8,)) + + value = None + parser.replay_action(8) + top = parser.stack.pop() + top = StateTermValue(8, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_98_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::single(1) [off: -1]; Unwind(Nt('terms'), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('terms'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_97_actions(parser, lexer, r0) + return + +def state_99_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::append(2, 1) [off: -1]; Unwind(Nt('terms'), 2, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.append(parser.stack[-2].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('terms'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_97_actions(parser, lexer, r0) + return + +def state_100_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [10]: + r0 = parser.replay.pop() + state_92_actions(parser, lexer, r0) + return + if parser.top_state() in [11]: + r0 = parser.replay.pop() + state_93_actions(parser, lexer, r0) + return + +def state_101_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [0, 1, 2, 3]: + r0 = parser.replay.pop() + state_95_actions(parser, lexer, r0) + return + if parser.top_state() in [4, 5, 6, 7]: + r0 = parser.replay.pop() + state_96_actions(parser, lexer, r0) + return + +def state_102_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((38,)) + + value = None + parser.replay_action(38) + top = parser.stack.pop() + top = StateTermValue(38, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_103_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::id(1) [off: -1]; Unwind(Nt('reducer'), 2, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.stack[-1].value + replay = [] + replay.append(StateTermValue(0, Nt('reducer'), value, False)) + del parser.stack[-2:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_102_actions(parser, lexer, r0) + return + +def state_104_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::args_single(1) [off: -1]; Unwind(Nt('expr_args'), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.args_single(parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr_args'), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_109_actions(parser, lexer, r0) + return + +def state_105_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((42,)) + + value = None + parser.replay_action(42) + top = parser.stack.pop() + top = StateTermValue(42, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_106_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::args_append(3, 1) [off: -1]; Unwind(Nt('expr_args'), 3, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.methods.args_append(parser.stack[-3].value, parser.stack[-1].value) + replay = [] + replay.append(StateTermValue(0, Nt('expr_args'), value, False)) + del parser.stack[-3:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_109_actions(parser, lexer, r0) + return + +def state_107_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [10, 11]: + r0 = parser.replay.pop() + state_85_actions(parser, lexer, r0) + return + if parser.top_state() in [12, 13]: + r0 = parser.replay.pop() + state_86_actions(parser, lexer, r0) + return + +def state_108_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [15]: + r0 = parser.replay.pop() + state_103_actions(parser, lexer, r0) + return + if parser.top_state() in [14]: + r0 = parser.replay.pop() + state_104_actions(parser, lexer, r0) + return + if parser.top_state() in [16]: + r0 = parser.replay.pop() + state_105_actions(parser, lexer, r0) + return + if parser.top_state() in [17]: + r0 = parser.replay.pop() + state_106_actions(parser, lexer, r0) + return + +def state_109_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # Replay((41,)) + + value = None + parser.replay_action(41) + top = parser.stack.pop() + top = StateTermValue(41, top.term, top.value, top.new_line) + parser.stack.append(top) + return + +def state_110_actions(parser, lexer, a0): + parser.replay.extend([a0]) + # { value = AstBuilder::id(1) [off: -1]; Unwind(Nt(InitNt(goal=Nt('grammar'))), 1, -1) } + parser.stack.append(parser.replay.pop()) + + value = None + value = parser.stack[-1].value + replay = [] + replay.append(StateTermValue(0, Nt(InitNt(goal=Nt('grammar'))), value, False)) + del parser.stack[-1:] + parser.replay.extend(replay) + r0 = parser.replay.pop() + state_84_actions(parser, lexer, r0) + return + +def state_111_actions(parser, lexer, a0): + parser.replay.extend([a0]) + + value = None + if parser.top_state() in [0, 1, 2, 3, 4, 5, 6, 7]: + r0 = parser.replay.pop() + state_98_actions(parser, lexer, r0) + return + if parser.top_state() in [8]: + r0 = parser.replay.pop() + state_99_actions(parser, lexer, r0) + return + +actions = [ + # 0. + + {'}': 49, 'IDENT': 52, 'STR': 53, 'COMMENT': 54, Nt('prods'): 4, Nt('prod'): 50, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 1. + + {'}': 61, 'IDENT': 52, 'STR': 53, 'COMMENT': 54, Nt('prods'): 5, Nt('prod'): 50, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 2. + + {'}': 62, 'IDENT': 52, 'STR': 53, 'COMMENT': 54, Nt('prods'): 6, Nt('prod'): 50, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 3. + + {'}': 69, 'IDENT': 52, 'STR': 53, 'COMMENT': 54, Nt('prods'): 7, Nt('prod'): 50, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 4. + + {'}': 56, 'IDENT': 52, 'STR': 53, Nt('prod'): 57, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 5. + + {'}': 68, 'IDENT': 52, 'STR': 53, Nt('prod'): 57, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 6. + + {'}': 70, 'IDENT': 52, 'STR': 53, Nt('prod'): 57, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 7. + + {'}': 71, 'IDENT': 52, 'STR': 53, Nt('prod'): 57, Nt('terms'): 8, Nt('term'): 51, Nt('symbol'): 9}, + + # 8. + + {';': 58, 'IDENT': 52, 'STR': 53, '=>': 15, Nt('term'): 59, Nt('symbol'): 9, Nt('reducer'): 38}, + + # 9. + + {'=>': 79, 'STR': 79, 'IDENT': 79, ';': 79, '?': 60, Nt('reducer'): 79, Nt('symbol'): 79, Nt('term'): 79}, + + # 10. + + {'nt': 18, 'COMMENT': 19, 'goal': 20, 'token': 21, 'var': 22, Nt('grammar'): 43, Nt('nt_defs'): 12, Nt('nt_def'): 44, Nt('token_defs'): 11, Nt('token_def'): 45, Nt(InitNt(goal=Nt('grammar'))): 23}, + + # 11. + + {'nt': 18, 'COMMENT': 19, 'goal': 20, 'token': 21, 'var': 22, Nt('nt_defs'): 13, Nt('nt_def'): 44, Nt('token_def'): 47}, + + # 12. + + {End(): 77, 'goal': 20, 'COMMENT': 19, 'nt': 18, Nt('nt_def'): 46}, + + # 13. + + {End(): 78, 'goal': 20, 'COMMENT': 19, 'nt': 18, Nt('nt_def'): 46}, + + # 14. + + {')': 72, 'MATCH': 66, 'IDENT': 39, 'Some': 40, 'None': 67, Nt('expr_args'): 41, Nt('expr'): 73}, + + # 15. + + {'MATCH': 66, 'IDENT': 39, 'Some': 40, 'None': 67, Nt('expr'): 65}, + + # 16. + + {'MATCH': 66, 'IDENT': 39, 'Some': 40, 'None': 67, Nt('expr'): 42}, + + # 17. + + {'MATCH': 66, 'IDENT': 39, 'Some': 40, 'None': 67, Nt('expr'): 76}, + + # 18. + + {'IDENT': 25}, + + # 19. + + {'nt': 26, 'goal': 27}, + + # 20. + + {'nt': 28}, + + # 21. + + {'IDENT': 29}, + + # 22. + + {'token': 30}, + + # 23. + + {End(): 48}, + + # 24. + + {}, + + # 25. + + {'{': 0}, + + # 26. + + {'IDENT': 31}, + + # 27. + + {'nt': 32}, + + # 28. + + {'IDENT': 33}, + + # 29. + + {'=': 34}, + + # 30. + + {'IDENT': 35}, + + # 31. + + {'{': 1}, + + # 32. + + {'IDENT': 36}, + + # 33. + + {'{': 2}, + + # 34. + + {'STR': 37}, + + # 35. + + {';': 55}, + + # 36. + + {'{': 3}, + + # 37. + + {';': 63}, + + # 38. + + {';': 64}, + + # 39. + + {'(': 14}, + + # 40. + + {'(': 16}, + + # 41. + + {')': 74, ',': 17}, + + # 42. + + {')': 75}, + + # 43. + + state_43_actions, + + # 44. + + state_44_actions, + + # 45. + + state_45_actions, + + # 46. + + state_46_actions, + + # 47. + + state_47_actions, + + # 48. + + state_48_actions, + + # 49. + + state_49_actions, + + # 50. + + state_50_actions, + + # 51. + + state_51_actions, + + # 52. + + state_52_actions, + + # 53. + + state_53_actions, + + # 54. + + state_54_actions, + + # 55. + + state_55_actions, + + # 56. + + state_56_actions, + + # 57. + + state_57_actions, + + # 58. + + state_58_actions, + + # 59. + + state_59_actions, + + # 60. + + state_60_actions, + + # 61. + + state_61_actions, + + # 62. + + state_62_actions, + + # 63. + + state_63_actions, + + # 64. + + state_64_actions, + + # 65. + + state_65_actions, + + # 66. + + state_66_actions, + + # 67. + + state_67_actions, + + # 68. + + state_68_actions, + + # 69. + + state_69_actions, + + # 70. + + state_70_actions, + + # 71. + + state_71_actions, + + # 72. + + state_72_actions, + + # 73. + + state_73_actions, + + # 74. + + state_74_actions, + + # 75. + + state_75_actions, + + # 76. + + state_76_actions, + + # 77. + + state_77_actions, + + # 78. + + state_78_actions, + + # 79. + + state_79_actions, + + # 80. + + state_80_actions, + + # 81. + + state_81_actions, + + # 82. + + state_82_actions, + + # 83. + + state_83_actions, + + # 84. + + state_84_actions, + + # 85. + + state_85_actions, + + # 86. + + state_86_actions, + + # 87. + + state_87_actions, + + # 88. + + state_88_actions, + + # 89. + + state_89_actions, + + # 90. + + state_90_actions, + + # 91. + + state_91_actions, + + # 92. + + state_92_actions, + + # 93. + + state_93_actions, + + # 94. + + state_94_actions, + + # 95. + + state_95_actions, + + # 96. + + state_96_actions, + + # 97. + + state_97_actions, + + # 98. + + state_98_actions, + + # 99. + + state_99_actions, + + # 100. + + state_100_actions, + + # 101. + + state_101_actions, + + # 102. + + state_102_actions, + + # 103. + + state_103_actions, + + # 104. + + state_104_actions, + + # 105. + + state_105_actions, + + # 106. + + state_106_actions, + + # 107. + + state_107_actions, + + # 108. + + state_108_actions, + + # 109. + + state_109_actions, + + # 110. + + state_110_actions, + + # 111. + + state_111_actions, + +] + +error_codes = [ + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, + None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, +] + +goal_nt_to_init_state = {'grammar': 10} + +class DefaultMethods: + def nt_defs_single(self, x0): + return ('nt_defs_single', x0) + def single(self, x0): + return ('single', x0) + def nt_defs_append(self, x0, x1): + return ('nt_defs_append', x0, x1) + def append(self, x0, x1): + return ('append', x0, x1) + def nt_def(self, x0, x1, x2, x3): + return ('nt_def', x0, x1, x2, x3) + def ident(self, x0): + return ('ident', x0) + def str(self, x0): + return ('str', x0) + def empty(self, x0): + return ('empty', x0) + def var_token(self, x0): + return ('var_token', x0) + def prod(self, x0, x1): + return ('prod', x0, x1) + def optional(self, x0): + return ('optional', x0) + def const_token(self, x0, x1): + return ('const_token', x0, x1) + def expr_match(self, x0): + return ('expr_match', x0) + def expr_none(self, ): + return ('expr_none', ) + def expr_call(self, x0, x1): + return ('expr_call', x0, x1) + def args_single(self, x0): + return ('args_single', x0) + def expr_some(self, x0): + return ('expr_some', x0) + def args_append(self, x0, x1): + return ('args_append', x0, x1) + def grammar(self, x0, x1): + return ('grammar', x0, x1) + +class Parser(runtime.Parser): + def __init__(self, goal='grammar', builder=None): + if builder is None: + builder = DefaultMethods() + super().__init__(actions, error_codes, goal_nt_to_init_state[goal], builder) + diff --git a/third_party/rust/jsparagus/jsparagus/parse_table.py b/third_party/rust/jsparagus/jsparagus/parse_table.py new file mode 100644 index 0000000000..ebf548c861 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/parse_table.py @@ -0,0 +1,1820 @@ +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import collections +import hashlib +import os +import pickle +import typing +import itertools + +from . import types +from .utils import consume, keep_until, split, default_id_dict, default_fwd_dict +from .ordered import OrderedSet, OrderedFrozenSet +from .actions import Action, Replay, Reduce, FilterStates, Seq +from .grammar import End, ErrorSymbol, InitNt, Nt +from .rewrites import CanonicalGrammar +from .lr0 import LR0Generator, Term +from .aps import APS, Edge, Path + +# StateAndTransitions objects are indexed using a StateId which is an integer. +StateId = int + +# Action or ordered sequence of action which have to be performed. +DelayedAction = typing.Union[Action, typing.Tuple[Action, ...]] + + +class StateAndTransitions: + """This is one state of the parse table, which has transitions based on + terminals (text), non-terminals (grammar rules) and epsilon (reduce). + + In this model epsilon transitions are used to represent code to be executed + such as reduce actions and any others actions. + """ + + __slots__ = ["index", "locations", "terminals", "nonterminals", "errors", + "epsilon", "delayed_actions", "arguments", "backedges", "_hash", + "stable_hash"] + + # Numerical index of this state. + index: StateId + + # The stable_str of each LRItem we could be parsing in this state: the + # places in grammar productions that tell what we've already parsed, + # i.e. how we got to this state. + locations: OrderedFrozenSet[str] + + # Ordered set of Actions which are pushed to the next state after a + # conflict. + delayed_actions: OrderedFrozenSet[DelayedAction] + + # Number of argument of an action state. + # + # Instead of having action states with a non-empty replay list of terms, we + # have a non-empty list of argument which size is described by this + # variable. + arguments: int + + # Outgoing edges taken when shifting terminals. + terminals: typing.Dict[str, StateId] + + # Outgoing edges taken when shifting nonterminals after reducing. + nonterminals: typing.Dict[Nt, StateId] + + # Error symbol transitions. + errors: typing.Dict[ErrorSymbol, StateId] + + # List of epsilon transitions with associated actions. + epsilon: typing.List[typing.Tuple[Action, StateId]] + + # Set of edges that lead to this state. + backedges: OrderedSet[Edge] + + # Cached hash code. This class implements __hash__ and __eq__ in order to + # help detect equivalent states (which must be merged, for correctness). + _hash: int + + # A hash code computed the same way as _hash, but used only for + # human-readable output. The stability is useful for debugging, to match + # states across multiple runs of the parser generator. + stable_hash: str + + def __init__( + self, + index: StateId, + locations: OrderedFrozenSet[str], + delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(), + arguments: int = 0 + ) -> None: + assert isinstance(locations, OrderedFrozenSet) + assert isinstance(delayed_actions, OrderedFrozenSet) + self.index = index + self.terminals = {} + self.nonterminals = {} + self.errors = {} + self.epsilon = [] + self.locations = locations + self.delayed_actions = delayed_actions + self.arguments = arguments + self.backedges = OrderedSet() + + # NOTE: The hash of a state depends on its location in the LR0 + # parse-table, as well as the actions which have not yet been executed. + def hashed_content() -> typing.Iterator[object]: + for item in sorted(self.locations): + yield item + yield "\n" + yield "delayed_actions" + for action in self.delayed_actions: + yield hash(action) + yield "arguments" + yield arguments + + self._hash = hash(tuple(hashed_content())) + h = hashlib.md5() + h.update("".join(map(str, hashed_content())).encode()) + self.stable_hash = h.hexdigest()[:6] + + def is_inconsistent(self) -> bool: + "Returns True if the state transitions are inconsistent." + # TODO: We could easily allow having a state with non-terminal + # transition and other epsilon transitions, as the non-terminal shift + # transitions are a form of condition based on the fact that a + # non-terminal, produced by a reduce action is consumed by the + # automaton. + if len(self.terminals) + len(self.nonterminals) + len(self.errors) > 0 and len(self.epsilon) > 0: + return True + elif len(self.epsilon) == 1: + if any(k.is_inconsistent() for k, s in self.epsilon): + return True + elif len(self.epsilon) > 1: + if any(k.is_inconsistent() for k, s in self.epsilon): + return True + # NOTE: We can accept multiple conditions as epsilon transitions + # iff they are checking the same variable with non-overlapping + # values. This implies that we can implement these conditions as a + # deterministic switch statement in the code emitter. + if any(not k.is_condition() for k, s in self.epsilon): + return True + iterator = iter(self.epsilon) + first, _ = next(iterator) + if any(not first.check_same_variable(k) for k, s in iterator): + return True + # "type: ignore" because mypy does not see that the preceding if-statement + # means all k.condition() actions are FilterFlags. + pairs = itertools.combinations((k for k, s in self.epsilon), 2) + if any(not k1.check_different_values(k2) for k1, k2 in pairs): + return True + else: + try: + self.get_error_symbol() + except ValueError: + return True + return False + + def shifted_edges(self) -> typing.Iterator[ + typing.Tuple[typing.Union[str, Nt, ErrorSymbol], StateId] + ]: + k: Term + s: StateId + for k, s in self.terminals.items(): + yield (k, s) + for k, s in self.nonterminals.items(): + yield (k, s) + for k, s in self.errors.items(): + yield (k, s) + + def edges(self) -> typing.Iterator[typing.Tuple[Term, StateId]]: + k: Term + s: StateId + for k, s in self.terminals.items(): + yield (k, s) + for k, s in self.nonterminals.items(): + yield (k, s) + for k, s in self.errors.items(): + yield (k, s) + for k, s in self.epsilon: + yield (k, s) + + def rewrite_state_indexes( + self, + state_map: typing.Dict[StateId, StateId] + ) -> None: + def apply_on_term(term: typing.Union[Term, None]) -> Term: + assert term is not None + if isinstance(term, Action): + return term.rewrite_state_indexes(state_map) + return term + + self.index = state_map[self.index] + self.terminals = { + k: state_map[s] for k, s in self.terminals.items() + } + self.nonterminals = { + k: state_map[s] for k, s in self.nonterminals.items() + } + self.errors = { + k: state_map[s] for k, s in self.errors.items() + } + self.epsilon = [ + (k.rewrite_state_indexes(state_map), state_map[s]) + for k, s in self.epsilon + ] + # We cannot have multiple identical actions jumping to different locations. + assert len(self.epsilon) == len(set(k for k, _ in self.epsilon)) + self.backedges = OrderedSet( + Edge(state_map[edge.src], apply_on_term(edge.term)) + for edge in self.backedges + ) + + def get_error_symbol(self) -> typing.Optional[ErrorSymbol]: + if len(self.errors) > 1: + raise ValueError("More than one error symbol on the same state.") + else: + return next(iter(self.errors), None) + + def __contains__(self, term: object) -> bool: + if isinstance(term, Action): + for t, s in self.epsilon: + if t == term: + return True + return False + elif isinstance(term, Nt): + return term in self.nonterminals + elif isinstance(term, ErrorSymbol): + return term in self.errors + else: + return term in self.terminals + + def __getitem__(self, term: Term) -> StateId: + if isinstance(term, Action): + for t, s in self.epsilon: + if t == term: + return s + raise KeyError(term) + elif isinstance(term, Nt): + return self.nonterminals[term] + if isinstance(term, ErrorSymbol): + return self.errors[term] + else: + return self.terminals[term] + + def get(self, term: Term, default: object) -> object: + try: + return self.__getitem__(term) + except KeyError: + return default + + def stable_str(self, states: typing.List[StateAndTransitions]) -> str: + conflict = "" + if self.is_inconsistent(): + conflict = " (inconsistent)" + return "{}{}:\n{}".format(self.stable_hash, conflict, "\n".join([ + "\t{} --> {}".format(k, states[s].stable_hash) for k, s in self.edges()])) + + def __str__(self) -> str: + conflict = "" + if self.is_inconsistent(): + conflict = " (inconsistent)" + return "{}{}:\n{}".format(self.index, conflict, "\n".join([ + "\t{} --> {}".format(k, s) for k, s in self.edges()])) + + def __eq__(self, other: object) -> bool: + return (isinstance(other, StateAndTransitions) + and sorted(self.locations) == sorted(other.locations) + and sorted(self.delayed_actions) == sorted(other.delayed_actions) + and self.arguments == other.arguments) + + def __hash__(self) -> int: + return self._hash + + +DebugInfo = typing.Dict[StateId, int] + + +class ParseTable: + """The parser can be represented as a matrix of state transitions where on one + side we have the current state, and on the other we have the expected + terminal, non-terminal or epsilon transition. + + a b c A B C #1 #2 #3 + +---+---+---+---+---+---+----+----+----+ + s1 | | | | | | | | | | + s2 | | | | | | | | | | + s3 | | | | | | | | | | + . | | | | | | | | | | + . | | | | | | | | | | + . | | | | | | | | | | + s67 | | | | | | | | | | + s68 | | | | | | | | | | + s69 | | | | | | | | | | + +---+---+---+---+---+---+----+----+----+ + + The terminals `a` are the token which are read from the input. The + non-terminals `A` are the token which are pushed by the reduce actions of + the epsilon transitions. The epsilon transitions `#1` are the actions which + have to be executed as code by the parser. + + A parse table is inconsistent if there is any state which has an epsilon + transitions and terminals/non-terminals transitions (shift-reduce + conflict), or a state with more than one epsilon transitions (reduce-reduce + conflict). This is equivalent to having a non deterministic state machine. + + """ + + __slots__ = [ + "actions", "states", "state_cache", "named_goals", "terminals", + "nonterminals", "debug_info", "exec_modes", "assume_inconsistent" + ] + + # Map of actions identifier to the corresponding object. + actions: typing.List[Action] + + # Map of state identifier to the corresponding object. + states: typing.List[StateAndTransitions] + + # Hash table of state objects, ensuring we never have two equal states. + state_cache: typing.Dict[StateAndTransitions, StateAndTransitions] + + # List of (Nt, states) tuples which are the entry point of the state + # machine. + named_goals: typing.List[typing.Tuple[Nt, StateId]] + + # Set of all terminals. + terminals: OrderedFrozenSet[typing.Union[str, End]] + + # List of non-terminals. + nonterminals: typing.List[Nt] + + # Carry the info to be used when generating debug_context. If False, + # then no debug_context is ever produced. + debug_info: typing.Union[bool, DebugInfo] + + # Execution modes are used by the code generator to decide which + # function is executed when. This is a dictionary of OrderedSet, where + # the keys are the various parsing modes, and the mapped set contains + # the list of traits which have to be implemented, and consequently + # which functions would be encoded. + exec_modes: typing.Optional[typing.DefaultDict[str, OrderedSet[types.Type]]] + + # True if the parse table might be inconsistent. When this is False, we add + # extra assertions when computing the reduce path. + assume_inconsistent: bool + + def __init__( + self, + grammar: CanonicalGrammar, + verbose: bool = False, + progress: bool = False, + debug: bool = False + ) -> None: + self.actions = [] + self.states = [] + self.state_cache = {} + self.named_goals = [] + self.terminals = grammar.grammar.terminals + self.nonterminals = typing.cast( + typing.List[Nt], + list(grammar.grammar.nonterminals.keys())) + + # typing.cast() doesn't actually check at run time, so let's do that: + assert all(isinstance(nt, Nt) for nt in self.nonterminals) + + self.debug_info = debug + self.exec_modes = grammar.grammar.exec_modes + self.assume_inconsistent = True + self.create_lr0_table(grammar, verbose, progress) + self.fix_inconsistent_table(verbose, progress) + # TODO: Optimize chains of actions into sequences. + # Optimize by removing unused states. + self.remove_all_unreachable_state(verbose, progress) + # TODO: Statically compute replayed terms. (maybe?) + # Replace reduce actions by programmatic stack manipulation. + self.lower_reduce_actions(verbose, progress) + # Fold Replay followed by Unwind instruction. + self.fold_replay_unwind(verbose, progress) + # Fold paths which have the same ending. + self.fold_identical_endings(verbose, progress) + # Group state with similar non-terminal edges close-by, to improve the + # generated Rust code by grouping matched state numbers. + self.group_nonterminal_states(verbose, progress) + # Split shift states from epsilon states. + # self.group_epsilon_states(verbose, progress) + + def save(self, filename: os.PathLike) -> None: + with open(filename, 'wb') as f: + pickle.dump(self, f) + + @classmethod + def load(cls, filename: os.PathLike) -> ParseTable: + with open(filename, 'rb') as f: + obj = pickle.load(f) + if len(f.read()) != 0: + raise ValueError("file has unexpected extra bytes at end") + if not isinstance(obj, cls): + raise TypeError("file contains wrong kind of object: expected {}, got {}" + .format(cls.__name__, obj.__class__.__name__)) + return obj + + def is_inconsistent(self) -> bool: + "Returns True if the grammar contains any inconsistent state." + for s in self.states: + if s is not None and s.is_inconsistent(): + return True + return False + + def rewrite_state_indexes(self, state_map: typing.Dict[StateId, StateId]) -> None: + for s in self.states: + if s is not None: + s.rewrite_state_indexes(state_map) + self.named_goals = [ + (nt, state_map[s]) for nt, s in self.named_goals + ] + + # After a rewrite, multiple actions (conditions) might jump to the same + # target, attempt to fold these conditions based on having the same + # target. If we can merge them, then remove previous edges (updating + # the backedges of successor states) and replace them by the newly + # created edges. + for s in self.states: + if s is not None and len(s.epsilon) != 0: + epsilon_by_dest = collections.defaultdict(list) + for k, d in s.epsilon: + epsilon_by_dest[d].append(k) + for d, ks in epsilon_by_dest.items(): + if len(ks) == 1: + continue + new_ks = ks[0].fold_by_destination(ks) + if new_ks == ks: + continue + # This collection is required by `remove_edge`, but in this + # particular case we know for sure that at least one edge + # would be added back. Therefore no need to use the content + # of the set. + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + assert len(new_ks) > 0 + for k in ks: + self.remove_edge(s, k, maybe_unreachable_set) + for k in new_ks: + self.add_edge(s, k, d) + + self.assert_table_invariants() + + def rewrite_reordered_state_indexes(self) -> None: + state_map = { + s.index: i + for i, s in enumerate(self.states) + if s is not None + } + self.rewrite_state_indexes(state_map) + + def new_state( + self, + locations: OrderedFrozenSet[str], + delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(), + arguments: int = 0 + ) -> typing.Tuple[bool, StateAndTransitions]: + """Get or create state with an LR0 location and delayed actions. Returns a tuple + where the first element is whether the element is newly created, and + the second element is the State object.""" + index = len(self.states) + state = StateAndTransitions(index, locations, delayed_actions, arguments) + try: + return False, self.state_cache[state] + except KeyError: + self.state_cache[state] = state + self.states.append(state) + return True, state + + def get_state( + self, + locations: OrderedFrozenSet[str], + delayed_actions: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(), + arguments: int = 0 + ) -> StateAndTransitions: + """Like new_state(), but only returns the state without returning whether it is + newly created or not.""" + _, state = self.new_state(locations, delayed_actions, arguments) + return state + + def remove_state(self, s: StateId, maybe_unreachable_set: OrderedSet[StateId]) -> None: + state = self.states[s] + self.clear_edges(state, maybe_unreachable_set) + del self.state_cache[state] + + # "type: ignore" because the type annotation on `states` doesn't allow + # entries to be `None`. + self.states[s] = None # type: ignore + + def add_edge( + self, + src: StateAndTransitions, + term: Term, + dest: StateId + ) -> None: + assert term not in src + assert dest < len(self.states) + if isinstance(term, Action): + src.epsilon.append((term, dest)) + elif isinstance(term, Nt): + src.nonterminals[term] = dest + elif isinstance(term, ErrorSymbol): + src.errors[term] = dest + else: + src.terminals[term] = dest + self.states[dest].backedges.add(Edge(src.index, term)) + + def remove_backedge( + self, + src: StateAndTransitions, + term: Term, + dest: StateId, + maybe_unreachable_set: OrderedSet[StateId] + ) -> None: + self.states[dest].backedges.remove(Edge(src.index, term)) + maybe_unreachable_set.add(dest) + + def replace_edge( + self, + src: StateAndTransitions, + term: Term, + dest: StateId, + maybe_unreachable_set: OrderedSet[StateId] + ) -> None: + assert isinstance(dest, int) and dest < len(self.states) + + edge_existed = term in src + if edge_existed: + old_dest = src[term] + self.remove_backedge(src, term, old_dest, maybe_unreachable_set) + + if isinstance(term, Action): + src.epsilon = [(t, d) for t, d in src.epsilon if t != term] + src.epsilon.append((term, dest)) + elif isinstance(term, Nt): + src.nonterminals[term] = dest + elif isinstance(term, ErrorSymbol): + src.errors[term] = dest + else: + src.terminals[term] = dest + self.states[dest].backedges.add(Edge(src.index, term)) + + self.assert_state_invariants(src) + self.assert_state_invariants(dest) + if edge_existed: + self.assert_state_invariants(old_dest) + + def remove_edge( + self, + src: StateAndTransitions, + term: Term, + maybe_unreachable_set: OrderedSet[StateId] + ) -> None: + edge_existed = term in src + if edge_existed: + old_dest = src[term] + self.remove_backedge(src, term, old_dest, maybe_unreachable_set) + if isinstance(term, Action): + src.epsilon = [(t, d) for t, d in src.epsilon if t != term] + elif isinstance(term, Nt): + del src.nonterminals[term] + elif isinstance(term, ErrorSymbol): + del src.errors[term] + else: + del src.terminals[term] + + self.assert_state_invariants(src) + if edge_existed: + self.assert_state_invariants(old_dest) + + def clear_edges( + self, + src: StateAndTransitions, + maybe_unreachable_set: OrderedSet[StateId] + ) -> None: + """Remove all existing edges, in order to replace them by new one. This is used + when resolving shift-reduce conflicts.""" + assert isinstance(src, StateAndTransitions) + old_dest = [] + for term, dest in src.edges(): + self.remove_backedge(src, term, dest, maybe_unreachable_set) + old_dest.append(dest) + src.terminals = {} + src.nonterminals = {} + src.errors = {} + src.epsilon = [] + self.assert_state_invariants(src) + for dest in old_dest: + self.assert_state_invariants(dest) + + def assert_table_invariants(self) -> None: + for s in self.states: + if s is not None: + self.assert_state_invariants(s) + + def assert_state_invariants(self, src: typing.Union[StateId, StateAndTransitions]) -> None: + if not self.debug_info: + return + if isinstance(src, int): + src = self.states[src] + assert isinstance(src, StateAndTransitions) + try: + for term, dest in src.edges(): + assert Edge(src.index, term) in self.states[dest].backedges + for e in src.backedges: + assert e.term is not None + assert self.states[e.src][e.term] == src.index + if not self.assume_inconsistent: + assert not src.is_inconsistent() + except AssertionError as exc: + print("assert_state_inveriants for {}\n".format(src)) + for e in src.backedges: + print("backedge {} from {}\n".format(e, self.states[e.src])) + raise exc + + def remove_unreachable_states( + self, + maybe_unreachable_set: OrderedSet[StateId] + ) -> None: + # TODO: This function is incomplete in case of loops, some cycle might + # remain isolated while not being reachable from the init states. We + # should maintain a notion of depth per-state, such that we can + # identify loops by noticing the all backedges have a larger depth than + # the current state. + init: OrderedSet[StateId] + init = OrderedSet(goal for name, goal in self.named_goals) + while maybe_unreachable_set: + next_set: OrderedSet[StateId] = OrderedSet() + for s in maybe_unreachable_set: + # Check if the state is reachable, if not remove the state and + # fill the next_set with all outgoing edges. + if len(self.states[s].backedges) == 0 and s not in init: + self.remove_state(s, next_set) + maybe_unreachable_set = next_set + + def is_reachable_state(self, s: StateId) -> bool: + """Check whether the current state is reachable or not.""" + if self.states[s] is None: + return False + reachable_back: OrderedSet[StateId] = OrderedSet() + todo = [s] + while todo: + s = todo.pop() + reachable_back.add(s) + for edge in self.states[s].backedges: + if edge.src not in reachable_back: + todo.append(edge.src) + for _, s in self.named_goals: + if s in reachable_back: + return True + return False + + def debug_dump(self) -> None: + # Sort the grammar by state hash, such that it can be compared + # before/after grammar modifications. + temp = [s for s in self.states if s is not None] + temp = sorted(temp, key=lambda s: s.stable_hash) + for s in temp: + print(s.stable_str(self.states)) + + def create_lr0_table( + self, + grammar: CanonicalGrammar, + verbose: bool, + progress: bool + ) -> None: + if verbose or progress: + print("Create LR(0) parse table.") + + goals = grammar.grammar.goals() + self.named_goals = [] + + # Temporary work queue. + todo: typing.Deque[typing.Tuple[LR0Generator, StateAndTransitions]] + todo = collections.deque() + + # Record the starting goals in the todo list. + for nt in goals: + init_nt = Nt(InitNt(nt), ()) + it = LR0Generator.start(grammar, init_nt) + s = self.get_state(it.stable_locations()) + todo.append((it, s)) + self.named_goals.append((nt, s.index)) + + # Iterate the grammar with sets of LR Items abstracted by the + # LR0Generator, and create new states in the parse table as long as new + # sets of LR Items are discovered. + def visit_grammar() -> typing.Iterator[None]: + while todo: + yield # progress bar. + # TODO: Compare stack / queue, for the traversal of the states. + s_it, s = todo.popleft() + if verbose: + print("\nMapping state {} to LR0:\n{}".format(s.stable_hash, s_it)) + for k, sk_it in s_it.transitions().items(): + locations = sk_it.stable_locations() + if not self.term_is_shifted(k): + locations = OrderedFrozenSet() + is_new, sk = self.new_state(locations) + if is_new: + todo.append((sk_it, sk)) + + # Add the edge from s to sk with k. + self.add_edge(s, k, sk.index) + + consume(visit_grammar(), progress) + + if verbose: + print("Create LR(0) Table Result:") + self.debug_dump() + + def term_is_shifted(self, term: typing.Optional[Term]) -> bool: + return not isinstance(term, Action) or term.follow_edge() + + def is_valid_path( + self, + path: typing.Sequence[Edge], + state: typing.Optional[StateId] = None + ) -> bool: + """This function is used to check a list of edges and returns whether it + corresponds to a valid path within the parse table. This is useful when + merging sequences of edges from various locations.""" + if not state and path != []: + state = path[0].src + while path: + edge = path[0] + path = path[1:] + if state != edge.src: + return False + assert isinstance(state, StateId) + + term = edge.term + if term is None and len(path) == 0: + return True + + row = self.states[state] + if term not in row: + return False + assert term is not None + state = row[term] + return True + + def term_is_stacked(self, term: typing.Optional[Term]) -> bool: + # The `term` argument is annotated as Optional because `Edge.term` is a + # common argument. If it's ever None in practice, the caller has a bug. + assert term is not None + + return not isinstance(term, Action) + + def aps_visitor(self, aps: APS, visit: typing.Callable[[APS], bool]) -> None: + """Visit all the states of the parse table, as-if we were running a + Generalized LR parser. + + However, instead parsing content, we use this algorithm to generate + both the content which remains to be parsed as well as the context + which might lead us to be in the state which from which we started. + + This algorithm takes an APS (Abstract Parser State) and a callback, and + consider all edges of the parse table, unless restricted by one of the + previously encountered actions. These restrictions, such as replayed + lookahead or the path which might be reduced are used for filtering out + states which are not handled by this parse table. + + For each edge, this functions calls the visit functions to know whether + to stop or continue. The visit function might capture APS given as + argument to be used for other analysis. + + """ + todo = [aps] + while todo: + aps = todo.pop() + cont = visit(aps) + if not cont: + continue + todo.extend(aps.shift_next(self)) + + def context_lanes(self, state: StateId) -> typing.Tuple[bool, typing.List[APS]]: + """Compute lanes, such that each reduce action can have set of unique stack to + reach the given state. The stacks are assumed to be loop-free by + reducing edges at most once. + + In order to avoid attempting to eagerly solve everything using context + information, we break this loop as soon as we have one token of + lookahead in a case which does not have enough context. + + The return value is a tuple where the first element is a boolean which + is True if we should fallback on solving this issue with more + lookahead, and the second is the list of APS lanes which are providing + enough context to disambiguate the inconsistency of the given state.""" + + def not_interesting(aps: APS) -> bool: + reduce_list = [e for e in aps.history if self.term_is_shifted(e.term)] + has_reduce_loop = len(reduce_list) != len(set(reduce_list)) + return has_reduce_loop + + # The context is a dictionary which maps all stack suffixes from an APS + # stack. It is mapped to a list of tuples, where the each tuple is the + # index with the APS stack and the APS action used to follow this path. + context: typing.DefaultDict[typing.Tuple[Edge, ...], typing.List[Edge]] + context = collections.defaultdict(lambda: []) + + def has_enough_context(aps: APS) -> bool: + try: + assert aps.history[0] in context[tuple(aps.stack)] + # Check the number of different actions which can reach this + # location. If there is more than 1, then we do not have enough + # context. + return len(set(context[tuple(aps.stack)])) <= 1 + except IndexError: + return False + + collect = [APS.start(state)] + enough_context = False + while not enough_context: + # print("collect.len = {}".format(len(collect))) + # Fill the context dictionary with all the sub-stack which might be + # encountered by other APS. + recurse = [] + context = collections.defaultdict(lambda: []) + while collect: + aps = collect.pop() + recurse.append(aps) + if aps.history == []: + continue + for i in range(len(aps.stack)): + context[tuple(aps.stack[i:])].append(aps.history[0]) + assert collect == [] + + # print("recurse.len = {}".format(len(recurse))) + # Iterate over APS which do not yet have enough context information + # to uniquely identify a single action. + enough_context = True + while recurse: + aps = recurse.pop() + if not_interesting(aps): + # print("discard uninteresting context lane:") + # print(aps.string("\tcontext")) + continue + if has_enough_context(aps): + collect.append(aps) + continue + # If we have not enough context but some lookahead is + # available, attempt to first solve this issue using more + # lookahead before attempting to use context information. + if len(aps.lookahead) >= 1: + # print("discard context_lanes due to lookahead:") + # for aps in itertools.chain(collect, recurse, [aps]): + # print(aps.string("\tcontext")) + return True, [] + enough_context = False + # print("extend starting at:\n{}".format(aps.string("\tcontext"))) + collect.extend(aps.shift_next(self)) + assert recurse == [] + + # print("context_lanes:") + # for aps in collect: + # print(aps.string("\tcontext")) + + return False, collect + + def lookahead_lanes(self, state: StateId) -> typing.List[APS]: + """Compute lanes to collect all lookahead symbols available. After each reduce + action, there is no need to consider the same non-terminal multiple + times, we are only interested in lookahead token and not in the context + information provided by reducing action.""" + + record = [] + # After the first reduce action, we do not want to spend too much + # resource visiting edges which would give us the same information. + # Therefore, if we already reduce an action to a given state, then we + # skip looking for lookahead that we already visited. + # + # Set of (first-reduce-edge, reducing-base, last-reduce-edge) + seen_edge_after_reduce: typing.Set[typing.Tuple[Edge, StateId, typing.Optional[Term]]] + seen_edge_after_reduce = set() + + def find_first_reduce( + edges: Path + ) -> typing.Tuple[int, typing.Optional[Edge]]: + for i, edge in enumerate(edges): + if not self.term_is_shifted(edge.term): + return i, edge + return 0, None + + def find_last_reduce( + edges: Path + ) -> typing.Tuple[int, typing.Optional[Edge]]: + for i, edge in zip(reversed(range(len(edges))), reversed(edges)): + if not self.term_is_shifted(edge.term): + return i, edge + return 0, None + + def visit(aps: APS) -> bool: + # Note, this suppose that we are not considering flags when + # computing, as flag might prevent some lookahead investigations. + reduce_key = None + first_index, first_reduce = find_first_reduce(aps.history) + last_index, last_reduce = find_last_reduce(aps.history) + if first_index != last_index and first_reduce and last_reduce: + if not isinstance(aps.history[-1].term, Action): + reduce_key = (first_reduce, aps.shift[0].src, last_reduce.term) + has_seen_edge_after_reduce = reduce_key and reduce_key in seen_edge_after_reduce + has_lookahead = len(aps.lookahead) >= 1 + stop = has_seen_edge_after_reduce or has_lookahead + # print("stop: {}, size lookahead: {}, seen_edge_after_reduce: {}".format( + # stop, len(aps.lookahead), repr(reduce_key) + # )) + # print(aps.string("\tvisitor")) + if stop: + if has_lookahead: + record.append(aps) + if reduce_key: + seen_edge_after_reduce.add(reduce_key) + return not stop + + self.aps_visitor(APS.start(state), visit) + return record + + def fix_with_context(self, s: StateId, aps_lanes: typing.List[APS]) -> None: + raise ValueError("fix_with_context: Not Implemented") + # # This strategy is about using context information. By using chains of + # # reduce actions, we are able to increase the knowledge of the stack + # # content. The stack content is the context which can be used to + # # determine how to consider a reduction. The stack content is also + # # called a lane, as defined in the Lane Table algorithm. + # # + # # To add context information to the current graph, we add flags + # # manipulation actions. + # # + # # Consider each edge as having an implicit function which can map one + # # flag value to another. The following implements a unification + # # algorithm which is attempting to solve the question of what is the + # # flag value, and where it should be changed. + # # + # # NOTE: (nbp) I would not be surprised if there is a more specialized + # # algorithm, but I failed to find one so far, and this problem + # # definitely looks like a unification problem. + # Id = collections.namedtuple("Id", "edge") + # Eq = collections.namedtuple("Eq", "flag_in edge flag_out") + # Var = collections.namedtuple("Var", "n") + # SubSt = collections.namedtuple("SubSt", "var by") + # + # # Unify expression, and return one substitution if both expressions + # # can be unified. + # def unify_expr(expr1, expr2, swapped=False): + # if isinstance(expr1, Eq) and isinstance(expr2, Id): + # if expr1.edge != expr2.edge: + # # Different edges are ok, but produce no substituions. + # return True + # if isinstance(expr1.flag_in, Var): + # return SubSt(expr1.flag_in, expr1.flag_out) + # if isinstance(expr1.flag_out, Var): + # return SubSt(expr1.flag_out, expr1.flag_in) + # # We are unifying with a relation which consider the current + # # function as an identity function. Having different values as + # # input and output fails the unification rule. + # return expr1.flag_out == expr1.flag_in + # if isinstance(expr1, Eq) and isinstance(expr2, Eq): + # if expr1.edge != expr2.edge: + # # Different edges are ok, but produce no substituions. + # return True + # if expr1.flag_in is None and isinstance(expr2.flag_in, Var): + # return SubSt(expr2.flag_in, None) + # if expr1.flag_out is None and isinstance(expr2.flag_out, Var): + # return SubSt(expr2.flag_out, None) + # if expr1.flag_in == expr2.flag_in: + # if isinstance(expr1.flag_out, Var): + # return SubSt(expr1.flag_out, expr2.flag_out) + # elif isinstance(expr2.flag_out, Var): + # return SubSt(expr2.flag_out, expr1.flag_out) + # # Reject solutions which are not deterministic. We do not + # # want the same input flag to have multiple outputs. + # return expr1.flag_out == expr2.flag_out + # if expr1.flag_out == expr2.flag_out: + # if isinstance(expr1.flag_in, Var): + # return SubSt(expr1.flag_in, expr2.flag_in) + # elif isinstance(expr2.flag_in, Var): + # return SubSt(expr2.flag_in, expr1.flag_in) + # return True + # if not swapped: + # return unify_expr(expr2, expr1, True) + # return True + # + # # Apply substituion rule to an expression. + # def subst_expr(subst, expr): + # if expr == subst.var: + # return True, subst.by + # if isinstance(expr, Eq): + # subst1, flag_in = subst_expr(subst, expr.flag_in) + # subst2, flag_out = subst_expr(subst, expr.flag_out) + # return subst1 or subst2, Eq(flag_in, expr.edge, flag_out) + # return False, expr + # + # # Add an expression to an existing knowledge based which is relying on + # # a set of free variables. + # def unify_with(expr, knowledge, free_vars): + # old_knowledge = knowledge + # old_free_Vars = free_vars + # while True: + # subst = None + # for rel in knowledge: + # subst = unify_expr(rel, expr) + # if subst is False: + # raise Error("Failed to find a coherent solution") + # if subst is True: + # continue + # break + # else: + # return knowledge + [expr], free_vars + # free_vars = [fv for fv in free_vars if fv != subst.var] + # # Substitue variables, and re-add rules which have substituted + # # vars to check the changes to other rules, in case 2 rules are + # # now in conflict or in case we can propagate more variable + # # changes. + # subst_rules = [subst_expr(subst, k) for k in knowledge] + # knowledge = [rule for changed, rule in subst_rule if not changed] + # for changed, rule in subst_rule: + # if not changed: + # continue + # knowledge, free_vars = unify_with(rule, knowledge, free_vars) + # + # # Register boundary conditions as part of the knowledge based, i-e that + # # reduce actions are expecting to see the flag value matching the + # # reduced non-terminal, and that we have no flag value at the start of + # # every lane head. + # # + # # TODO: Catch exceptions from the unify function in case we do not yet + # # have enough context to disambiguate. + # rules = [] + # free_vars = [] + # last_free = 0 + # maybe_id_edges = set() + # nts = set() + # for aps in aps_lanes: + # assert len(aps.stack) >= 1 + # flag_in = None + # for edge in aps.stack[-1]: + # i = last_free + # last_free += 1 + # free_vars.append(Var(i)) + # rule = Eq(flag_in, edge, Var(i)) + # rules, free_vars = unify_with(rule, rules, free_vars) + # flag_in = Var(i) + # if flag_in is not None: + # maybe_id_edges.add(Id(edge)) + # edge = aps.stack[-1] + # nt = edge.term.update_stack_with().nt + # rule = Eq(nt, edge, None) + # rules, free_vars = unify_with(rule, rules, free_vars) + # nts.add(nt) + # + # # We want to produce a parse table where most of the node are ignoring + # # the content of the flag which is being added. Thus we want to find a + # # solution where most edges are the identical function. + # def fill_with_id_functions(rules, free_vars, maybe_id_edges): + # min_rules, min_vars = rules, free_vars + # for num_id_edges in reversed(range(len(maybe_id_edges))): + # for id_edges in itertools.combinations(edges, num_id_edges): + # for edge in id_edges: + # new_rules, new_free_vars = unify_with(rule, rules, free_vars) + # if new_free_vars == []: + # return new_rules, new_free_vars + # if len(new_free_vars) < len(min_free_vars): + # min_vars = new_free_vars + # min_rules = new_rules + # return rules, free_vars + # + # rules, free_vars = fill_with_id_functions(rules, free_vars, maybe_id_edges) + # if free_vars != []: + # raise Error("Hum โฆ maybe we can try to iterate over the remaining free-variable.") + # print("debug: Great we found a solution for a reduce-reduce conflict") + # + # # The set of rules describe the function that each edge is expected to + # # support. If there is an Id(edge), then we know that we do not have to + # # change the graph for the given edge. If the rule is Eq(A, edge, B), + # # then we have to (filter A & pop) and push B, except if A or B is + # # None. + # # + # # For each edge, collect the set of rules concerning the edge to + # # determine which edges have to be transformed to add the filter&pop + # # and push actions. + # edge_rules = collections.defaultdict(lambda: []) + # for rule in rules: + # if isinstance(rule, Id): + # edge_rules[rule.edge] = None + # elif isinstance(rule, Eq): + # if edge_rules[rule.edge] is not None: + # edge_rules[rule.edge].append(rule) + # + # maybe_unreachable_set = set() + # flag_name = self.get_flag_for(nts) + # for edge, rules in edge_rules.items(): + # # If the edge is an identity function, then skip doing any + # # modifications on it. + # if rules is None: + # continue + # # Otherwise, create a new state and transition for each mapping. + # src = self.states[edge.src] + # dest = src[edge.term] + # dest_state = self.states[dest] + # # TODO: Add some information to avoid having identical hashes as + # # the destination. + # actions = [] + # for rule in OrderedFrozenSet(rules): + # assert isinstance(rule, Eq) + # seq = [] + # if rule.flag_in is not None: + # seq.append(FilterFlag(flag_name, True)) + # if rule.flag_in != rule.flag_out: + # seq.append(PopFlag(flag_name)) + # if rule.flag_out is not None and rule.flag_in != rule.flag_out: + # seq.append(PushFlag(flag_name, rule.flag_out)) + # actions.append(Seq(seq)) + # # Assert that we do not map flag_in more than once. + # assert len(set(eq.flag_in for eq in rules)) < len(rules) + # # Create the new state and add edges. + # is_new, switch = self.new_state(dest.locations, OrderedFrozenSet(actions)) + # assert is_new + # for seq in actions: + # self.add_edge(switch, seq, dest) + # + # # Replace the edge from src to dest, by an edge from src to the + # # newly created switch state, which then decide which flag to set + # # before going to the destination target. + # self.replace_edge(src, edge.term, switch, maybe_unreachable_set) + # + # self.remove_unreachable_states(maybe_unreachable_set) + # pass + + def fix_with_lookahead(self, s: StateId, aps_lanes: typing.List[APS]) -> None: + # Find the list of terminals following each actions (even reduce + # actions). + assert all(len(aps.lookahead) >= 1 for aps in aps_lanes) + if self.debug_info: + for aps in aps_lanes: + print(str(aps)) + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + + # For each shifted term, associate a set of state and actions which + # would have to be executed. + shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ] + shift_map = collections.defaultdict(lambda: []) + for aps in aps_lanes: + actions = aps.history + assert isinstance(actions[-1], Edge) + src = actions[-1].src + term = actions[-1].term + assert term == aps.lookahead[0] + assert isinstance(term, (str, End, ErrorSymbol, Nt)) + + # No need to consider any action beyind the first reduced action + # since the reduced action is in charge of replaying the lookahead + # terms. + actions = list(keep_until(actions[:-1], lambda edge: not self.term_is_shifted(edge.term))) + assert all(isinstance(edge.term, Action) for edge in actions) + + # Change the order of the shifted term, shift all actions by 1 with + # the given lookahead term, in order to match the newly generated + # state machine. + # + # Shifting actions with the list of shifted terms is used to record + # the number of terms to be replayed, as well as verifying whether + # Lookahead filter actions should accept or reject this lane. + new_actions = [] + accept = True + for edge in actions: + edge_term = edge.term + assert isinstance(edge_term, Action) + new_term = edge_term.shifted_action(term) + if isinstance(new_term, bool): + if new_term is False: + accept = False + break + else: + continue + new_actions.append(Edge(edge.src, new_term)) + if accept: + target_id = self.states[src][term] + target = self.states[target_id] + shift_map[term].append((target, new_actions)) + + # Restore the new state machine based on a given state to use as a base + # and the shift_map corresponding to edges. + def restore_edges( + state: StateAndTransitions, + shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ], + depth: str + ) -> None: + # print("{}starting with {}\n".format(depth, state)) + edges = {} + for term, actions_list in shift_map.items(): + # print("{}term: {}, lists: {}\n".format(depth, repr(term), repr(actions_list))) + # Collect all the states reachable after shifting the term. + # Compute the unique name, based on the locations and actions + # which are delayed. + locations: OrderedSet[str] = OrderedSet() + delayed: OrderedSet[DelayedAction] = OrderedSet() + new_shift_map: typing.DefaultDict[ + Term, + typing.List[typing.Tuple[StateAndTransitions, typing.List[Edge]]] + ] + new_shift_map = collections.defaultdict(lambda: []) + recurse = False + if not self.term_is_shifted(term): + # There is no more target after a reduce action. + actions_list = [] + for target, actions in actions_list: + assert isinstance(target, StateAndTransitions) + locations |= target.locations + delayed |= target.delayed_actions + if actions != []: + # Pull edges, with delayed actions. + edge = actions[0] + assert isinstance(edge, Edge) + for action in actions: + action_term = action.term + assert isinstance(action_term, Action) + delayed.add(action_term) + edge_term = edge.term + assert edge_term is not None + new_shift_map[edge_term].append((target, actions[1:])) + recurse = True + else: + # Pull edges, as a copy of existing edges. + for next_term, next_dest_id in target.edges(): + next_dest = self.states[next_dest_id] + new_shift_map[next_term].append((next_dest, [])) + + is_new, new_target = self.new_state( + OrderedFrozenSet(locations), OrderedFrozenSet(delayed)) + edges[term] = new_target.index + if self.debug_info: + print("{}is_new = {}, index = {}".format(depth, is_new, new_target.index)) + print("{}Add: {} -- {} --> {}".format(depth, state.index, str(term), new_target.index)) + print("{}continue: (is_new: {}) or (recurse: {})".format(depth, is_new, recurse)) + if is_new or recurse: + restore_edges(new_target, new_shift_map, depth + " ") + + self.clear_edges(state, maybe_unreachable_set) + for term, target_id in edges.items(): + self.add_edge(state, term, target_id) + if self.debug_info: + print("{}replaced by {}\n".format(depth, state)) + + state = self.states[s] + restore_edges(state, shift_map, "") + self.remove_unreachable_states(maybe_unreachable_set) + + def fix_inconsistent_state(self, s: StateId, verbose: bool) -> bool: + # Fix inconsistent states works one state at a time. The goal is to + # achieve the same method as the Lane Tracer, but instead of building a + # table to then mutate the parse state, we mutate the parse state + # directly. + # + # This strategy is simpler, and should be able to reproduce the same + # graph mutations as seen with Lane Table algorithm. One of the problem + # with the Lane Table algorithm is that it assume reduce operations, + # and as such it does not apply simply to epsilon transitions which are + # used as conditions on the parse table. + # + # By using push-flag and filter-flag actions, we are capable to + # decompose the Lane Table transformation of the parse table into + # multiple steps which are working one step at a time, and with less + # table state duplication. + + state = self.states[s] + if state is None or not state.is_inconsistent(): + return False + + all_reduce = all(a.update_stack() for a, _ in state.epsilon) + any_shift = (len(state.terminals) + len(state.nonterminals) + len(state.errors)) > 0 + try_with_context = all_reduce and not any_shift + try_with_lookahead = not try_with_context + # if verbose: + # print(aps_lanes_str(aps_lanes, "fix_inconsistent_state:", "\taps")) + if try_with_context: + if verbose: + print("\tFix with context.") + try_with_lookahead, aps_lanes = self.context_lanes(s) + if not try_with_lookahead: + assert aps_lanes != [] + self.fix_with_context(s, aps_lanes) + elif verbose: + print("\tFallback on fixing with lookahead.") + if try_with_lookahead: + if verbose: + print("\tFix with lookahead.") + aps_lanes = self.lookahead_lanes(s) + assert aps_lanes != [] + self.fix_with_lookahead(s, aps_lanes) + return True + + def fix_inconsistent_table(self, verbose: bool, progress: bool) -> None: + """The parse table might be inconsistent. We fix the parse table by looking + around the inconsistent states for more context. Either by looking at the + potential stack state which might lead to the inconsistent state, or by + increasing the lookahead.""" + self.assume_inconsistent = True + if verbose or progress: + print("Fix parse table inconsistencies.") + + todo: typing.Deque[StateId] = collections.deque() + for state in self.states: + if state.is_inconsistent(): + todo.append(state.index) + + if verbose and todo: + print("\n".join([ + "\nGrammar is inconsistent.", + "\tNumber of States = {}", + "\tNumber of inconsistencies found = {}"]).format( + len(self.states), len(todo))) + + count = 0 + + def visit_table() -> typing.Iterator[None]: + nonlocal count + unreachable = [] + while todo: + while todo: + yield # progress bar. + # TODO: Compare stack / queue, for the traversal of the states. + s = todo.popleft() + if not self.is_reachable_state(s): + # NOTE: We do not fix unreachable states, as we might + # not be able to compute the reduce actions. However, + # we should not clean edges not backedges as the state + # might become reachable later on, since states are + # shared if they have the same locations. + unreachable.append(s) + continue + assert self.states[s].is_inconsistent() + start_len = len(self.states) + if verbose: + count = count + 1 + print("Fixing state {}\n".format(self.states[s].stable_str(self.states))) + try: + self.fix_inconsistent_state(s, verbose) + except Exception as exc: + self.debug_info = True + raise ValueError( + "Error while fixing conflict in state {}\n\n" + "In the following grammar productions:\n{}" + .format(self.states[s].stable_str(self.states), + self.debug_context(s, "\n", "\t")) + ) from exc + new_inconsistent_states = [ + s.index for s in self.states[start_len:] + if s.is_inconsistent() + ] + if verbose: + print("\tAdding {} states".format(len(self.states[start_len:]))) + print("\tWith {} inconsistent states".format(len(new_inconsistent_states))) + todo.extend(new_inconsistent_states) + + # Check whether none of the previously inconsistent and + # unreahable state became reachable. If so add it back to the + # todo list. + still_unreachable = [] + for s in unreachable: + if self.is_reachable_state(s): + todo.append(s) + else: + still_unreachable.append(s) + unreachable = still_unreachable + + consume(visit_table(), progress) + if verbose: + print("\n".join([ + "\nGrammar is now consistent.", + "\tNumber of States = {}", + "\tNumber of inconsistencies solved = {}"]).format( + len(self.states), count)) + assert not self.is_inconsistent() + self.assume_inconsistent = False + + if verbose: + print("Fix Inconsistent Table Result:") + self.debug_dump() + + def remove_all_unreachable_state(self, verbose: bool, progress: bool) -> None: + self.states = [s for s in self.states if s is not None] + self.rewrite_reordered_state_indexes() + + def lower_reduce_actions(self, verbose: bool, progress: bool) -> None: + # Remove Reduce actions and replace them by the programmatic + # equivalent. + # + # This transformation preserves the stack manipulations of the parse + # table. It only changes it from being implicitly executed by the LR + # parser, to being explicitly executed with actions. + # + # This transformation converts the hard-to-predict load of the shift + # table into a branch prediction which is potentially easier to + # predict. + # + # A side-effect of this transformation is that it removes the need for + # replaying non-terminals, thus the backends could safely ignore the + # ability of the shift function from handling non-terminals. + if verbose or progress: + print("Lower Reduce actions.") + + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + + def transform() -> typing.Iterator[None]: + for s in self.states: + term, _ = next(iter(s.epsilon), (None, None)) + if self.term_is_shifted(term): + continue + assert len(s.epsilon) == 1 + yield # progress bar. + reduce_state = s + if verbose: + print("Inlining shift-operation for state {}".format(str(reduce_state))) + + # The reduced_aps should contain all reduced path of the single + # Reduce action which is present on this state. However, as + # state of the graph are shared, some reduced paths might follow + # the same path and reach the same state. + # + # This code collect for each replayed path, the tops of the + # stack on top of which these states are replayed. + aps = APS.start(s.index) + states_by_replay_term = collections.defaultdict(list) + # print("Start:\n{}".format(aps.string(name="\titer_aps"))) + # print(s.stable_str(self.states)) + for reduced_aps in aps.shift_next(self): + # As long as we have elements to replay, we should only + # have a single path for each reduced path. If the next + # state contains an action, then we stop here. + iter_aps = reduced_aps + next_is_action = self.states[iter_aps.state].epsilon != [] + has_replay = iter_aps.replay != [] + assert next_is_action is False and has_replay is True + while (not next_is_action) and has_replay: + # print("Step {}:\n{}".format(len(iter_aps.history), + # iter_aps.string(name="\titer_aps"))) + next_aps = list(iter_aps.shift_next(self)) + if len(next_aps) == 0: + # Note, this might happen as we are adding + # lookahead tokens from any successor, we might not + # always have a way to replay all tokens, in such + # case an error should be produced, but in the mean + # time, let's use the shift function as usual. + break + assert len(next_aps) == 1 + iter_aps = next_aps[0] + next_is_action = self.states[iter_aps.state].epsilon != [] + has_replay = iter_aps.replay != [] + # print("End at {}:\n{}".format(len(iter_aps.history), + # iter_aps.string(name="\titer_aps"))) + replay_list = [e.src for e in iter_aps.shift] + assert len(replay_list) >= 2 + replay_term = Replay(replay_list[1:]) + states_by_replay_term[replay_term].append(replay_list[0]) + + # Create FilterStates actions. + filter_by_replay_term = { + replay_term: FilterStates(states) + for replay_term, states in states_by_replay_term.items() + } + + # Convert the Reduce action to an Unwind action. + reduce_term, _ = next(iter(s.epsilon)) + if isinstance(reduce_term, Reduce): + unwind_term: Action = reduce_term.unwind + else: + assert isinstance(reduce_term, Seq) + assert isinstance(reduce_term.actions[-1], Reduce) + unwind_term = Seq(list(reduce_term.actions[:-1]) + [reduce_term.actions[-1].unwind]) + + # Remove the old Reduce edge if still present. + # print("Before:\n{}".format(reduce_state.stable_str(self.states))) + self.remove_edge(reduce_state, reduce_term, maybe_unreachable_set) + + # Add Unwind action. + # print("After:\n") + locations = reduce_state.locations + delayed: OrderedFrozenSet[DelayedAction] = OrderedFrozenSet(filter_by_replay_term.items()) + replay_size = 1 # Replay the unwound non-terminal + is_new, filter_state = self.new_state(locations, delayed, replay_size) + self.add_edge(reduce_state, unwind_term, filter_state.index) + if not is_new: + # The destination state already exists. Assert that all + # outgoing edges are matching what we would have generated. + if len(filter_by_replay_term) == 1: + # There is only one predecessor, no need for a + # FilterState condition. + replay_term = next(iter(filter_by_replay_term)) + assert replay_term in filter_state + continue + + for replay_term, filter_term in filter_by_replay_term.items(): + assert filter_term in filter_state + replay_state = self.states[filter_state[filter_term]] + assert replay_term in replay_state + continue + + if len(filter_by_replay_term) == 1: + replay_term = next(iter(filter_by_replay_term)) + dest_idx = replay_term.replay_steps[-1] + # Do not add the FilterStates action, as there is only one. + # Add Replay actions from the filter_state to the destination. + self.add_edge(filter_state, replay_term, dest_idx) + else: + for replay_term, filter_term in filter_by_replay_term.items(): + dest_idx = replay_term.replay_steps[-1] + dest = self.states[dest_idx] + + # Add FilterStates action from the filter_state to the replay_state. + locations = dest.locations + delayed = OrderedFrozenSet(itertools.chain(dest.delayed_actions, [replay_term])) + is_new, replay_state = self.new_state(locations, delayed, replay_size) + self.add_edge(filter_state, filter_term, replay_state.index) + assert (not is_new) == (replay_term in replay_state) + + # Add Replay actions from the replay_state to the destination. + if is_new: + dest_idx = replay_term.replay_steps[-1] + self.add_edge(replay_state, replay_term, dest_idx) + # print(replay_state.stable_str(self.states)) + assert not replay_state.is_inconsistent() + + # print(filter_state.stable_str(self.states)) + # print(reduce_state.stable_str(self.states)) + assert not reduce_state.is_inconsistent() + assert not filter_state.is_inconsistent() + + consume(transform(), progress) + + def fold_replay_unwind(self, verbose: bool, progress: bool) -> None: + """Convert Replay action falling into Unwind action to an Unwind action which + replay less terms.""" + if verbose or progress: + print("Fold Replay followed by Unwind actions.") + + maybe_unreachable_set: OrderedSet[StateId] = OrderedSet() + + def try_transform(s: StateAndTransitions) -> bool: + if len(s.epsilon) != 1: + return False + replay_term, replay_dest_idx = next(iter(s.epsilon)) + if not isinstance(replay_term, Replay): + return False + replay_dest = self.states[replay_dest_idx] + if len(replay_dest.epsilon) != 1: + return False + unwind_term, unwind_dest_idx = next(iter(replay_dest.epsilon)) + if not unwind_term.update_stack(): + return False + stack_diff = unwind_term.update_stack_with() + if not stack_diff.reduce_stack(): + return False + if stack_diff.pop + stack_diff.replay <= 0: + return False + + # Remove replayed terms from the Unwind action. + replayed = replay_term.replay_steps + unshifted = min(stack_diff.replay + min(s.arguments, stack_diff.pop), len(replayed)) + if unshifted < len(replayed): + # We do not have all replayed terms as arguments, thus do not + # consume arguments + unshifted = min(stack_diff.replay, len(replayed)) + if unshifted == 0: + return False + new_unwind_term = unwind_term.unshift_action(unshifted) + new_replay = new_unwind_term.update_stack_with().replay + + # Replace the replay_term and unwind_term by terms which are + # avoiding extra replay actions. + self.remove_edge(s, replay_term, maybe_unreachable_set) + if len(replayed) == unshifted: + # The Unwind action replay more terms than what we originally + # had. The replay term is replaced by an Unwind edge instead. + assert s.arguments >= -new_replay + self.add_edge(s, new_unwind_term, unwind_dest_idx) + else: + # The Unwind action replay and pop less terms than what we + # originally had. Thus the replay action is shortened and a new + # state is created to accomodate the new Unwind action. + assert unshifted >= 1 + new_replay_term = Replay(replayed[:-unshifted]) + implicit_replay_term = Replay(replayed[-unshifted:]) + locations = replay_dest.locations + delayed: OrderedFrozenSet[DelayedAction] + delayed = OrderedFrozenSet( + itertools.chain(replay_dest.delayed_actions, [implicit_replay_term])) + is_new, unwind_state = self.new_state(locations, delayed) + assert (not is_new) == (new_unwind_term in unwind_state) + + # Add new Replay and new Unwind actions. + self.add_edge(s, new_replay_term, unwind_state.index) + if is_new: + assert unwind_state.arguments >= -new_replay + self.add_edge(unwind_state, new_unwind_term, unwind_dest_idx) + assert not unwind_state.is_inconsistent() + assert not s.is_inconsistent() + return True + + def transform() -> typing.Iterator[None]: + for s in self.states: + if try_transform(s): + yield # progress bar + + consume(transform(), progress) + self.remove_unreachable_states(maybe_unreachable_set) + + def fold_identical_endings(self, verbose: bool, progress: bool) -> None: + # If 2 states have the same outgoing edges, then we can merge the 2 + # states into a single state, and rewrite all the backedges leading to + # these states to be replaced by edges going to the reference state. + if verbose or progress: + print("Fold identical endings.") + + def rewrite_backedges(state_list: typing.List[StateAndTransitions], + state_map: typing.Dict[StateId, StateId], + backrefs: typing.Dict[StateId, + typing.List[typing.Tuple[StateId, Action, StateId]]], + maybe_unreachable: OrderedSet[StateId]) -> bool: + all_backrefs = [] + new_backrefs = set() + for s in state_list: + all_backrefs.extend(backrefs[s.index]) + # All states have the same outgoing edges. Thus we replace all of + # them by a single state. We do that by replacing edges of which + # are targeting the state in the state_list by edges targetting the + # ref state. + ref = state_list.pop() + tmp_state_map = default_fwd_dict(state_map) + for s in state_list: + tmp_state_map[s.index] = ref.index + + for ref_s, ref_t, _d in all_backrefs: + new_backrefs.add((ref_s, ref_t.rewrite_state_indexes(tmp_state_map))) + if len(all_backrefs) != len(new_backrefs): + # Skip this rewrite if when rewritting we are going to cause + # some aliasing to happen between actions which are going to + # different states. + return False + + replace_edges = [e for s in state_list for e in s.backedges] + hit = False + for edge in replace_edges: + edge_term = edge.term + assert edge_term is not None + src = self.states[edge.src] + old_dest = src[edge_term] + # print("replace {} -- {} --> {}, by {} -- {} --> {}" + # .format(src.index, edge_term, src[edge_term], src.index, edge_term, ref.index)) + self.replace_edge(src, edge_term, ref.index, maybe_unreachable) + state_map[old_dest] = ref.index + hit = True + return hit + + def rewrite_if_same_outedges(state_list: typing.List[StateAndTransitions]) -> bool: + maybe_unreachable: OrderedSet[StateId] = OrderedSet() + backrefs = collections.defaultdict(list) + outedges = collections.defaultdict(list) + for s in state_list: + # Iterate first over actions, then over ordinary states. + self.assert_state_invariants(s) + outedges[tuple(s.edges())].append(s) + if s.epsilon == []: + continue + for t, d in s.edges(): + if not isinstance(t, Action): + continue + for r in t.state_refs(): + backrefs[r].append((s.index, t, d)) + hit = False + state_map: typing.Dict[StateId, StateId] = default_id_dict() + for same in outedges.values(): + if len(same) > 1: + hit = rewrite_backedges(same, state_map, backrefs, maybe_unreachable) or hit + if hit: + self.remove_unreachable_states(maybe_unreachable) + self.rewrite_state_indexes(state_map) + self.remove_all_unreachable_state(verbose, progress) + return hit + + def visit_table() -> typing.Iterator[None]: + hit = True + while hit: + yield # progress bar. + hit = rewrite_if_same_outedges(self.states) + + consume(visit_table(), progress) + + def group_epsilon_states(self, verbose: bool, progress: bool) -> None: + def all_action_inedges(s: StateAndTransitions) -> bool: + return all(isinstance(e.term, Action) for e in s.backedges) + shift_states, action_states = split(self.states, lambda s: len(s.epsilon) == 0) + from_act_action_states, from_shf_action_states = split(action_states, all_action_inedges) + self.states = [] + self.states.extend(shift_states) + self.states.extend(from_shf_action_states) + self.states.extend(from_act_action_states) + self.rewrite_reordered_state_indexes() + + def group_nonterminal_states(self, verbose: bool, progress: bool) -> None: + # This function is used to reduce the range of FilterStates values, + # such that the Rust compiler can compile FilterStates match statements + # to a table-switch. + freq_count = collections.Counter(nt for s in self.states for nt in s.nonterminals) + freq_nt, _ = zip(*freq_count.most_common()) + + def state_value(s: StateAndTransitions) -> float: + value = 0.0 + if len(s.epsilon) != 0: + return 4.0 + if len(s.nonterminals) == 0: + return 2.0 + i = 1.0 + for nt in freq_nt: + if nt in s: + value += i + i /= 2.0 + return -value + self.states.sort(key=state_value) + self.rewrite_reordered_state_indexes() + + def count_shift_states(self) -> int: + return sum(1 for s in self.states if s is not None and len(s.epsilon) == 0) + + def count_action_states(self) -> int: + return sum(1 for s in self.states if s is not None and len(s.epsilon) > 0) + + def count_action_from_shift_states(self) -> int: + def from_shift_states(s: StateAndTransitions) -> bool: + return any(not isinstance(e.term, Action) for e in s.backedges) + + return sum(1 for s in self.states if len(s.epsilon) > 0 and from_shift_states(s)) + + def prepare_debug_context(self) -> DebugInfo: + """To better filter out the traversal of the grammar in debug context, we + pre-compute for each state the maximal depth of each state within a + production. Therefore, if visiting a state no increases the reducing + depth beyind the ability to shrink the shift list to 0, then we can + stop going deeper, as we entered a different production. """ + depths = collections.defaultdict(lambda: []) + for s in self.states: + if s is None or not s.epsilon: + continue + aps = APS.start(s.index) + for aps_next in aps.shift_next(self): + if not aps_next.reducing: + continue + for i, edge in enumerate(aps_next.stack): + depths[edge.src].append(i + 1) + return {s: max(ds) for s, ds in depths.items()} + + def debug_context( + self, + state: StateId, + split_txt: str = "; ", + prefix: str = "" + ) -> str: + """Reconstruct the grammar production by traversing the parse table.""" + if self.debug_info is False: + return "" + if self.debug_info is True: + self.debug_info = self.prepare_debug_context() + debug_info = typing.cast(typing.Dict[StateId, int], self.debug_info) + + record = [] + + def visit(aps: APS) -> bool: + # Stop after reducing once. + if aps.history == []: + return True + last = aps.history[-1].term + is_unwind = isinstance(last, Action) and last.update_stack() + has_shift_loop = len(aps.shift) != 1 + len(set(zip(aps.shift, aps.shift[1:]))) + can_reduce_later = True + try: + can_reduce_later = debug_info[aps.shift[-1].src] >= len(aps.shift) + except KeyError: + can_reduce_later = False + stop = is_unwind or has_shift_loop or not can_reduce_later + # Record state which are reducing at most all the shifted states. + save = stop and len(aps.shift) == 1 + save = save and is_unwind + if save: + assert isinstance(last, Action) + save = last.update_stack_with().nt in self.states[aps.shift[0].src] + if save: + record.append(aps) + return not stop + + self.aps_visitor(APS.start(state), visit) + + context: OrderedSet[str] = OrderedSet() + for aps in record: + assert aps.history != [] + action = aps.history[-1].term + assert isinstance(action, Action) + assert action.update_stack() + stack_diff = action.update_stack_with() + replay = stack_diff.replay + before = [repr(e.term) for e in aps.stack[:-1]] + after = [repr(e.term) for e in aps.history[:-1]] + prod = before + ["\N{MIDDLE DOT}"] + after + if replay < len(after) and replay > 0: + del prod[-replay:] + replay = 0 + if replay > len(after): + replay += 1 + if replay > 0: + prod = prod[:-replay] + ["[lookahead:"] + prod[-replay:] + ["]"] + txt = "{}{} ::= {}".format(prefix, repr(stack_diff.nt), " ".join(prod)) + context.add(txt) + + if split_txt is None: + return context + return split_txt.join(txt for txt in sorted(context)) diff --git a/third_party/rust/jsparagus/jsparagus/rewrites.py b/third_party/rust/jsparagus/jsparagus/rewrites.py new file mode 100644 index 0000000000..632ad78c85 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/rewrites.py @@ -0,0 +1,735 @@ +"""Early-pipeline operations that error-check and lower grammars.""" + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +import collections +import dataclasses +from dataclasses import dataclass +import typing + +from .grammar import (CallMethod, Element, End, ErrorSymbol, Exclude, Grammar, + LenientNt, Literal, LookaheadRule, Optional, + NoLineTerminatorHere, Nt, NtDef, NtParameter, Production, + ReduceExpr, ReduceExprOrAccept, Some, UnicodeCategory, + Var, is_concrete_element) +from .ordered import OrderedFrozenSet, OrderedSet +from .runtime import ErrorToken, ErrorTokenClass + + +# *** Checking for errors ***************************************************** + +T = typing.TypeVar("T") + + +def fix(f: typing.Callable[[T], T], start: T) -> T: + """Compute a fixed point of `f`, the hard way, starting from `start`.""" + prev, current = start, f(start) + while current != prev: + prev, current = current, f(current) + return current + + +def empty_nt_set(grammar: Grammar) -> typing.Dict[LenientNt, ReduceExprOrAccept]: + """Determine which nonterminals in `grammar` can produce the empty string. + + Return a dict {nt: expr} that maps each such nonterminal to the expr + that should be evaluated when reducing the empty string to nt. + So, for example, if we have a production + + a ::= b? c? => CallMethod("a", [0, 1]) + + then the resulting dictionary will contain the entry + `("a", CallMethod("a", [None, None]))`. + """ + + empties: typing.Dict[LenientNt, ReduceExprOrAccept] = {} + + def production_is_empty(p: Production) -> bool: + return all(isinstance(e, LookaheadRule) + or isinstance(e, Optional) + or (isinstance(e, Nt) and e in empties) + or e is NoLineTerminatorHere + for e in p.body) + + def evaluate_reducer_with_empty_matches(p: Production) -> ReduceExprOrAccept: + # partial evaluation of p.reducer + stack = [e for e in p.body if is_concrete_element(e)] + + Expr = typing.TypeVar("Expr", ReduceExpr, ReduceExprOrAccept) + + def eval(expr: Expr) -> Expr: + if expr is None: + return None + elif isinstance(expr, Some): + return Some(eval(expr.inner)) + elif isinstance(expr, CallMethod): + return dataclasses.replace( + expr, + args=tuple(eval(arg_expr) for arg_expr in expr.args) + ) + elif isinstance(expr, int): + e = stack[expr] + if isinstance(e, Optional): + return None + else: + assert isinstance(e, Nt) + result = empties[e] + assert not isinstance(result, str) + return result + elif expr == 'accept': + # Hmm, this is not ideal! Maybe 'accept' needs to take an + # argument so that the normal case is Accept(0) and this case + # is Accept(eval(expr.args[0])). + return 'accept' + else: + raise TypeError( + "internal error: unhandled reduce expression type {!r}" + .format(expr)) + + return eval(p.reducer) + + done = False + while not done: + done = True + for nt, nt_def in grammar.nonterminals.items(): + if nt not in empties: + for p in nt_def.rhs_list: + if production_is_empty(p): + if nt in empties: + raise ValueError( + "ambiguous grammar: multiple productions for " + "{!r} match the empty string" + .format(nt)) + done = False + empties[nt] = evaluate_reducer_with_empty_matches(p) + return empties + + +def check_cycle_free(grammar: Grammar) -> None: + """Throw an exception if any nonterminal in `grammar` produces itself + via a cycle of 1 or more productions. + """ + empties = empty_nt_set(grammar) + + # OK, first find out which nonterminals directly produce which other + # nonterminals (after possibly erasing some optional/empty nts). + direct_produces: typing.Dict[LenientNt, typing.Set[Nt]] = {} + for orig in grammar.nonterminals: + direct_produces[orig] = set() + for source_production in grammar.nonterminals[orig].rhs_list: + for rhs, _r in expand_optional_symbols_in_rhs(source_production.body, grammar, empties): + result: typing.List[Nt] = [] + all_possibly_empty_so_far = True + # If we break out of the following loop, that means it turns + # out that this production does not produce *any* strings that + # are just a single nonterminal. + for e in rhs: + if grammar.is_terminal(e): + break # no good, this production contains a terminal + elif isinstance(e, Nt): + if e in empties: + if all_possibly_empty_so_far: + result.append(e) + else: + if not all_possibly_empty_so_far: + # Give up - we have 2+ nonterminals that can't + # be empty. + break + all_possibly_empty_so_far = False + result = [e] + elif isinstance(e, Exclude): + if isinstance(e.inner, Nt): + result.append(e.inner) + elif isinstance(e, LookaheadRule): + # Ignore the restriction. We lose a little precision + # here, and could report a cycle where there isn't one, + # but it's unlikely in real-world grammars. + pass + elif e is NoLineTerminatorHere: + # This doesn't affect the property we're checking. + pass + elif isinstance(e, Literal): + if e.text != "": + # This production contains a non-empty character, + # therefore it cannot correspond to an empty cycle. + break + elif isinstance(e, UnicodeCategory): + # This production consume a class of character, + # therefore it cannot correspond to an empty cycle. + break + elif isinstance(e, End): + # This production consume the End meta-character, + # therefore it cannot correspond to an empty cycle, + # even if this character is expect to be produced + # infinitely once the end is reached. + break + elif isinstance(e, CallMethod): + # This production execute code, but does not consume + # any input. + pass + else: + # Optional is not possible because we called + # expand_optional_symbols_in_rhs. ErrorSymbol + # effectively matches the empty string (though only if + # nothing else matches). + assert isinstance(e, ErrorSymbol) + else: + # If we get here, we didn't break, so our results are good! + # nt can definitely produce all the nonterminals in result. + direct_produces[orig] |= set(result) + + def step( + produces: typing.Dict[LenientNt, typing.Set[Nt]] + ) -> typing.Dict[LenientNt, typing.Set[Nt]]: + return { + orig: dest | set(b for a in dest for b in produces[a]) + for orig, dest in produces.items() + } + produces = fix(step, direct_produces) + + for nt in grammar.nonterminals: + if nt in produces[nt]: + raise ValueError( + "invalid grammar: nonterminal {} can produce itself" + .format(nt)) + + +def check_lookahead_rules(grammar: Grammar) -> None: + """Check that no LookaheadRule appears at the end of a production (or before + elements that can produce the empty string). + + If there are any offending lookahead rules, throw a ValueError. + """ + + empties = empty_nt_set(grammar) + + check_cycle_free(grammar) + for nt in grammar.nonterminals: + for source_production in grammar.nonterminals[nt].rhs_list: + body = source_production.body + for rhs, _r in expand_optional_symbols_in_rhs(body, grammar, empties): + # XXX BUG: The next if-condition is insufficient, since it + # fails to detect a lookahead restriction followed by a + # nonterminal that can match the empty string. + if rhs and isinstance(rhs[-1], LookaheadRule): + raise ValueError( + "invalid grammar: lookahead restriction " + "at end of production: {}" + .format(grammar.production_to_str(nt, body))) + + +def check_no_line_terminator_here(grammar: Grammar) -> None: + empties = empty_nt_set(grammar) + + def check(e: Element, nt: LenientNt, body: typing.List[Element]) -> None: + if grammar.is_terminal(e): + pass + elif isinstance(e, Nt): + if e in empties: + raise ValueError( + "invalid grammar: [no LineTerminator here] cannot appear next to " + "a nonterminal that matches the empty string\n" + "in production: {}".format(grammar.production_to_str(nt, body))) + else: + raise ValueError( + "invalid grammar: [no LineTerminator here] must appear only " + "between terminals and/or nonterminals\n" + "in production: {}".format(grammar.production_to_str(nt, body))) + + for nt in grammar.nonterminals: + for production in grammar.nonterminals[nt].rhs_list: + body = production.body + for i, e in enumerate(body): + if e is NoLineTerminatorHere: + if i == 0 or i == len(body) - 1: + raise ValueError( + "invalid grammar: [no LineTerminator here] must be between two symbols\n" + "in production: {}".format(grammar.production_to_str(nt, body))) + check(body[i - 1], nt, body) + check(body[i + 1], nt, body) + + +def expand_parameterized_nonterminals(grammar: Grammar) -> Grammar: + """Replace parameterized nonterminals with specialized copies. + + For example, a single pair `nt_name: NtDef(params=('A', 'B'), ...)` in + `grammar.nonterminals` will be replaced with (assuming A and B are boolean + parameters) up to four pairs, each having an Nt object as the key and an + NtDef with no parameters as the value. + + `grammar.nonterminals` must have string keys. + + Returns a new copy of `grammar` with Nt keys, whose NtDefs all have + `nt_def.params == []`. + """ + + todo = collections.deque(grammar.goals()) + new_nonterminals = {} + + def expand(nt: Nt) -> NtDef: + """Expand grammar.nonterminals[nt](**args). + + Returns the expanded NtDef, which contains no conditional + productions or Nt objects. + """ + + if nt.args is None: + args_dict = None + else: + args_dict = dict(nt.args) + + def evaluate_arg(arg: NtParameter) -> NtParameter: + if isinstance(arg, Var): + return args_dict[arg.name] + else: + return arg + + def expand_element(e: Element) -> Element: + if isinstance(e, Optional): + return Optional(expand_element(e.inner)) + elif isinstance(e, Exclude): + return Exclude(expand_element(e.inner), tuple(map(expand_element, e.exclusion_list))) + elif isinstance(e, Nt): + args = tuple((name, evaluate_arg(arg)) + for name, arg in e.args) + e = Nt(e.name, args) + if e not in new_nonterminals: + todo.append(e) + return e + else: + return e + + def expand_production(p: Production) -> Production: + return p.copy_with( + body=[expand_element(e) for e in p.body], + condition=None) + + def expand_productions(nt_def: NtDef) -> NtDef: + result = [] + for p in nt_def.rhs_list: + if p.condition is None: + included = True + else: + param, value = p.condition + included = (args_dict[param] == value) + if included: + result.append(expand_production(p)) + return NtDef((), result, nt_def.type) + + nt_def = grammar.nonterminals[nt.name] + assert tuple(name for name, value in nt.args) == nt_def.params + return expand_productions(nt_def) + + while todo: + nt = todo.popleft() + if nt not in new_nonterminals: # not already expanded + new_nonterminals[nt] = expand(nt) + + # "type: ignore" because this runs afoul of Python's covariance rules for + # Mapping; but it conforms to the intended type. + return grammar.with_nonterminals(new_nonterminals) # type: ignore + + +# *** Start sets and follow sets ********************************************** + +EMPTY = "(empty)" +END = None + +TerminalOrEmpty = str +TerminalOrEmptyOrErrorToken = typing.Union[str, ErrorTokenClass] +StartSets = typing.Dict[Nt, OrderedFrozenSet[TerminalOrEmptyOrErrorToken]] + + +def start_sets(grammar: Grammar) -> StartSets: + """Compute the start sets for nonterminals in a grammar. + + A nonterminal's start set is the set of tokens that a match for that + nonterminal may start with, plus EMPTY if it can match the empty string + and ErrorToken if it can start with an error. + """ + + # How this works: Note that we can replace the words "match" and "start + # with" in the definition above with more queries about start sets. + # + # 1. A nonterminal's start set contains a terminal `t` if any of its + # productions contains either `t` or a nonterminal with `t` in *its* + # start set, preceded only by zero or more nonterminals that have EMPTY + # in *their* start sets. Plus: + # + # 2. A nonterminal's start set contains EMPTY if any of its productions + # consists entirely of nonterminals that have EMPTY in *their* start + # sets. + # + # This definition is rather circular. We want the smallest collection of + # start sets satisfying these rules, and we get that by iterating to a + # fixed point. + + assert all(isinstance(nt, Nt) for nt in grammar.nonterminals) + start: StartSets + start = {typing.cast(Nt, nt): OrderedFrozenSet() for nt in grammar.nonterminals} + done = False + while not done: + done = True + for nt, nt_def in grammar.nonterminals.items(): + assert isinstance(nt, Nt) + # Compute start set for each `prod` based on `start` so far. + # Could be incomplete, but we'll ratchet up as we iterate. + nt_start = OrderedFrozenSet( + t for p in nt_def.rhs_list for t in seq_start(grammar, start, p.body)) + if nt_start != start[nt]: + start[nt] = nt_start + done = False + return start + + +def seq_start( + grammar: Grammar, + start: StartSets, + seq: typing.List[Element] +) -> OrderedFrozenSet[TerminalOrEmptyOrErrorToken]: + """Compute the start set for a sequence of elements.""" + s: OrderedSet[TerminalOrEmptyOrErrorToken] = OrderedSet([EMPTY]) + for i, e in enumerate(seq): + if EMPTY not in s: # preceding elements never match the empty string + break + s.remove(EMPTY) + if grammar.is_terminal(e): + assert isinstance(e, str) + s.add(e) + elif isinstance(e, ErrorSymbol): + s.add(ErrorToken) + elif isinstance(e, Nt): + s |= start[e] + elif e is NoLineTerminatorHere: + s.add(EMPTY) + else: + assert isinstance(e, LookaheadRule) + future = seq_start(grammar, start, seq[i + 1:]) + if e.positive: + future &= e.set + else: + future -= e.set + return OrderedFrozenSet(future) + return OrderedFrozenSet(s) + + +StartSetCache = typing.List[typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]] + + +def make_start_set_cache( + grammar: Grammar, + prods: typing.List[Prod], + start: StartSets +) -> StartSetCache: + """Compute start sets for all suffixes of productions in the grammar. + + Returns a list of lists `cache` such that + `cache[n][i] == seq_start(grammar, start, prods[n][i:])`. + + (The cache is for speed, since seq_start was being called millions of + times.) + """ + + def suffix_start_list( + rhs: typing.List[Element] + ) -> typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]]: + sets: typing.List[OrderedFrozenSet[TerminalOrEmptyOrErrorToken]] + sets = [OrderedFrozenSet([EMPTY])] + for e in reversed(rhs): + s: OrderedFrozenSet[TerminalOrEmptyOrErrorToken] + if grammar.is_terminal(e): + assert isinstance(e, str) + s = OrderedFrozenSet([e]) + elif isinstance(e, ErrorSymbol): + s = OrderedFrozenSet([ErrorToken]) + elif isinstance(e, Nt): + s = start[e] + if EMPTY in s: + s = OrderedFrozenSet((s - {EMPTY}) | sets[-1]) + elif e is NoLineTerminatorHere: + s = sets[-1] + else: + assert isinstance(e, LookaheadRule) + if e.positive: + s = OrderedFrozenSet(sets[-1] & e.set) + else: + s = OrderedFrozenSet(sets[-1] - e.set) + assert isinstance(s, OrderedFrozenSet) + assert s == seq_start(grammar, start, rhs[len(rhs) - len(sets):]) + sets.append(s) + sets.reverse() + assert sets == [seq_start(grammar, start, rhs[i:]) + for i in range(len(rhs) + 1)] + return sets + + return [suffix_start_list(prod.rhs) for prod in prods] + + +FollowSet = OrderedSet[typing.Union[TerminalOrEmptyOrErrorToken, None]] +FollowSets = typing.DefaultDict[Nt, FollowSet] + + +def follow_sets( + grammar: Grammar, + prods_with_indexes_by_nt: typing.DefaultDict[ + LenientNt, + typing.List[typing.Tuple[int, typing.List[Element]]] + ], + start_set_cache: StartSetCache +) -> FollowSets: + """Compute all follow sets for nonterminals in a grammar. + + The follow set for a nonterminal `A`, as defined in the book, is "the set + of terminals that can appear immediately to the right of `A` in some + sentential form"; plus, "If `A` can be the rightmost symbol in some + sentential form, then $ is in FOLLOW(A)." + + Returns a default-dictionary mapping nts to follow sets. + """ + + # Set of nonterminals already seen, including those we are in the middle of + # analyzing. The algorithm starts at `goal` and walks all reachable + # nonterminals, recursively. + visited = set() + + # The results. By definition, nonterminals that are not reachable from the + # goal nt have empty follow sets. + follow: FollowSets = collections.defaultdict(OrderedSet) + + # If `(x, y) in subsumes_relation`, then x can appear at the end of a + # production of y, and therefore follow[x] should be <= follow[y]. + # (We could maintain that invariant throughout, but at present we + # brute-force iterate to a fixed point at the end.) + subsumes_relation: OrderedSet[typing.Tuple[Nt, Nt]] + subsumes_relation = OrderedSet() + + # `END` is $. It is, of course, in follow[each goal nonterminal]. It gets + # into other nonterminals' follow sets through the subsumes relation. + for init_nt in grammar.init_nts: + assert isinstance(init_nt, Nt) + follow[init_nt].add(END) + + def visit(nt: Nt) -> None: + if nt in visited: + return + visited.add(nt) + for prod_index, rhs in prods_with_indexes_by_nt[nt]: + for i, symbol in enumerate(rhs): + if isinstance(symbol, Nt): + visit(symbol) + after = start_set_cache[prod_index][i + 1] + if EMPTY in after: + after -= {EMPTY} + subsumes_relation.add((symbol, nt)) + follow[symbol] |= after + + for nt in grammar.init_nts: + assert isinstance(nt, Nt) + visit(nt) + + # Now iterate to a fixed point on the subsumes relation. + done = False + while not done: + done = True # optimistically + for target, source in subsumes_relation: + if follow[source] - follow[target]: + follow[target] |= follow[source] + done = False + + return follow + + +# *** Lowering **************************************************************** + +# At this point, lowered productions start getting farther from the original +# source. We need to associate them with the original grammar in order to +# produce correct output, so we use Prod values to represent productions. +# +# - `nt` is the name of the nonterminal as it appears in the original +# grammar. +# +# - `index` is the index of the source production, within nt's productions, +# in the original grammar. +# +# - `rhs` is the fully lowered/expanded right-hand-side of the production. +# +# There may be many productions in a grammar that all have the same `nt` and +# `index` because they were all produced from the same source production. +@dataclass +class Prod: + nt: Nt + index: int + rhs: typing.List + reducer: ReduceExprOrAccept + + +def expand_optional_symbols_in_rhs( + rhs: typing.List[Element], + grammar: Grammar, + empties: typing.Dict[LenientNt, ReduceExprOrAccept], + start_index: int = 0 +) -> typing.Iterable[typing.Tuple[typing.List[Element], typing.Dict[int, ReduceExpr]]]: + """Expand a sequence with optional symbols into sequences that have none. + + rhs is a list of symbols, possibly containing optional elements. This + yields every list that can be made by replacing each optional element + either with its .inner value, or with nothing. + + Each list is accompanied by the list of the indices of optional elements in + `rhs` that were dropped. + + For example, `expand_optional_symbols_in_rhs(["if", Optional("else")])` + yields the two pairs `(["if"], [1])` and `["if", "else"], []`. + """ + + replacement: ReduceExpr + for i in range(start_index, len(rhs)): + e = rhs[i] + if isinstance(e, Optional): + if isinstance(e.inner, Nt) and e.inner in empties: + # If this is already possibly-empty in the input grammar, it's an + # error! The grammar is ambiguous. + raise ValueError( + "ambiguous grammar: {} is ambiguous because {} can match " + "the empty string" + .format(grammar.element_to_str(e), + grammar.element_to_str(e.inner))) + replacement = None + break + elif isinstance(e, Nt) and e in empties: + empty_expr = empties[e] + # The replacement can't be 'accept' because that only happens with + # InitNt nonterminals, which are never used in productions. + assert not isinstance(empty_expr, str) + replacement = empty_expr + break + else: + yield rhs[start_index:], {} + return + + for expanded, r in expand_optional_symbols_in_rhs(rhs, grammar, empties, i + 1): + e = rhs[i] + rhs_inner = e.inner if isinstance(e, Optional) else e + # without rhs[i] + r2 = r.copy() + r2[i] = replacement + yield rhs[start_index:i] + expanded, r2 + # with rhs[i] + yield rhs[start_index:i] + [rhs_inner] + expanded, r + + +def expand_all_optional_elements(grammar: Grammar) -> typing.Tuple[ + Grammar, + typing.List[Prod], + typing.DefaultDict[LenientNt, typing.List[typing.Tuple[int, typing.List[Element]]]] +]: + """Expand optional elements in the grammar. + + We replace each production that contains an optional element with two + productions: one with and one without. Downstream of this step, we can + ignore the possibility of optional elements. + """ + expanded_grammar: typing.Dict[LenientNt, NtDef] = {} + + # This was capturing the set of empty production to simplify the work of + # the previous algorithm which was trying to determine the lookahead. + # However, with the LR0Generator this is no longer needed as we are + # generating deliberatly inconsistent parse table states, which are then + # properly fixed by adding lookahead information where needed, and without + # bugs! + # empties = empty_nt_set(grammar) + empties: typing.Dict[LenientNt, ReduceExprOrAccept] = {} + + # Put all the productions in one big list, so each one has an index. We + # will use the indices in the action table (as reduce action payloads). + prods: typing.List[Prod] = [] + prods_with_indexes_by_nt: \ + typing.DefaultDict[LenientNt, typing.List[typing.Tuple[int, typing.List[Element]]]] = \ + collections.defaultdict(list) + + for nt, nt_def in grammar.nonterminals.items(): + assert isinstance(nt, Nt) + prods_expanded = [] + for prod_index, p in enumerate(nt_def.rhs_list): + # Aggravatingly, a reduce-expression that's an int is not + # simply an offset into p.body. It only counts "concrete" + # elements. Make a mapping for converting reduce-expressions to + # offsets. + reduce_expr_to_offset = [ + i + for i, e in enumerate(p.body) + if is_concrete_element(e) + ] + + for pair in expand_optional_symbols_in_rhs(p.body, grammar, empties): + expanded_rhs, removals = pair + + Expr = typing.TypeVar("Expr", ReduceExpr, ReduceExprOrAccept) + + def adjust_reduce_expr(expr: Expr) -> Expr: + if isinstance(expr, int): + i = reduce_expr_to_offset[expr] + if i in removals: + return removals[i] + was_optional = isinstance(p.body[i], Optional) + expr -= sum(1 for r in removals if r < i) + if was_optional: + return Some(expr) + else: + return expr + elif expr is None: + return None + elif isinstance(expr, Some): + return Some(adjust_reduce_expr(expr.inner)) + elif isinstance(expr, CallMethod): + return dataclasses.replace( + expr, + args=tuple(adjust_reduce_expr(arg) + for arg in expr.args) + ) + elif expr == 'accept': + # doesn't need to be adjusted because 'accept' isn't + # turned into code downstream. + return 'accept' + else: + raise TypeError( + "internal error: unrecognized element {!r}" + .format(expr)) + + adjusted_reducer = adjust_reduce_expr(p.reducer) + prods_expanded.append( + Production(body=expanded_rhs, + reducer=adjusted_reducer)) + prods.append(Prod(nt, prod_index, expanded_rhs, + adjusted_reducer)) + prods_with_indexes_by_nt[nt].append( + (len(prods) - 1, expanded_rhs)) + expanded_grammar[nt] = nt_def.with_rhs_list(prods_expanded) + + return (grammar.with_nonterminals(expanded_grammar), + prods, + prods_with_indexes_by_nt) + + +class CanonicalGrammar: + __slots__ = ["prods", "prods_with_indexes_by_nt", "grammar"] + + prods: typing.List[Prod] + prods_with_indexes_by_nt: typing.Mapping[ + LenientNt, + typing.List[typing.Tuple[int, typing.List[Element]]]] + grammar: Grammar + + def __init__(self, grammar: Grammar) -> None: + # Step by step, we check the grammar and lower it to a more primitive form. + grammar = expand_parameterized_nonterminals(grammar) + check_cycle_free(grammar) + # check_lookahead_rules(grammar) + check_no_line_terminator_here(grammar) + grammar, prods, prods_with_indexes_by_nt = \ + expand_all_optional_elements(grammar) + + self.prods = prods + self.prods_with_indexes_by_nt = prods_with_indexes_by_nt + self.grammar = grammar diff --git a/third_party/rust/jsparagus/jsparagus/runtime.py b/third_party/rust/jsparagus/jsparagus/runtime.py new file mode 100644 index 0000000000..cf295dc52c --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/runtime.py @@ -0,0 +1,317 @@ +"""Runtime support for jsparagus-generated parsers.""" + +# Nt is unused here, but we re-export it. +from .grammar import Nt, InitNt, End +from .lexer import UnexpectedEndError +import collections +from dataclasses import dataclass + + +__all__ = ['ACCEPT', 'ERROR', 'Nt', 'InitNt', 'End', 'Parser', 'ErrorToken'] + +# Actions are encoded as 64-bit signed integers, with the following meanings: +# - n in range(0, 0x8000_0000_0000_0000) - shift to state n +# - n in range(0x8000_0000_0000_0000, 0xc000_0000_0000_0000) - call special_case(n & SPECIAL_CASE_MASK) +# - n == ERROR (0xbfff_ffff_ffff_fffe) +# - n == ACCEPT (0xbfff_ffff_ffff_ffff) +# - n in range(0xc000_0000_0000_0000, 0x1_0000_0000_0000_0000) - reduce by production -n - 1 + +SPECIAL_CASE_MASK = 0x3fff_ffff_ffff_ffff +SPECIAL_CASE_TAG = -0x8000_0000_0000_0000 +ACCEPT = 0x_bfff_ffff_ffff_ffff - (1 << 64) +ERROR = ACCEPT - 1 + + +@dataclass(frozen=True) +class ErrorTokenClass: + def __repr__(self): + return 'ErrorToken' + + +ErrorToken = ErrorTokenClass() + + +def throw_syntax_error(actions, state, t, tokens): + assert t is not None + if isinstance(state, StateTermValue): + state = state.state + expected = set(actions[state].keys()) + expected = set(e for e in expected if not isinstance(e, Nt)) + + # Tidy up the `expected` set a bit. + if End() in expected: + expected.remove(End()) + expected.add("end of input") + if ErrorToken in expected: + # This is possible because we restore the stack in _try_error_handling + # after reducing and then failing to find a recovery rule after all. + # But don't tell people in error messages that an error is one of the + # things we expect. It makes no sense. + expected.remove(ErrorToken) + + if len(expected) < 2: + tokens.throw("expected {!r}, got {!r}".format(list(expected)[0], t)) + else: + tokens.throw("expected one of {!r}, got {!r}" + .format(sorted(expected), t)) + + +StateTermValue = collections.namedtuple("StateTermValue", "state term value new_line") + + +class ShiftError(Exception): + pass + + +class ShiftAccept(Exception): + pass + + +class Parser: + """Parser using jsparagus-generated tables. + + The usual design is, a parser object consumes a token iterator. + This Parser is not like that. Instead, the lexer feeds tokens to it + by calling `parser.write_terminal(lexer, token)` repeatedly, then + `parser.close(lexer)`. + + The parser uses these methods of the lexer object: + + * lexer.take() - Return data associated with a token, like the + numeric value of an int literal token. + + * lexer.throw(message) - Throw a syntax error. (This is on the lexer + because the lexer has the current position.) + + * lexer.throw_unexpected_end() - Throw a syntax error after we + successfully parsed the whole file except more tokens were expected at + the end. + + """ + + def __init__(self, actions, error_codes, entry_state, methods): + self.actions = actions + self.stack = [StateTermValue(entry_state, None, None, False)] + self.replay = [] + self.flags = collections.defaultdict(lambda: []) + self.error_codes = error_codes + self.methods = methods + self.closed = False + self.debug = False + self.is_simulator = False + self.last_shift = None + + def clone(self): + return Parser(self.actions, self.error_codes, 0, self.methods) + + def simulator_clone(self): + """Make a copy of this parser for simulation. + + The copy has a version of the self.reductions table that never actually + does anything. + + This is absurdly expensive and is for very odd and special use cases. + """ + p = self.clone() + p.stack = self.stack[:] + p.replay = self.replay[:] + p.debug = self.debug + p.is_simulator = True + return p + + def _str_stv(self, stv): + # NOTE: replace this function by repr(), to inspect wrong computations. + val = '' + if stv.value: + val = '*' + return "-- {} {}--> {}".format(stv.term, val, stv.state) + + def _dbg_where(self, t=""): + name = "stack" + if self.is_simulator: + name = "simulator" + print("{}: {}; {}\nexpect one of: {}".format( + name, + " ".join(self._str_stv(s) for s in self.stack), t, + repr(self.actions[self.stack[-1].state]) + )) + + def _shift(self, stv, lexer): + state = self.stack[-1].state + if self.debug: + self._dbg_where("shift: {}".format(str(stv.term))) + if not isinstance(self.actions[state], dict): + # This happens after raising a ShiftAccept error. + if stv.term == End(): + raise ShiftAccept() + raise ShiftError() + self.last_shift = (state, stv) + while True: + goto = self.actions[state].get(stv.term, ERROR) + if goto == ERROR: + if self.debug: + self._dbg_where("(error)") + self._try_error_handling(lexer, stv) + stv = self.replay.pop() + if self.debug: + self._dbg_where("error: {}".format(str(stv.term))) + continue + state = goto + self.stack.append(StateTermValue(state, stv.term, stv.value, stv.new_line)) + action = self.actions[state] + if not isinstance(action, dict): # Action + if self.debug: + self._dbg_where("(action {})".format(state)) + action(self, lexer) + state = self.stack[-1].state + action = self.actions[state] + # Actions should always unwind or do an epsilon transition to a + # shift state. + assert isinstance(action, dict) + if self.replay != []: + stv = self.replay.pop() + if self.debug: + self._dbg_where("replay: {}".format(repr(stv.term))) + else: + break + + def replay_action(self, dest): + # This code emulates the code which would be executed by the shift + # function, if we were to return to this shift function instead of + # staying within the action functions. The destination provided as + # argument should match the content of the parse table, otherwise this + # would imply that the replay action does not encode a transition from + # the parse table. + state = self.stack[-1].state + stv = self.replay.pop() + if self.debug: + self._dbg_where("(inline-replay: {})".format(repr(stv.term))) + goto = self.actions[state].get(stv.term, ERROR) + assert goto == dest + self.stack.append(StateTermValue(dest, stv.term, stv.value, stv.new_line)) + + def shift_list(self, stv_list, lexer): + self.replay.extend(reversed(stv_list)) + + def write_terminal(self, lexer, t): + assert not self.closed + try: + stv = StateTermValue(0, t, lexer.take(), lexer.saw_line_terminator()) + self._shift(stv, lexer) + except ShiftAccept: + if self.debug: + self._dbg_where("(write_terminal accept)") + if self.replay != []: + state, stv = self.last_shift + throw_syntax_error(self.actions, state, lexer.take(), lexer) + except ShiftError: + state, stv = self.last_shift + throw_syntax_error(self.actions, state, lexer.take(), lexer) + + def close(self, lexer): + assert not self.closed + self.closed = True + try: + self._shift(StateTermValue(0, End(), End(), False), lexer) + except ShiftAccept: + if self.debug: + self._dbg_where("(close accept)") + print(repr(self.stack)) + while self.stack[-1].term == End(): + self.stack.pop() + assert len(self.stack) == 2 + assert self.stack[0].term is None + assert isinstance(self.stack[1].term, Nt) + return self.stack[1].value + + def top_state(self): + return self.stack[-1].state + + def check_not_on_new_line(self, lexer, peek): + if peek <= 0: + raise ValueError("check_not_on_new_line got an impossible peek offset") + if not self.stack[-peek].new_line: + return True + for _ in range(peek - 1): + self.replay.append(self.stack.pop()) + stv = self.stack.pop() + self._try_error_handling(lexer, stv) + return False + + def _try_error_handling(self, lexer, stv): + # Error recovery version of the code in write_terminal. Three differences + # between this and write_terminal are commented below. + if stv.term is ErrorToken: + if stv.value == End(): + lexer.throw_unexpected_end() + raise + throw_syntax_error(self.actions, self.stack[-1], stv.value, lexer) + raise + + state = self.stack[-1].state + error_code = self.error_codes[state] + if error_code is not None: + self.on_recover(error_code, lexer, stv) + self.replay.append(stv) + self.replay.append(StateTermValue(0, ErrorToken, stv.value, stv.new_line)) + elif stv.term == End(): + lexer.throw_unexpected_end() + raise + else: + throw_syntax_error(self.actions, self.stack[-1], stv.value, lexer) + raise + + def on_recover(self, error_code, lexer, stv): + """Called when the grammar says to recover from a parse error. + + Subclasses can override this to add custom code when an ErrorSymbol in + a production is matched. This base-class implementation does nothing, + allowing the parser to recover from the error silently. + """ + pass + + def can_accept_terminal(self, lexer, t): + """Return True if the terminal `t` is OK next. + + False if it's an error. `t` can be None, querying if we can accept + end-of-input. + """ + class BogusLexer: + def throw_unexpected_end(self): + raise UnexpectedEndError("") + + def throw(self, message): + raise SyntaxError(message) + + def take(self): + return str(t) + + def saw_line_terminator(self): + return lexer.saw_line_terminator() + + sim = self.simulator_clone() + try: + sim.write_terminal(BogusLexer(), t) + except Exception: + return False + return True + + def can_close(self): + """Return True if self.close() would succeed.""" + + # The easy case: no error, parsing just succeeds. + # The hard case: maybe error-handling would succeed? + # The easiest thing is simply to run the method. + class BogusLexer: + def throw_unexpected_end(self): + raise UnexpectedEndError("") + + def throw(self, message): + raise SyntaxError(message) + + sim = self.simulator_clone() + try: + sim.close(BogusLexer()) + except SyntaxError: + return False + return True diff --git a/third_party/rust/jsparagus/jsparagus/types.py b/third_party/rust/jsparagus/jsparagus/types.py new file mode 100644 index 0000000000..f62f199572 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/types.py @@ -0,0 +1,326 @@ +"""Type inference for reduce expressions. + +The nonterminals and reduce expressions in a grammar can have types, to support +generating parsers in typeful languages. Types are represented by `Type` objects. + +A `TypeVar` is a type variable that might be bound to any type. This is used +only during inference. So during inference, a type is either a `Type` or a +`TypeVar` + +In addition, MethodType simply gathers together a return type and a list of +argument types. + +See infer_types() for more. +""" + +from __future__ import annotations +# mypy: disallow-untyped-defs, disallow-incomplete-defs, disallow-untyped-calls + +from dataclasses import dataclass +import typing +from . import grammar + + +@dataclass(frozen=True) +class Lifetime: + name: str + + def __str__(self) -> str: + return "'" + self.name + + +_all_types = {} + + +@dataclass(frozen=True) +class Type: + name: str + args: typing.Tuple[TypeParameter, ...] = () + + def __new__(cls, name: str, args: typing.Tuple[TypeParameter, ...] = ()) -> Type: + assert isinstance(args, tuple) + + # caching + key = name, args + if key not in _all_types: + obj = super().__new__(cls) + _all_types[key] = obj + return _all_types[key] + + def __getnewargs__(self) -> typing.Tuple[str, typing.Tuple[TypeParameter, ...]]: + return (self.name, self.args) + + def __str__(self) -> str: + if self.args: + return '{}<{}>'.format(self.name, ', '.join(map(str, self.args))) + else: + return self.name + + def __repr__(self) -> str: + if self.args: + return 'Type({!r}, {!r})'.format(self.name, self.args) + else: + return 'Type({!r})'.format(self.name) + + +UnitType = Type('Unit') +TokenType = Type('Token') + +# The type of expressions that can't be fully evaluated, like Rust `panic!()`; +# likewise, the return type of functions that don't return. +NoReturnType = Type('NoReturn') + + +class TypeVar: + """A type variable, used only during type inference. + + The point of type inference is to assign each method and each nonterminal a + return type; we start by assigning each one a type variable and then do + unification, a la Hindley-Milner. + + Each type variable may be given a str `name` and a positive int + `precedence`. These are used at the end of type inference, if we still + don't know a concrete type for this variable--which is often the case for + nonterminals. + + The precedence is used when two type variables are unified, to choose the + better name. (Nonterminal names should take precedence over method names.) + Greater precedence numbers mean higher precedence. + """ + __slots__ = ['name', 'precedence', 'value'] + + name: typing.Optional[str] + precedence: int + value: typing.Optional[TypeOrTypeVar] + + def __init__( + self, + name: typing.Optional[str] = None, + precedence: int = 0 + ) -> None: + assert (precedence > 0) == (name is not None) + self.name = name + self.precedence = precedence + self.value = None + + def __str__(self) -> str: + return 'TypeVar({!r})'.format(self.name) + + +TypeOrTypeVar = typing.Union[Type, TypeVar] +TypeParameter = typing.Union[Type, TypeVar, Lifetime] + + +class JsparagusTypeError(Exception): + def annotate(self, line: str) -> None: + message, *rest = self.args + message = line + "\n" + message + self.args = message, *rest + + @classmethod + def clash(cls, expected: TypeParameter, actual: TypeParameter) -> JsparagusTypeError: + return cls("expected type {}, got type {}".format(expected, actual)) + + +def deref(t: TypeOrTypeVar) -> TypeOrTypeVar: + if isinstance(t, TypeVar): + if t.value is not None: + t.value = deref(t.value) + return t.value + return t + + +def final_deref_parameter(ty: TypeParameter) -> TypeParameter: + if isinstance(ty, Lifetime): + return ty + else: + return final_deref(ty) + + +def final_deref(ty: TypeOrTypeVar) -> Type: + """ Like deref(), but also replace any remaining unresolved type variables with + synthesized Types. + """ + ty = deref(ty) + if isinstance(ty, TypeVar): + assert ty.name is not None, "internal error: no way to assign a type to variable" + # ty becomes an nt type. + assert ty.name != 'Unit' + ty.value = Type(ty.name) + return ty.value + else: + assert isinstance(ty, Type) + if ty.args: + assert ty.name != 'Unit' + return Type(ty.name, tuple(final_deref_parameter(arg) for arg in ty.args)) + return ty + + +def unify(actual: TypeParameter, expected: TypeParameter) -> None: + if isinstance(actual, Lifetime) or isinstance(expected, Lifetime): + if actual is expected: + return + else: + raise JsparagusTypeError.clash(expected, actual) + + actual = deref(actual) + expected = deref(expected) + + if actual is expected: + pass + elif isinstance(actual, Type) and isinstance(expected, Type): + if actual.name != expected.name or len(actual.args) != len(expected.args): + raise JsparagusTypeError.clash(expected, actual) + + for i, (actual_arg, expected_arg) in enumerate(zip(actual.args, expected.args)): + try: + unify(actual_arg, expected_arg) + except JsparagusTypeError as exc: + # The error message has to do with the parameter, but we want + # to provide the complete problem types. + raise JsparagusTypeError.clash(expected, actual) from exc + + elif isinstance(expected, TypeVar): + assert expected.value is None + if (isinstance(actual, TypeVar) + and actual.precedence <= expected.precedence): + actual.value = expected + else: + expected.value = actual + else: + assert isinstance(actual, TypeVar) + assert actual.value is None + if actual is not expected: + actual.value = expected + + +@dataclass +class MethodType: + __slots__ = ['argument_types', 'return_type'] + + argument_types: typing.List[TypeOrTypeVar] + return_type: TypeOrTypeVar + + def resolve(self) -> MethodType: + return MethodType( + [final_deref(t) for t in self.argument_types], + final_deref(self.return_type)) + + +def infer_types(g: grammar.Grammar) -> None: + """Assign a type to each nonterminal and each method in a grammar. + + The type system is pretty rigid. We don't have any polymorphism or union + types. If two of a nonterminal's productions have different types, this + will typically just unify the two types, which can result in mysterious + output. If it can't do that (e.g. if one of the types is `str`) then it + throws a JsparagusTypeError. + + This mutates the Grammar `g` in place, assigning to the `.type` field of + each NtDef in `g.nonterminals` and to `g.methods`. + """ + + def type_of_nt(nt: typing.Union[grammar.Nt, str], nt_def: grammar.NtDef) -> TypeOrTypeVar: + if nt_def.type is not None: + return nt_def.type + else: + nt_name = nt if isinstance(nt, str) else nt.name + assert isinstance(nt_name, str) + return TypeVar(nt_name, 2) + + nt_types = { + nt: type_of_nt(nt, nt_def) + for nt, nt_def in g.nonterminals.items() + if not isinstance(nt, grammar.InitNt) + } + + method_types: typing.Dict[str, MethodType] = {} + + def element_type(e: grammar.Element) -> TypeOrTypeVar: + if isinstance(e, str): + if e in g.nonterminals: + return nt_types[e] + else: + return TokenType + elif isinstance(e, grammar.Optional): + return Type('Option', (element_type(e.inner),)) + elif isinstance(e, grammar.Literal): + return TokenType + elif isinstance(e, grammar.UnicodeCategory): + return TokenType + elif isinstance(e, grammar.Exclude): + return Type('Exclude', + (element_type(e.inner),) + + tuple(element_type(v) for v in e.exclusion_list)) + elif isinstance(e, grammar.Nt): + # Cope with the awkward fact that g.nonterminals keys may be either + # strings or Nt objects. + return nt_types[e if e in nt_types else e.name] # type: ignore + else: + assert False, "unexpected element type: {!r}".format(e) + + concrete_element_types: typing.List[TypeOrTypeVar] + + def expr_type(expr: grammar.ReduceExprOrAccept) -> TypeOrTypeVar: + if isinstance(expr, int): + return concrete_element_types[expr] + elif expr is None: + return Type('Option', (TypeVar(),)) + elif isinstance(expr, grammar.Some): + return Type('Option', (expr_type(expr.inner),)) + elif isinstance(expr, grammar.CallMethod): + arg_types = [expr_type(arg) for arg in expr.args] + if expr.method in method_types: + mtype = method_types[expr.method] + if len(expr.args) != len(mtype.argument_types): + raise JsparagusTypeError( + "method {!r} is called with {} argument(s) and with {} argument(s)" + .format(expr.method, len(expr.args), len(mtype.argument_types))) + for i, (actual_type, expected_type) in enumerate( + zip(arg_types, mtype.argument_types)): + try: + unify(actual_type, expected_type) + except JsparagusTypeError as exc: + exc.annotate( + "error passing {} as argument {} to method {!r}:" + .format( + grammar.expr_to_str(expr.args[i]), + i + 1, + expr.method)) + raise + else: + # Use method name as fallback type name (but low + # precedence--this should be unified with something better). + name = expr.method + if ' ' in name: + name = name.split(' ')[0] + + mtype = MethodType(arg_types, TypeVar(name, 1)) + method_types[expr.method] = mtype + return mtype.return_type + elif expr == 'accept': + return NoReturnType + else: + raise TypeError("unrecognized reduce expr: {!r}".format(expr)) + + for nt, nt_def in g.nonterminals.items(): + if isinstance(nt, grammar.InitNt): + continue + nt_type = nt_types[nt] + for i, p in enumerate(nt_def.rhs_list): + concrete_element_types = [ + element_type(e) + for e in p.body + if grammar.is_concrete_element(e) + ] + try: + unify(nt_type, expr_type(p.reducer)) + except JsparagusTypeError as exc: + exc.annotate( + "in nonterminal {!r}, production {}:" + .format(nt, i + 1)) + raise + + for nt, ty in nt_types.items(): + g.nonterminals[nt].type = final_deref(ty) + g.methods = {name: mtype.resolve() for name, mtype in method_types.items()} diff --git a/third_party/rust/jsparagus/jsparagus/utils.py b/third_party/rust/jsparagus/jsparagus/utils.py new file mode 100644 index 0000000000..f3ecba2472 --- /dev/null +++ b/third_party/rust/jsparagus/jsparagus/utils.py @@ -0,0 +1,74 @@ +"""List of functions which are useful in many places.""" + +import sys +import typing + + +T = typing.TypeVar("T") +U = typing.TypeVar("U") + + +def keep_until( + iterable: typing.Iterable[T], + pred: typing.Callable[[T], bool] +) -> typing.Iterable[T]: + """Filter an iterable generator or list and keep all elements until the first + time the predicate becomes true, including the element where the predicate + is true. All elements after are skipped.""" + for e in iterable: + yield e + if pred(e): + return + + +def split( + iterable: typing.Iterable[T], + pred: typing.Callable[[T], bool] +) -> typing.Tuple[typing.List[T], typing.List[T]]: + """Given a predicate, split the input iterator into a tuple of 2 list. A first + list which satisfy the predicate and a second list which does not.""" + yes = [] + no = [] + for e in iterable: + if pred(e): + yes.append(e) + else: + no.append(e) + return yes, no + + +def consume(iterator: typing.Iterable[T], progress: bool) -> None: + """Drain the iterator. If progress is true, print dots on stdout.""" + i = 0 + to_feed = str(i) + try: + for _ in iterator: + if progress: + if len(to_feed) > 0: + sys.stdout.write(to_feed[0]) + to_feed = to_feed[1:] + else: + sys.stdout.write(".") + i += 1 + if i % 100 == 0: + sys.stdout.write("\n") + to_feed = str(i) + sys.stdout.flush() + finally: + if progress and i != 0: + sys.stdout.write("\n") + sys.stdout.flush() + + +class default_id_dict(dict, typing.Mapping[T, T]): + def __missing__(self, key) -> T: + return key + + +class default_fwd_dict(dict, typing.Mapping[T, U]): + def __init__(self, fwd: typing.Mapping[T, U]): + super().__init__() + self.fwd = fwd + + def __missing__(self, key: T) -> U: + return self.fwd[key] diff --git a/third_party/rust/jsparagus/mozconfigs/smoosh-debug b/third_party/rust/jsparagus/mozconfigs/smoosh-debug new file mode 100644 index 0000000000..6685fcabcf --- /dev/null +++ b/third_party/rust/jsparagus/mozconfigs/smoosh-debug @@ -0,0 +1,6 @@ +ac_add_options --enable-application=js +ac_add_options --disable-optimize +ac_add_options --enable-debug +ac_add_options --enable-smoosh +mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/obj-smoosh-debug +mk_add_options AUTOCLOBBER=1 diff --git a/third_party/rust/jsparagus/mozconfigs/smoosh-opt b/third_party/rust/jsparagus/mozconfigs/smoosh-opt new file mode 100644 index 0000000000..f1cfd789e5 --- /dev/null +++ b/third_party/rust/jsparagus/mozconfigs/smoosh-opt @@ -0,0 +1,7 @@ +ac_add_options --enable-application=js +ac_add_options --enable-optimize +ac_add_options --disable-debug +ac_add_options --enable-release +ac_add_options --enable-smoosh +mk_add_options MOZ_OBJDIR=@TOPSRCDIR@/obj-smoosh-opt +mk_add_options AUTOCLOBBER=1 diff --git a/third_party/rust/jsparagus/pgen.pgen b/third_party/rust/jsparagus/pgen.pgen new file mode 100644 index 0000000000..36b7483bb0 --- /dev/null +++ b/third_party/rust/jsparagus/pgen.pgen @@ -0,0 +1,89 @@ +# Grammar for the pgen parser specification language +# +# This was used to bootstrap the parser for the emu-grammar parser +# specification language, and it's not clear why we should have two things. +# Ignore this for now. + +var token IDENT; +var token STR; +var token MATCH; +var token COMMENT; +token Eq = "="; +token Arrow = "=>"; +token Semi = ";"; +token Token = "token"; +token Var = "var"; +token Nt = "nt"; +token Goal = "goal"; +token Some = "Some"; +token None = "None"; +token OpenBrace = "{"; +token CloseBrace = "}"; +token OpenParen = "("; +token CloseParen = ")"; +token Comma = ","; +token QuestionMark = "?"; + +goal nt grammar { + token_defs? nt_defs => grammar($0, $1); +} + +nt token_defs { + token_def => single($0); + token_defs token_def => append($0, $1); +} + +nt token_def { + "token" IDENT "=" STR ";" => const_token($1, $3); + "var" "token" IDENT ";" => var_token($2); +} + +nt nt_defs { + nt_def => nt_defs_single($0); + nt_defs nt_def => nt_defs_append($0, $1); +} + +nt nt_def { + COMMENT? "goal"? "nt" IDENT "{" prods? "}" => nt_def($0, $1, $3, $5); +} + +nt prods { + prod => single($0); + prods prod => append($0, $1); + COMMENT => empty($0); +} + +nt prod { + terms reducer? ";" => prod($0, $1); +} + +nt terms { + term => single($0); + terms term => append($0, $1); +} + +nt term { + symbol; + symbol "?" => optional($0); +} + +nt symbol { + IDENT => ident($0); + STR => str($0); +} + +nt reducer { + "=>" expr => $1; +} + +nt expr { + MATCH => expr_match($0); + IDENT "(" expr_args? ")" => expr_call($0, $2); + "Some" "(" expr ")" => expr_some($2); + "None" => expr_none(); +} + +nt expr_args { + expr => args_single($0); + expr_args "," expr => args_append($0, $2); +} diff --git a/third_party/rust/jsparagus/requirements.txt b/third_party/rust/jsparagus/requirements.txt new file mode 100644 index 0000000000..6115bfe34a --- /dev/null +++ b/third_party/rust/jsparagus/requirements.txt @@ -0,0 +1,13 @@ +## The following requirements were added by pip freeze: +entrypoints==0.3 +flake8==3.7.9 +html5lib==1.0.1 +mccabe==0.6.1 +mypy==0.770 +mypy-extensions==0.4.3 +pycodestyle==2.5.0 +pyflakes==2.1.1 +six==1.11.0 +typed-ast==1.4.1 +typing-extensions==3.7.4.1 +webencodings==0.5.1 diff --git a/third_party/rust/jsparagus/smoosh_status.py b/third_party/rust/jsparagus/smoosh_status.py new file mode 100644 index 0000000000..63aeb3e0fd --- /dev/null +++ b/third_party/rust/jsparagus/smoosh_status.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this file, +# You can obtain one at http://mozilla.org/MPL/2.0/. + +import pathlib +import json +import urllib.request +import re +import subprocess +import sys + + +class Logger: + @classmethod + def info(cls, s): + print('[INFO]', s) + + # Flush to make it apeear immediately in automation log. + sys.stdout.flush() + + @classmethod + def fetch(cls, url): + cls.info(f'Fetching {url}') + + @classmethod + def cmd(cls, cmd): + def format_cmd(s): + if ' ' in s: + escaped = s.replace('"', '\"') + return f'"{escaped}"' + return s + + formatted_command = ' '.join(list(map(format_cmd, cmd))) + cls.info(f'$ {formatted_command}') + + +class GitRepository: + def __init__(self, path): + self.path = path + + self.git_dir = self.path / '.git' + if not self.git_dir.exists(): + print(f'{self.path} is not a Git repository.', file=sys.stderr) + sys.exit(1) + + def get_output(self, *args): + cmd = ['git'] + list(args) + Logger.cmd(cmd) + output = subprocess.run(cmd, + capture_output=True, + cwd=self.path) + + return output.stdout.decode() + + def run(self, *args): + cmd = ['git'] + list(args) + Logger.cmd(cmd) + subprocess.run(cmd, + check=True, + cwd=self.path) + + def commit_message(self, rev): + return self.get_output('log', '-1', '--pretty=format:%s%n', rev) + + +class MCRemoteRepository: + HG_API_URL = 'https://hg.mozilla.org/mozilla-central/' + + @classmethod + def call(cls, name, path): + url = f'{cls.HG_API_URL}{name}{path}' + Logger.fetch(url) + req = urllib.request.Request(url, None, {}) + response = urllib.request.urlopen(req) + return response.read() + + @classmethod + def call_json(cls, name, path): + return json.loads(cls.call(name, path)) + + @classmethod + def file(cls, rev, path): + return cls.call('raw-file', f'/{rev}{path}') + + +class TreeHerder: + API_URL = 'https://treeherder.mozilla.org/api/' + + @classmethod + def call(cls, name): + url = f'{cls.API_URL}{name}' + Logger.fetch(url) + req = urllib.request.Request(url, None, { + 'User-Agent': 'smoosh-tools', + }) + response = urllib.request.urlopen(req) + return response.read() + + @classmethod + def call_json(cls, name): + return json.loads(cls.call(name)) + + @classmethod + def push_id(cls, rev): + push = cls.call_json(f'project/mozilla-central/push/?full=true&format=json&count=1&revision={rev}') + return push['results'][0]['id'] + + @classmethod + def jobs(cls, push_id): + push = cls.call_json(f'jobs/?push_id={push_id}&format=json') + count = push['count'] + results = [] + results += push['results'] + + page = 2 + while len(results) < count: + push = cls.call_json(f'jobs/?push_id={push_id}&format=json&page={page}') + results += push['results'] + page += 1 + + return results + + +class Status: + def run(is_ci): + Logger.info('Fetching ci_generated branch') + + jsparagus = GitRepository(pathlib.Path('./')) + jsparagus.run('fetch', 'origin', 'ci_generated') + + Logger.info('Checking mozilla-central tip revision') + + m_c_rev = MCRemoteRepository.call_json('json-log', '/tip/')['node'] + cargo_file = MCRemoteRepository.file( + m_c_rev, + '/js/src/frontend/smoosh/Cargo.toml' + ).decode() + m = re.search('rev = "(.+)"', cargo_file) + ci_generated_rev = m.group(1) + + Logger.info('Checking jsparagus referred by mozilla-central') + + message = jsparagus.commit_message(ci_generated_rev) + m = re.search('for ([A-Fa-f0-9]+)', message) + master_rev = m.group(1) + + Logger.info('Checking build status') + + push_id = TreeHerder.push_id(m_c_rev) + jobs = TreeHerder.jobs(push_id) + nonunified_job = None + smoosh_job = None + for job in jobs: + if 'spidermonkey-sm-nonunified-linux64/debug' in job: + nonunified_job = job + if 'spidermonkey-sm-smoosh-linux64/debug' in job: + smoosh_job = job + + def get_result(job): + if job: + if 'completed' in job: + if 'success' in job: + return 'OK' + else: + return 'NG' + else: + return 'not yet finished' + else: + return 'unknown' + + nonunified_result = get_result(nonunified_job) + smoosh_result = get_result(smoosh_job) + + if is_ci: + print(f'##[set-output name=mc;]{m_c_rev}') + print(f'##[set-output name=jsparagus;]{master_rev}') + print(f'##[set-output name=build;]{nonunified_result}') + print(f'##[set-output name=test;]{smoosh_result}') + else: + print(f'mozilla-central tip: {m_c_rev}') + print(f'referred jsparagus revision: {master_rev}') + print(f'Build status:') + print(f' Build with --enable-smoosh: {nonunified_result}') + print(f' Test with --smoosh: {smoosh_result}') + + +is_ci = False +if len(sys.argv) > 1: + if sys.argv[1] == 'ci': + is_ci = True + +Status.run(is_ci) diff --git a/third_party/rust/jsparagus/src/bin/smoosh_tools.rs b/third_party/rust/jsparagus/src/bin/smoosh_tools.rs new file mode 100644 index 0000000000..3e814237e3 --- /dev/null +++ b/third_party/rust/jsparagus/src/bin/smoosh_tools.rs @@ -0,0 +1,964 @@ +use std::collections::HashMap; +use std::env::{self, Args}; +use std::fs::{create_dir_all, File}; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::process::{self, exit, Command}; +use std::str::FromStr; + +static USAGE_STRING: &'static str = r#"Tools for jsparagus + SmooshMonkey development + +USAGE: + cargo run --bin smoosh_tools [COMMAND] [OPTIONS] + +COMMAND: + build [--opt] [MOZILLA_CENTRAL] + Build SpiderMonkey JS shell with SmooshMonkey enabled, using this + jsparagus clone instead of vendored one + shell [--opt] [MOZILLA_CENTRAL] + Run SpiderMonkey JS shell binary built by "build" command + test [--opt] [MOZILLA_CENTRAL] + Run jstests/jit-test with SpiderMonkey JS shell binary built by + "build" command + bench [--opt] [--samples-dir=REAL_JS_SAMPLES/DATE] [MOZILLA_CENTRAL] + Compare SpiderMonkey parser performance against SmooshMonkey on a + collection of JavaScript files, using the JS shell binary built by + "build" command. + bump [MOZILLA_CENTRAL] + Bump jsparagus version referred by mozilla-central to the latest + "ci_generated" branch HEAD, and re-vendor jsparagus + try [--remote=REMOTE] [MOZILLA_CENTRAL] + Push to try with current jsparagus branch + This pushes current jsparagus branch to "generated" branch, and + modifies the reference in mozilla-central to it, and pushes to try + This requires L1 Commit Access for hg.mozilla.org, + and mozilla-central should be a Git repository + gen [--remote=REMOTE] + Push current jsparagus branch to "generated" branch, with generated + files included, to refer from mozilla-central + +OPTIONS: + MOZILLA_CENTRAL Path to mozilla-central or mozilla-unified clone + This can be omitted if mozilla-central or mozilla-unified + is placed next to jsparagus clone directory + --opt Use optimized build configuration, instead of debug build + --remote=REMOTE The name of remote to push the generated branch to + Defaults to "origin" + --concat-mozconfig For building mozilla-central, concatenates the content + of the MOZCONFIG environment variable with the content of + smoosh_tools mozconfig. + --samples-dir=DIR Directory containing thousands of JavaScripts to be used + for measuring the performance of SmooshMonkey. +"#; + +macro_rules! try_finally { + ({$($t: tt)*} {$($f: tt)*}) => { + let result = (|| -> Result<(), Error> { + $($t)* + Ok(()) + })(); + $($f)* + result? + } +} + +/// Simple wrapper for logging. +/// +/// Do not use env_logger etc, to avoid adding extra dependency to library. +/// See https://github.com/rust-lang/rfcs/pull/2887 +macro_rules! log_info { + ($($t: tt)*) => { + print!("[INFO] "); + println!($($t)*); + } +} + +#[derive(Debug)] +enum Error { + Generic(String), + SubProcessError(String, Option<i32>), + IO(String, std::io::Error), + Encode(String, std::str::Utf8Error), + EnvVar(&'static str, std::env::VarError), +} + +impl Error { + fn dump(&self) { + match self { + Error::Generic(message) => { + println!("{}", message); + } + Error::SubProcessError(message, code) => { + println!("{}", message); + match code { + Some(code) => println!("Subprocess exit with exit status: {}", code), + None => println!("Subprocess terminated by signal"), + } + } + Error::IO(message, e) => { + println!("{}", message); + println!("{}", e); + } + Error::Encode(message, e) => { + println!("{}", message); + println!("{}", e); + } + Error::EnvVar(var, e) => { + println!("Error while reading {}:", var); + println!("{}", e); + } + } + } +} + +#[derive(Debug, Copy, Clone)] +enum CommandType { + Build, + Shell, + Test, + Bench, + Bump, + Gen, + Try, +} + +#[derive(Debug, Copy, Clone)] +enum BuildType { + Opt, + Debug, +} + +/// Parse command line arguments. +/// +/// Do not use `clap` here, to avoid adding extra dependency to library. +/// See https://github.com/rust-lang/rfcs/pull/2887 +#[derive(Debug)] +struct SimpleArgs { + command: CommandType, + build_type: BuildType, + moz_path: String, + realjs_path: String, + remote: String, + concat_mozconfig: bool, +} + +impl SimpleArgs { + fn parse(mut args: Args) -> Self { + // Skip binary path. + let _ = args.next().unwrap(); + + let command = match args.next() { + Some(command) => match command.as_str() { + "build" => CommandType::Build, + "test" => CommandType::Test, + "shell" => CommandType::Shell, + "bench" => CommandType::Bench, + "bump" => CommandType::Bump, + "gen" => CommandType::Gen, + "try" => CommandType::Try, + _ => Self::show_usage(), + }, + None => Self::show_usage(), + }; + + let mut plain_args = Vec::new(); + + let mut remote = "origin".to_string(); + let mut moz_path = Self::guess_moz(); + let mut realjs_path = Self::guess_realjs(); + let mut build_type = BuildType::Debug; + let mut concat_mozconfig = false; + + for arg in args { + if arg.starts_with("-") { + if arg.contains("=") { + let mut split = arg.split("="); + let name = match split.next() { + Some(s) => s, + None => Self::show_usage(), + }; + let value = match split.next() { + Some(s) => s, + None => Self::show_usage(), + }; + + match name { + "--remote" => { + remote = value.to_string(); + } + "--samples-dir" => { + realjs_path = value.to_string(); + } + _ => { + Self::show_usage(); + } + } + } else { + match arg.as_str() { + "--opt" => { + build_type = BuildType::Opt; + } + "--concat-mozconfig" => { + concat_mozconfig = true; + } + _ => { + Self::show_usage(); + } + } + } + } else { + plain_args.push(arg); + } + } + + if !plain_args.is_empty() { + moz_path = plain_args.remove(0); + } + + if !plain_args.is_empty() { + Self::show_usage(); + } + + SimpleArgs { + command, + build_type, + moz_path, + realjs_path, + remote, + concat_mozconfig, + } + } + + fn show_usage() -> ! { + print!("{}", USAGE_STRING); + process::exit(-1) + } + + fn guess_moz() -> String { + let cwd = match env::current_dir() { + Ok(cwd) => cwd, + _ => return "../mozilla-central".to_string(), + }; + + for path in vec!["../mozilla-central", "../mozilla-unified"] { + let topsrcdir = Path::new(&cwd).join(path); + if topsrcdir.exists() { + return path.to_string(); + } + } + + return "../mozilla-central".to_string(); + } + + fn guess_realjs() -> String { + return "../real-js-samples/20190416".to_string(); + } +} + +#[derive(Debug)] +struct MozillaTree { + topsrcdir: PathBuf, + smoosh_cargo: PathBuf, +} + +impl MozillaTree { + fn try_new(path: &String) -> Result<Self, Error> { + let rel_topsrcdir = Path::new(path); + let cwd = env::current_dir().unwrap(); + let topsrcdir = Path::new(&cwd).join(rel_topsrcdir); + if !topsrcdir.exists() { + return Err(Error::Generic(format!( + "{:?} doesn't exist. Please specify a path to mozilla-central\n +For more information, see https://github.com/mozilla-spidermonkey/jsparagus/wiki/SpiderMonkey", + topsrcdir + ))); + } + let topsrcdir = topsrcdir.canonicalize().unwrap(); + let cargo = topsrcdir + .join("js") + .join("src") + .join("frontend") + .join("smoosh") + .join("Cargo.toml"); + if !cargo.exists() { + return Err(Error::Generic(format!( + "{:?} doesn't exist. Please specify a path to mozilla-central", + cargo + ))); + } + + Ok(Self { + topsrcdir: topsrcdir.to_path_buf(), + smoosh_cargo: cargo.to_path_buf(), + }) + } +} + +#[derive(Debug)] +struct JsparagusTree { + topsrcdir: PathBuf, + mozconfigs: PathBuf, +} + +impl JsparagusTree { + fn try_new() -> Result<Self, Error> { + let cwd = env::current_dir().unwrap(); + let topsrcdir = Path::new(&cwd); + let cargo = topsrcdir.join("Cargo.toml"); + if !cargo.exists() { + return Err(Error::Generic(format!( + "{:?} doesn't exist. Please run smoosh_tools in jsparagus top level directory", + cargo + ))); + } + + let mozconfigs = topsrcdir.join("mozconfigs"); + if !mozconfigs.exists() { + return Err(Error::Generic(format!( + "{:?} doesn't exist. Please run smoosh_tools in jsparagus top level directory", + mozconfigs + ))); + } + + Ok(Self { + topsrcdir: topsrcdir.to_path_buf(), + mozconfigs: mozconfigs.to_path_buf(), + }) + } + + fn mozconfig(&self, build_type: BuildType) -> PathBuf { + self.mozconfigs.join(match build_type { + BuildType::Opt => "smoosh-opt", + BuildType::Debug => "smoosh-debug", + }) + } + + fn compare_parsers_js(&self) -> PathBuf { + self.topsrcdir + .join("benchmarks") + .join("compare-spidermonkey-parsers.js") + } +} + +struct ObjDir(String); +impl FromStr for ObjDir { + type Err = Error; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + let header = "mk_add_options"; + let s = match s.starts_with(header) { + true => &s[header.len()..], + false => return Err(Error::Generic("unexpected start".into())), + }; + if Some(0) != s.find(char::is_whitespace) { + return Err(Error::Generic( + "expected whitespace after mk_add_options".into(), + )); + } + let s = s.trim_start(); + let eq_idx = s.find('=').ok_or(Error::Generic( + "equal sign not found after mk_add_option".into(), + ))?; + let var_name = &s[..eq_idx]; + if var_name != "MOZ_OBJDIR" { + return Err(Error::Generic(format!( + "{}: unexpected variable, expected MOZ_OBJDIR", + var_name + ))); + } + let s = &s[(eq_idx + 1)..]; + let s = s.trim(); + + Ok(ObjDir(s.into())) + } +} + +#[derive(Debug)] +struct BuildTree { + moz: MozillaTree, + #[allow(dead_code)] + jsp: JsparagusTree, + mozconfig: PathBuf, +} + +impl BuildTree { + fn try_new(args: &SimpleArgs) -> Result<Self, Error> { + let moz = MozillaTree::try_new(&args.moz_path)?; + let jsp = JsparagusTree::try_new()?; + + let jsp_mozconfig = jsp.mozconfig(args.build_type); + let mozconfig = if args.concat_mozconfig { + // Create a MOZCONFIG file which concatenate the content of the + // environmenet variable with the content provided by jsparagus. + // This is useful to add additional compilation variants for + // mozilla-central. + let env = env::var("MOZCONFIG").map_err(|e| Error::EnvVar("MOZCONFIG", e))?; + let env_config = read_file(&env.into())?; + let jsp_config = read_file(&jsp_mozconfig)?; + let config = env_config + &jsp_config; + + // Extract the object directory, in which the mozconfig file would + // be created. + let mut objdir = None; + for line in config.lines() { + match line.parse() { + Ok(ObjDir(meta_path)) => objdir = Some(meta_path), + Err(_error) => (), + } + } + let objdir = objdir.ok_or(Error::Generic("MOZ_OBJDIR must exists".into()))?; + let topsrcdir = moz + .topsrcdir + .to_str() + .ok_or(()) + .map_err(|_| Error::Generic("topsrcdir cannot be encoded in UTF-8.".into()))?; + let objdir = objdir.replace("@TOPSRCDIR@", topsrcdir); + + // Create the object direcotry. + let objdir: PathBuf = objdir.into(); + if !objdir.is_dir() { + create_dir_all(&objdir).map_err(|e| { + Error::IO(format!("Failed to create directory {:?}", objdir), e) + })?; + } + + // Create MOZCONFIG file. + let mozconfig = objdir.join("mozconfig"); + write_file(&mozconfig, config)?; + + mozconfig + } else { + jsp_mozconfig + }; + + Ok(Self { + moz, + jsp, + mozconfig, + }) + } +} + +/// Run `command`, and check if the exit code is successful. +/// Returns Err if failed to run the command, or the exit code is non-zero. +fn check_command(command: &mut Command) -> Result<(), Error> { + log_info!("$ {:?}", command); + let status = command + .status() + .map_err(|e| Error::IO(format!("Failed to run {:?}", command), e))?; + if !status.success() { + return Err(Error::SubProcessError( + format!("Failed to run {:?}", command), + status.code(), + )); + } + + Ok(()) +} + +/// Run `command`, and returns its status code. +/// Returns Err if failed to run the command, or the subprocess is terminated +/// by signal. +fn get_retcode(command: &mut Command) -> Result<i32, Error> { + log_info!("$ {:?}", command); + let status = command + .status() + .map_err(|e| Error::IO(format!("Failed to run {:?}", command), e))?; + if !status.success() { + match status.code() { + Some(code) => return Ok(code), + None => { + return Err(Error::SubProcessError( + format!("Failed to run {:?}", command), + None, + )) + } + } + } + + Ok(0) +} + +/// Run `command`, and returns its stdout +/// Returns Err if failed to run the command. +fn get_output(command: &mut Command) -> Result<String, Error> { + log_info!("$ {:?}", command); + let output = command + .output() + .map_err(|e| Error::IO(format!("Failed to run {:?}", command), e))?; + let stdout = std::str::from_utf8(output.stdout.as_slice()) + .map_err(|e| Error::Encode(format!("Failed to decode the output of {:?}", command), e))? + .to_string(); + Ok(stdout) +} + +struct GitRepository { + topsrcdir: PathBuf, +} + +impl GitRepository { + fn try_new(topsrcdir: PathBuf) -> Result<Self, Error> { + if !topsrcdir.join(".git").as_path().exists() { + return Err(Error::Generic(format!( + "{:?} is not Git repository", + topsrcdir + ))); + } + + Ok(Self { topsrcdir }) + } + + fn run(&self, args: &[&str]) -> Result<(), Error> { + check_command( + Command::new("git") + .args(args) + .current_dir(self.topsrcdir.clone()), + ) + } + + fn get_retcode(&self, args: &[&str]) -> Result<i32, Error> { + get_retcode( + Command::new("git") + .args(args) + .current_dir(self.topsrcdir.clone()), + ) + } + + fn get_output(&self, args: &[&str]) -> Result<String, Error> { + get_output( + Command::new("git") + .args(args) + .current_dir(self.topsrcdir.clone()), + ) + } + + /// Checks if there's no uncommitted changes. + fn assert_clean(&self) -> Result<(), Error> { + log_info!("Checking {} is clean", self.topsrcdir.to_str().unwrap()); + let code = self.get_retcode(&["diff-index", "--quiet", "HEAD", "--"])?; + if code != 0 { + return Err(Error::Generic(format!( + "Uncommitted changes found in {}", + self.topsrcdir.to_str().unwrap() + ))); + } + + let code = self.get_retcode(&["diff-index", "--cached", "--quiet", "HEAD", "--"])?; + if code != 0 { + return Err(Error::Generic(format!( + "Uncommitted changes found in {}", + self.topsrcdir.to_str().unwrap() + ))); + } + + Ok(()) + } + + /// Returns the current branch, or "HEAD" if it's detached head.. + fn branch(&self) -> Result<String, Error> { + Ok(self + .get_output(&["rev-parse", "--abbrev-ref", "HEAD"])? + .trim() + .to_string()) + } + + /// Ensure a remote with `name` exists. + /// If it doesn't exist, add remote with `name` and `url`. + fn ensure_remote(&self, name: &'static str, url: &'static str) -> Result<(), Error> { + for line in self.get_output(&["remote"])?.split("\n") { + if line == name { + return Ok(()); + } + } + + self.run(&["remote", "add", name, url])?; + + Ok(()) + } + + /// Returns a map of remote branches. + fn ls_remote(&self, remote: &'static str) -> Result<HashMap<String, String>, Error> { + let mut map = HashMap::new(); + for line in self.get_output(&["ls-remote", remote])?.split("\n") { + let mut split = line.split("\t"); + let sha = match split.next() { + Some(s) => s, + None => continue, + }; + let ref_name = match split.next() { + Some(s) => s, + None => continue, + }; + map.insert(ref_name.to_string(), sha.to_string()); + } + + Ok(map) + } +} + +/// Trait for replacing dependencies in Cargo.toml. +trait DependencyLineReplacer { + /// Receives `line` for official jsparagus reference, + /// and adds modified jsparagus reference to `lines`. + fn on_official(&self, line: &str, lines: &mut Vec<String>); +} + +/// Replace jsparagus reference to `sha` in official ci_generated branch. +struct OfficialDependencyLineReplacer { + sha: String, +} + +impl DependencyLineReplacer for OfficialDependencyLineReplacer { + fn on_official(&self, _line: &str, lines: &mut Vec<String>) { + let newline = format!("jsparagus = {{ git = \"https://github.com/mozilla-spidermonkey/jsparagus\", rev = \"{}\" }}", self.sha); + log_info!("Rewriting jsparagus reference: {}", newline); + lines.push(newline); + } +} + +/// Replace jsparagus reference to local clone. +struct LocalDependencyLineReplacer { + jsparagus: PathBuf, +} + +impl DependencyLineReplacer for LocalDependencyLineReplacer { + fn on_official(&self, line: &str, lines: &mut Vec<String>) { + lines.push(format!("# {}", line)); + let newline = format!( + "jsparagus = {{ path = \"{}\" }}", + self.jsparagus.to_str().unwrap() + ); + log_info!("Rewriting jsparagus reference: {}", newline); + lines.push(newline); + } +} + +/// Replace jsparagus reference to a remote branch in forked repository. +struct ForkDependencyLineReplacer { + github_user: String, + branch: String, +} + +impl DependencyLineReplacer for ForkDependencyLineReplacer { + fn on_official(&self, line: &str, lines: &mut Vec<String>) { + lines.push(format!("# {}", line)); + let newline = format!( + "jsparagus = {{ git = \"https://github.com/{}/jsparagus\", branch = \"{}\" }}", + self.github_user, self.branch + ); + log_info!("Rewriting jsparagus reference: {}", newline); + lines.push(newline); + } +} + +fn read_file(path: &PathBuf) -> Result<String, Error> { + let mut file = File::open(path.as_path()) + .map_err(|e| Error::IO(format!("Couldn't open {}", path.to_str().unwrap()), e))?; + let mut content = String::new(); + file.read_to_string(&mut content) + .map_err(|e| Error::IO(format!("Couldn't read {}", path.to_str().unwrap()), e))?; + + Ok(content) +} + +fn write_file(path: &PathBuf, content: String) -> Result<(), Error> { + let mut file = File::create(path.as_path()).map_err(|e| { + Error::IO( + format!("Couldn't open {} in write mode", path.to_str().unwrap()), + e, + ) + })?; + file.write_all(content.as_bytes()) + .map_err(|e| Error::IO(format!("Couldn't write {}", path.to_str().unwrap()), e))?; + + Ok(()) +} + +fn update_cargo<T>(cargo: &PathBuf, replacer: T) -> Result<(), Error> +where + T: DependencyLineReplacer, +{ + let content = read_file(cargo)?; + let mut filtered_lines = Vec::new(); + for line in content.split("\n") { + if line.starts_with( + "# jsparagus = { git = \"https://github.com/mozilla-spidermonkey/jsparagus\",", + ) || line.starts_with( + "jsparagus = { git = \"https://github.com/mozilla-spidermonkey/jsparagus\",", + ) { + let orig_line = if line.starts_with("# ") { + &line[2..] + } else { + line + }; + replacer.on_official(orig_line, &mut filtered_lines) + } else if line.starts_with("jsparagus = ") { + } else { + filtered_lines.push(line.to_string()); + } + } + write_file(cargo, filtered_lines.join("\n")) +} + +fn update_gkrust(args: &SimpleArgs) -> Result<(), Error> { + log_info!("Updating gkrust-shared"); + + let build = BuildTree::try_new(args)?; + + check_command( + Command::new("cargo") + .args(["update", "-p", "gkrust-shared"]) + .current_dir(build.moz.topsrcdir), + ) +} + +fn run_mach(command_args: &[&str], args: &SimpleArgs) -> Result<(), Error> { + let build = BuildTree::try_new(args)?; + + check_command( + Command::new(build.moz.topsrcdir.join("mach").to_str().unwrap()) + .args(command_args) + .current_dir(build.moz.topsrcdir) + .env("MOZCONFIG", build.mozconfig.to_str().unwrap()), + ) +} + +fn build(args: &SimpleArgs) -> Result<(), Error> { + let moz = MozillaTree::try_new(&args.moz_path)?; + let jsparagus = JsparagusTree::try_new()?; + + update_cargo( + &moz.smoosh_cargo, + LocalDependencyLineReplacer { + jsparagus: jsparagus.topsrcdir, + }, + )?; + + update_gkrust(args)?; + + run_mach(&["build"], args) +} + +fn shell(args: &SimpleArgs) -> Result<(), Error> { + run_mach(&["run", "--smoosh"], args) +} + +fn bench(args: &SimpleArgs) -> Result<(), Error> { + let jsparagus = JsparagusTree::try_new()?; + let cmp_parsers = jsparagus.compare_parsers_js(); + let cmp_parsers: &str = cmp_parsers.to_str().ok_or(Error::Generic( + "Unable to serialize benchmark script path".into(), + ))?; + let realjs_path = jsparagus.topsrcdir.join(&args.realjs_path); + let realjs_path: &str = realjs_path.to_str().ok_or(Error::Generic( + "Unable to serialize benchmark script path".into(), + ))?; + + run_mach( + &["run", "-f", cmp_parsers, "--", "--", "--dir", realjs_path], + args, + ) +} + +fn test(args: &SimpleArgs) -> Result<(), Error> { + run_mach(&["jstests", "--args=--smoosh"], args)?; + run_mach(&["jit-test", "--args=--smoosh"], args) +} + +fn vendor(moz: &MozillaTree) -> Result<(), Error> { + check_command( + Command::new(moz.topsrcdir.join("mach").to_str().unwrap()) + .arg("vendor") + .arg("rust") + .current_dir(moz.topsrcdir.clone()), + ) +} + +fn bump(args: &SimpleArgs) -> Result<(), Error> { + let moz = MozillaTree::try_new(&args.moz_path)?; + let jsparagus = JsparagusTree::try_new()?; + + let jsparagus_repo = GitRepository::try_new(jsparagus.topsrcdir.clone())?; + + log_info!("Checking ci_generated branch HEAD"); + + let remotes = + jsparagus_repo.ls_remote("https://github.com/mozilla-spidermonkey/jsparagus.git")?; + + let branch = "refs/heads/ci_generated"; + + let ci_generated_sha = match remotes.get(branch) { + Some(sha) => sha, + None => { + return Err(Error::Generic(format!("{} not found in upstream", branch))); + } + }; + + log_info!("ci_generated branch HEAD = {}", ci_generated_sha); + + update_cargo( + &moz.smoosh_cargo, + OfficialDependencyLineReplacer { + sha: ci_generated_sha.clone(), + }, + )?; + + vendor(&moz)?; + + log_info!("Please add updated files and commit them."); + + Ok(()) +} + +/// Parse remote string and get GitHub username. +/// Currently this supports only SSH format. +fn parse_github_username(remote: String) -> Result<String, Error> { + let git_prefix = "git@github.com:"; + let git_suffix = "/jsparagus.git"; + + if remote.starts_with(git_prefix) && remote.ends_with(git_suffix) { + return Ok(remote.replace(git_prefix, "").replace(git_suffix, "")); + } + + Err(Error::Generic(format!( + "Failed to get GitHub username: {}", + remote + ))) +} + +struct BranchInfo { + github_user: String, + branch: String, +} + +/// Create "generated" branch and push to remote, and returns +/// GitHub username and branch name. +fn push_to_gen_branch(args: &SimpleArgs) -> Result<BranchInfo, Error> { + let jsparagus = JsparagusTree::try_new()?; + + let jsparagus_repo = GitRepository::try_new(jsparagus.topsrcdir.clone())?; + jsparagus_repo.assert_clean()?; + + log_info!("Getting GitHub username and current branch"); + + let origin = jsparagus_repo + .get_output(&["remote", "get-url", args.remote.as_str()])? + .trim() + .to_string(); + let github_user = parse_github_username(origin)?; + + let branch = jsparagus_repo.branch()?; + if branch == "HEAD" { + return Err(Error::Generic(format!( + "Detached HEAD is not supported. Please checkout a branch" + ))); + } + + let gen_branch = format!("{}-generated-branch", branch); + + log_info!("Creating {} branch", gen_branch); + + jsparagus_repo.run(&["checkout", "-b", gen_branch.as_str()])?; + + try_finally!({ + log_info!("Updating generated files"); + + check_command( + Command::new("make") + .arg("all") + .current_dir(jsparagus.topsrcdir.clone()), + )?; + + log_info!("Committing generated files"); + + jsparagus_repo.run(&["add", "--force", "*_generated.rs"])?; + + try_finally!({ + jsparagus_repo.run(&["commit", "-m", "Add generated files"])?; + + try_finally!({ + log_info!("Pushing to {}", gen_branch); + jsparagus_repo.run(&["push", "-f", args.remote.as_str(), gen_branch.as_str()])?; + } { + // Revert the commit, wihtout removing *_generated.rs. + jsparagus_repo.run(&["reset", "--soft", "HEAD^"])?; + }); + } { + // Forget *_generated.rs files. + jsparagus_repo.run(&["reset"])?; + }); + } { + jsparagus_repo.run(&["checkout", branch.as_str()])?; + jsparagus_repo.run(&["branch", "-D", gen_branch.as_str()])?; + }); + + Ok(BranchInfo { + github_user, + branch: gen_branch, + }) +} + +fn gen_branch(args: &SimpleArgs) -> Result<(), Error> { + push_to_gen_branch(args)?; + + Ok(()) +} + +fn push_try(args: &SimpleArgs) -> Result<(), Error> { + let moz = MozillaTree::try_new(&args.moz_path)?; + + let moz_repo = GitRepository::try_new(moz.topsrcdir.clone())?; + moz_repo.assert_clean()?; + + let branch_info = push_to_gen_branch(args)?; + + moz_repo.ensure_remote("try", "hg::https://hg.mozilla.org/try")?; + + update_cargo( + &moz.smoosh_cargo, + ForkDependencyLineReplacer { + github_user: branch_info.github_user, + branch: branch_info.branch, + }, + )?; + + vendor(&moz)?; + + moz_repo.run(&["add", "."])?; + moz_repo.run(&["commit", "-m", "Update vendored crates for jsparagus"])?; + try_finally!({ + let syntax = "try: -b do -p sm-smoosh-linux64,sm-nonunified-linux64 -u none -t none"; + moz_repo.run(&["commit", "--allow-empty", "-m", syntax])?; + try_finally!({ + moz_repo.run(&["push", "try"])?; + } { + moz_repo.run(&["reset", "--hard", "HEAD^"])?; + }); + } { + moz_repo.run(&["reset", "--hard", "HEAD^"])?; + }); + + Ok(()) +} + +fn main() { + let args = SimpleArgs::parse(env::args()); + + let result = match args.command { + CommandType::Build => build(&args), + CommandType::Shell => shell(&args), + CommandType::Test => test(&args), + CommandType::Bench => bench(&args), + CommandType::Bump => bump(&args), + CommandType::Gen => gen_branch(&args), + CommandType::Try => push_try(&args), + }; + + match result { + Ok(_) => {} + Err(e) => { + e.dump(); + exit(1) + } + } +} diff --git a/third_party/rust/jsparagus/src/lib.rs b/third_party/rust/jsparagus/src/lib.rs new file mode 100644 index 0000000000..abaa91b7a0 --- /dev/null +++ b/third_party/rust/jsparagus/src/lib.rs @@ -0,0 +1,24 @@ +pub mod ast { + extern crate jsparagus_ast; + pub use self::jsparagus_ast::*; +} + +pub mod emitter { + extern crate jsparagus_emitter; + pub use self::jsparagus_emitter::*; +} + +pub mod parser { + extern crate jsparagus_parser; + pub use self::jsparagus_parser::*; +} + +pub mod scope { + extern crate jsparagus_scope; + pub use self::jsparagus_scope::*; +} + +pub mod stencil { + extern crate jsparagus_stencil; + pub use self::jsparagus_stencil::*; +} diff --git a/third_party/rust/jsparagus/test.sh b/third_party/rust/jsparagus/test.sh new file mode 100755 index 0000000000..0fdd0f6b3b --- /dev/null +++ b/third_party/rust/jsparagus/test.sh @@ -0,0 +1,31 @@ +#!/bin/sh + +# test.sh - Run some tests. + +set -eu + +# announce what you're doing before you do it +verbosely() { + echo "$*" + $* +} + +wtf() { + exitcode="$?" + if [ $(which python3 | cut -b -4) == "/usr" ]; then + echo >&2 + echo "WARNING: venv is not activated. See README.md." >&2 + fi + exit $exitcode +} + +warn_update() { + exitcode="$?" + echo >&2 + echo "NOTE: Test failed. This may just mean you need to run update.sh." >&2 + exit $exitcode +} + +verbosely python3 -m tests.test || wtf +verbosely python3 -m tests.test_js +verbosely python3 -m tests.test_parse_pgen || warn_update diff --git a/third_party/rust/jsparagus/tests/__init__.py b/third_party/rust/jsparagus/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/third_party/rust/jsparagus/tests/__init__.py diff --git a/third_party/rust/jsparagus/tests/test.py b/third_party/rust/jsparagus/tests/test.py new file mode 100755 index 0000000000..2d064098b5 --- /dev/null +++ b/third_party/rust/jsparagus/tests/test.py @@ -0,0 +1,1204 @@ +#!/usr/bin/env python3 + +import io +import re +import unittest +import typing + +import jsparagus +from jsparagus import gen, lexer, rewrites +from jsparagus.grammar import (Grammar, Production, CallMethod, Nt, + Optional, LookaheadRule, NtDef, Var) +from js_parser.parse_esgrammar import parse_esgrammar + + +LispTokenizer = lexer.LexicalGrammar("( )", SYMBOL=r'[!%&*+:<=>?@A-Z^_a-z~]+') + + +def prod(body, method_name): + return Production(body, CallMethod(method_name, list(range(len(body))))) + + +class GenTestCase(unittest.TestCase): + def compile(self, tokenize, grammar, **kwargs): + """Compile a grammar. Use this when you expect compilation to + succeed.""" + self.tokenize = tokenize + self.parser_class = gen.compile(grammar, **kwargs) + + def parse(self, text, goal=None): + if goal is None: + parser = self.parser_class() + else: + parser = self.parser_class(goal=goal) + lexer = self.tokenize(parser) + lexer.write(text) + return lexer.close() + + def compile_multi(self, tokenize, grammar): + self.tokenize = tokenize + obj = gen.compile_multi(grammar) + for attr in dir(obj): + if attr.startswith("parse_"): + setattr(self, attr, getattr(obj, attr)) + + def assertParse(self, s, expected=None, *, goal=None): + result = self.parse(s, goal=goal) + if expected is not None: + self.assertEqual(expected, result) + + def assertNoParse(self, s, *, goal=None, message="banana"): + if goal is None: + kwargs = {} + else: + kwargs = {"goal": goal} + self.assertRaisesRegex( + SyntaxError, + re.escape(message), + lambda: self.parse(s, **kwargs)) + + def testSimple(self): + grammar = parse_esgrammar( + """ + expr : + SYMBOL => $0 + `(` tail + + tail : + `)` => $0 + expr tail + """, + terminal_names=["SYMBOL"] + ) + self.compile(LispTokenizer, grammar) + + self.assertParse( + "(lambda (x) (* x x))", + ('expr_1', + '(', + ('tail_1', + 'lambda', + ('tail_1', + ('expr_1', '(', ('tail_1', 'x', ')')), + ('tail_1', + ('expr_1', + '(', + ('tail_1', + '*', + ('tail_1', + 'x', + ('tail_1', 'x', ')')))), + ')'))))) + + def testEnd(self): + self.compile( + lexer.LexicalGrammar("ONE TWO"), + Grammar({ + 'goal': [ + ['ONE', 'TWO'] + ] + }) + ) + self.assertNoParse("ONE TWO TWO", + message="expected 'end of input', got 'TWO'") + + def testList(self): + list_grammar = Grammar({ + 'prelist': [ + ['word', 'list'] + ], + 'list': [ + ['word'], + ['list', 'word'], + ], + 'word': [ + ['SYMBOL'] + ], + }) + self.compile(LispTokenizer, list_grammar) + self.assertParse( + "the quick brown fox jumped over the lazy dog", + ('prelist', + 'the', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + ('list_1', + 'quick', + 'brown'), + 'fox'), + 'jumped'), + 'over'), + 'the'), + 'lazy'), + 'dog'))) + + def testArithmetic(self): + tokenize = lexer.LexicalGrammar( + "+ - * / ( )", + NUM=r'[0-9]\w*', + VAR=r'[A-Za-z]\w*') + arith_grammar = Grammar({ + 'expr': [ + ['term'], + ['expr', '+', 'term'], + ['expr', '-', 'term'], + ], + 'term': [ + ['prim'], + ['term', '*', 'prim'], + ['term', '/', 'prim'], + ], + 'prim': [ + ['NUM'], + ['VAR'], + ['(', 'expr', ')'], + ], + }) + self.compile(tokenize, arith_grammar) + + self.assertParse( + '2 * 3 + 4 * (5 + 7)', + ('expr_1', + ('term_1', '2', '*', '3'), + '+', + ('term_1', + '4', + '*', + ('prim_2', + '(', + ('expr_1', '5', '+', '7'), + ')')))) + + self.assertNoParse( + "(", + message="unexpected end of input") + self.assertNoParse( + ")", + message="expected one of ['(', 'NUM', 'VAR'], got ')'") + + def testAmbiguous(self): + # This grammar should fail verification. + # It's ambiguous: is ABC s(A)y(BC) or s(AB)y(C)? + grammar = Grammar({ + 'goal': [ + ['s', 'y'], + ], + 's': [ + ['A'], + ['s', 'B'], + ], + 'y': [ + ['C'], + ['B', 'C'], + ], + }) + + out = io.StringIO() + self.assertRaisesRegex(ValueError, r"conflict", + lambda: gen.generate_parser(out, grammar)) + + def testAmbiguousEmpty(self): + """Reject grammars that are ambiguous due to empty productions. + + (Empty productions are ones that match the empty string.)""" + + def check(rules): + grammar = Grammar(rules, goal_nts=['goal']) + out = io.StringIO() + self.assertRaisesRegex( + ValueError, + r"ambiguous grammar|conflict", + lambda: gen.generate_parser(out, grammar)) + + check({'goal': [[], []]}) + check({'goal': [[Optional('X')], []]}) + check({'goal': [[Optional('X')], [Optional('Y')]]}) + check({'goal': [[Optional('X'), Optional('Y')], [Optional('Z')]]}) + + # Issue #3: This also has an abiguity; empty string matches either + # `goal ::= [empty]` or `goal ::= phrase, phrase ::= [empty]`. + check({ + 'goal': [[Optional('phrase')]], + 'phrase': [[Optional('X')]], + }) + + # Input "X" is ambiguous, could be ('goal', ('a', None), ('a', 'X')) + # or the other 'a' could be the one that's missing. + check({ + 'goal': [['a', 'a']], + 'a': [[Optional('X')]], + }) + + def testLeftFactor(self): + """Most basic left-factoring test.""" + tokenize = lexer.LexicalGrammar("A B") + grammar = Grammar({ + 'goal': [ + ['A'], + ['A', 'B'], + ], + }) + + self.compile(tokenize, grammar) + self.assertParse("A", 'A') + self.assertParse("A B", ('goal_1', 'A', 'B')) + + def testLeftFactorMulti(self): + """Test left-factoring with common prefix of length >1.""" + tokenize = lexer.LexicalGrammar("A B C D E") + grammar = Grammar({ + 'goal': [ + ['A', 'B', 'C', 'D'], + ['A', 'B', 'C', 'E'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse( + "A B C D", + ('goal_0', 'A', 'B', 'C', 'D')) + self.assertParse( + "A B C E", + ('goal_1', 'A', 'B', 'C', 'E')) + + def testLeftFactorMultiLevel(self): + """Test left-factoring again on a nonterminal introduced by + left-factoring.""" + tokenize = lexer.LexicalGrammar("FOR IN TO BY ( ) = ;", + VAR=r'[A-Za-z]+') + + # The first left-factoring pass on `stmt` will left-factor `FOR ( VAR`. + # A second pass is needed to left-factor `= expr TO expr`. + grammar = Grammar({ + 'stmt': [ + ['expr', ';'], + ['FOR', '(', 'VAR', 'IN', 'expr', ')', 'stmt'], + ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', ')', 'stmt'], + ['FOR', '(', 'VAR', '=', 'expr', 'TO', 'expr', + 'BY', 'expr', ')', 'stmt'], + ['IF', '(', 'expr', ')', 'stmt'], + ], + 'expr': [ + ['VAR'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse( + "FOR (x IN y) z;", + ('stmt_1', 'FOR', '(', 'x', 'IN', 'y', ')', + ('stmt_0', 'z', ';'))) + self.assertParse( + "FOR (x = y TO z) x;", + ('stmt_2', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', ')', + ('stmt_0', 'x', ';'))) + self.assertParse( + "FOR (x = y TO z BY w) x;", + ('stmt_3', 'FOR', '(', 'x', '=', 'y', 'TO', 'z', 'BY', 'w', ')', + ('stmt_0', 'x', ';'))) + + def testFirstFirstConflict(self): + """This grammar is unambiguous, but is not LL(1) due to a first/first conflict. + + Cribbed from: https://stackoverflow.com/a/17047370/94977 + """ + + tokenize = lexer.LexicalGrammar("A B C") + grammar = Grammar({ + 's': [ + ['x', 'B'], + ['y', 'C'], + ], + 'x': [ + prod(['A'], "x"), + ], + 'y': [ + prod(['A'], "y"), + ], + }) + self.compile(tokenize, grammar) + + self.assertParse("A B", ('s_0', ('x', 'A'), 'B')) + self.assertParse("A C", ('s_1', ('y', 'A'), 'C')) + + def testLeftHandSideExpression(self): + """Example of a grammar that's in SLR(1) but hard to smoosh into an LL(1) form. + + This is taken from the ECMAScript grammar. + + ...Of course, it's not really possible to enforce the desired syntactic + restrictions in LR(k) either; the ES grammar matches `(x + y) = z` and + an additional attribute grammar (IsValidSimpleAssignmentTarget) is + necessary to rule it out. + """ + self.compile( + lexer.LexicalGrammar("= +", VAR=r'[a-z]+\b'), + Grammar({ + 'AssignmentExpression': [ + ['AdditiveExpression'], + ['LeftHandSideExpression', '=', 'AssignmentExpression'], + ], + 'AdditiveExpression': [ + ['LeftHandSideExpression'], + ['AdditiveExpression', '+', 'LeftHandSideExpression'], + ], + 'LeftHandSideExpression': [ + ['VAR'], + ] + }) + ) + self.assertParse("z = x + y") + self.assertNoParse( + "x + y = z", + message="expected one of ['+', 'end of input'], got '='") + + def testDeepRecursion(self): + grammar = Grammar({ + 'expr': [ + ['SYMBOL'], + ['(', ')'], + ['(', 'exprs', ')'], + ], + 'exprs': [ + ['expr'], + ['exprs', 'expr'], + ], + }) + self.compile(LispTokenizer, grammar) + + N = 3000 + s = "x" + t = ('expr_0', 'x') + for i in range(N): + s = "(" + s + ")" + t = ('expr_2', '(', t, ')') + + result = self.parse(s) + + # Python can't check that result == t; it causes a RecursionError. + # Testing that repr(result) == repr(t), same deal. So: + for i in range(N): + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 4) + self.assertEqual(result[0], 'expr_2') + self.assertEqual(result[1], '(') + self.assertEqual(result[3], ')') + result = result[2] + + def testExpandOptional(self): + grammar = Grammar({'goal': [[]]}) + empties = {} + # Unit test for rewrites.expand_optional_symbols_in_rhs + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs(['ONE', 'TWO', '3'], + grammar, empties)), + [(['ONE', 'TWO', '3'], {})]) + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs( + ['a', 'b', Optional('c')], grammar, empties)), + [(['a', 'b'], {2: None}), + (['a', 'b', 'c'], {})]) + self.assertEqual( + list(rewrites.expand_optional_symbols_in_rhs( + [Optional('a'), Optional('b')], grammar, empties)), + [([], {0: None, 1: None}), + (['a'], {1: None}), + (['b'], {0: None}), + (['a', 'b'], {})]) + + def testEmptyGrammar(self): + tokenize = lexer.LexicalGrammar("X") + self.compile(tokenize, Grammar({'goal': [[]]})) + self.assertParse("", ('goal',)) + self.assertNoParse( + "X", + message="expected 'end of input', got 'X' (line 1)") + + def testOptionalEmpty(self): + tokenize = lexer.LexicalGrammar("X Y") + grammar = Grammar({ + 'a': [ + [Optional('b'), Optional('c')], + ], + 'b': [ + prod(['X'], 'b'), + ], + 'c': [ + prod(['Y'], 'c'), + ] + }) + self.compile(tokenize, grammar) + self.assertParse("", ('a', None, None)) + self.assertParse("X", ('a', ('b', 'X'), None)) + self.assertParse("Y", ('a', None, ('c', 'Y'))) + self.assertParse("X Y", ('a', ('b', 'X'), ('c', 'Y'))) + + def testOptional(self): + tokenize = lexer.LexicalGrammar('[ ] , X') + grammar = Grammar({ + 'array': [ + ['[', Optional('elision'), ']'], + ['[', 'elements', ']'], + ['[', 'elements', ',', Optional('elision'), ']'] + ], + 'elements': [ + [Optional('elision'), 'X'], + ['elements', ',', Optional('elision'), 'X'] + ], + 'elision': [ + [','], + ['elision', ','] + ] + }) + self.compile(tokenize, grammar) + self.assertParse("[]", + ('array_0', '[', None, ']')) + self.assertParse("[,]", + ('array_0', '[', ',', ']')) + self.assertParse( + "[,,X,,X,]", + ('array_2', + '[', + ('elements_1', + ('elements_0', + ('elision_1', + ',', + ','), + 'X'), + ',', + ',', + 'X'), + ',', + None, + ']')) + + def testPositiveLookahead(self): + self.compile( + lexer.LexicalGrammar('A B + ( )'), + Grammar({ + 'goal': [ + [LookaheadRule(frozenset({'A', 'B'}), True), 'expr'], + ], + 'expr': [ + ['term'], + ['expr', '+', 'term'], + ], + 'term': [ + ['A'], + ['B'], + ['(', 'expr', ')'], + ] + }) + ) + self.assertNoParse( + "(A)", + message="expected one of ['A', 'B'], got '('") + self.assertParse("A + B") + + def testNegativeLookahead(self): + tokenize = lexer.LexicalGrammar('a b') + rules = { + 'goal': [ + [LookaheadRule(frozenset({'a'}), False), 'abs'], + ], + 'abs': [ + ['a'], + ['b'], + ['abs', 'a'], + ['abs', 'b'], + ], + } + + self.compile(tokenize, Grammar(rules)) + self.assertNoParse("a b", message="expected 'b', got 'a'") + self.assertParse( + 'b a', + ('goal', ('abs_2', 'b', 'a'))) + + # In simple cases like this, the lookahead restriction can even + # disambiguate a grammar that would otherwise be ambiguous. + rules['goal'].append(prod(['a'], 'goal_a')) + self.compile(tokenize, Grammar(rules)) + self.assertParse('a', ('goal_a', 'a')) + + def disabledNegativeLookaheadDisambiguation(self): + tokenize = lexer.LexicalGrammar( + '( ) { } ; function =', + IDENT=r'[A-Za-z_][A-Za-z_0-9]*') + grammar = Grammar({ + 'stmts': [ + ['stmt'], + ['stmts', 'stmt'], + ], + 'stmt': [ + [LookaheadRule(set=frozenset({'function'}), positive=False), + 'expr', ';'], + ['fndecl'], + ], + 'fndecl': [ + ['function', 'IDENT', '(', ')', '{', Optional('stmt'), '}'], + ], + 'expr': [ + ['term'], + ['IDENT', '=', 'expr'], + ], + 'term': [ + ['(', 'expr', ')'], + ['fndecl'], + ['term', '(', 'expr', ')'], + ], + }) + self.compile(tokenize, grammar) + + # Test that without the lookahead restriction, we reject this grammar + # (it's ambiguous): + del grammar['stmt'][0][0] + self.assertRaisesRegex(ValueError, + 'banana', + lambda: gen.compile(grammar)) + + self.assertParse( + 'function f() { x = function y() {}; }', + ('stmt', 1, + ('fndecl', + 'function', 'f', '(', ')', '{', + ('stmt', 0, + ('expr', 1, + 'x', + '=', + ('expr', 0, + ('term', 1, + ('fndecl', + 'function', 'y', '(', ')', + '{', None, '}')))), + ';')))) + + self.assertParse( + '(function g(){});', + ('stmts', 0, + ('stmt', 0, + ('term', 1, + ('fndecl', + 'function', 'g', '(', ')', '{', None, '}')), + ';'))) + + def testTrailingLookahead(self): + """Lookahead at the end of a production is banned.""" + tokenize = lexer.LexicalGrammar('IF ( X ) ELSE OTHER ;') + grammar = gen.Grammar({ + 'goal': [['stmt']], + 'stmt': [ + ['OTHER', ';'], + ['IF', '(', 'X', ')', 'stmt', + LookaheadRule(frozenset({'ELSE'}), False)], + ['IF', '(', 'X', ')', 'stmt', 'ELSE', 'stmt'], + ], + }) + + def stmt_0(): + return ('stmt_0', 'OTHER', ';') + + def stmt_1(t): + return ('stmt_1', 'IF', '(', 'X', ')', t) + + def stmt_2(t, e): + return ('stmt_2', 'IF', '(', 'X', ')', t, 'ELSE', e) + + self.compile(tokenize, grammar) + self.assertParse('IF(X) OTHER;', stmt_1(stmt_0())) + self.assertParse('IF(X) OTHER; ELSE OTHER;', + stmt_2(stmt_0(), stmt_0())) + self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER; ELSE OTHER;', + stmt_2(stmt_2(stmt_0(), stmt_0()), stmt_0())) + self.assertParse('IF(X) OTHER; ELSE IF(X) OTHER; ELSE OTHER;', + stmt_2(stmt_0(), stmt_2(stmt_0(), stmt_0()))) + self.assertParse('IF(X) IF(X) OTHER; ELSE OTHER;', + stmt_1(stmt_2(stmt_0(), stmt_0()))) + + def testLookaheadBeforeOptional(self): + self.compile( + lexer.LexicalGrammar( + '= : _', + PUBLIC=r'public\b', + IDENT=r'[a-z]+\b', + NUM=r'[0-9]\b'), + Grammar({ + 'decl': [ + [ + LookaheadRule(frozenset({'IDENT'}), True), + Optional('attrs'), + 'pat', '=', 'NUM' + ], + ], + 'attrs': [ + ['attr'], + ['attrs', 'attr'], + ], + 'attr': [ + ['PUBLIC', ':'], + ['IDENT', ':'], + ], + 'pat': [ + ['IDENT'], + ['_'], + ], + }) + ) + self.assertEqual( + self.parse("x = 0"), + ("decl", None, "x", "=", "0")) + self.assertParse("thread: x = 0") + self.assertNoParse( + "public: x = 0", + message="expected 'IDENT', got 'public'") + self.assertNoParse("_ = 0", message="expected 'IDENT', got '_'") + self.assertParse("funny: public: x = 0") + self.assertParse("funny: _ = 0") + + def testForLookahead(self): + grammar = Grammar({ + 'Stmt': [ + [';'], + ['ForStmt'], + ], + 'ForStmt': [ + ["for", "(", LookaheadRule(frozenset({"let"}), False), + "Expr", ";", ";", ")", "Stmt"], + ], + 'Expr': [ + ["0"], + ["let"], + ], + }) + self.compile(lexer.LexicalGrammar("for ( let ; ) 0"), grammar) + self.assertParse("for (0;;) ;") + self.assertNoParse("for (let;;) ;", message="expected '0', got 'let'") + + def testLookaheadDisambiguation(self): + """A lookahead restriction should be able to rule out certain nonterminals entirely.""" + + grammar = Grammar({ + 'Script': [ + ['Statement'], + ['Statement', 'Statement'], + ], + 'Statement': [ + [LookaheadRule(frozenset({'function'}), False), 'Expression', ';'], + ['Function'], + ], + 'Function': [ + ['function', 'x', '(', ')', '{', '}'], + ], + 'Expression': [ + ['Primary'], + ['++', 'Primary'], + ['Primary', '++'], + ], + 'Primary': [ + ['Function'], + ['x'], + ], + }) + + self.compile(lexer.LexicalGrammar("function x ( ) { } ++ ;"), grammar) + self.assertParse("function x() {}") + self.assertParse("++function x() {};") + self.assertNoParse("++function x() {}", message="unexpected end") + # TODO: The parser generator fails to handle this case because it does + # not forward the restriction from producting a Function to the + # Primitive rule. Therefore, `Function [lookahead: ;]` is incorrectly + # reduced to a `Primitive [lookahead: ;]` + # self.assertNoParse("function x() {}++;", message="got ';'") + self.assertParse("function x() {} ++x;") + + # XXX to test: combination of lookaheads, ++, +-, -+, -- + # XXX todo: find an example where lookahead canonicalization matters + + def testHugeExample(self): + grammar = Grammar( + { + 'grammar': [['nt_def_or_blank_line'], + ['grammar', 'nt_def_or_blank_line']], + 'arg': [['sigil', 'NT']], + 'args': [['arg'], ['args', ',', 'arg']], + 'definite_sigil': [['~'], ['+']], + 'exclusion': [['terminal'], + ['nonterminal'], + ['CHR', 'through', 'CHR']], + 'exclusion_list': [['exclusion'], + ['exclusion_list', 'or', 'exclusion']], + 'ifdef': [['[', 'definite_sigil', 'NT', ']']], + 'line_terminator': [['NT'], ['NTALT']], + 'lookahead_assertion': [ + ['==', 'terminal'], + ['!=', 'terminal'], + ['<!', 'NT'], + ['<!', '{', 'lookahead_exclusions', '}']], + 'lookahead_exclusion': [['lookahead_exclusion_element'], + ['lookahead_exclusion', + 'lookahead_exclusion_element']], + 'lookahead_exclusion_element': [['terminal'], + ['no_line_terminator_here']], + 'lookahead_exclusions': [['lookahead_exclusion'], + ['lookahead_exclusions', ',', + 'lookahead_exclusion']], + 'no_line_terminator_here': [ + ['[', 'no', 'line_terminator', 'here', ']']], + 'nonterminal': [['NT'], ['NTCALL', '[', 'args', ']']], + 'nt_def': [['nt_lhs', 'EQ', 'NL', 'rhs_lines', 'NL'], + ['nt_lhs', 'EQ', 'one', 'of', 'NL', + 't_list_lines', 'NL']], + 'nt_def_or_blank_line': [['NL'], ['nt_def']], + 'nt_lhs': [['NT'], ['NTCALL', '[', 'params', ']']], + 'param': [['NT']], + 'params': [['param'], ['params', ',', 'param']], + 'rhs': [['symbols'], ['[', 'empty', ']']], + 'rhs_line': [[Optional(inner='ifdef'), 'rhs', + Optional(inner='PRODID'), 'NL'], + ['PROSE', 'NL']], + 'rhs_lines': [['rhs_line'], ['rhs_lines', 'rhs_line']], + 'sigil': [['definite_sigil'], ['?']], + 'symbol': [['terminal'], + ['nonterminal'], + ['nonterminal', '?'], + ['nonterminal', 'but', 'not', 'exclusion'], + ['nonterminal', 'but', 'not', 'one', 'of', + 'exclusion_list'], + ['[', 'lookahead', 'lookahead_assertion', ']'], + ['no_line_terminator_here'], + ['WPROSE']], + 'symbols': [['symbol'], ['symbols', 'symbol']], + 't_list_line': [['terminal_seq', 'NL']], + 't_list_lines': [['t_list_line'], + ['t_list_lines', 't_list_line']], + 'terminal': [['T'], ['CHR']], + 'terminal_seq': [['terminal'], ['terminal_seq', 'terminal']] + }, + variable_terminals='EQ T CHR NTCALL NT NTALT ' + 'PRODID PROSE WPROSE'.split() + ) + + # Note: This lexical grammar is not suitable for use with incremental + # parsing. + emu_grammar_lexer = lexer.LexicalGrammar( + # the operators and keywords: + "[ ] { } , ~ + ? <! == != " + "but empty here lookahead no not of one or through", + NL="\n", + # any number of colons together + EQ=r':+', + # terminals of the ES grammar, quoted with backticks + T=r'`[^` \n]+`|```', + # also terminals, denoting control characters + CHR=r'<[A-Z]+>|U\+[0-9A-f]{4}', + # nonterminals that will be followed by boolean parameters + NTCALL=r'(?:uri|[A-Z])\w*(?=\[)', + # nonterminals (also, boolean parameters) + NT=r'(?:uri|[A-Z])\w*', + # nonterminals wrapped in vertical bars for no apparent reason + NTALT=r'\|[A-Z]\w+\|', + # the spec also gives a few productions names + PRODID=r'#[A-Za-z]\w*', + # prose to the end of the line + PROSE=r'>.*', + # prose wrapped in square brackets + WPROSE=r'\[>[^]]*\]', + ) + + self.compile(emu_grammar_lexer, grammar) + + source = """\ + IdentifierReference[Yield, Await] : + Identifier + [~Yield] `yield` + [~Await] `await` + + """ + + self.assertParse(source) + + def testParameterizedProductions(self): + passthru = ('Yield', Var('Yield')), + name = Nt("name", passthru) + stmt = Nt("stmt", passthru) + stmts = Nt("stmts", passthru) + grammar = Grammar({ + 'script': [ + ['def'], + ['script', 'def'], + ], + 'def': [ + [ + 'function', 'IDENT', '(', ')', '{', + Nt('stmts', (('Yield', False),)), '}' + ], + [ + 'function', '*', 'IDENT', '(', ')', '{', + Nt('stmts', (('Yield', True),)), '}' + ], + ], + 'stmts': NtDef(('Yield',), [ + [stmt], + [stmts, stmt], + ], None), + 'stmt': NtDef(('Yield',), [ + [name, "(", ")", ";"], + [name, "=", name, ";"], + Production(["yield", name, ";"], + reducer=CallMethod("yield_stmt", [1]), + condition=('Yield', True)), + ], None), + 'name': NtDef(('Yield',), [ + ["IDENT"], + # Specifically ask for a method here, because otherwise we + # wouldn't get one and then type checking would fail. + Production(["yield"], + CallMethod("yield_as_name", []), + condition=('Yield', False)), + ], None), + }, variable_terminals=["IDENT"]) + self.compile(lexer.LexicalGrammar("( ) { } ; * = function yield", + IDENT=r'[A-Za-z]\w*'), + grammar) + self.assertParse("function* farm() { cow = pig; yield cow; }") + self.assertNoParse( + "function city() { yield toOncomingTraffic; }", + message="expected one of ['(', '='], got 'toOncomingTraffic'") + self.assertNoParse( + "function* farm() { yield = corn; yield yield; }", + message="expected 'IDENT', got '='") + + def testMissingParameterError(self): + grammar = { + 'Foo': [ + ['Bar'], + ], + 'Bar': NtDef(('Arg',), [ + ['NUM'], + Production(['STR'], + reducer=0, + condition=('Arg', True)), + ], None), + } + + self.assertRaisesRegex(ValueError, "missing parameters for 'Bar'", + lambda: Grammar(grammar)) + + def testCanonicalLR(self): + """Example 4.39 (grammar 4.20) from the book.""" + + # Modified as marked below + grammar = Grammar({ + "S": [ + ["L", "=", "R"], + ["R"], + ], + "L": [ + ["*", "R"], + ["id"], + ], + "R": [ + ["L"], + # added so we can have a negative test, showing that + # `R = R` is not an S: + ["7"], + ], + }) + self.compile(lexer.LexicalGrammar("id = * 7"), grammar) + self.assertParse("id = *id") + self.assertParse("*id = id") + self.assertParse("id = 7") + self.assertNoParse("7 = id", + message="expected 'end of input', got '='") + + def testLookaheadWithCanonicalLR(self): + """Only a lookahead assertion makes this grammar unambiguous.""" + tokenize = lexer.LexicalGrammar("async => { } ;", Identifier=r'\w+') + grammar = Grammar({ + "script": [ + ["Expression", ";"], + ], + "Expression": [ + ["PrimaryExpression"], + ["async", "Identifier", "=>", "AsyncConciseBody"], + ], + "AsyncConciseBody": [ + [LookaheadRule(set=frozenset(["{"]), positive=False), + "Expression"], + ["{", "}"], + ], + "PrimaryExpression": [ + ["{", "}"], + ], + }) + + self.compile(tokenize, grammar) + self.assertParse("{};") + self.assertParse("async x => {};") + self.assertParse("async x => async y => {};") + + def testMultiGoal(self): + tokenize = lexer.LexicalGrammar("WHILE DEF FN { } ( ) -> ;", ID=r'\w+') + grammar = Grammar({ + "stmt": [ + ["expr", ";"], + ["{", "stmts", "}"], + ["WHILE", "(", "expr", ")", "stmt"], + ["DEF", "ID", "(", "ID", ")", "{", Optional("stmts"), "}"], + ], + "stmts": [ + ["stmt"], + ["stmts", "stmt"], + ], + "expr": [ + ["FN", "ID", "->", "expr"], + ["call_expr"], + ], + "call_expr": [ + ["ID"], + ["call_expr", "(", "expr", ")"], + ["(", "expr", ")"], + ], + }, goal_nts=["stmts", "expr"]) + self.compile(tokenize, grammar) + self.assertParse("WHILE ( x ) { decx ( x ) ; }", goal="stmts") + self.assertNoParse( + "WHILE ( x ) { decx ( x ) ; }", goal="expr", + message="expected one of ['(', 'FN', 'ID'], got 'WHILE'") + self.assertParse("f(x);", goal="stmts") + self.assertNoParse("f(x);", goal="expr", + message="expected 'end of input', got ';'") + self.assertParse("(FN x -> f ( x ))(x)", goal="expr") + self.assertNoParse("(FN x -> f ( x ))(x)", goal="stmts", + message="unexpected end of input") + + def testStaggeredItems(self): + """Items in a state can have different amounts of leading context.""" + # In this example grammar, after "A" "B", we're in a state that + # contains these two items (ignoring lookahead): + # goal ::= "A" "B" ยท y + # x ::= "B" ยท stars "X" + # + # Likewise, after `"A" "B" stars`, we have: + # x ::= "B" stars ยท "X" + # y ::= stars ยท "Y" + # stars ::= stars ยท "*" + tokenize = lexer.LexicalGrammar("A B * X Y") + grammar = Grammar({ + "goal": [ + ["A", "x"], + ["A", "B", "y"], + ], + "x": [ + ["B", "stars", "X"], + ], + "y": [ + ["stars", "Y"], + ], + "stars": [ + ["*"], + ["stars", "*"], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("A B * * * X") + self.assertParse("A B * * * Y") + + def testCheckCycleFree(self): + tokenize = lexer.LexicalGrammar("!") + grammar = Grammar({ + "problem": [ + ["one", "two"], + ], + "one": [ + ["!"], + ], + "two": [ + [Optional("problem")], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("! ! ! ! !") + + def testReduceActions(self): + tokenize = lexer.LexicalGrammar("+ - * / ( )", + NUM=r'[0-9]\w*', + VAR=r'[A-Za-z]\w*') + grammar = Grammar({ + "expr": [ + ["term"], + prod(["expr", "+", "term"], "add"), + prod(["expr", "-", "term"], "sub"), + ], + "term": [ + ["unary"], + prod(["term", "*", "unary"], "mul"), + prod(["term", "/", "unary"], "div"), + ], + "unary": [ + ["prim"], + prod(["-", "prim"], "neg"), + ], + "prim": [ + prod(["(", "expr", ")"], "parens"), + prod(["NUM"], "num"), + prod(["VAR"], "var"), + ], + }, goal_nts=['expr']) + + self.compile(tokenize, grammar) + self.assertParse("X", ('var', 'X')) + self.assertParse("3 + 4", ('add', ('num', '3'), '+', ('num', '4'))) + self.assertParse( + "2 * 3 + 4 * (5 + 7)", + ( + 'add', + ('mul', ('num', '2'), '*', ('num', '3')), + '+', + ( + 'mul', + ('num', '4'), + '*', + ('parens', '(', + ('add', ('num', '5'), '+', ('num', '7')), ')')))) + self.assertParse( + "1 / (1 + 1 / (1 + 1 / (1 + 1)))", + ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', ( + 'div', ('num', '1'), '/', ( + 'parens', '(', ( + 'add', ('num', '1'), '+', + ('num', '1')), + ')'))), + ')'))), + ')'))) + + def testConvenienceMethodTypeInference(self): + """A method can be called only in an intermediate reduce expression.""" + + # The reduce expression `f(g($0))`. + reducer = CallMethod("f", [CallMethod("g", [0])]) + + # The grammar `goal ::= NAME => f(g($1))`. + grammar = Grammar( + { + 'goal': [Production(['NAME'], reducer)], + }, + variable_terminals=['NAME']) + + # Since the return value of f() is used as the value of a `goal`, + # we infer that f() returns a goal. + self.assertEqual( + grammar.methods['f'].return_type, + jsparagus.types.Type('goal')) + + # Since the return value of g() isn't used except as an argument, we + # just give it the type `g`. + self.assertEqual( + grammar.methods['g'].return_type, + jsparagus.types.Type('g')) + + # Since g() is passed to f(), we infer this: + self.assertEqual( + grammar.methods['f'].argument_types, + [jsparagus.types.Type('g')]) + + def testEpsilonFreeTransform(self): + tokenize = lexer.LexicalGrammar('{ } X') + grammar = Grammar({ + 'goal': [ + ['{', 'xlist', '}'], + ], + 'xlist': [ + [], + ['xlist', 'X'], + ], + }) + self.compile(tokenize, grammar) + self.assertParse("{}", ('goal', '{', ('xlist_0',), '}')) + + def compile_as_js( + self, + grammar_source: str, + goals: typing.Optional[typing.Iterable[str]] = None, + verbose: bool = False, + ) -> None: + """Like self.compile(), but generate a parser from ESGrammar, + with ASI support, using the JS lexer. + """ + from js_parser.lexer import JSLexer + from js_parser import load_es_grammar + from js_parser import generate_js_parser_tables + + grammar = parse_esgrammar( + grammar_source, + filename="es-simplified.esgrammar", + extensions=[], + goals=goals, + synthetic_terminals=load_es_grammar.ECMASCRIPT_SYNTHETIC_TERMINALS, + terminal_names=load_es_grammar.TERMINAL_NAMES_FOR_SYNTACTIC_GRAMMAR) + grammar = generate_js_parser_tables.hack_grammar(grammar) + base_parser_class = gen.compile(grammar, verbose=verbose) + + # "type: ignore" because poor mypy can't cope with the runtime codegen + # we're doing here. + class JSParser(base_parser_class): # type: ignore + def __init__(self, goal='Script', builder=None): + super().__init__(goal, builder) + self._goal = goal + # self.debug = True + + def clone(self): + return JSParser(self._goal, self.methods) + + def on_recover(self, error_code, lexer, stv): + """Check that ASI error recovery is really acceptable.""" + if error_code == 'asi': + if not self.closed and stv.term != '}' and not lexer.saw_line_terminator(): + lexer.throw("missing semicolon") + else: + assert error_code == 'do_while_asi' + + self.tokenize = JSLexer + self.parser_class = JSParser + + def testExtraGoal(self): + + grammar_source = """ +StuffToIgnore_ThisWorksAroundAnUnrelatedBug: + Identifier + IdentifierName + +Hat : + `^` + +ArrowFunction : + `^` `=>` + Hat `*` `=>` + +Script : + `?` `?` ArrowFunction + `?` `?` [lookahead <! {`async`} ] Hat `of` + +LazyArrowFunction : + ArrowFunction + """ + + def try_it(goals): + self.compile_as_js(grammar_source, goals=goals) + self.assertParse("? ? ^ =>", goal='Script') + self.assertParse("? ? ^ of", goal='Script') + + try_it(['Script', 'LazyArrowFunction']) + try_it(['Script']) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/rust/jsparagus/tests/test_js.py b/third_party/rust/jsparagus/tests/test_js.py new file mode 100644 index 0000000000..571232f77a --- /dev/null +++ b/third_party/rust/jsparagus/tests/test_js.py @@ -0,0 +1,207 @@ +""" Tests for the JS parser. """ + +import unittest +import jsparagus.lexer +from js_parser.parser import parse_Script, JSParser +from js_parser.lexer import JSLexer + + +class ESTestCase(unittest.TestCase): + def parse(self, s): + if isinstance(s, list): + f = JSLexer(JSParser()) + for chunk in s: + f.write(chunk) + return f.close() + else: + return parse_Script(s) + + def assert_parses(self, s): + self.parse(s) + + def assert_incomplete(self, s): + """Assert that s fails to parse with UnexpectedEndError. + + (This should be the case if `s` is a prefix of a valid Script.) + """ + self.assertRaises(jsparagus.lexer.UnexpectedEndError, + lambda: parse_Script(s)) + + def assert_syntax_error(self, s): + """Assert that s fails to parse.""" + with self.assertRaises(jsparagus.lexer.SyntaxError): + parse_Script(s) + + def assert_can_close_after(self, s): + parser = JSParser() + lexer = JSLexer(parser) + if isinstance(s, list): + for chunk in s: + lexer.write(chunk) + else: + lexer.write(s) + self.assertTrue(lexer.can_close()) + + # === Tests! + + def test_asi_at_end(self): + self.assert_parses("3 + 4") + self.assert_syntax_error("3 4") + self.assert_incomplete("3 +") + self.assert_incomplete("{") + self.assert_incomplete("{;") + + def test_asi_at_block_end(self): + self.assert_parses("{ doCrimes() }") + self.assert_parses("function f() { ok }") + + def test_asi_after_line_terminator(self): + self.assert_parses('''\ + switch (value) { + case 1: break + case 2: console.log('2'); + } + ''') + self.assert_syntax_error( + "switch (value) { case 1: break case 2: console.log('2'); }") + + def test_asi_after_no_line_terminator_here(self): + self.assert_parses('''\ + function f() { + return + x; + } + ''') + + def test_asi_suppressed(self): + # The specification says ASI does not happen in the production + # EmptyStatement : `;`. + self.assert_syntax_error("if (true)") + self.assert_syntax_error("{ for (;;) }") + + # ASI does not happen in for(;;) loops. + self.assert_syntax_error("for ( \n ; ) {}") + self.assert_syntax_error("for ( ; \n ) {}") + self.assert_syntax_error("for ( \n \n ) {}") + self.assert_syntax_error("for (var i = 0 \n i < 9; i++) {}") + self.assert_syntax_error("for (var i = 0; i < 9 \n i++) {}") + self.assert_syntax_error("for (i = 0 \n i < 9; i++) {}") + self.assert_syntax_error("for (i = 0; i < 9 \n i++) {}") + self.assert_syntax_error("for (let i = 0 \n i < 9; i++) {}") + + # ASI is suppressed in the production ClassElement[Yield, Await] : `;` + # to prevent an infinite loop of ASI. lol + self.assert_syntax_error("class Fail { \n +1; }") + + def test_if_else(self): + self.assert_parses("if (x) f();") + self.assert_incomplete("if (x)") + self.assert_parses("if (x) f(); else g();") + self.assert_incomplete("if (x) f(); else") + self.assert_parses("if (x) if (y) g(); else h();") + self.assert_parses("if (x) if (y) g(); else h(); else j();") + + def test_lexer_decimal(self): + self.assert_parses("0.") + self.assert_parses(".5") + self.assert_syntax_error(".") + + def test_arrow(self): + self.assert_parses("x => x") + self.assert_parses("f = x => x;") + self.assert_parses("(x, y) => [y, x]") + self.assert_parses("f = (x, y) => {}") + self.assert_syntax_error("(x, y) => {x: x, y: y}") + + def test_invalid_character(self): + self.assert_syntax_error("\0") + self.assert_syntax_error("โx;") + self.assert_syntax_error("const ONE_THIRD = 1 รท 3;") + + def test_regexp(self): + self.assert_parses(r"/\w/") + self.assert_parses("/[A-Z]/") + self.assert_parses("/[//]/") + self.assert_parses("/a*a/") + self.assert_parses("/**//x*/") + self.assert_parses("{} /x/") + self.assert_parses("of / 2") + + def test_incomplete_comments(self): + self.assert_syntax_error("/*") + self.assert_syntax_error("/* hello world") + self.assert_syntax_error("/* hello world *") + self.assert_parses(["/* hello\n", " world */"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie\n ok();"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie", "jiowaeawojefiw"]) + self.assert_parses(["// oawfeoiawj", "ioawefoawjie", "jiowaeawojefiw\n ok();"]) + + def test_awkward_chunks(self): + self.assert_parses(["let", "ter.head = 1;"]) + self.assert_parses(["let", " x = 1;"]) + + # `list()` here explodes the string into a list of one-character strings. + self.assert_parses(list("function f() { ok(); }")) + + self.assertEqual( + self.parse(["/xyzzy/", "g;"]), + ('script', + ('script_body', + ('statement_list_single', + ('expression_statement', + ('regexp_literal', '/xyzzy/g')))))) + + self.assertEqual( + self.parse(['x/', '=2;']), + ('script', + ('script_body', + ('statement_list_single', + ('expression_statement', + ('compound_assignment_expr', + ('identifier_expr', ('identifier_reference', 'x')), + ('box_assign_op', ('div_assign_op', '/=')), + ('numeric_literal', '2'))))))) + + def test_can_close(self): + self.assert_can_close_after([]) + self.assert_can_close_after("") + self.assert_can_close_after("2 + 2;\n") + self.assert_can_close_after("// seems ok\n") + + def test_can_close_with_asi(self): + self.assert_can_close_after("2 + 2\n") + + def test_conditional_keywords(self): + # property names + self.assert_parses("let obj = {if: 3, function: 4};") + self.assert_parses("assert(obj.if == 3);") + + # method names + self.assert_parses(""" + class C { + if() {} + function() {} + } + """) + + self.assert_parses("var let = [new Date];") # let as identifier + self.assert_parses("let v = let;") # let as keyword, then identifier + # Next line would fail because the multitoken `let [` lookahead isn't implemented yet. + # self.assert_parses("let.length;") # `let .` -> ExpressionStatement + self.assert_syntax_error("let[0].getYear();") # `let [` -> LexicalDeclaration + + self.assert_parses(""" + var of = [1, 2, 3]; + for (of of of) console.log(of); // logs 1, 2, 3 + """) + self.assert_parses("var of, let, private, target;") + self.assert_parses("class X { get y() {} }") + self.assert_parses("async: { break async; }") + self.assert_parses("var get = { get get() {}, set get(v) {}, set: 3 };") + self.assert_parses("for (async of => {};;) {}") + # self.assert_parses("for (async of []) {}") # would fail + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/rust/jsparagus/tests/test_parse_pgen.py b/third_party/rust/jsparagus/tests/test_parse_pgen.py new file mode 100644 index 0000000000..5052f9069c --- /dev/null +++ b/third_party/rust/jsparagus/tests/test_parse_pgen.py @@ -0,0 +1,33 @@ +import unittest + +import jsparagus.gen +from jsparagus import parse_pgen, parse_pgen_generated + + +class ParsePgenTestCase(unittest.TestCase): + def test_self(self): + import os + filename = os.path.join(os.path.dirname(parse_pgen.__file__), "..", + "pgen.pgen") + grammar = parse_pgen.load_grammar(filename) + self.maxDiff = None + pgen_grammar = parse_pgen.pgen_grammar + self.assertEqual(pgen_grammar.nonterminals, grammar.nonterminals) + self.assertEqual(pgen_grammar.variable_terminals, + grammar.variable_terminals) + self.assertEqual(pgen_grammar.goals(), grammar.goals()) + + with open(parse_pgen_generated.__file__) as f: + pre_generated = f.read() + + import io + out = io.StringIO() + jsparagus.gen.generate_parser(out, grammar) + generated_from_file = out.getvalue() + + self.maxDiff = None + self.assertEqual(pre_generated, generated_from_file) + + +if __name__ == '__main__': + unittest.main() diff --git a/third_party/rust/jsparagus/update.sh b/third_party/rust/jsparagus/update.sh new file mode 100755 index 0000000000..7d01d70ab9 --- /dev/null +++ b/third_party/rust/jsparagus/update.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# update.sh - Rebuild generated files from parse_pgen.py and pgen.pgen. +# +# These generated files are not actually used to generate themselves, +# so the process isn't as tricky as it could otherwise be. (They are used +# for testing and benchmarking.) +# +# How to change the pgen syntax: +# +# 1. Update the pgen_grammar and ASTBuilder in parse_pgen.py, +# and other downstream Python and Rust code appropriately. +# 2. Make the corresponding edits to pgen.pgen. You can change it to +# use the new syntax that you're adding. +# 3. Run this script. +# +# Even if something fails, fear not! It's usually pretty easy to fix stuff and +# get to a fixpoint where everything passes. + +set -eu + +cd $(dirname "$0") +python3 -m jsparagus.parse_pgen --regenerate > jsparagus/parse_pgen_generated_NEW.py +mv jsparagus/parse_pgen_generated_NEW.py jsparagus/parse_pgen_generated.py + +./test.sh diff --git a/third_party/rust/jsparagus/update_stencil.py b/third_party/rust/jsparagus/update_stencil.py new file mode 100755 index 0000000000..7350bb76e3 --- /dev/null +++ b/third_party/rust/jsparagus/update_stencil.py @@ -0,0 +1,722 @@ +#!/usr/bin/env python3 + +""" Extract stencil data/struct from C++ header. +""" + +import argparse +import os +import re +import shutil +import subprocess +import sys +from textwrap import dedent + +parser = argparse.ArgumentParser(description='Update stencil data/struct') +parser.add_argument('PATH_TO_MOZILLA_CENTRAL', + help='Path to mozilla-central') +parser.add_argument('PATH_TO_JSPARAGUS', + help='Path to jsparagus') +args = parser.parse_args() + + +def ensure_exists(path): + if not os.path.exists(path): + print(f'{path} does not exist', file=sys.stderr) + sys.exit(1) + + +def ensure_input_files(files): + paths = {} + for (parent, name) in files: + path = os.path.join(parent, name) + ensure_exists(path) + paths[name] = path + + return paths + + +js_dir = os.path.join(args.PATH_TO_MOZILLA_CENTRAL, 'js') +frontend_dir = os.path.join(js_dir, 'src', 'frontend') +vm_dir = os.path.join(js_dir, 'src', 'vm') +public_dir = os.path.join(js_dir, 'public') + +input_paths = ensure_input_files([ + (frontend_dir, 'SourceNotes.h'), + (public_dir, 'Symbol.h'), + (vm_dir, 'AsyncFunctionResolveKind.h'), + (vm_dir, 'BytecodeFormatFlags.h'), + (vm_dir, 'CheckIsObjectKind.h'), + (vm_dir, 'CompletionKind.h'), + (vm_dir, 'FunctionFlags.h'), + (vm_dir, 'FunctionPrefixKind.h'), + (vm_dir, 'GeneratorAndAsyncKind.h'), + (vm_dir, 'GeneratorResumeKind.h'), + (vm_dir, 'Opcodes.h'), + (vm_dir, 'ThrowMsgKind.h'), + (vm_dir, 'StencilEnums.h'), +]) + + +def get_source_path(crate, name): + path = os.path.join(args.PATH_TO_JSPARAGUS, + 'crates', crate, 'src', name) + ensure_exists(path) + return path + + +opcode_dest_path = get_source_path('stencil', 'opcode.rs') +emitter_dest_path = get_source_path('emitter', 'emitter.rs') +function_dest_path = get_source_path('stencil', 'function.rs') +script_dest_path = get_source_path('stencil', 'script.rs') + +copy_dir = os.path.join(args.PATH_TO_JSPARAGUS, + 'crates', 'stencil', 'src', 'copy') +if not os.path.exists(copy_dir): + os.makedirs(copy_dir) + + +def extract_opcodes(paths): + opcodes = [] + + with open(paths['Opcodes.h'], 'r') as f: + for line in f: + line = line.strip() + + if line.startswith('IF_RECORD_TUPLE('): + # Ignore Record and Tuple opcodes + continue + + if line.startswith('MACRO(') and ',' in line: + line = line[5:] + if line.endswith(' \\'): + line = line[:-2] + assert line.endswith(')') + opcodes.append((" " * 16) + line + ",") + + return opcodes + + +def extract_opcode_flags(paths): + pat = re.compile(r'(JOF_[A-Z0-9_]+)\s=\s([^,]+),\s*/\*\s*(.*?)\s*\*/', + re.DOTALL) + + flags = [] + + with open(paths['BytecodeFormatFlags.h'], 'r') as f: + content = f.read() + + for m in pat.finditer(content): + name = m.group(1) + value = m.group(2) + comment = m.group(3) + + comment = re.sub('\s*\n\s*', ' ', comment) + + if name == 'JOF_MODEMASK': + continue + + flags.append({ + 'name': name, + 'value': value, + 'comment': comment, + }) + + return flags + + +def remove_comment(body): + block_comment_pat = re.compile(r'/\*.+?\*/', re.M) + line_comment_pat = re.compile(r'//.*') + + result = '' + for line in block_comment_pat.sub('', body).split('\n'): + line = line_comment_pat.sub('', line) + result += line + + return result + + +def filter_enum_body(body): + space_pat = re.compile(r'\s*') + return space_pat.sub('', body) + + +size_types = { + 'bool': 'bool', + 'int8_t': 'i8', + 'uint8_t': 'u8', + 'uint16_t': 'u16', + 'uint24_t': 'u24', + 'int32_t': 'i32', + 'uint32_t': 'u32', +} + + +def extract_enum(types, paths, ty, filename=None, custom_handler=None): + variants_pat = re.compile( + r'enum(?:\s+class)?\s*' + ty + r'\s*:\s*([A-Za-z0-9_]+)\s*\{([^}]+)\}', re.M) + simple_init_pat = re.compile(r'^([A-Za-z0-9_]+)=((:?0x)?[A-Fa-f0-9+]+)$') + bits_init_pat = re.compile(r'^([A-Za-z0-9_]+)=(\d+)<<(\d+)$') + + if not filename: + filename = f'{ty}.h' + with open(paths[filename], 'r') as f: + content = f.read() + content = remove_comment(content) + + m = variants_pat.search(content) + assert m, f'enum {ty} is not found' + + size_type = m.group(1) + body = m.group(2) + + if size_type not in size_types: + print(f'{size_types} is not supported', file=sys.stderr) + sys.exit(1) + + size = size_types[size_type] + + body = filter_enum_body(body) + + variants = [] + i = 0 + for variant in body.split(','): + if variant == '': + # After trailing comma + continue + + m = simple_init_pat.search(variant) + if m: + name = m.group(1) + value = m.group(2) + + variants.append((name, value)) + if value.startswith('0x'): + i = int(value, 16) + 1 + else: + i = int(value) + 1 + continue + + m = bits_init_pat.search(variant) + if m: + name = m.group(1) + bits = m.group(2) + shift = m.group(3) + + value = f'{bits} << {shift}' + + variants.append((name, value)) + + # If a bit pattern is inside the variant, + # do not support any variant without initializer. + i = None + continue + + if custom_handler: + if custom_handler(variants, variant): + i = None + continue + + if i is None: + raise Exception(f'All variants should have initializer : {variant}') + + name = variant + value = i + variants.append((name, value)) + i += 1 + + types[ty] = { + 'dead': False, + 'size': size, + 'variants': variants + } + + +def extract_function_flags(paths): + kind_init_pat = re.compile(r'^([A-Za-z0-9_]+)=([A-Za-z0-9_]+)<<([A-Za-z0-9_]+)$') + combined_init_pat = re.compile(r'^([A-Za-z0-9_]+)=([A-Za-z0-9_]+(\|[A-Za-z0-9_]+)*)$') + + def custom_handler(variants, variant): + m = kind_init_pat.search(variant) + if m: + name = m.group(1) + bits = m.group(2) + shift = m.group(3) + + value = f'(FunctionKind::{bits} as u16) << {shift}' + + variants.append((name, value)) + return True + + m = combined_init_pat.search(variant) + if m: + name = m.group(1) + value = m.group(2) + + variants.append((name, value)) + return True + + raise Exception(f'unhandled variant {variant}') + + types = {} + extract_enum(types, paths, 'Flags', 'FunctionFlags.h', + custom_handler) + + assert types['Flags']['size'] == 'u16' + return types['Flags']['variants'] + + +def extract_types(paths): + types = {} + + def extract_symbols(): + pat = re.compile(r'MACRO\((.+)\)') + + ty = 'SymbolCode' + variants = [] + i = 0 + + found = False + state = 'before' + with open(paths['Symbol.h'], 'r') as f: + for line in f: + if 'enum class SymbolCode : uint32_t {' in line: + found = True + + if state == 'before': + if 'JS_FOR_EACH_WELL_KNOWN_SYMBOL' in line: + state = 'macro' + elif state == 'macro': + m = pat.search(line) + if m: + sym = m.group(1) + sym = sym[0].upper() + sym[1:] + variants.append((sym, i)) + i += 1 + + if not line.strip().endswith('\\'): + state = 'after' + + if not found: + print('SymbolCode : uint32_t is not found', + file=sys.stderr) + sys.exit(1) + + types[ty] = { + 'dead': False, + 'size': 'u32', + 'variants': variants + } + + def extract_source_notes(): + pat = re.compile(r'M\((.+),(.+),(.+)\)') + + ty = 'SrcNoteType' + variants = [] + i = 0 + + found = False + state = 'before' + with open(paths['SourceNotes.h'], 'r') as f: + for line in f: + if 'enum class SrcNoteType : uint8_t {' in line: + found = True + + if state == 'before': + if 'FOR_EACH_SRC_NOTE_TYPE' in line: + state = 'macro' + elif state == 'macro': + m = pat.search(line) + if m: + variants.append((m.group(1), i)) + i += 1 + + if not line.strip().endswith('\\'): + state = 'after' + + if not found: + print('SrcNoteType : uint8_t is not found', + file=sys.stderr) + sys.exit(1) + + types[ty] = { + 'dead': False, + 'size': 'u8', + 'variants': variants + } + + extract_enum(types, paths, 'AsyncFunctionResolveKind') + extract_enum(types, paths, 'CheckIsObjectKind') + extract_enum(types, paths, 'CompletionKind') + extract_enum(types, paths, 'FunctionPrefixKind') + extract_enum(types, paths, 'GeneratorResumeKind') + extract_enum(types, paths, 'ThrowMsgKind') + extract_enum(types, paths, 'ThrowCondition', 'ThrowMsgKind.h') + extract_enum(types, paths, 'TryNoteKind', 'StencilEnums.h') + + extract_symbols() + + extract_source_notes() + + return types + + +def extract_script_types(paths): + types = {} + + extract_enum(types, paths, 'ImmutableScriptFlagsEnum', 'StencilEnums.h') + types['ImmutableScriptFlagsEnum']['dead'] = True + extract_enum(types, paths, 'MutableScriptFlagsEnum', 'StencilEnums.h') + types['MutableScriptFlagsEnum']['dead'] = True + + # Remove unused mask that doesn't follow the naming convention. + types['MutableScriptFlagsEnum']['variants'] = \ + filter(lambda item: item[0] != 'WarmupResets_MASK', + types['MutableScriptFlagsEnum']['variants']) + + return types + + +def extract_function_types(paths): + types = {} + + extract_enum(types, paths, 'FunctionKind', filename='FunctionFlags.h') + + return types + + +def format_opcodes(out, opcodes): + for opcode in opcodes: + out.write(f'{opcode}\n') + + +def format_opcode_flags(out, flags): + for flag in flags: + out.write(dedent(f"""\ + /// {flag['comment']} + const {flag['name']}: u32 = {flag['value']}; + + """)) + + +def rustfmt(path): + subprocess.run(['rustfmt', path], check=True) + + +def update_opcode(path, opcodes, flags): + tmppath = f'{path}.tmp' + + with open(path, 'r') as in_f: + with open(tmppath, 'w') as out_f: + state = 'normal' + for line in in_f: + if '@@@@ BEGIN OPCODES @@@@' in line: + state = 'opcodes' + out_f.write(line) + format_opcodes(out_f, opcodes) + elif '@@@@ END OPCODES @@@@' in line: + assert state == 'opcodes' + state = 'normal' + out_f.write(line) + elif '@@@@ BEGIN FLAGS @@@@' in line: + state = 'flags' + out_f.write(line) + format_opcode_flags(out_f, flags) + elif '@@@@ END FLAGS @@@@' in line: + assert state == 'flags' + state = 'normal' + out_f.write(line) + elif state == 'normal': + out_f.write(line) + assert state == 'normal' + + os.replace(tmppath, path) + rustfmt(path) + + +def to_snake_case(s): + return re.sub(r'(?<!^)(?=[A-Z])', '_', s).lower() + + +def parse_operands(opcode): + params = [] + + copied_types = [ + 'AsyncFunctionResolveKind', + 'CheckIsObjectKind', + 'CompletionKind', + 'FunctionPrefixKind', + 'GeneratorResumeKind', + 'ThrowMsgKind', + 'ThrowCondition', + ] + + for operand in opcode.operands_array: + tmp = operand.split(' ') + ty = tmp[0] + name = to_snake_case(tmp[1]) + + if ty in size_types: + ty = size_types[ty] + elif ty == 'double': + ty = 'f64' + elif ty in copied_types: + pass + else: + print(f'Unsupported operand type {ty}', file=sys.stderr) + sys.exit(1) + + if 'JOF_ATOM' in opcode.format_: + assert ty == 'u32' + ty = 'GCThingIndex' + + if 'JOF_STRING' in opcode.format_: + assert ty == 'u32' + ty = 'GCThingIndex' + + if 'JOF_ICINDEX' in opcode.format_ or 'JOF_LOOPHEAD' in opcode.format_: + if ty == 'u32' and name == 'ic_index': + ty = 'IcIndex' + name = '' + else: + assert 'JOF_LOOPHEAD' in opcode.format_ and name == 'depth_hint' + + # FIXME: Stronger typing for Opcode::CheckIsObj kind parameter. + + params.append((ty, name)) + + return params + + +def generate_types(out_f, types): + for ty in types: + variants = [] + for variant, i in types[ty]['variants']: + if types[ty]['dead']: + variants.append(dedent(f"""\ + #[allow(dead_code)] + """)) + + variants.append(dedent(f"""\ + {variant} = {i}, + """)) + + out_f.write(dedent(f"""\ + #[derive(Debug, Clone, Copy)] + pub enum {ty} {{ + {''.join(variants)}}} + + """)) + + +def format_function_flags(out_f, function_flags): + for name, value in function_flags: + out_f.write(dedent(f"""\ + #[allow(dead_code)] + const {name} : u16 = {value}; + """)) + + +def generate_emit_methods(out_f, opcodes, types): + for op, opcode in opcodes.items(): + if op in ['True', 'False']: + # done by `boolean` method + continue + + if op in ['Void', 'Pos', 'Neg', 'Pos', 'BitNot', 'Not']: + # done by `emit_unary_op` method + continue + + if op in ['BitOr', 'BitXor', 'BitAnd', + 'Eq', 'Ne', 'StrictEq', 'StrictNe', + 'Lt', 'Gt', 'Le', 'Ge', + 'Instanceof', 'In', + 'Lsh', 'Rsh', 'Ursh', + 'Add', 'Sub', 'Mul', 'Div', 'Mod', 'Pow']: + # done by `emit_binary_op` method + continue + + if op == 'TableSwitch': + # Unsupported + continue + + op_snake = opcode.op_snake + if op_snake in ['yield', 'await']: + op_snake = f'{op_snake}_' + + params = parse_operands(opcode) + + method = 'emit_op' + extra_args = '' + + if 'JOF_ARGC' in opcode.format_: + assert int(opcode.nuses) == -1 + method = 'emit_argc_op' + extra_args = f', {params[0][1]}' + elif op == 'PopN': + assert int(opcode.nuses) == -1 + method = 'emit_pop_n_op' + extra_args = f', {params[0][1]}' + elif op == 'RegExp': + assert len(params) == 1 + assert params[0][0] == 'u32' + params[0] = ('GCThingIndex', params[0][1]) + elif 'JOF_OBJECT' in opcode.format_ or 'JOF_SCOPE' in opcode.format_ or 'JOF_SHAPE' in opcode.format_: + assert len(params) == 1 + assert params[0][0] == 'u32' + params[0] = ('GCThingIndex', params[0][1]) + elif 'JOF_JUMP' in opcode.format_: + assert params[0][0] == 'i32' + params[0] = ('BytecodeOffsetDiff', params[0][1]) + else: + assert int(opcode.nuses) != -1 + + assert int(opcode.ndefs) != -1 + + method_params = [] + for ty, name in params: + if ty == 'IcIndex': + continue + method_params.append(f', {name}: {ty}') + + out_f.write(dedent(f"""\ + pub fn {op_snake}(&mut self{''.join(method_params)}) {{ + self.{method}(Opcode::{op}{extra_args}); + """)) + + for (ty, name) in params: + if ty in types: + size_ty = types[ty]['size'] + out_f.write(dedent(f"""\ + self.write_{size_ty}({name} as {size_ty}); + """)) + else: + out_f.write(dedent(f"""\ + self.write_{to_snake_case(ty)}({name}); + """)) + + out_f.write(dedent(f"""\ + }} + + """)) + + +def get_filtered_opcodes(): + sys.path.append(vm_dir) + from jsopcode import get_opcodes + + _, opcodes = get_opcodes(args.PATH_TO_MOZILLA_CENTRAL) + + filtered_opcodes = {} + for op, opcode in opcodes.items(): + if opcode.type_name in ['Record literals', 'Tuple literals']: + continue + + filtered_opcodes[op] = opcode + + return filtered_opcodes + + +def update_emitter(path, types): + opcodes = get_filtered_opcodes() + + tmppath = f'{path}.tmp' + + with open(path, 'r') as in_f: + with open(tmppath, 'w') as out_f: + state = 'normal' + for line in in_f: + if '@@@@ BEGIN METHODS @@@@' in line: + state = 'methods' + out_f.write(line) + generate_emit_methods(out_f, opcodes, types) + elif '@@@@ END METHODS @@@@' in line: + assert state == 'methods' + state = 'normal' + out_f.write(line) + elif '@@@@ BEGIN TYPES @@@@' in line: + state = 'types' + out_f.write(line) + generate_types(out_f, types) + elif '@@@@ END TYPES @@@@' in line: + assert state == 'types' + state = 'normal' + out_f.write(line) + elif state == 'normal': + out_f.write(line) + assert state == 'normal' + + os.replace(tmppath, path) + rustfmt(path) + + +def update_function(path, types, flags): + opcodes = get_filtered_opcodes() + + tmppath = f'{path}.tmp' + + with open(path, 'r') as in_f: + with open(tmppath, 'w') as out_f: + state = 'normal' + for line in in_f: + if '@@@@ BEGIN TYPES @@@@' in line: + state = 'types' + out_f.write(line) + generate_types(out_f, types) + format_function_flags(out_f, flags) + elif '@@@@ END TYPES @@@@' in line: + assert state == 'types' + state = 'normal' + out_f.write(line) + elif state == 'normal': + out_f.write(line) + assert state == 'normal' + + os.replace(tmppath, path) + rustfmt(path) + + +def update_script(path, types): + tmppath = f'{path}.tmp' + + with open(path, 'r') as in_f: + with open(tmppath, 'w') as out_f: + state = 'normal' + for line in in_f: + if '@@@@ BEGIN TYPES @@@@' in line: + state = 'types' + out_f.write(line) + generate_types(out_f, types) + elif '@@@@ END TYPES @@@@' in line: + assert state == 'types' + state = 'normal' + out_f.write(line) + elif state == 'normal': + out_f.write(line) + assert state == 'normal' + + os.replace(tmppath, path) + rustfmt(path) + + +def copy_input(paths): + for name, path in paths.items(): + shutil.copyfile(path, + os.path.join(copy_dir, name)) + + +opcodes = extract_opcodes(input_paths) +opcode_flags = extract_opcode_flags(input_paths) +emitter_types = extract_types(input_paths) + +function_flags = extract_function_flags(input_paths) +function_types = extract_function_types(input_paths) + +script_types = extract_script_types(input_paths) + +update_opcode(opcode_dest_path, opcodes, opcode_flags) +update_emitter(emitter_dest_path, emitter_types) +update_function(function_dest_path, function_types, function_flags) +update_script(script_dest_path, script_types) + +copy_input(input_paths) diff --git a/third_party/rust/jsparagus/update_unicode.py b/third_party/rust/jsparagus/update_unicode.py new file mode 100644 index 0000000000..3f2bba0956 --- /dev/null +++ b/third_party/rust/jsparagus/update_unicode.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 + +""" Generate Unicode data table for parser +""" + +import argparse +import io +import re +import sys +from contextlib import closing +from itertools import tee, zip_longest +from urllib.request import urlopen +from zipfile import ZipFile + + +# These are also part of IdentifierPart ยง11.6 Names and Keywords +compatibility_identifier_part = [ + ord(u'\N{ZERO WIDTH NON-JOINER}'), + ord(u'\N{ZERO WIDTH JOINER}'), +] + +FLAG_ID_START = 1 << 0 +FLAG_ID_CONTINUE = 1 << 1 + + +def download_derived_core_properties(version): + """Downloads UCD.zip for given version, and return the content of + DerivedCoreProperties.txt. """ + + baseurl = 'https://unicode.org/Public' + if version == 'UNIDATA': + url = '%s/%s' % (baseurl, version) + else: + url = '%s/%s/ucd' % (baseurl, version) + + request_url = '{}/UCD.zip'.format(url) + with closing(urlopen(request_url)) as downloaded_file: + downloaded_data = io.BytesIO(downloaded_file.read()) + + with ZipFile(downloaded_data) as zip_file: + return zip_file.read('DerivedCoreProperties.txt').decode() + + +def read_derived_core_properties(derived_core_properties): + """Read DerivedCoreProperties.txt content and yield each item. """ + for line in derived_core_properties.split('\n'): + if line == '' or line.startswith('#'): + continue + row = line.split('#')[0].split(';') + char_range = row[0].strip() + char_property = row[1].strip() + if '..' not in char_range: + yield (int(char_range, 16), char_property) + else: + [start, end] = char_range.split('..') + for char in range(int(start, 16), int(end, 16) + 1): + yield (char, char_property) + + +def process_derived_core_properties(derived_core_properties): + """Parse DerivedCoreProperties.txt and returns its version, + and set of characters with ID_Start and ID_Continue. """ + id_start = set() + id_continue = set() + + m = re.match('# DerivedCoreProperties-([0-9\.]+).txt', derived_core_properties) + assert m + version = m.group(1) + + for (char, prop) in read_derived_core_properties(derived_core_properties): + if prop == 'ID_Start': + id_start.add(char) + if prop == 'ID_Continue': + id_continue.add(char) + + return (version, id_start, id_continue) + + +def int_ranges(ints): + """ Yields consecutive ranges (inclusive) from integer values. """ + (a, b) = tee(sorted(ints)) + start = next(b) + for (curr, succ) in zip_longest(a, b): + if curr + 1 != succ: + yield (start, curr) + start = succ + + +def process_unicode_data(derived_core_properties): + MAX_BMP = 0xffff + + dummy = 0 + table = [dummy] + cache = {dummy: 0} + index = [0] * (MAX_BMP + 1) + non_bmp_id_start_set = {} + non_bmp_id_continue_set = {} + + (version, id_start, id_continue) = process_derived_core_properties(derived_core_properties) + codes = id_start.union(id_continue) + + for code in codes: + if code > MAX_BMP: + if code in id_start: + non_bmp_id_start_set[code] = 1 + if code in id_continue: + non_bmp_id_continue_set[code] = 1 + continue + + flags = 0 + if code in id_start: + flags |= FLAG_ID_START + if code in id_continue or code in compatibility_identifier_part: + flags |= FLAG_ID_CONTINUE + + i = cache.get(flags) + if i is None: + assert flags not in table + cache[flags] = i = len(table) + table.append(flags) + index[code] = i + + return ( + version, + table, + index, + id_start, + id_continue, + non_bmp_id_start_set, + non_bmp_id_continue_set, + ) + + +def getsize(data): + """ return smallest possible integer size for the given array """ + maxdata = max(data) + assert maxdata < 2**32 + + if maxdata < 256: + return 1 + elif maxdata < 65536: + return 2 + else: + return 4 + + +def splitbins(t): + """t -> (t1, t2, shift). Split a table to save space. + + t is a sequence of ints. This function can be useful to save space if + many of the ints are the same. t1 and t2 are lists of ints, and shift + is an int, chosen to minimize the combined size of t1 and t2 (in C + code), and where for each i in range(len(t)), + t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] + where mask is a bitmask isolating the last "shift" bits. + """ + + def dump(t1, t2, shift, bytes): + print("%d+%d bins at shift %d; %d bytes" % ( + len(t1), len(t2), shift, bytes), file=sys.stderr) + print("Size of original table:", len(t) * getsize(t), + "bytes", file=sys.stderr) + + + n = len(t)-1 # last valid index + maxshift = 0 # the most we can shift n and still have something left + if n > 0: + while n >> 1: + n >>= 1 + maxshift += 1 + del n + bytes = sys.maxsize # smallest total size so far + t = tuple(t) # so slices can be dict keys + for shift in range(maxshift + 1): + t1 = [] + t2 = [] + size = 2**shift + bincache = {} + + for i in range(0, len(t), size): + bin = t[i:i + size] + + index = bincache.get(bin) + if index is None: + index = len(t2) + bincache[bin] = index + t2.extend(bin) + t1.append(index >> shift) + + # determine memory size + b = len(t1) * getsize(t1) + len(t2) * getsize(t2) + if b < bytes: + best = t1, t2, shift + bytes = b + t1, t2, shift = best + + print("Best:", end=' ', file=sys.stderr) + dump(t1, t2, shift, bytes) + + # exhaustively verify that the decomposition is correct + mask = 2**shift - 1 + for i in range(len(t)): + assert t[i] == t2[(t1[i >> shift] << shift) + (i & mask)] + return best + + +def write_table(f, name, type, table, formatter, per_line): + f.write(f""" +pub const {name}: &'static [{type}] = &[ +""") + + i = 0 + for item in table: + if i == 0: + f.write(' ') + f.write(f'{formatter(item)},') + i += 1 + if i == per_line: + i = 0 + f.write(""" +""") + + f.write("""\ +]; +""") + + +def write_func(f, name, group_set): + f.write(f""" +pub fn {name}(c: char) -> bool {{""") + + for (from_code, to_code) in int_ranges(group_set.keys()): + f.write(f""" + if c >= \'\\u{{{from_code:X}}}\' && c <= \'\\u{{{to_code:X}}}\' {{ + return true; + }}""") + + f.write(""" + false +} +""") + + +def make_unicode_file(version, table, index, + id_start, id_continue, + non_bmp_id_start_set, non_bmp_id_continue_set): + index1, index2, shift = splitbins(index) + + # verify correctness + for char in index: + test = table[index[char]] + + idx = index1[char >> shift] + idx = index2[(idx << shift) + (char & ((1 << shift) - 1))] + + assert test == table[idx] + + with open('crates/parser/src/unicode_data.rs', 'w') as f: + f.write(f"""\ +// Generated by update_unicode.py DO NOT MODIFY +// Unicode version: {version} +""") + + f.write(f""" +const FLAG_ID_START: u8 = {FLAG_ID_START}; +const FLAG_ID_CONTINUE: u8 = {FLAG_ID_CONTINUE}; +""") + + f.write(""" +pub struct CharInfo { + flags: u8, +} + +impl CharInfo { + pub fn is_id_start(&self) -> bool { + self.flags & FLAG_ID_START != 0 + } + + pub fn is_id_continue(&self) -> bool { + self.flags & FLAG_ID_CONTINUE != 0 + } +} +""") + + write_table(f, 'CHAR_INFO_TABLE', 'CharInfo', table, + lambda flag: f"CharInfo {{ flags: {flag} }}", + 1) + write_table(f, 'INDEX1', 'u8', index1, + lambda i: f'{i:4d}', 8) + write_table(f, 'INDEX2', 'u8', index2, + lambda i: f'{i:4d}', 8) + + f.write(f""" +const SHIFT: usize = {shift}; +""") + + f.write(""" +pub fn char_info(c: char) -> &'static CharInfo { + let code = c as usize; + let index = INDEX1[code >> SHIFT] as usize; + let index = INDEX2[(index << SHIFT) + (code & ((1 << SHIFT) - 1))] as usize; + + &CHAR_INFO_TABLE[index] +} +""") + + def format_bool(b): + if b: + return 'true ' + else: + return 'false' + + write_table(f, 'IS_ID_START_TABLE', 'bool', range(0, 128), + lambda code: format_bool(code in id_start), 8) + write_table(f, 'IS_ID_CONTINUE_TABLE', 'bool', range(0, 128), + lambda code: format_bool(code in id_continue), 8) + + write_func(f, 'is_id_start_non_bmp', non_bmp_id_start_set) + write_func(f, 'is_id_continue_non_bmp', non_bmp_id_continue_set) + + + +parser = argparse.ArgumentParser(description='Generate Unicode data table for parser') +parser.add_argument('VERSION', + help='Unicode version number to download from\ + <https://unicode.org/Public>. The number must match\ + a published Unicode version, e.g. use\ + "--version=8.0.0" to download Unicode 8 files. Alternatively use\ + "--version=UNIDATA" to download the latest published version.') +parser.add_argument('PATH_TO_JSPARAGUS', + help='Path to jsparagus') +args = parser.parse_args() + +derived_core_properties = download_derived_core_properties(args.VERSION) + +( + version, + table, + index, + id_start, + id_continue, + non_bmp_id_start_set, + non_bmp_id_continue_set, +) = process_unicode_data(derived_core_properties) + +make_unicode_file( + version, + table, + index, + id_start, + id_continue, + non_bmp_id_start_set, + non_bmp_id_continue_set, +) |