diff options
Diffstat (limited to 'third_party/rust/fluent-syntax')
25 files changed, 4501 insertions, 0 deletions
diff --git a/third_party/rust/fluent-syntax/.cargo-checksum.json b/third_party/rust/fluent-syntax/.cargo-checksum.json new file mode 100644 index 0000000000..d1941314cd --- /dev/null +++ b/third_party/rust/fluent-syntax/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"Cargo.lock":"3fd2bd8414b6f818747e28ac2e78d0d99795946f2b4c74ca5e5ca9ce1bc8f8e2","Cargo.toml":"a98f67a3a7a2c050115fffb1863b9eb7ff0de131f2125b25753aa5eb3793bef8","LICENSE-APACHE":"5db2b182453ff32ed40f7da63589c9667a3f8bd8b16b1471b152caae56f77e45","LICENSE-MIT":"49c0b000c03731d9e3970dc059ad4ca345d773681f4a612b0024435b663e0220","README.md":"0ceb23fff33406f7663d967601e1ad7650a4664565c42d618961cc3a8fc81ea5","benches/contexts/README.md":"562d317f507caedd62cbe00e6b2bb350cb970168e5ebd5b3c497e68696e21e8b","benches/parser.rs":"b207acac3cc05025a323646dc72bc32b675740ebdcc602fc37f2ca42df9c6fb3","benches/parser_iai.rs":"10253310d8abbe979ae0833df4bb0d1e2b770d1e94fe99416dec7fcd23aa3f16","src/ast/helper.rs":"39187bbc97823ba0f3a9baadd76ddc59140bf09cddeb92997dd872e41b305375","src/ast/mod.rs":"389032943da7c1809d6b4bc903ab13662d715945bbd3df974a47b29e0884715f","src/bin/parser.rs":"06aa90e1b78f5f845c46b8abba86028c4c157555b580ea59b8a08b7ccba9907c","src/bin/update_fixtures.rs":"387ca0e6331942369a3a3aa6c5ae013aa1c87c612215ce20f486ca37d4acd55d","src/lib.rs":"b4a11659d3d233073a1d820872a1d48141d3343401295032f494f251af591d78","src/parser/comment.rs":"dff6546043538e3573eea98ea7eda3fd55d5846e4dbcb38a0595844862d979af","src/parser/core.rs":"4ef1a455ef50235bb3ab8da0c8da90caf73fd346f1b5e96d67ee8b3fc039ad04","src/parser/errors.rs":"7ac92c323c15f9efa10a37a5ba3894f7a348cadbd5bfc053afef901bbe53ad4b","src/parser/expression.rs":"737b6670e849e8f04b06146ebddb59b8da9f082660f427a564e7eb52ec674326","src/parser/helper.rs":"26ba1d0d64455220cc054a2c7c23051854b1915bf7d1028f4db7ec8a8dc9114b","src/parser/macros.rs":"d5927d8e6757b50bbdeb122eb3a9bde7ab939a7ba41c9dfeff216d3839a5162b","src/parser/mod.rs":"27378f63f5e98890b0928d54e0a7edf4828179bf37f9ed9e5e8c2eaac111bd75","src/parser/pattern.rs":"4566796f34b5b47f76aa682355b664001e370ff313537e32649d815e4d5edd5b","src/parser/runtime.rs":"280aefcd960a4e1d402132c38c6764582e471853aa0ef143b099edbdd41f4b6c","src/parser/slice.rs":"aad45bc35ecc3ff68bcd5b2671fecb134a4535d6ee463092422dfc5cc8b25a1d","src/unicode.rs":"799299d1895e0123dc837648bdd5adb2d16f9b8312d1f070570a03d40c2d4a07"},"package":"c0abed97648395c902868fee9026de96483933faa54ea3b40d652f7dfe61ca78"}
\ No newline at end of file diff --git a/third_party/rust/fluent-syntax/Cargo.lock b/third_party/rust/fluent-syntax/Cargo.lock new file mode 100644 index 0000000000..46ad03deaf --- /dev/null +++ b/third_party/rust/fluent-syntax/Cargo.lock @@ -0,0 +1,672 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bitflags" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" + +[[package]] +name = "bstr" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a40b47ad93e1a5404e6c18dec46b628214fee441c70f4ab5d6942142cc268a3d" +dependencies = [ + "lazy_static", + "memchr", + "regex-automata", + "serde", +] + +[[package]] +name = "bumpalo" +version = "3.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099e596ef14349721d9016f6b80dd3419ea1bf289ab9b44df8e4dfd3a005d5d9" + +[[package]] +name = "byteorder" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae44d1a3d5a19df61dd0c8beb138458ac2a53a7ac09eba97d55592540004306b" + +[[package]] +name = "cast" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b9434b9a5aa1450faa3f9cb14ea0e8c53bb5d2b3c1bfd1ab4fc03e9f33fbfb0" +dependencies = [ + "rustc_version", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "bitflags", + "textwrap", + "unicode-width", +] + +[[package]] +name = "const_fn" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b9d6de7f49e22cf97ad17fc4036ece69300032f45f78f30b4a4482cdc3f4a6" + +[[package]] +name = "criterion" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab327ed7354547cc2ef43cbe20ef68b988e70b4b593cbd66a2a61733123a3d23" +dependencies = [ + "atty", + "cast", + "clap", + "criterion-plot", + "csv", + "itertools 0.10.0", + "lazy_static", + "num-traits", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_cbor", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e022feadec601fba1649cfa83586381a4ad31c6bf3a9ab7d408118b05dd9889d" +dependencies = [ + "cast", + "itertools 0.9.0", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94af6efb46fef72616855b036a624cf27ba656ffc9be1b9a3c931cfc7749a9a9" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1aaa739f95311c2c7887a76863f500026092fb1dce0161dab577e559ef3569d" +dependencies = [ + "cfg-if", + "const_fn", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d96d1e189ef58269ebe5b97953da3274d83a93af647c2ddd6f9dab28cedb8d" +dependencies = [ + "autocfg", + "cfg-if", + "lazy_static", +] + +[[package]] +name = "csv" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d58633299b24b515ac72a3f869f8b91306a3cec616a602843a383acd6f9e97" +dependencies = [ + "bstr", + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" +dependencies = [ + "memchr", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "fluent-syntax" +version = "0.11.0" +dependencies = [ + "criterion", + "glob", + "iai", + "serde", + "serde_json", + "thiserror", +] + +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + +[[package]] +name = "half" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62aca2aba2d62b4a7f5b33f3712cb1b0692779a56fb510499d5c0aa594daeaf3" + +[[package]] +name = "hermit-abi" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c" +dependencies = [ + "libc", +] + +[[package]] +name = "iai" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" + +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37d572918e350e82412fe766d24b15e6682fb2ed2bbe018280caa810397cb319" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736" + +[[package]] +name = "js-sys" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cfb73131c35423a367daf8cbd24100af0d077668c8c2943f0e7dd775fef0f65" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7282d924be3275cec7f6756ff4121987bc6481325397dde6ba3e7802b1a8b1c" + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525" + +[[package]] +name = "memoffset" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "157b4208e3059a8f9e78d559edc658e13df41410cb3ae03979c83130067fdd87" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "oorandom" +version = "11.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" + +[[package]] +name = "plotters" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca0ae5f169d0917a7c7f5a9c1a3d3d9598f18f529dd2b8373ed988efea307a" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b07fffcddc1cb3a1de753caa4e4df03b79922ba43cf882acc1bdd7e8df9f4590" + +[[package]] +name = "plotters-svg" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b38a02e23bd9604b842a812063aec4ef702b57989c37b655254bb61c471ad211" +dependencies = [ + "plotters-backend", +] + +[[package]] +name = "proc-macro2" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rayon" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b0d8e0819fadc20c74ea8373106ead0600e3a67ef1fe8da56e39b9ae7275674" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ab346ac5921dc62ffa9f89b7a773907511cdfa5490c572ae9be1be33e8afa4a" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9251239e129e16308e70d853559389de218ac275b515068abc96829d05b948a" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4" +dependencies = [ + "byteorder", +] + +[[package]] +name = "regex-syntax" +version = "0.6.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5eb417147ba9860a96cfe72a0b93bf88fee1744b5636ec99ab20c1aa9376581" + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + +[[package]] +name = "serde" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92d5161132722baa40d802cc70b15262b98258453e85e5d1d365c757c73869ae" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_cbor" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e18acfa2f90e8b735b2836ab8d538de304cbb6729a7360729ea5a895d15a622" +dependencies = [ + "half", + "serde", +] + +[[package]] +name = "serde_derive" +version = "1.0.123" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9391c295d64fc0abb2c556bad848f33cb8296276b1ad2677d1ae1ace4f258f31" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea1c6153794552ea7cf7cf63b1231a25de00ec90db326ba6264440fa08e31486" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "1.0.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "thiserror" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76cc616c6abf8c8928e2fdcc0dbfab37175edd8fb49a4641066ad1364fdab146" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9be73a2caec27583d0046ef3796c3794f868a5bc813db689eed00c7631275cd1" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tinytemplate" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2ada8616fad06a2d0c455adc530de4ef57605a8120cc65da9653e0e9623ca74" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "unicode-width" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3" + +[[package]] +name = "unicode-xid" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564" + +[[package]] +name = "walkdir" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55c0f7123de74f0dab9b7d00fd614e7b19349cd1e2f5252bbe9b1754b59433be" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7bc45447f0d4573f3d65720f636bbcc3dd6ce920ed704670118650bcd47764c7" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b8853882eef39593ad4174dd26fc9865a64e84026d223f63bb2c42affcbba2c" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4133b5e7f2a531fa413b3a1695e925038a05a71cf67e87dafa295cb645a01385" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd4945e4943ae02d15c13962b38a5b1e81eadd4b71214eee75af64a4d6a4fd64" + +[[package]] +name = "web-sys" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c40dc691fc48003eba817c38da7113c15698142da971298003cac3ef175680b3" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/fluent-syntax/Cargo.toml b/third_party/rust/fluent-syntax/Cargo.toml new file mode 100644 index 0000000000..cc6851d45f --- /dev/null +++ b/third_party/rust/fluent-syntax/Cargo.toml @@ -0,0 +1,78 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies +# +# If you believe there's an error in this file please file an +# issue against the rust-lang/cargo repository. If you're +# editing this file be aware that the upstream Cargo.toml +# will likely look very different (and much more reasonable) + +[package] +edition = "2018" +name = "fluent-syntax" +version = "0.11.0" +authors = ["Zibi Braniecki <gandalf@mozilla.com>", "Staś Małolepszy <stas@mozilla.com>"] +include = ["src/**/*", "benches/*.rs", "Cargo.toml", "README.md", "LICENSE-APACHE", "LICENSE-MIT"] +description = "Parser/Serializer tools for Fluent Syntax. \n" +homepage = "http://www.projectfluent.org" +readme = "README.md" +keywords = ["localization", "l10n", "i18n", "intl", "internationalization"] +categories = ["localization", "internationalization"] +license = "Apache-2.0/MIT" +repository = "https://github.com/projectfluent/fluent-rs" + +[[bin]] +name = "parser" +path = "src/bin/parser.rs" + +[[bin]] +name = "update_fixtures" +path = "src/bin/update_fixtures.rs" +required-features = ["json"] + +[[test]] +name = "parser_fixtures" +path = "tests/parser_fixtures.rs" +required-features = ["json"] + +[[bench]] +name = "parser" +harness = false + +[[bench]] +name = "parser_iai" +harness = false +[dependencies.serde] +version = "1.0" +features = ["derive"] +optional = true + +[dependencies.serde_json] +version = "1.0" +optional = true + +[dependencies.thiserror] +version = "1.0" +[dev-dependencies.criterion] +version = "0.3" + +[dev-dependencies.glob] +version = "0.3" + +[dev-dependencies.iai] +version = "0.1" + +[dev-dependencies.serde] +version = "1.0" +features = ["derive"] + +[dev-dependencies.serde_json] +version = "1.0" + +[features] +all-benchmarks = [] +default = [] +json = ["serde", "serde_json"] diff --git a/third_party/rust/fluent-syntax/LICENSE-APACHE b/third_party/rust/fluent-syntax/LICENSE-APACHE new file mode 100644 index 0000000000..35582f166b --- /dev/null +++ b/third_party/rust/fluent-syntax/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 Mozilla + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/third_party/rust/fluent-syntax/LICENSE-MIT b/third_party/rust/fluent-syntax/LICENSE-MIT new file mode 100644 index 0000000000..5655fa311c --- /dev/null +++ b/third_party/rust/fluent-syntax/LICENSE-MIT @@ -0,0 +1,19 @@ +Copyright 2017 Mozilla + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/third_party/rust/fluent-syntax/README.md b/third_party/rust/fluent-syntax/README.md new file mode 100644 index 0000000000..f7214aaed5 --- /dev/null +++ b/third_party/rust/fluent-syntax/README.md @@ -0,0 +1,63 @@ +# Fluent Syntax + +`fluent-syntax` is a parser/serializer API for the Fluent Syntax, part of the [Project Fluent](https://projectfluent.org/), a localization +framework designed to unleash the entire expressive power of natural language translations. + +[![crates.io](https://meritbadge.herokuapp.com/fluent-syntax)](https://crates.io/crates/fluent-syntax) +[![Build and test](https://github.com/projectfluent/fluent-rs/workflows/Build%20and%20test/badge.svg)](https://github.com/projectfluent/fluent-rs/actions?query=branch%3Amaster+workflow%3A%22Build+and+test%22) +[![Coverage Status](https://coveralls.io/repos/github/projectfluent/fluent-rs/badge.svg?branch=master)](https://coveralls.io/github/projectfluent/fluent-rs?branch=master) + +Status +------ + +The crate currently provides just a parser, which is tracking Fluent Syntax on its way to 1.0. + +Local Development +----------------- + + cargo build + cargo test + cargo bench + +When submitting a PR please use [`cargo fmt`][] (nightly). + +[`cargo fmt`]: https://github.com/rust-lang-nursery/rustfmt + + +Learn the FTL syntax +-------------------- + +FTL is a localization file format used for describing translation resources. +FTL stands for _Fluent Translation List_. + +FTL is designed to be simple to read, but at the same time allows to represent +complex concepts from natural languages like gender, plurals, conjugations, and +others. + + hello-user = Hello, { $username }! + +[Read the Fluent Syntax Guide][] in order to learn more about the syntax. If +you're a tool author you may be interested in the formal [EBNF grammar][]. + +[Read the Fluent Syntax Guide]: http://projectfluent.org/fluent/guide/ +[EBNF grammar]: https://github.com/projectfluent/fluent/tree/master/spec + + +Get Involved +------------ + +`fluent-rs` is open-source, licensed under the Apache License, Version 2.0. We +encourage everyone to take a look at our code and we'll listen to your +feedback. + + +Discuss +------- + +We'd love to hear your thoughts on Project Fluent! Whether you're a localizer +looking for a better way to express yourself in your language, or a developer +trying to make your app localizable and multilingual, or a hacker looking for +a project to contribute to, please do get in touch on discourse and the IRC channel. + + - Discourse: https://discourse.mozilla.org/c/fluent + - IRC channel: [irc://irc.mozilla.org/l20n](irc://irc.mozilla.org/l20n) diff --git a/third_party/rust/fluent-syntax/benches/contexts/README.md b/third_party/rust/fluent-syntax/benches/contexts/README.md new file mode 100644 index 0000000000..7d37ac0fe5 --- /dev/null +++ b/third_party/rust/fluent-syntax/benches/contexts/README.md @@ -0,0 +1,4 @@ +The following context is extracted from +the `browser.xhtml` localization context +from mozilla-central rev 51efc4b931f7 +from 2020-03-03. diff --git a/third_party/rust/fluent-syntax/benches/parser.rs b/third_party/rust/fluent-syntax/benches/parser.rs new file mode 100644 index 0000000000..f14725a512 --- /dev/null +++ b/third_party/rust/fluent-syntax/benches/parser.rs @@ -0,0 +1,141 @@ +use criterion::criterion_group; +use criterion::criterion_main; +use criterion::Criterion; +use std::collections::HashMap; +use std::fs; +use std::io; + +use fluent_syntax::parser::parse_runtime; + +fn read_file(path: &str) -> Result<String, io::Error> { + fs::read_to_string(path) +} + +#[cfg(feature = "all-benchmarks")] +fn get_resources(tests: &[&'static str]) -> HashMap<&'static str, String> { + let mut ftl_strings = HashMap::new(); + for test in tests { + let path = format!("./benches/{}", test); + ftl_strings.insert(*test, read_file(&path).expect("Couldn't load file")); + } + return ftl_strings; +} + +fn get_ctxs(tests: &[&'static str]) -> HashMap<&'static str, Vec<String>> { + let mut ftl_strings = HashMap::new(); + for test in tests { + let paths = fs::read_dir(format!("./benches/contexts/{}", test)).unwrap(); + let strings = paths + .into_iter() + .map(|p| { + let p = p.unwrap().path(); + let path = p.to_str().unwrap(); + read_file(path).unwrap() + }) + .collect::<Vec<_>>(); + ftl_strings.insert(*test, strings); + } + return ftl_strings; +} + +fn parse_bench(c: &mut Criterion) { + #[cfg(feature = "all-benchmarks")] + { + let tests = &["simple.ftl", "preferences.ftl", "menubar.ftl"]; + + let mut group = c.benchmark_group("parse_resource"); + + for (name, resource) in get_resources(tests) { + group.bench_with_input(name, &resource, |b, source| { + b.iter(|| parse_runtime(source.as_str()).expect("Parsing of the FTL failed.")) + }); + } + + group.finish(); + } + + let ctx_names = &["browser", "preferences"]; + + #[cfg(feature = "all-benchmarks")] + { + use fluent_syntax::parser::parse; + + let mut group = c.benchmark_group("parse_ctx"); + + for (name, ctx) in get_ctxs(ctx_names) { + group.bench_with_input(name, &ctx, |b, ctx| { + b.iter(|| { + for source in ctx { + parse(source.as_str()).expect("Parsing of the FTL failed."); + } + }) + }); + } + + group.finish(); + } + + { + let mut group = c.benchmark_group("parse_ctx_runtime"); + + for (name, ctx) in get_ctxs(ctx_names) { + group.bench_with_input(name, &ctx, |b, ctx| { + b.iter(|| { + for source in ctx { + parse_runtime(source.as_str()).expect("Parsing of the FTL failed."); + } + }) + }); + } + + group.finish(); + } + + #[cfg(feature = "all-benchmarks")] + { + use fluent_syntax::unicode::{unescape_unicode, unescape_unicode_to_string}; + + let strings = &[ + "foo", + "This is an example value", + "Hello \\u00e3\\u00e9 World", + "\\u004c\\u006f\\u0072\\u0065\\u006d \\u0069\\u0070\\u0073\\u0075\\u006d \\u0064\\u006f\\u006c\\u006f\\u0072 \\u0073\\u0069\\u0074 \\u0061\\u006d\\u0065\\u0074", + "Let me introduce \\\"The\\\" Fluent", + "And here's an example of \\\\ a character to be escaped", + "But this message is completely unescape free", + "And so is this one", + "Maybe this one is as well completely escape free", + "Welcome to Mozilla Firefox", + "\\u0054\\u0068\\u0065\\u0073\\u0065 \\u0073\\u0065\\u0074\\u0074\\u0069\\u006e\\u0067\\u0073 \\u0061\\u0072\\u0065 \\u0074\\u0061\\u0069\\u006c\\u006f\\u0072\\u0065\\u0064 \\u0074\\u006f \\u0079\\u006f\\u0075\\u0072 \\u0063\\u006f\\u006d\\u0070\\u0075\\u0074\\u0065\\u0072\\u2019\\u0073 \\u0068\\u0061\\u0072\\u0064\\u0077\\u0061\\u0072\\u0065 \\u0061\\u006e\\u0064 \\u006f\\u0070\\u0065\\u0072\\u0061\\u0074\\u0069\\u006e\\u0067 \\u0073\\u0079\\u0073\\u0074\\u0065\\u006d\\u002e", + "These settings are tailored to your computer’s hardware and operating system", + "Use recommended performance settings", + "\\u0041\\u0064\\u0064\\u0069\\u0074\\u0069\\u006f\\u006e\\u0061\\u006c \\u0063\\u006f\\u006e\\u0074\\u0065\\u006e\\u0074 \\u0070\\u0072\\u006f\\u0063\\u0065\\u0073\\u0073\\u0065\\u0073 \\u0063\\u0061\\u006e \\u0069\\u006d\\u0070\\u0072\\u006f\\u0076\\u0065 \\u0070\\u0065\\u0072\\u0066\\u006f\\u0072\\u006d\\u0061\\u006e\\u0063\\u0065 \\u0077\\u0068\\u0065\\u006e \\u0075\\u0073\\u0069\\u006e\\u0067 \\u006d\\u0075\\u006c\\u0074\\u0069\\u0070\\u006c\\u0065 \\u0074\\u0061\\u0062\\u0073\\u002c \\u0062\\u0075\\u0074 \\u0077\\u0069\\u006c\\u006c \\u0061\\u006c\\u0073\\u006f \\u0075\\u0073\\u0065 \\u006d\\u006f\\u0072\\u0065 \\u006d\\u0065\\u006d\\u006f\\u0072\\u0079\\u002e", + "Additional content processes can improve performance when using multiple tabs, but will also use more memory.", + ]; + + let mut group = c.benchmark_group("unicode"); + + group.bench_function("writer", |b| { + b.iter(|| { + let mut result = String::new(); + for s in strings { + unescape_unicode(&mut result, s).unwrap(); + result.clear(); + } + }) + }); + + group.bench_function("to_string", |b| { + b.iter(|| { + for s in strings { + let _ = unescape_unicode_to_string(s); + } + }) + }); + + group.finish(); + } +} + +criterion_group!(benches, parse_bench,); +criterion_main!(benches); diff --git a/third_party/rust/fluent-syntax/benches/parser_iai.rs b/third_party/rust/fluent-syntax/benches/parser_iai.rs new file mode 100644 index 0000000000..f2eed5e5b1 --- /dev/null +++ b/third_party/rust/fluent-syntax/benches/parser_iai.rs @@ -0,0 +1,15 @@ +use fluent_syntax::parser::parse_runtime; + +fn iai_parse_ctx_runtime() { + let files = &[ + include_str!("contexts/browser/appmenu.ftl"), + include_str!("contexts/browser/browser.ftl"), + include_str!("contexts/browser/menubar.ftl"), + include_str!("contexts/preferences/preferences.ftl"), + ]; + for source in files { + parse_runtime(*source).expect("Parsing of the FTL failed."); + } +} + +iai::main!(iai_parse_ctx_runtime); diff --git a/third_party/rust/fluent-syntax/src/ast/helper.rs b/third_party/rust/fluent-syntax/src/ast/helper.rs new file mode 100644 index 0000000000..923437d23b --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/helper.rs @@ -0,0 +1,25 @@ +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +use super::Comment; +// This is a helper struct used to properly deserialize referential +// JSON comments which are single continous String, into a vec of +// content slices. +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum CommentDef<S> { + Single { content: S }, + Multi { content: Vec<S> }, +} + +impl<'s, S> From<CommentDef<S>> for Comment<S> { + fn from(input: CommentDef<S>) -> Self { + match input { + CommentDef::Single { content } => Self { + content: vec![content], + }, + CommentDef::Multi { content } => Self { content }, + } + } +} diff --git a/third_party/rust/fluent-syntax/src/ast/mod.rs b/third_party/rust/fluent-syntax/src/ast/mod.rs new file mode 100644 index 0000000000..5b79bb3e02 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/ast/mod.rs @@ -0,0 +1,1446 @@ +//! Abstract Syntax Tree representation of the Fluent Translation List. +//! +//! The AST of Fluent contains all nodes structures to represent a complete +//! representation of the FTL resource. +//! +//! The tree preserves all semantic information and allow for round-trip +//! of a canonically written FTL resource. +//! +//! The root node is called [`Resource`] and contains a list of [`Entry`] nodes +//! representing all possible entries in the Fluent Translation List. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! +//! ## This is a message comment +//! hello-world = Hello World! +//! .tooltip = Tooltip for you, { $userName }. +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![ +//! ast::Attribute { +//! id: ast::Identifier { +//! name: "tooltip" +//! }, +//! value: ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Tooltip for you, " +//! }, +//! ast::PatternElement::Placeable { +//! expression: ast::Expression::Inline( +//! ast::InlineExpression::VariableReference { +//! id: ast::Identifier { +//! name: "userName" +//! } +//! } +//! ) +//! }, +//! ast::PatternElement::TextElement { +//! value: "." +//! }, +//! ] +//! } +//! } +//! ], +//! comment: Some( +//! ast::Comment { +//! content: vec!["This is a message comment"] +//! } +//! ) +//! } +//! ), +//! ); +//! ``` +//! +//! ## Errors +//! +//! Fluent AST preserves blocks containing invaid syntax as [`Entry::Junk`]. +//! +//! ## White space +//! +//! At the moment, AST does not preserve white space. In result only a +//! canonical form of the AST is suitable for a round-trip. +mod helper; + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// Root node of a Fluent Translation List. +/// +/// A [`Resource`] contains a body with a list of [`Entry`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = ""; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Resource<S> { + pub body: Vec<Entry<S>>, +} + +/// A top-level node representing an entry of a [`Resource`]. +/// +/// Every [`Entry`] is a standalone element and the parser is capable +/// of recovering from errors by identifying a beginning of a next entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = Value +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +/// +/// # Junk Entry +/// +/// If FTL source contains invalid FTL content, it will be preserved +/// in form of [`Entry::Junk`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// g@rb@ge En!ry +/// +/// "#; +/// +/// let (resource, _) = parser::parse(ftl) +/// .expect_err("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Junk { +/// content: "g@rb@ge En!ry\n\n" +/// } +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum Entry<S> { + Message(Message<S>), + Term(Term<S>), + Comment(Comment<S>), + GroupComment(Comment<S>), + ResourceComment(Comment<S>), + Junk { content: S }, +} + +/// Message node represents the most common [`Entry`] in an FTL [`Resource`]. +/// +/// A message is a localization unit with a [`Identifier`] unique within a given +/// [`Resource`], and a value or attributes with associated [`Pattern`]. +/// +/// A message can contain a simple text value, or a compound combination of value +/// and attributes which together can be used to localize a complex User Interface +/// element. +/// +/// Finally, each [`Message`] may have an associated [`Comment`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Message<S> { + pub id: Identifier<S>, + pub value: Option<Pattern<S>>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +/// A Fluent [`Term`]. +/// +/// Terms are semantically similar to [`Message`] nodes, but +/// they represent a separate concept in Fluent system. +/// +/// Every term has to have a value, and the parser will +/// report errors when term references are used in wrong positions. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// -brand-name = Nightly +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Term(ast::Term { +/// id: ast::Identifier { +/// name: "brand-name" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Nightly" +/// } +/// ] +/// }, +/// attributes: vec![], +/// comment: None, +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Term<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, + pub attributes: Vec<Attribute<S>>, + pub comment: Option<Comment<S>>, +} + +/// Pattern contains a value of a [`Message`], [`Term`] or an [`Attribute`]. +/// +/// Each pattern is a list of [`PatternElement`] nodes representing +/// either a simple textual value, or a combination of text literals +/// and placeholder [`Expression`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// welcome = Welcome, { $userName }. +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "welcome" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Welcome, " +/// }, +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "userName" +/// } +/// } +/// ) +/// }, +/// ast::PatternElement::TextElement { +/// value: "." +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Pattern<S> { + pub elements: Vec<PatternElement<S>>, +} + +/// PatternElement is an element of a [`Pattern`]. +/// +/// Each [`PatternElement`] node represents +/// either a simple textual value, or a combination of text literals +/// and placeholder [`Expression`] nodes. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Hello, World! +/// +/// welcome = Welcome, { $userName }. +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello, World!" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "welcome" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Welcome, " +/// }, +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "userName" +/// } +/// } +/// ) +/// }, +/// ast::PatternElement::TextElement { +/// value: "." +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum PatternElement<S> { + TextElement { value: S }, + Placeable { expression: Expression<S> }, +} + +/// Attribute represents a part of a [`Message`] or [`Term`]. +/// +/// Attributes are used to express a compound list of keyed +/// [`Pattern`] elements on an entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = +/// .title = This is a title +/// .accesskey = T +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: None, +/// attributes: vec![ +/// ast::Attribute { +/// id: ast::Identifier { +/// name: "title" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "This is a title" +/// }, +/// ] +/// } +/// }, +/// ast::Attribute { +/// id: ast::Identifier { +/// name: "accesskey" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "T" +/// }, +/// ] +/// } +/// } +/// ], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Attribute<S> { + pub id: Identifier<S>, + pub value: Pattern<S>, +} + +/// Identifier is part of nodes such as [`Message`], [`Term`] and [`Attribute`]. +/// +/// It is used to associate a unique key with an [`Entry`] or an [`Attribute`] +/// and in [`Expression`] nodes to refer to another entry. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = Value +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value" +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Identifier<S> { + pub name: S, +} + +/// Variant is a single branch of a value in a [`Select`](Expression::Select) expression. +/// +/// It's a pair of [`VariantKey`] and [`Pattern`]. If the selector match the +/// key, then the value of the variant is returned as the value of the expression. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = { $var -> +/// [key1] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "key1" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct Variant<S> { + pub key: VariantKey<S>, + pub value: Pattern<S>, + pub default: bool, +} + +/// A key of a [`Variant`]. +/// +/// Variant key can be either an identifier or a number. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// hello-world = { $var -> +/// [0] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::NumberLiteral { +/// value: "0" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum VariantKey<S> { + Identifier { name: S }, + NumberLiteral { value: S }, +} + +/// Fluent [`Comment`]. +/// +/// In Fluent, comments may be standalone, or associated with +/// an entry such as [`Term`] or [`Message`]. +/// +/// When used as a standalone [`Entry`], comments may appear in one of +/// three levels: +/// +/// * Standalone comment +/// * Group comment associated with a group of messages +/// * Resource comment associated with the whole resource +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// ## A standalone level comment +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Comment(ast::Comment { +/// content: vec![ +/// "A standalone level comment" +/// ] +/// }) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(from = "helper::CommentDef<S>"))] +pub struct Comment<S> { + pub content: Vec<S>, +} + +/// List of arguments for a [`FunctionReference`](InlineExpression::FunctionReference) or a +/// [`TermReference`](InlineExpression::TermReference). +/// +/// Function and Term reference may contain a list of positional and +/// named arguments passed to them. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { FUNC($var1, "literal", style: "long") } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::FunctionReference { +/// id: ast::Identifier { +/// name: "FUNC" +/// }, +/// arguments: ast::CallArguments { +/// positional: vec![ +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "var1" +/// } +/// }, +/// ast::InlineExpression::StringLiteral { +/// value: "literal", +/// } +/// ], +/// named: vec![ +/// ast::NamedArgument { +/// name: ast::Identifier { +/// name: "style" +/// }, +/// value: ast::InlineExpression::StringLiteral +/// { +/// value: "long" +/// } +/// } +/// ], +/// } +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct CallArguments<S> { + pub positional: Vec<InlineExpression<S>>, + pub named: Vec<NamedArgument<S>>, +} + +/// A key-value pair used in [`CallArguments`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { FUNC(style: "long") } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::FunctionReference { +/// id: ast::Identifier { +/// name: "FUNC" +/// }, +/// arguments: ast::CallArguments { +/// positional: vec![], +/// named: vec![ +/// ast::NamedArgument { +/// name: ast::Identifier { +/// name: "style" +/// }, +/// value: ast::InlineExpression::StringLiteral +/// { +/// value: "long" +/// } +/// } +/// ], +/// } +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub struct NamedArgument<S> { + pub name: Identifier<S>, + pub value: InlineExpression<S>, +} + +/// A subset of expressions which can be used as [`Placeable`](PatternElement::Placeable), +/// [`selector`](Expression::Select), or in [`CallArguments`]. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { $emailCount } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Inline( +/// ast::InlineExpression::VariableReference { +/// id: ast::Identifier { +/// name: "emailCount" +/// }, +/// } +/// ) +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ) +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(tag = "type"))] +pub enum InlineExpression<S> { + /// Single line string literal enclosed in `"`. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { "this is a literal" } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::StringLiteral { + /// value: "this is a literal", + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + StringLiteral { value: S }, + /// A number literal. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { -0.5 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::NumberLiteral { + /// value: "-0.5", + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + NumberLiteral { value: S }, + /// A function reference. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { FUNC() } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::FunctionReference { + /// id: ast::Identifier { + /// name: "FUNC" + /// }, + /// arguments: ast::CallArguments::default(), + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + FunctionReference { + id: Identifier<S>, + arguments: CallArguments<S>, + }, + /// A reference to another message. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { key2 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::MessageReference { + /// id: ast::Identifier { + /// name: "key2" + /// }, + /// attribute: None, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + MessageReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + }, + /// A reference to a term. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { -brand-name } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::TermReference { + /// id: ast::Identifier { + /// name: "brand-name" + /// }, + /// attribute: None, + /// arguments: None, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + TermReference { + id: Identifier<S>, + attribute: Option<Identifier<S>>, + arguments: Option<CallArguments<S>>, + }, + /// A reference to a variable. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { $var1 } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::VariableReference { + /// id: ast::Identifier { + /// name: "var1" + /// }, + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + VariableReference { id: Identifier<S> }, + /// A placeable which may contain another expression. + /// + /// # Example + /// + /// ``` + /// use fluent_syntax::parser; + /// use fluent_syntax::ast; + /// + /// let ftl = r#" + /// + /// key = { { "placeable" } } + /// + /// "#; + /// + /// let resource = parser::parse(ftl) + /// .expect("Failed to parse an FTL resource."); + /// + /// assert_eq!( + /// resource, + /// ast::Resource { + /// body: vec![ + /// ast::Entry::Message( + /// ast::Message { + /// id: ast::Identifier { + /// name: "key" + /// }, + /// value: Some(ast::Pattern { + /// elements: vec![ + /// ast::PatternElement::Placeable { + /// expression: ast::Expression::Inline( + /// ast::InlineExpression::Placeable { + /// expression: Box::new( + /// ast::Expression::Inline( + /// ast::InlineExpression::StringLiteral { + /// value: "placeable" + /// } + /// ) + /// ) + /// } + /// ) + /// }, + /// ] + /// }), + /// attributes: vec![], + /// comment: None, + /// } + /// ) + /// ] + /// } + /// ); + /// ``` + Placeable { expression: Box<Expression<S>> }, +} + +/// An expression that is either a select expression or an inline expression. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// +/// key = { $var -> +/// [key1] Value 1 +/// *[other] Value 2 +/// } +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource, +/// ast::Resource { +/// body: vec![ +/// ast::Entry::Message(ast::Message { +/// id: ast::Identifier { +/// name: "key" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::Placeable { +/// expression: ast::Expression::Select { +/// selector: ast::InlineExpression::VariableReference { +/// id: ast::Identifier { name: "var" }, +/// }, +/// variants: vec![ +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "key1" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1", +/// } +/// ] +/// }, +/// default: false, +/// }, +/// ast::Variant { +/// key: ast::VariantKey::Identifier { +/// name: "other" +/// }, +/// value: ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2", +/// } +/// ] +/// }, +/// default: true, +/// }, +/// ] +/// } +/// } +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// }), +/// ] +/// } +/// ); +/// ``` +#[derive(Debug, PartialEq, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "serde", serde(untagged))] +pub enum Expression<S> { + Select { + selector: InlineExpression<S>, + variants: Vec<Variant<S>>, + }, + Inline(InlineExpression<S>), +} diff --git a/third_party/rust/fluent-syntax/src/bin/parser.rs b/third_party/rust/fluent-syntax/src/bin/parser.rs new file mode 100644 index 0000000000..46275a7290 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/parser.rs @@ -0,0 +1,42 @@ +use fluent_syntax::parser::parse; +use std::env; +use std::fs::File; +use std::io; +use std::io::Read; + +fn read_file(path: &str) -> Result<String, io::Error> { + let mut f = File::open(path)?; + let mut s = String::new(); + f.read_to_string(&mut s)?; + Ok(s) +} + +fn main() { + let args: Vec<String> = env::args().collect(); + let source = read_file(args.get(1).expect("Pass an argument")).expect("Failed to fetch file"); + + let (ast, errors) = match parse(source.as_str()) { + Ok(ast) => (ast, None), + Err((ast, err)) => (ast, Some(err)), + }; + + #[cfg(feature = "json")] + { + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + println!("{}", target_json); + } + #[cfg(not(feature = "json"))] + { + use std::fmt::Write; + let mut result = String::new(); + write!(result, "{:#?}", ast).unwrap(); + println!("{}", result); + } + + if let Some(errors) = errors { + println!("\n======== Errors ========== \n"); + for err in errors { + println!("Err: {:#?}", err); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs new file mode 100644 index 0000000000..01e7a02af0 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/bin/update_fixtures.rs @@ -0,0 +1,44 @@ +use std::fs; +use std::io; + +use fluent_syntax::parser::parse; + +fn read_file(path: &str) -> Result<String, io::Error> { + fs::read_to_string(path) +} + +fn write_file(path: &str, source: &str) -> Result<(), io::Error> { + fs::write(path, source) +} + +fn main() { + let samples = &["menubar", "preferences", "simple"]; + let contexts = &["browser", "preferences"]; + + for sample in samples { + let path = format!("./benches/{}.ftl", sample); + let source = read_file(&path).unwrap(); + let ast = parse(source).unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!("./tests/fixtures/benches/{}.json", sample); + write_file(&new_path, &target_json).unwrap(); + } + + for test in contexts { + let paths = fs::read_dir(format!("./benches/contexts/{}", test)).unwrap(); + for path in paths.into_iter() { + let p = path.unwrap().path(); + let file_name = p.file_name().unwrap().to_str().unwrap(); + let path = p.to_str().unwrap(); + let source = read_file(path).unwrap(); + let ast = parse(source).unwrap(); + let target_json = serde_json::to_string_pretty(&ast).unwrap(); + let new_path = format!( + "./tests/fixtures/benches/contexts/{}/{}", + test, + file_name.replace(".ftl", ".json") + ); + write_file(&new_path, &target_json).unwrap(); + } + } +} diff --git a/third_party/rust/fluent-syntax/src/lib.rs b/third_party/rust/fluent-syntax/src/lib.rs new file mode 100644 index 0000000000..5b9cbbfe7f --- /dev/null +++ b/third_party/rust/fluent-syntax/src/lib.rs @@ -0,0 +1,51 @@ +//! Fluent is a modern localization system designed to improve how software is translated. +//! +//! `fluent-syntax` is the lowest level component of the [Fluent Localization +//! System](https://www.projectfluent.org). +//! +//! It exposes components necessary for parsing and tooling operations on Fluent Translation Lists ("FTL"). +//! +//! The crate provides a [`parser`] module which allows for parsing of an +//! input string to an Abstract Syntax Tree defined in the [`ast`] module. +//! +//! The [`unicode`] module exposes a set of helper functions used to decode +//! escaped unicode literals according to Fluent specification. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! +//! hello-world = Hello World! +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![], +//! comment: None, +//! } +//! ), +//! ); +//! ``` +pub mod ast; +pub mod parser; +pub mod unicode; diff --git a/third_party/rust/fluent-syntax/src/parser/comment.rs b/third_party/rust/fluent-syntax/src/parser/comment.rs new file mode 100644 index 0000000000..a63483c1d3 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/comment.rs @@ -0,0 +1,89 @@ +use super::{core::Parser, core::Result, Slice}; +use crate::ast; + +#[derive(Debug, PartialEq, Clone, Copy)] +pub(super) enum Level { + None = 0, + Regular = 1, + Group = 2, + Resource = 3, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_comment(&mut self) -> Result<(ast::Comment<S>, Level)> { + let mut level = Level::None; + let mut content = vec![]; + + while self.ptr < self.length { + let line_level = self.get_comment_level(); + if line_level == Level::None { + self.ptr -= 1; + break; + } else if level != Level::None && line_level != level { + self.ptr -= line_level as usize; + break; + } + + level = line_level; + + if self.ptr == self.length { + break; + } else if self.is_current_byte(b'\n') { + content.push(self.get_comment_line()); + } else { + if let Err(e) = self.expect_byte(b' ') { + if content.is_empty() { + return Err(e); + } else { + self.ptr -= line_level as usize; + break; + } + } + content.push(self.get_comment_line()); + } + self.skip_eol(); + } + + Ok((ast::Comment { content }, level)) + } + + pub(super) fn skip_comment(&mut self) { + loop { + while self.ptr < self.length && !self.is_current_byte(b'\n') { + self.ptr += 1; + } + self.ptr += 1; + if self.is_current_byte(b'#') { + self.ptr += 1; + } else { + break; + } + } + } + + fn get_comment_level(&mut self) -> Level { + if self.take_byte_if(b'#') { + if self.take_byte_if(b'#') { + if self.take_byte_if(b'#') { + return Level::Resource; + } + return Level::Group; + } + return Level::Regular; + } + Level::None + } + + fn get_comment_line(&mut self) -> S { + let start_pos = self.ptr; + + while !self.is_eol() { + self.ptr += 1; + } + + self.source.slice(start_pos..self.ptr) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/core.rs b/third_party/rust/fluent-syntax/src/parser/core.rs new file mode 100644 index 0000000000..68ad8dc0b6 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/core.rs @@ -0,0 +1,307 @@ +use super::{ + comment, + errors::{ErrorKind, ParserError}, + slice::Slice, +}; +use crate::ast; + +pub type Result<T> = std::result::Result<T, ParserError>; + +pub struct Parser<S> { + pub(super) source: S, + pub(super) ptr: usize, + pub(super) length: usize, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub fn new(source: S) -> Self { + let length = source.as_ref().as_bytes().len(); + Self { + source, + ptr: 0, + length, + } + } + + pub fn parse( + mut self, + ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> { + let mut errors = vec![]; + + let mut body = vec![]; + + self.skip_blank_block(); + let mut last_comment = None; + let mut last_blank_count = 0; + + while self.ptr < self.length { + let entry_start = self.ptr; + let mut entry = self.get_entry(entry_start); + + if let Some(comment) = last_comment.take() { + match entry { + Ok(ast::Entry::Message(ref mut msg)) if last_blank_count < 2 => { + msg.comment = Some(comment); + } + Ok(ast::Entry::Term(ref mut term)) if last_blank_count < 2 => { + term.comment = Some(comment); + } + _ => { + body.push(ast::Entry::Comment(comment)); + } + } + } + + match entry { + Ok(ast::Entry::Comment(comment)) => { + last_comment = Some(comment); + } + Ok(entry) => { + body.push(entry); + } + Err(mut err) => { + self.skip_to_next_entry_start(); + err.slice = Some(entry_start..self.ptr); + errors.push(err); + let content = self.source.slice(entry_start..self.ptr); + body.push(ast::Entry::Junk { content }); + } + } + last_blank_count = self.skip_blank_block(); + } + + if let Some(last_comment) = last_comment.take() { + body.push(ast::Entry::Comment(last_comment)); + } + if errors.is_empty() { + Ok(ast::Resource { body }) + } else { + Err((ast::Resource { body }, errors)) + } + } + + fn get_entry(&mut self, entry_start: usize) -> Result<ast::Entry<S>> { + let entry = match get_current_byte!(self) { + Some(b'#') => { + let (comment, level) = self.get_comment()?; + match level { + comment::Level::Regular => ast::Entry::Comment(comment), + comment::Level::Group => ast::Entry::GroupComment(comment), + comment::Level::Resource => ast::Entry::ResourceComment(comment), + comment::Level::None => unreachable!(), + } + } + Some(b'-') => ast::Entry::Term(self.get_term(entry_start)?), + _ => ast::Entry::Message(self.get_message(entry_start)?), + }; + Ok(entry) + } + + pub fn get_message(&mut self, entry_start: usize) -> Result<ast::Message<S>> { + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if pattern.is_none() && attributes.is_empty() { + let entry_id = id.name.as_ref().to_owned(); + return error!( + ErrorKind::ExpectedMessageField { entry_id }, + entry_start, self.ptr + ); + } + + Ok(ast::Message { + id, + value: pattern, + attributes, + comment: None, + }) + } + + pub fn get_term(&mut self, entry_start: usize) -> Result<ast::Term<S>> { + self.expect_byte(b'-')?; + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + self.skip_blank_inline(); + + let value = self.get_pattern()?; + + self.skip_blank_block(); + + let attributes = self.get_attributes(); + + if let Some(value) = value { + Ok(ast::Term { + id, + value, + attributes, + comment: None, + }) + } else { + error!( + ErrorKind::ExpectedTermField { + entry_id: id.name.as_ref().to_owned() + }, + entry_start, self.ptr + ) + } + } + + fn get_attributes(&mut self) -> Vec<ast::Attribute<S>> { + let mut attributes = vec![]; + + loop { + let line_start = self.ptr; + self.skip_blank_inline(); + if !self.take_byte_if(b'.') { + self.ptr = line_start; + break; + } + + if let Ok(attr) = self.get_attribute() { + attributes.push(attr); + } else { + self.ptr = line_start; + break; + } + } + attributes + } + + fn get_attribute(&mut self) -> Result<ast::Attribute<S>> { + let id = self.get_identifier()?; + self.skip_blank_inline(); + self.expect_byte(b'=')?; + let pattern = self.get_pattern()?; + + match pattern { + Some(pattern) => Ok(ast::Attribute { id, value: pattern }), + None => error!(ErrorKind::MissingValue, self.ptr), + } + } + + pub(super) fn get_identifier_unchecked(&mut self) -> ast::Identifier<S> { + let mut ptr = self.ptr; + + while matches!(get_byte!(self, ptr), Some(b) if b.is_ascii_alphanumeric() || *b == b'-' || *b == b'_') + { + ptr += 1; + } + + let name = self.source.slice(self.ptr - 1..ptr); + self.ptr = ptr; + + ast::Identifier { name } + } + + pub(super) fn get_identifier(&mut self) -> Result<ast::Identifier<S>> { + if !self.is_identifier_start() { + return error!( + ErrorKind::ExpectedCharRange { + range: "a-zA-Z".to_string() + }, + self.ptr + ); + } + self.ptr += 1; + Ok(self.get_identifier_unchecked()) + } + + pub(super) fn get_attribute_accessor(&mut self) -> Result<Option<ast::Identifier<S>>> { + if self.take_byte_if(b'.') { + let ident = self.get_identifier()?; + Ok(Some(ident)) + } else { + Ok(None) + } + } + + fn get_variant_key(&mut self) -> Result<ast::VariantKey<S>> { + self.skip_blank(); + + let key = if self.is_number_start() { + ast::VariantKey::NumberLiteral { + value: self.get_number_literal()?, + } + } else { + ast::VariantKey::Identifier { + name: self.get_identifier()?.name, + } + }; + + self.skip_blank(); + + self.expect_byte(b']')?; + + Ok(key) + } + + pub(super) fn get_variants(&mut self) -> Result<Vec<ast::Variant<S>>> { + let mut variants = Vec::with_capacity(2); + let mut has_default = false; + + loop { + let default = self.take_byte_if(b'*'); + if default { + if has_default { + return error!(ErrorKind::MultipleDefaultVariants, self.ptr); + } else { + has_default = true; + } + } + + if !self.take_byte_if(b'[') { + break; + } + + let key = self.get_variant_key()?; + + let value = self.get_pattern()?; + + if let Some(value) = value { + variants.push(ast::Variant { + key, + value, + default, + }); + self.skip_blank(); + } else { + return error!(ErrorKind::MissingValue, self.ptr); + } + } + + if has_default { + Ok(variants) + } else { + error!(ErrorKind::MissingDefaultVariant, self.ptr) + } + } + + pub(super) fn get_placeable(&mut self) -> Result<ast::Expression<S>> { + self.skip_blank(); + let exp = self.get_expression()?; + self.skip_blank_inline(); + self.expect_byte(b'}')?; + + let invalid_expression_found = match &exp { + ast::Expression::Inline(ast::InlineExpression::TermReference { + ref attribute, .. + }) => attribute.is_some(), + _ => false, + }; + if invalid_expression_found { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + + Ok(exp) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/errors.rs b/third_party/rust/fluent-syntax/src/parser/errors.rs new file mode 100644 index 0000000000..2c29f97bbf --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/errors.rs @@ -0,0 +1,169 @@ +use std::ops::Range; +use thiserror::Error; + +/// Error containing information about an error encountered by the Fluent Parser. +/// +/// Errors in Fluent Parser are non-fatal, and the syntax has been +/// designed to allow for strong recovery. +/// +/// In result [`ParserError`] is designed to point at the slice of +/// the input that is most likely to be a complete fragment from after +/// the end of a valid entry, to the start of the next valid entry, with +/// the invalid syntax in the middle. +/// +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// key1 = Value 1 +/// +/// g@Rb@ge = #2y ds +/// +/// key2 = Value 2 +/// +/// "#; +/// +/// let (resource, errors) = parser::parse_runtime(ftl) +/// .expect_err("Resource should contain errors."); +/// +/// assert_eq!( +/// errors, +/// vec![ +/// parser::ParserError { +/// pos: 18..19, +/// slice: Some(17..35), +/// kind: parser::ErrorKind::ExpectedToken('=') +/// } +/// ] +/// ); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key1" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Junk { +/// content: "g@Rb@ge = #2y ds\n\n" +/// } +/// ); +/// +/// assert_eq!( +/// resource.body[2], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key2" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +/// +/// The information contained in the `ParserError` should allow the tooling +/// to display rich contextual annotations of the error slice, using +/// crates such as `annotate-snippers`. +#[derive(Error, Debug, PartialEq, Clone)] +#[error("{}", self.kind)] +pub struct ParserError { + /// Precise location of where the parser encountered the error. + pub pos: Range<usize>, + /// Slice of the input from the end of the last valid entry to the beginning + /// of the next valid entry with the invalid syntax in the middle. + pub slice: Option<Range<usize>>, + /// The type of the error that the parser encountered. + pub kind: ErrorKind, +} + +macro_rules! error { + ($kind:expr, $start:expr) => {{ + Err(ParserError { + pos: $start..$start + 1, + slice: None, + kind: $kind, + }) + }}; + ($kind:expr, $start:expr, $end:expr) => {{ + Err(ParserError { + pos: $start..$end, + slice: None, + kind: $kind, + }) + }}; +} + +/// Kind of an error associated with the [`ParserError`]. +#[derive(Error, Debug, PartialEq, Clone)] +pub enum ErrorKind { + #[error("Expected a token starting with \"{0}\"")] + ExpectedToken(char), + #[error("Expected one of \"{range}\"")] + ExpectedCharRange { range: String }, + #[error("Expected a message field for \"{entry_id}\"")] + ExpectedMessageField { entry_id: String }, + #[error("Expected a term field for \"{entry_id}\"")] + ExpectedTermField { entry_id: String }, + #[error("Callee is not allowed here")] + ForbiddenCallee, + #[error("The select expression must have a default variant")] + MissingDefaultVariant, + #[error("Expected a value")] + MissingValue, + #[error("A select expression can only have one default variant")] + MultipleDefaultVariants, + #[error("Message references can't be used as a selector")] + MessageReferenceAsSelector, + #[error("Term references can't be used as a selector")] + TermReferenceAsSelector, + #[error("Message attributes can't be used as a selector")] + MessageAttributeAsSelector, + #[error("Term attributes can't be used as a selector")] + TermAttributeAsPlaceable, + #[error("Unterminated string literal")] + UnterminatedStringLiteral, + #[error("Positional arguments must come before named arguments")] + PositionalArgumentFollowsNamed, + #[error("The \"{0}\" argument appears twice")] + DuplicatedNamedArgument(String), + #[error("Unknown escape sequence")] + UnknownEscapeSequence(String), + #[error("Invalid unicode escape sequence, \"{0}\"")] + InvalidUnicodeEscapeSequence(String), + #[error("Unbalanced closing brace")] + UnbalancedClosingBrace, + #[error("Expected an inline expression")] + ExpectedInlineExpression, + #[error("Expected a simple expression as selector")] + ExpectedSimpleExpressionAsSelector, + #[error("Expected a string or number literal")] + ExpectedLiteral, +} diff --git a/third_party/rust/fluent-syntax/src/parser/expression.rs b/third_party/rust/fluent-syntax/src/parser/expression.rs new file mode 100644 index 0000000000..c5ccb32bf4 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/expression.rs @@ -0,0 +1,224 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; +use crate::ast; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_expression(&mut self) -> Result<ast::Expression<S>> { + let exp = self.get_inline_expression(false)?; + + self.skip_blank(); + + if !self.is_current_byte(b'-') || !self.is_byte_at(b'>', self.ptr + 1) { + if let ast::InlineExpression::TermReference { ref attribute, .. } = exp { + if attribute.is_some() { + return error!(ErrorKind::TermAttributeAsPlaceable, self.ptr); + } + } + return Ok(ast::Expression::Inline(exp)); + } + + match exp { + ast::InlineExpression::MessageReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::MessageReferenceAsSelector, self.ptr); + } else { + return error!(ErrorKind::MessageAttributeAsSelector, self.ptr); + } + } + ast::InlineExpression::TermReference { ref attribute, .. } => { + if attribute.is_none() { + return error!(ErrorKind::TermReferenceAsSelector, self.ptr); + } + } + ast::InlineExpression::StringLiteral { .. } + | ast::InlineExpression::NumberLiteral { .. } + | ast::InlineExpression::VariableReference { .. } + | ast::InlineExpression::FunctionReference { .. } => {} + _ => { + return error!(ErrorKind::ExpectedSimpleExpressionAsSelector, self.ptr); + } + }; + + self.ptr += 2; // -> + + self.skip_blank_inline(); + if !self.skip_eol() { + return error!( + ErrorKind::ExpectedCharRange { + range: "\n | \r\n".to_string() + }, + self.ptr + ); + } + self.skip_blank(); + + let variants = self.get_variants()?; + + Ok(ast::Expression::Select { + selector: exp, + variants, + }) + } + + pub(super) fn get_inline_expression( + &mut self, + only_literal: bool, + ) -> Result<ast::InlineExpression<S>> { + match get_current_byte!(self) { + Some(b'"') => { + self.ptr += 1; // " + let start = self.ptr; + while let Some(b) = get_current_byte!(self) { + match b { + b'\\' => match get_byte!(self, self.ptr + 1) { + Some(b'\\') | Some(b'{') | Some(b'"') => self.ptr += 2, + Some(b'u') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(4)?; + } + Some(b'U') => { + self.ptr += 2; + self.skip_unicode_escape_sequence(6)?; + } + b => { + let seq = b.unwrap_or(&b' ').to_string(); + return error!(ErrorKind::UnknownEscapeSequence(seq), self.ptr); + } + }, + b'"' => { + break; + } + b'\n' => { + return error!(ErrorKind::UnterminatedStringLiteral, self.ptr); + } + _ => self.ptr += 1, + } + } + + self.expect_byte(b'"')?; + let slice = self.source.slice(start..self.ptr - 1); + Ok(ast::InlineExpression::StringLiteral { value: slice }) + } + Some(b) if b.is_ascii_digit() => { + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + Some(b'-') if !only_literal => { + self.ptr += 1; // - + if self.is_identifier_start() { + self.ptr += 1; + let id = self.get_identifier_unchecked(); + let attribute = self.get_attribute_accessor()?; + let arguments = self.get_call_arguments()?; + Ok(ast::InlineExpression::TermReference { + id, + attribute, + arguments, + }) + } else { + self.ptr -= 1; + let num = self.get_number_literal()?; + Ok(ast::InlineExpression::NumberLiteral { value: num }) + } + } + Some(b'$') if !only_literal => { + self.ptr += 1; // $ + let id = self.get_identifier()?; + Ok(ast::InlineExpression::VariableReference { id }) + } + Some(b) if b.is_ascii_alphabetic() => { + self.ptr += 1; + let id = self.get_identifier_unchecked(); + let arguments = self.get_call_arguments()?; + if let Some(arguments) = arguments { + if !Self::is_callee(&id.name) { + return error!(ErrorKind::ForbiddenCallee, self.ptr); + } + + Ok(ast::InlineExpression::FunctionReference { id, arguments }) + } else { + let attribute = self.get_attribute_accessor()?; + Ok(ast::InlineExpression::MessageReference { id, attribute }) + } + } + Some(b'{') if !only_literal => { + self.ptr += 1; // { + let exp = self.get_placeable()?; + Ok(ast::InlineExpression::Placeable { + expression: Box::new(exp), + }) + } + _ if only_literal => error!(ErrorKind::ExpectedLiteral, self.ptr), + _ => error!(ErrorKind::ExpectedInlineExpression, self.ptr), + } + } + + pub fn get_call_arguments(&mut self) -> Result<Option<ast::CallArguments<S>>> { + self.skip_blank(); + if !self.take_byte_if(b'(') { + return Ok(None); + } + + let mut positional = vec![]; + let mut named = vec![]; + let mut argument_names = vec![]; + + self.skip_blank(); + + while self.ptr < self.length { + if self.is_current_byte(b')') { + break; + } + + let expr = self.get_inline_expression(false)?; + + if let ast::InlineExpression::MessageReference { + ref id, + attribute: None, + } = expr + { + self.skip_blank(); + if self.is_current_byte(b':') { + if argument_names.contains(&id.name) { + return error!( + ErrorKind::DuplicatedNamedArgument(id.name.as_ref().to_owned()), + self.ptr + ); + } + self.ptr += 1; + self.skip_blank(); + let val = self.get_inline_expression(true)?; + + argument_names.push(id.name.clone()); + named.push(ast::NamedArgument { + name: ast::Identifier { + name: id.name.clone(), + }, + value: val, + }); + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + } else { + if !argument_names.is_empty() { + return error!(ErrorKind::PositionalArgumentFollowsNamed, self.ptr); + } + positional.push(expr); + } + + self.skip_blank(); + self.take_byte_if(b','); + self.skip_blank(); + } + + self.expect_byte(b')')?; + + Ok(Some(ast::CallArguments { positional, named })) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/helper.rs b/third_party/rust/fluent-syntax/src/parser/helper.rs new file mode 100644 index 0000000000..11544d6855 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/helper.rs @@ -0,0 +1,169 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn is_current_byte(&self, b: u8) -> bool { + get_current_byte!(self) == Some(&b) + } + + pub(super) fn is_byte_at(&self, b: u8, pos: usize) -> bool { + get_byte!(self, pos) == Some(&b) + } + + pub(super) fn skip_to_next_entry_start(&mut self) { + while let Some(b) = get_current_byte!(self) { + let new_line = self.ptr == 0 || get_byte!(self, self.ptr - 1) == Some(&b'\n'); + + if new_line && (b.is_ascii_alphabetic() || [b'-', b'#'].contains(b)) { + break; + } + + self.ptr += 1; + } + } + + pub(super) fn skip_eol(&mut self) -> bool { + match get_current_byte!(self) { + Some(b'\n') => { + self.ptr += 1; + true + } + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 2; + true + } + _ => false, + } + } + + pub(super) fn skip_unicode_escape_sequence(&mut self, length: usize) -> Result<()> { + let start = self.ptr; + for _ in 0..length { + match get_current_byte!(self) { + Some(b) if b.is_ascii_hexdigit() => self.ptr += 1, + _ => break, + } + } + if self.ptr - start != length { + let end = if self.ptr >= self.length { + self.ptr + } else { + self.ptr + 1 + }; + let seq = self.source.slice(start..end).as_ref().to_owned(); + return error!(ErrorKind::InvalidUnicodeEscapeSequence(seq), self.ptr); + } + Ok(()) + } + + pub(super) fn is_identifier_start(&self) -> bool { + matches!(get_current_byte!(self), Some(b) if b.is_ascii_alphabetic()) + } + + pub(super) fn take_byte_if(&mut self, b: u8) -> bool { + if self.is_current_byte(b) { + self.ptr += 1; + true + } else { + false + } + } + + pub(super) fn skip_blank_block(&mut self) -> usize { + let mut count = 0; + loop { + let start = self.ptr; + self.skip_blank_inline(); + if !self.skip_eol() { + self.ptr = start; + break; + } + count += 1; + } + count + } + + pub(super) fn skip_blank(&mut self) { + loop { + match get_current_byte!(self) { + Some(b' ') | Some(b'\n') => self.ptr += 1, + Some(b'\r') if get_byte!(self, self.ptr + 1) == Some(&b'\n') => self.ptr += 2, + _ => break, + } + } + } + + pub(super) fn skip_blank_inline(&mut self) -> usize { + let start = self.ptr; + while let Some(b' ') = get_current_byte!(self) { + self.ptr += 1; + } + self.ptr - start + } + + pub(super) fn is_byte_pattern_continuation(b: u8) -> bool { + !matches!(b, b'.' | b'}' | b'[' | b'*') + } + + pub(super) fn is_callee(name: &S) -> bool { + name.as_ref() + .as_bytes() + .iter() + .all(|c| c.is_ascii_uppercase() || c.is_ascii_digit() || *c == b'_' || *c == b'-') + } + + pub(super) fn expect_byte(&mut self, b: u8) -> Result<()> { + if !self.is_current_byte(b) { + return error!(ErrorKind::ExpectedToken(b as char), self.ptr); + } + self.ptr += 1; + Ok(()) + } + + pub(super) fn is_number_start(&self) -> bool { + matches!(get_current_byte!(self), Some(b) if b.is_ascii_digit() || b == &b'-') + } + + pub(super) fn is_eol(&self) -> bool { + match get_current_byte!(self) { + Some(b'\n') => true, + Some(b'\r') if self.is_byte_at(b'\n', self.ptr + 1) => true, + None => true, + _ => false, + } + } + + pub(super) fn skip_digits(&mut self) -> Result<()> { + let start = self.ptr; + loop { + match get_current_byte!(self) { + Some(b) if b.is_ascii_digit() => self.ptr += 1, + _ => break, + } + } + if start == self.ptr { + error!( + ErrorKind::ExpectedCharRange { + range: "0-9".to_string() + }, + self.ptr + ) + } else { + Ok(()) + } + } + + pub(super) fn get_number_literal(&mut self) -> Result<S> { + let start = self.ptr; + self.take_byte_if(b'-'); + self.skip_digits()?; + if self.take_byte_if(b'.') { + self.skip_digits()?; + } + + Ok(self.source.slice(start..self.ptr)) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/macros.rs b/third_party/rust/fluent-syntax/src/parser/macros.rs new file mode 100644 index 0000000000..671d543285 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/macros.rs @@ -0,0 +1,11 @@ +macro_rules! get_byte { + ($s:expr, $idx:expr) => { + $s.source.as_ref().as_bytes().get($idx) + }; +} + +macro_rules! get_current_byte { + ($s:expr) => { + $s.source.as_ref().as_bytes().get($s.ptr) + }; +} diff --git a/third_party/rust/fluent-syntax/src/parser/mod.rs b/third_party/rust/fluent-syntax/src/parser/mod.rs new file mode 100644 index 0000000000..52edfdc37a --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/mod.rs @@ -0,0 +1,278 @@ +//! Fluent Translation List parsing utilities +//! +//! FTL resources can be parsed using one of two methods: +//! * [`parse`] - parses an input into a complete Abstract Syntax Tree representation with all source information preserved. +//! * [`parse_runtime`] - parses an input into a runtime optimized Abstract Syntax Tree +//! representation with comments stripped. +//! +//! # Example +//! +//! ``` +//! use fluent_syntax::parser; +//! use fluent_syntax::ast; +//! +//! let ftl = r#" +//! #### Resource Level Comment +//! +//! ## This is a message comment +//! hello-world = Hello World! +//! +//! "#; +//! +//! let resource = parser::parse(ftl) +//! .expect("Failed to parse an FTL resource."); +//! +//! assert_eq!( +//! resource.body[0], +//! ast::Entry::ResourceComment( +//! ast::Comment { +//! content: vec![ +//! "Resource Level Comment" +//! ] +//! } +//! ) +//! ); +//! assert_eq!( +//! resource.body[1], +//! ast::Entry::Message( +//! ast::Message { +//! id: ast::Identifier { +//! name: "hello-world" +//! }, +//! value: Some(ast::Pattern { +//! elements: vec![ +//! ast::PatternElement::TextElement { +//! value: "Hello World!" +//! }, +//! ] +//! }), +//! attributes: vec![], +//! comment: Some( +//! ast::Comment { +//! content: vec!["This is a message comment"] +//! } +//! ) +//! } +//! ), +//! ); +//! ``` +//! +//! # Error Recovery +//! +//! In both modes the parser is lenient, attempting to recover from errors. +//! +//! The [`Result`] return the resulting AST in both scenarios, and in the +//! error scenario a vector of [`ParserError`] elements is returned as well. +//! +//! Any unparsed parts of the input are returned as [`ast::Entry::Junk`] elements. +#[macro_use] +mod errors; +#[macro_use] +mod macros; +mod comment; +mod core; +mod expression; +mod helper; +mod pattern; +mod runtime; +mod slice; + +use crate::ast; +pub use errors::{ErrorKind, ParserError}; +pub use slice::Slice; + +/// Parser result always returns an AST representation of the input, +/// and if parsing errors were encountered, a list of [`ParserError`] elements +/// is also returned. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// key1 = Value 1 +/// +/// g@Rb@ge = #2y ds +/// +/// key2 = Value 2 +/// +/// "#; +/// +/// let (resource, errors) = parser::parse_runtime(ftl) +/// .expect_err("Resource should contain errors."); +/// +/// assert_eq!( +/// errors, +/// vec![ +/// parser::ParserError { +/// pos: 18..19, +/// slice: Some(17..35), +/// kind: parser::ErrorKind::ExpectedToken('=') +/// } +/// ] +/// ); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key1" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 1" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Junk { +/// content: "g@Rb@ge = #2y ds\n\n" +/// } +/// ); +/// +/// assert_eq!( +/// resource.body[2], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "key2" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Value 2" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +pub type Result<S> = std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)>; + +/// Parses an input into a complete Abstract Syntax Tree representation with +/// all source information preserved. +/// +/// This mode is intended for tooling, linters and other scenarios where +/// complete representation, with comments, is preferred over speed and memory +/// utilization. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// #### Resource Level Comment +/// +/// ## This is a message comment +/// hello-world = Hello World! +/// +/// "#; +/// +/// let resource = parser::parse(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::ResourceComment( +/// ast::Comment { +/// content: vec![ +/// "Resource Level Comment" +/// ] +/// } +/// ) +/// ); +/// assert_eq!( +/// resource.body[1], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello World!" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: Some( +/// ast::Comment { +/// content: vec!["This is a message comment"] +/// } +/// ) +/// } +/// ), +/// ); +/// ``` +pub fn parse<'s, S>(input: S) -> Result<S> +where + S: Slice<'s>, +{ + core::Parser::new(input).parse() +} + +/// Parses an input into an Abstract Syntax Tree representation with comments stripped. +/// +/// This mode is intended for runtime use of Fluent. It currently strips all +/// comments improving parsing performance and reducing the size of the AST tree. +/// +/// # Example +/// +/// ``` +/// use fluent_syntax::parser; +/// use fluent_syntax::ast; +/// +/// let ftl = r#" +/// #### Resource Level Comment +/// +/// ## This is a message comment +/// hello-world = Hello World! +/// +/// "#; +/// +/// let resource = parser::parse_runtime(ftl) +/// .expect("Failed to parse an FTL resource."); +/// +/// assert_eq!( +/// resource.body[0], +/// ast::Entry::Message( +/// ast::Message { +/// id: ast::Identifier { +/// name: "hello-world" +/// }, +/// value: Some(ast::Pattern { +/// elements: vec![ +/// ast::PatternElement::TextElement { +/// value: "Hello World!" +/// }, +/// ] +/// }), +/// attributes: vec![], +/// comment: None, +/// } +/// ), +/// ); +/// ``` +pub fn parse_runtime<'s, S>(input: S) -> Result<S> +where + S: Slice<'s>, +{ + core::Parser::new(input).parse_runtime() +} diff --git a/third_party/rust/fluent-syntax/src/parser/pattern.rs b/third_party/rust/fluent-syntax/src/parser/pattern.rs new file mode 100644 index 0000000000..516326d761 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/pattern.rs @@ -0,0 +1,207 @@ +use super::errors::{ErrorKind, ParserError}; +use super::{core::Parser, core::Result, slice::Slice}; +use crate::ast; + +#[derive(Debug, PartialEq)] +enum TextElementTermination { + LineFeed, + CRLF, + PlaceableStart, + EOF, +} + +// This enum tracks the placement of the text element in the pattern, which is needed for +// dedentation logic. +#[derive(Debug, PartialEq)] +enum TextElementPosition { + InitialLineStart, + LineStart, + Continuation, +} + +// This enum allows us to mark pointers in the source which will later become text elements +// but without slicing them out of the source string. This makes the indentation adjustments +// cheaper since they'll happen on the pointers, rather than extracted slices. +#[derive(Debug)] +enum PatternElementPlaceholders<S> { + Placeable(ast::Expression<S>), + // (start, end, indent, position) + TextElement(usize, usize, usize, TextElementPosition), +} + +// This enum tracks whether the text element is blank or not. +// This is important to identify text elements which should not be taken into account +// when calculating common indent. +#[derive(Debug, PartialEq)] +enum TextElementType { + Blank, + NonBlank, +} + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub(super) fn get_pattern(&mut self) -> Result<Option<ast::Pattern<S>>> { + let mut elements = vec![]; + let mut last_non_blank = None; + let mut common_indent = None; + + self.skip_blank_inline(); + + let mut text_element_role = if self.skip_eol() { + self.skip_blank_block(); + TextElementPosition::LineStart + } else { + TextElementPosition::InitialLineStart + }; + + while self.ptr < self.length { + if self.take_byte_if(b'{') { + if text_element_role == TextElementPosition::LineStart { + common_indent = Some(0); + } + let exp = self.get_placeable()?; + last_non_blank = Some(elements.len()); + elements.push(PatternElementPlaceholders::Placeable(exp)); + text_element_role = TextElementPosition::Continuation; + } else { + let slice_start = self.ptr; + let mut indent = 0; + if text_element_role == TextElementPosition::LineStart { + indent = self.skip_blank_inline(); + if let Some(b) = get_current_byte!(self) { + if indent == 0 { + if b != &b'\r' && b != &b'\n' { + break; + } + } else if !Self::is_byte_pattern_continuation(*b) { + self.ptr = slice_start; + break; + } + } else { + break; + } + } + let (start, end, text_element_type, termination_reason) = self.get_text_slice()?; + if start != end { + if text_element_role == TextElementPosition::LineStart + && text_element_type == TextElementType::NonBlank + { + if let Some(common) = common_indent { + if indent < common { + common_indent = Some(indent); + } + } else { + common_indent = Some(indent); + } + } + if text_element_role != TextElementPosition::LineStart + || text_element_type == TextElementType::NonBlank + || termination_reason == TextElementTermination::LineFeed + { + if text_element_type == TextElementType::NonBlank { + last_non_blank = Some(elements.len()); + } + elements.push(PatternElementPlaceholders::TextElement( + slice_start, + end, + indent, + text_element_role, + )); + } + } + + text_element_role = match termination_reason { + TextElementTermination::LineFeed => TextElementPosition::LineStart, + TextElementTermination::CRLF => TextElementPosition::LineStart, + TextElementTermination::PlaceableStart => TextElementPosition::Continuation, + TextElementTermination::EOF => TextElementPosition::Continuation, + }; + } + } + + if let Some(last_non_blank) = last_non_blank { + let elements = elements + .into_iter() + .take(last_non_blank + 1) + .enumerate() + .map(|(i, elem)| match elem { + PatternElementPlaceholders::Placeable(expression) => { + ast::PatternElement::Placeable { expression } + } + PatternElementPlaceholders::TextElement(start, end, indent, role) => { + let start = if role == TextElementPosition::LineStart { + common_indent.map_or_else( + || start + indent, + |common_indent| start + std::cmp::min(indent, common_indent), + ) + } else { + start + }; + let mut value = self.source.slice(start..end); + if last_non_blank == i { + value.trim(); + } + ast::PatternElement::TextElement { value } + } + }) + .collect(); + return Ok(Some(ast::Pattern { elements })); + } + + Ok(None) + } + + fn get_text_slice( + &mut self, + ) -> Result<(usize, usize, TextElementType, TextElementTermination)> { + let start_pos = self.ptr; + let mut text_element_type = TextElementType::Blank; + + while let Some(b) = get_current_byte!(self) { + match b { + b' ' => self.ptr += 1, + b'\n' => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::LineFeed, + )); + } + b'\r' if self.is_byte_at(b'\n', self.ptr + 1) => { + self.ptr += 1; + return Ok(( + start_pos, + self.ptr - 1, + text_element_type, + TextElementTermination::CRLF, + )); + } + b'{' => { + return Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::PlaceableStart, + )); + } + b'}' => { + return error!(ErrorKind::UnbalancedClosingBrace, self.ptr); + } + _ => { + text_element_type = TextElementType::NonBlank; + self.ptr += 1 + } + } + } + Ok(( + start_pos, + self.ptr, + text_element_type, + TextElementTermination::EOF, + )) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/runtime.rs b/third_party/rust/fluent-syntax/src/parser/runtime.rs new file mode 100644 index 0000000000..e116ceaeed --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/runtime.rs @@ -0,0 +1,61 @@ +use super::{ + core::{Parser, Result}, + errors::ParserError, + slice::Slice, +}; +use crate::ast; + +impl<'s, S> Parser<S> +where + S: Slice<'s>, +{ + pub fn parse_runtime( + mut self, + ) -> std::result::Result<ast::Resource<S>, (ast::Resource<S>, Vec<ParserError>)> { + let mut errors = vec![]; + + // That default allocation gives the lowest + // number of instructions and cycles in ioi. + let mut body = Vec::with_capacity(6); + + self.skip_blank_block(); + + while self.ptr < self.length { + let entry_start = self.ptr; + let entry = self.get_entry_runtime(entry_start); + + match entry { + Ok(Some(entry)) => { + body.push(entry); + } + Ok(None) => {} + Err(mut err) => { + self.skip_to_next_entry_start(); + err.slice = Some(entry_start..self.ptr); + errors.push(err); + let content = self.source.slice(entry_start..self.ptr); + body.push(ast::Entry::Junk { content }); + } + } + self.skip_blank_block(); + } + + if errors.is_empty() { + Ok(ast::Resource { body }) + } else { + Err((ast::Resource { body }, errors)) + } + } + + fn get_entry_runtime(&mut self, entry_start: usize) -> Result<Option<ast::Entry<S>>> { + let entry = match get_current_byte!(self) { + Some(b'#') => { + self.skip_comment(); + None + } + Some(b'-') => Some(ast::Entry::Term(self.get_term(entry_start)?)), + _ => Some(ast::Entry::Message(self.get_message(entry_start)?)), + }; + Ok(entry) + } +} diff --git a/third_party/rust/fluent-syntax/src/parser/slice.rs b/third_party/rust/fluent-syntax/src/parser/slice.rs new file mode 100644 index 0000000000..d44f8251fe --- /dev/null +++ b/third_party/rust/fluent-syntax/src/parser/slice.rs @@ -0,0 +1,25 @@ +use std::ops::Range; +pub trait Slice<'s>: AsRef<str> + Clone + PartialEq { + fn slice(&self, range: Range<usize>) -> Self; + fn trim(&mut self); +} + +impl<'s> Slice<'s> for String { + fn slice(&self, range: Range<usize>) -> Self { + self[range].to_string() + } + + fn trim(&mut self) { + *self = self.trim_end().to_string(); + } +} + +impl<'s> Slice<'s> for &'s str { + fn slice(&self, range: Range<usize>) -> Self { + &self[range] + } + + fn trim(&mut self) { + *self = self.trim_end(); + } +} diff --git a/third_party/rust/fluent-syntax/src/unicode.rs b/third_party/rust/fluent-syntax/src/unicode.rs new file mode 100644 index 0000000000..ab95a86884 --- /dev/null +++ b/third_party/rust/fluent-syntax/src/unicode.rs @@ -0,0 +1,159 @@ +//! A set of helper functions for unescaping Fluent unicode escape sequences. +//! +//! # Unicode +//! +//! Fluent supports UTF-8 in all FTL resources, but it also allows +//! unicode sequences to be escaped in [`String +//! Literals`](super::ast::InlineExpression::StringLiteral). +//! +//! Four byte sequences are encoded with `\u` and six byte +//! sqeuences using `\U`. +//! ## Example +//! +//! ``` +//! use fluent_syntax::unicode::unescape_unicode_to_string; +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\u5bd2 Bar"), +//! "Foo 寒 Bar" +//! ); +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\U01F68A Bar"), +//! "Foo 🚊 Bar" +//! ); +//! ``` +//! +//! # Other unescapes +//! +//! This also allows for a char `"` to be present inside an FTL string literal, +//! and for `\` itself to be escaped. +//! +//! ## Example +//! +//! ``` +//! use fluent_syntax::unicode::unescape_unicode_to_string; +//! +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\\" Bar"), +//! "Foo \" Bar" +//! ); +//! assert_eq!( +//! unescape_unicode_to_string("Foo \\\\ Bar"), +//! "Foo \\ Bar" +//! ); +//! ``` +use std::borrow::Cow; +use std::char; +use std::fmt; + +const UNKNOWN_CHAR: char = '�'; + +fn encode_unicode(s: Option<&str>) -> char { + s.and_then(|s| u32::from_str_radix(s, 16).ok().and_then(char::from_u32)) + .unwrap_or(UNKNOWN_CHAR) +} + +/// Unescapes to a writer without allocating. +/// +/// ## Example +/// +/// ``` +/// use fluent_syntax::unicode::unescape_unicode; +/// +/// let mut s = String::new(); +/// unescape_unicode(&mut s, "Foo \\U01F60A Bar"); +/// assert_eq!(s, "Foo 😊 Bar"); +/// ``` +pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result +where + W: fmt::Write, +{ + let bytes = input.as_bytes(); + + let mut start = 0; + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + ptr += 1; + continue; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let seq_start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + encode_unicode(input.get(seq_start..seq_start + len)) + } + _ => UNKNOWN_CHAR, + }; + ptr += 1; + w.write_char(new_char)?; + start = ptr; + } + if start != ptr { + w.write_str(&input[start..ptr])?; + } + Ok(()) +} + +/// Unescapes to a `Cow<str>` optionally allocating. +/// +/// ## Example +/// +/// ``` +/// use fluent_syntax::unicode::unescape_unicode_to_string; +/// +/// assert_eq!( +/// unescape_unicode_to_string("Foo \\U01F60A Bar"), +/// "Foo 😊 Bar" +/// ); +/// ``` +pub fn unescape_unicode_to_string(input: &str) -> Cow<str> { + let bytes = input.as_bytes(); + let mut result = Cow::from(input); + + let mut ptr = 0; + + while let Some(b) = bytes.get(ptr) { + if b != &b'\\' { + if let Cow::Owned(ref mut s) = result { + s.push(*b as char); + } + ptr += 1; + continue; + } + + if let Cow::Borrowed(_) = result { + result = Cow::from(&input[0..ptr]); + } + + ptr += 1; + + let new_char = match bytes.get(ptr) { + Some(b'\\') => '\\', + Some(b'"') => '"', + Some(u @ b'u') | Some(u @ b'U') => { + let start = ptr + 1; + let len = if u == &b'u' { 4 } else { 6 }; + ptr += len; + input + .get(start..(start + len)) + .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice))) + } + _ => UNKNOWN_CHAR, + }; + result.to_mut().push(new_char); + ptr += 1; + } + result +} |