diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-19 00:47:55 +0000 |
commit | 26a029d407be480d791972afb5975cf62c9360a6 (patch) | |
tree | f435a8308119effd964b339f76abb83a57c29483 /third_party/rust/nom | |
parent | Initial commit. (diff) | |
download | firefox-26a029d407be480d791972afb5975cf62c9360a6.tar.xz firefox-26a029d407be480d791972afb5975cf62c9360a6.zip |
Adding upstream version 124.0.1.upstream/124.0.1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/nom')
50 files changed, 21645 insertions, 0 deletions
diff --git a/third_party/rust/nom/.cargo-checksum.json b/third_party/rust/nom/.cargo-checksum.json new file mode 100644 index 0000000000..6ff1a3e60c --- /dev/null +++ b/third_party/rust/nom/.cargo-checksum.json @@ -0,0 +1 @@ +{"files":{"CHANGELOG.md":"fd79f8ab367d50fd57136744fa2e0a198c279c9707c2a45f00ad1b62d0a637ab","Cargo.lock":"3b95f09a31e1fc281ef890f9623ac84f0a58bc3454a96b652d2f0682963f29b5","Cargo.toml":"9d07fd5cc339d8f7c0c164b2568164b808dee1d441b17631cb7b7d44d39ed778","LICENSE":"4dbda04344456f09a7a588140455413a9ac59b6b26a1ef7cdf9c800c012d87f0","README.md":"3cbdcb07f9ef189ad617f40423a17f1e48ee5aba3f118f261886c139d64d26ae","doc/nom_recipes.md":"a903a6d8f9e5c935f2a4cbd632f67bc46e41934d1cc8517da9b9e7f3840c9955","src/bits/complete.rs":"640bdcad311a05d94e4b3a1a8b2105c540f80864edb75de0a570f03d4055e5ed","src/bits/mod.rs":"1c6aa26887d379384c1c823684e6f8e91b59df72664eefd8ddf0b6ca5517e669","src/bits/streaming.rs":"304cc5d565cfa30075827c1d6cb173b7cb92b101d8ebe9bc50476a00c92fd5dc","src/branch/mod.rs":"dbe1ed1bb0230310adf8e8d15e6afcf8826c7a111f8429e13fe3e9ebd3fbeae0","src/branch/tests.rs":"9a4a7b0c38fc28881d904f8ad6757a23e543f47a4c2d6fd6a9589eeb97209088","src/bytes/complete.rs":"666fa037c63292b1616cbc04c5b414a53c705d0d2ccd8d84399bbe78f573b7e9","src/bytes/mod.rs":"055e264f71a9fa2245be652cc747cfb2c3e34c3c2ba3b75e9611be51fcebea0b","src/bytes/streaming.rs":"e716e6555fbde14bfc2d7358a3edc2191df0522bc55b1f7735f9809ceb235829","src/bytes/tests.rs":"f0d9eb90d72873346e47e5592d30093eb38cbbb5fbf2e769cda776ccfff4f887","src/character/complete.rs":"7eeb5f00baab7aeaf7f392e5872d141d352951a146c0396721dab71e29b4c87b","src/character/mod.rs":"2fc6a3b19b766a3c37328d62eedbc0c9cb9612aa1d38ececd5cc579b61725fa2","src/character/streaming.rs":"de67ec5663475bc5ffa16f12d121ce9181353b16656b90216704197fca3010fc","src/character/tests.rs":"38958a709f96f077f93a72b32d8ded0a2ad6e488d9aadbe3cf1cfd8adaec06c8","src/combinator/mod.rs":"f7b9c35734f10a4b46d2e2ae874628d48fa1fe0bfc9f44325a89a14b3cfaea02","src/combinator/tests.rs":"1e56e2c1263d93bfbd244d24160a0bea41731e5158d57382e69c215427770b94","src/error.rs":"9d9bf87e76b47cfd9170f8ae50b6deeb02ff1c296aac3eb4f71ee1474dc0fba5","src/internal.rs":"5e670e0f5955af13da4b9a98d444fc721d38835b4637fe8170871fefef4e04cb","src/lib.rs":"9e05f2447ef1e5e9418953300c97d297f26f8f33c0528733a7681d8cb458346e","src/macros.rs":"11ac3dee346e5bf61015f328975cf6ea6f438683eb3b8b6c7c8e6383b0c5b7af","src/multi/mod.rs":"6093bd5909ddae76309442eba3a33716d279d219e5933a0dedef977312d6c5f8","src/multi/tests.rs":"806f89f5f347978c22e9b8cc7f8a49ad1d1fe23bff5974725b643a2ceffe8cb0","src/number/complete.rs":"a4f312c200710a446986142d19ebc78727265cf2c3b07b70dd84339051040bdd","src/number/mod.rs":"ba6eb439ee0befcc0872be7ce43b4836622af45c3dc2fc003b0d909ee42d7b20","src/number/streaming.rs":"1c2137235f093857e545069d687f50712ea457fac03961f3f6ac15c0f7e40c2a","src/sequence/mod.rs":"2dff114a950965e321cafdb215a441990f35d28756b12b2891179f348268fca2","src/sequence/tests.rs":"8dc4ca519b274d4e0694b373b2710c2e947e6074d43ec6a738a74113a09379f5","src/str.rs":"f26aa11f43e8a4a300ea0f310d599fab3f809102cfb29033ddf84f299ee8010c","src/traits.rs":"01934f8a61fc3cc5a03438a10751d3b074c89e5f3bcc96df8e43cf6b09be2308","tests/arithmetic.rs":"725efba4fc6cc811f542f3bcc8c7afd52702a66f64319d4f2796225e2e75d0ca","tests/arithmetic_ast.rs":"c7c28c988640405dd250c86045bbda75fc6ead2a769fb05eafbfbe74d97e0485","tests/css.rs":"36a2198e42e601efc611ebd6b3c6861f3ccb6a63525829ae6a2603bcdc4c2b11","tests/custom_errors.rs":"354d5a82a4f5a24b97901a3b411b4eab038c4d034047971956b9cdc12538e50d","tests/escaped.rs":"c25987ea6d9a7dde74d58a49c332d223da3a495569cb79e3fe921bce51729ead","tests/float.rs":"cdac92fb14afb75cba9d6b8f568e272a630b2cfb9f096b76c91909a3cd016869","tests/fnmut.rs":"dc9b6140eb3405d1497b05675fc4d3050785771a2afa81990d684b2edd0c9746","tests/ini.rs":"f0ce38b90057e9e0fd2329819395c420cbf1400457f9c4279414301faa38b19c","tests/ini_str.rs":"4c8f6ce3a2a245e8365837b873c25d2d8f24887313b791e2edd09a76a2d98947","tests/issues.rs":"1322ffc270ba1bedf39b295eb622ead2715ab7d60db0181af5305a0429c7819e","tests/json.rs":"8672fca70b889d6243a2f0f4c99389e22200e4363f253e83a3f26620b92f765e","tests/mp4.rs":"db6568ee9ccad70a7b567295831b961b369011f66dc2dd406851208007588600","tests/multiline.rs":"aef9768beaf5042b8629599b2094712646abb23eb11fa662b5a9bf3dfa432547","tests/overflow.rs":"a249ebeebfc5228faf9bfd5241a54a8181df476c4699ef87bb7d8a2161b9fc72","tests/reborrow_fold.rs":"66230bacd8d36e1559f1dc919ae8eab3515963c4aef85a079ec56218c9a6e676"},"package":"d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"}
\ No newline at end of file diff --git a/third_party/rust/nom/CHANGELOG.md b/third_party/rust/nom/CHANGELOG.md new file mode 100644 index 0000000000..a8f4c02c27 --- /dev/null +++ b/third_party/rust/nom/CHANGELOG.md @@ -0,0 +1,1551 @@ +# Change Log + +## [Unreleased][unreleased] + +### Thanks + +### Changed + +## 7.1.3 - 2023-01-15 + +### Thanks + +- @Shadow53 + +### Fixed + +- panic in `many` and `count` combinators when the output type is zero sized + +## 7.1.2 - 2023-01-01 + +### Thanks + +- @joubs +- @Fyko +- @LoganDark +- @darnuria +- @jkugelman +- @barower +- @puzzlewolf +- @epage +- @cky +- @wolthom +- @w1ll-i-code + +### Changed + +- documentation fixes +- tests fixes +- limit the initial capacity of the result vector of `many_m_n` to 64kiB +- bits parser now accept `Parser` implementors instead of only functions + +### Added + +- implement `Tuple` parsing for the unit type as a special case +- implement `ErrorConvert` on the unit type to make it usable as error type for bits parsers +- bool parser for bits input + +## 7.1.1 - 2022-03-14 + +### Thanks + +- @ThomasdenH +- @@SphinxKnight +- @irevoire +- @doehyunbaek +- @pxeger +- @punkeel +- @max-sixty +- @Xiretza +- @5c077m4n +- @erihsu +- @TheNeikos +- @LoganDark +- @nickelc +- @chotchki +- @ctrlcctrlv + + +### Changed + +- documentation fixes +- more examples + +## 7.1.0 - 2021-11-04 + +### Thanks + +- @nickelc +- @Stargateur +- @NilsIrl +- @clonejo +- @Strytyp +- @schubart +- @jihchi +- @nipunn1313 +- @Gungy2 +- @Drumato +- @Alexhuszagh +- @Aehmlo +- @homersimpsons +- @dne +- @epage +- @saiintbrisson +- @pymongo + +### Changed + +- documentation fixes +- Ci fixes +- the move to minimal-lexical for float parsing introduced bugs that cannot be resolved right now, so this version moves back to using the standard lib' parser. *This is a performance regression**. If you have specific requirements around float parsing, you are strongly encouraged to use [recognize_float](https://docs.rs/nom/latest/nom/number/complete/fn.recognize_float.html) and another library to convert to a f32 or f64 + +### Added + +- alt now works with 1 elment tuples + +## 7.0.0 - 2021-08-21 + +This release fixes dependency compilation issues and strengthen the minimum supported Rust version (MSRV) policy. This is also the first release without the macros that were used since nom's beginning. + +### Thanks + +- @djc +- @homersimpsons +- @lo48576 +- @myrrlyn +- @RalXYZ +- @nickelc +- @cenodis + +### Added + +- `take_until1` combinator +- more `to_owned` implementations +- `fail`: a parser that always fail, useful as default condition in other combinators +- text to number parsers: in the `character::streaming` and `character::complete` modules, there are parsers named `i8, u16, u32, u64, u128` and `u8 ,u16, u32, u64, u128` that recognize decimal digits and directly convert to a number in the target size (checking for max int size) + +### Removed + +- now that function combinators are the main way to write parsers, the old macro combinators are confusing newcomers. THey have been removed +- the `BitSlice` input type from bitvec has been moved into the [nom-bitvec](https://crates.io/crates/nom-bitvec) crate. nom does not depend on bitvec now +- regex parsers have been moved into the [nom-regex](https://crates.io/crates/nom-regex) crate. nom does not depend on regex now +- `ErrorKind::PArseTo` was not needed anymore + +### Changed + +- relax trait bounds +- some performance fixes +- `split_at_position*` functions should now be guaranteed panic free +- the `lexical-core` crate used for float parsing has now been replaced with `minimal-lexical`: the new crate is faster to compile, faster to parse, and has no dependencies + +### Fixed + +- infinite loop in `escaped` combinator +- `many_m_n` now fails if min > max + + +## 6.2.1 - 2021-06-23 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): + +- @homersimpsons + +### Fixed + +- fix documentation building + +## 6.2.0 - 2021-02-15 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): + +- @DavidKorczynski +- @homersimpsons +- @kornelski +- @lf- +- @lewisbelcher +- @ronan-d +- @weirane +- @heymind +- @marcianx +- @Nukesor + +### Added + +- nom is now regularly fuzzed through the OSSFuzz project + +### Changed + +- lots of documentation fixes +- relax trait bounds +- workarounds for dependency issues with bitvec and memchr + +## 6.1.2 - 2021-02-15 + +### Changed + +- Fix cargo feature usage in previous release + +## 6.1.1 - 2021-02-15 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): + +- @nickelc + +### Changed + +- Fix dependenciy incompatibilities: Restrict the bitvec->funty dependency to <=1.1 + +## 6.1.0 - 2021-01-23 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): + +- @sachaarbonel +- @vallentin +- @Lucretiel +- @meiomorphism +- @jufajardini +- @neithernut +- @drwilco + +### Changed + +- readme and documentation fixes +- rewrite of fold_many_m_n +- relax trait bounds on some parsers +- implement `std::error::Error` on `VerboseError` + + +## 6.0.1 - 2020-11-24 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): + +- @Leonqn +- @nickelc +- @toshokan +- @juchiast +- @shssoichiro +- @jlkiri +- @chifflier +- @fkloiber +- @Kaoet +- @Matthew Plant + +### Added + +- `ErrorConvert` implementation for `VerboseError` + +### Changed + +- CI fixes +- `fold_many*` now accept `FnMut` for the accumulation function +- relaxed input bounds on `length_count` + +# Fixed + +- documentation fixes +- the `#[deprecated]` attribute was removed from traits because it does not compile anymore on nightly +- bits and bytes combinators from the bits modules are now converted to use `FnMut` + +## 6.0.0 - 2020-10-31 + +### Thanks + +This release was done thanks to the hard work of (by order of appearance in the commit list): +- @chifflier +- @shepmaster +- @amerelo +- @razican +- @Palladinium +- @0ndorio +- Sebastian Zivota +- @keruspe +- @devonhollowood +- @parasyte +- @nnt0 +- @AntoineCezar +- @GuillaumeGomez +- @eijebong +- @stadelmanma +- @sphynx +- @snawaz +- @fosskers +- @JamesHarrison +- @calebsander +- @jthornber +- @ahmedcharles +- @rljacobson +- @benkay86 +- @georgeclaghorn +- @TianyiShi2001 +- @shnewto +- @alfriadox +- @resistor +- @myrrlyn +- @chipsenkbeil +- @ruza-net +- @fanf2 +- @jameysharp +- @FallenWarrior2k +- @jmg-duarte +- @ericseppanen +- @hbina +- Andreas Molzer +- @nickelc +- @bgourlie + +## Notable changes + +This release is a more polished version of nom 5, that came with a focus on +function parsers, by relaxing the requirements: combinators will return a +`impl FnMut` instead of `impl Fn`, allowing closures that change their context, +and parsers can be any type now, as long as they implement the new `Parser` trait. +That parser trait also comes with a few helper methods. + +Error management was often a pain point, so a lot of work went into making it easier. +Now it integrates with `std:error::Error`, the `IResult::finish()` method allows you +to convert to a more usable type, the `into` combinator can convert the error type +if there's a `From` implementation, and there are more specific error traits like +`ContextError` for the `context` combinator, and `FromExternalError` for `map_res`. +While the `VerboseError` type and its `convert_error` function saw some changes, +not many features ill be added to it, instead you are encouraged to build the error +type that corresponds to your needs if you are building a language parser. + +This version also integrates with the excellent [bitvec](https://crates.io/crates/bitvec) +crate for better bit level parsing. This part of nom was not great and a bit of a hack, +so this will give better options for those parsers. + +At last, documentation! There are now more code examples, functions and macros that require +specific cargo features are now clearly indicated, and there's a new `recipes` module +containing example patterns. + +### Breaking changes + +- the minimal Rust version is now 1.44 (1.37 if building without the `alloc` or `std` features) +- streaming parsers return the number of additional bytes they need, not the total. This was supposed to be the case everywhere, but some parsers were forgotten +- removed the `regexp_macros` cargo feature +- the `context` combinator is not linked to `ParseError` anymore, instead it come with its own `ContextError` trait +- `Needed::Size` now contains a `NonZeroUsize`, so we can reduce the structure's size by 8 bytes. When upgrading, `Needed::Size(number)` can be replaced with `Needed::new(number)` +- there is now a more general `Parser` trait, so parsers can be something else than a function. This trait also comes with combinator methods like `map`, `flat_map`, `or`. Since it is implemented on `Fn*` traits, it should not affect existing code too much +- combinators that returned a `impl Fn` now return a `impl FnMut` to allow parser closures that capture some mutable value from the context +- `separated_list` is now `separated_list0` +- removed the deprecated `methods` module +- removed the deprecated `whitespace` module +- the default error type is now a struct (`nom::error::Error`) instead of a tuple +- the `FromExternalError` allows wrapping the error returned by the function in the `map_res` combinator +- renamed the `dbg!` macro to avoid conflicts with `std::dbg!` +- `separated_list` now allows empty elements + + +### Added + +- function version of regex parsers +- `fill`: attempts to fill the output slice passed as argument +- `success`: returns a value without consuming the input +- `satisfy`: checks a predicate over the next character +- `eof` function combinator +- `consumed`: returns the produced value and the consumed input +- `length_count` function combinator +- `into`: converts a parser's output and error values if `From` implementations are available +- `IResult::finish()`: converts a parser's result to `Result<(I, O), E>` by removing the distinction between `Error` and `Failure` and panicking on `Incomplete` +- non macro versions of `u16`, `i32`, etc, with configurable endianness +- `is_newline` function +- `std::error::Error` implementation for nom's error types +- recipes section of the documentation, outlining common patterns in nom +- custom errors example +- bitstream parsing with the `BitSlice` type from the bitvec crate +- native endianness parsers +- github actions for CI + +### Changed + +- allows lexical-core 0.7 +- number parsers are now generic over the input type +- stabilized the `alloc` feature +- `convert_error` accepts a type that derefs to `&str` +- the JSON example now follows the spec better + +### Fixed +- use `fold_many0c` in the `fold_many0` macro + +## 5.1.1 - 2020-02-24 + +### Thanks + +- @Alexhuszagh for float fixes +- @AlexanderEkdahl, @JoshOrndorff, @akitsu-sanae for docs fixes +- @ignatenkobrain: dependency update +- @derekdreery: `map` implementation for errors +- @Lucretiel for docs fixes and compilation fixes +- adytzu2007: warning fixes +- @lo48576: error management fixes + +### Fixed + +- C symbols compilation errors due to old lexical-core version + +### Added + +- `Err` now has a `map` function + +### Changed + +- Make `error::context()` available without `alloc` feature + +## 5.1.0 - 2020-01-07 + +### Thanks + +- @Hywan, @nickmooney, @jplatte, @ngortheone, @ejmg, @SirWindfield, @demurgos, @spazm, @nyarly, @guedou, @adamnemecek, for docs fixes +- @Alxandr for error management bugfixes +- @Lucretiel for example fixes and optimizations +- @adytzu2007 for optimizations +- @audunhalland for utf8 fixes + +### Fixed + +- panic in `convert_error` +- `compile_error` macro usage + +### Added + +- `std::error::Error`, `std::fmt::Display`, `Eq`, `ToOwned` implementations for errors +- inline attribute for `ToUsize` + +### Changed + +- `convert_error` optimization +- `alt` optimization + +## 5.0.1 - 2019-08-22 + +### Thanks + +- @waywardmonkeys, @phaazon, @dalance for docs fixes +- @kali for `many0_m_n` fixes +- @ia0 for macros fixes + +### Fixed + +- `many0_m_n` now supports the n=1 case +- relaxed trait requirements in `cut` +- `peek!` macro reimplementation +- type inference in `value!` + +## 5.0.0 - 2019-06-24 + +This version comes with a complete rewrite of nom internals to use functions as a base +for parsers, instead of macros. Macros have been updated to use functions under +the hood, so that most existing parsers will work directly or require minimal changes. + +The `CompleteByteSlice` and `CompleteStr` input types were removed. To get different +behaviour related to streaming or complete input, there are different versions of some +parsers in different submodules, like `nom::character::streaming::alpha0` and +`nom::character::complete::alpha0`. + +The `verbose-errors` feature is gone, now the error type is decided through a generic +bound. To get equivalent behaviour to `verbose-errors`, check out `nom::error::VerboseError` + +### Thanks + +- @lowenheim helped in refactoring and error management +- @Keruspe helped in refactoring and fixing tests +- @pingiun, @Songbird0, @jeremystucki, @BeatButton, @NamsooCho, @Waelwindows, @rbtcollins, @MarkMcCaskey for a lot of help in rewriting the documentation and adding code examples +- @GuillaumeGomez for documentation rewriting and checking +- @iosmanthus for bug fixes +- @lo48576 for error management fixes +- @vaffeine for macros visibility fixes +- @webholik and @Havvy for `escaped` and `escaped_transform` fixes +- @proman21 for help on porting bits parsers + +### Added + +- the `VerboseError` type accumulates position info and error codes, and can generate a trace with span information +- the `lexical-core` crate is now used by default (through the `lexical` compilation feature) to parse floats from text +- documentation and code examples for all functions and macros + +### Changed + +- nom now uses functions instead of macros to generate parsers +- macros now use the functions under the hood +- the minimal Rust version is now 1.31 +- the verify combinator's condition function now takes its argument by reference +- `cond` will now return the error of the parser instead of None +- `alpha*`, `digit*`, `hex_digit*`, `alphanumeric*` now recognize only ASCII characters + +### Removed + +- deprecated string parsers (with the `_s` suffix), the normal version can be used instead +- `verbose-errors` is not needed anymore, now the error type can be decided when writing the parsers, and parsers provided by nom are generic over the error type +- `AtEof`, `CompleteByteSlice` and `CompleteStr` are gone, instead some parsers are specialized to work on streaming or complete input, and provided in different modules +- character parsers that were aliases to their `*1` version: eol, alpha, digit, hex_digit, oct_digit, alphanumeric, space, multispace +- `count_fixed` macro +- `whitespace::sp` can be replaced by `character::complete::multispace0` +- method combinators are now in the nom-methods crate +- `take_until_either`, `take_until_either1`, `take_until_either_and_consume` and `take_until_either_and_consume1`: they can be replaced with `is_not` (possibly combined with something else) +- `take_until_and_consume`, `take_until_and_consume1`: they can be replaced with `take_until` combined with `take` +- `sized_buffer` and `length_bytes!`: they can be replaced with the `length_data` function +- `non_empty`, `begin` and `rest_s` function +- `cond_reduce!`, `cond_with_error!`, `closure!`, `apply`, `map_res_err!`, `expr_opt!`, `expr_res!` +- `alt_complete`, `separated_list_complete`, `separated_nonempty_list_complete` + +## 4.2.3 - 2019-03-23 + +### Fixed + +- add missing `build.rs` file to the package +- fix code comparison links in changelog + +## 4.2.2 - 2019-03-04 + +### Fixed + +- regression in do_parse macro import for edition 2018 + +## 4.2.1 - 2019-02-27 + +### Fixed + +- macro expansion error in `do_parse` due to `compile_error` macro usage + +## 4.2.0 - 2019-01-29 + +### Thanks + +- @JoshMcguigan for unit test fixes +- @oza for documentation fixes +- @wackywendell for better error conversion +- @Zebradil for documentation fixes +- @tsraom for new combinators +- @hcpl for minimum Rust version tests +- @KellerFuchs for removing some unsafe uses in float parsing + +### Changed + +- macro import in edition 2018 code should work without importing internal macros now +- the regex parsers do not require the calling code to have imported the regex crate anymore +- error conversions are more ergonomic +- method combinators are now deprecated. They might be moved to a separate crate +- nom now specifies Rust 1.24.1 as minimum version. This was already the case before, now it is made explicit + +### Added + +- `many0_count` and `many1_count` to count applications of a parser instead of +accumulating its results in a `Vec` + +### Fixed + +- overflow in the byte wrapper for bit level parsers +- `f64` parsing does not use `transmute` anymore + +## 4.1.1 - 2018-10-14 + +### Fixed + +- compilation issue in verbose-errors mode for `add_return_error` + +## 4.1.0 - 2018-10-06 + +### Thanks + +- @xfix for fixing warnings, simplifying examples and performance fixes +- @dvberkel for documentation fixes +- @chifflier for fixing warnings +- @myrrlyn for dead code elimination +- @petrochenkov for removing redundant test macros +- @tbelaire for documentation fixes +- @khernyo for fixing warnings +- @linkmauve for documentation fixes +- @ProgVal for documentation fixes, warning fixes and error management +- @Nemo157 for compilation fixes +- @RReverser for documentation fixes +- @xpayn for fixing warnings +- Blas Rodriguez Irizar for documentation fixes +- @badboy for documentation fixes +- @kyrias for compilation fixes +- @kurnevsky for the `rest_len` parser +- @hjr3 for new documentation examples +- @fengalin for error management +- @ithinuel for the pcap example project +- @phaazon for documentation fixes +- @juchiast for documentation fixes +- @jrakow for the `u128` and `i128` parsers +- @smarnach for documentation fixes +- @derekdreery for `pub(crate)` support +- @YaLTeR for `map_res_err!` + +### Added + +- `rest_len` parser, returns the length of the remaining input +- `parse_to` has its own error code now +- `u128` and `i128` parsers in big and little endian modes +- support for `pub(crate)` syntax +- `map_res_err!` combinator that appends the error of its argument function in verbose errors mode + +### Fixed + +- lots of unused imports warnings were removed +- the `bytes` combinator was not compiling in some cases +- the big and little endian combinators now work without external imports +- CI is now faster and uses less cache +- in `add_return_error`, the provided error code is now evaluated only once + +### Changed + +- `fold_many1` will now transmit a `Failure` instead of transforming it to an `Error` +- `float` and `double` now work on all of nom's input types (`&[u8]`, `&str`, `CompleteByteSlice`, `CompleteStr` and any type that implements the required traits). `float_s` and `double_s` got the same modification, but are now deprecated +- `CompleteByteSlice` and `CompleteStr` get a small optimization by inlining some functions + + +## 4.0.0 - 2018-05-14 + +### Thanks + +- @jsgf for the new `AtEof` trait +- @tmccombs for fixes on `escaped*` combinators +- @s3bk for fixes around non Copy input types and documentation help +- @kamarkiewicz for fixes to no_std and CI +- @bheisler for documentation and examples +- @target-san for simplifying the `InputIter` trait for `&[u8]` +- @willmurphyscode for documentation and examples +- @Chaitanya1416 for typo fixes +- @fflorent for `input_len()` usage fixes +- @dbrgn for typo fixes +- @iBelieve for no_std fixes +- @kpp for warning fixes and clippy fixes +- @keruspe for fixes on FindToken +- @dtrebbien for fixes on take_until_and_consume1 +- @Henning-K for typo fixes +- @vthriller for documentation fixes +- @federicomenaquintero and @veprbl for their help fixing the float parsers +- @vmchale for new named_args versions +- @hywan for documentation fixes +- @fbenkstein for typo fixes +- @CAD97 for catching missing trait implementations +- @goldenlentils for &str optimizations +- @passy for typo fixes +- @ayrat555 for typo fixes +- @GuillaumeGomez for documentation fixes +- @jrakow for documentation fixes and fixes for `switch!` +- @phlosioneer for documentation fixes +- @creativcoder for typo fixes +- @derekdreery for typo fixes +- @lucasem for implementing `Deref` on `CompleteStr` and `CompleteByteSlice` +- @lowenheim for `parse_to!` fixes +- @myrrlyn for trait fixes around `CompleteStr` and `CompleteByteSlice` +- @NotBad4U for fixing code coverage analysis +- @murarth for code formatting +- @glandium for fixing build in no_std +- @csharad for regex compatibility with `CompleteStr` +- @FauxFaux for implementing `AsRef<str>` on `CompleteStr` +- @jaje for implementing `std::Error` on `nom:Err` +- @fengalin for warning fixes +- @@khernyo for doc formatting + +Special thanks to @corkami for the logo :) + +### Breaking changes + +- the `IResult` type now becomes a `Result` from the standard library +- `Incomplete` now returns the additional data size needed, not the total data size needed +- verbose-errors is now a superset of basic errors +- all the errors now include the related input slice +- the arguments from `error_position` and other such macros were swapped to be more consistent with the rest of nom +- automatic error conversion: to fix error type inference issues, a custom error type must now implement `std::convert::From<u32>` +- the `not!` combinator returns unit `()` +- FindToken's calling convention was swapped +- the `take_*` combinators are now more coherent and stricter, see commit 484f6724ea3ccb for more information +- `many0` and other related parsers will now return `Incomplete` if the reach the end of input without an error of the child parser. They will also return `Incomplete` on an empty input +- the `sep!` combinator for whitespace only consumes whitespace in the prefix, while the `ws!` combinator takes care of consuming the remaining whitespace + +### Added + +- the `AtEof` trait for input type: indicate if we can get more input data later (related to streaming parsers and `Incomplete` handling) +- the `escaped*` parsers now support the `&str`input type +- the `Failure` error variant represents an unrecoverable error, for which `alt` and other combinators will not try other branches. This error means we got in the right part of the code (like, a prefix was checked correctly), but there was an error in the following parts +- the `CompleteByteSlice` and `CompleteStr` input types consider there will be no more refill of the input. They fixed the `Incomplete` related issues when we have all of the data +- the `exact!()` combinator will fail if we did not consume the whole input +- the `take_while_m_n!` combinator will match a specified number of characters +- `ErrorKind::TakeUntilAndConsume1` +- the `recognize_float` parser will match a float number's characters, but will not transform to a `f32` or `f64` +- `alpha` and other basic parsers are now much stricter about partial inputs. We also introduce the `*0` and `*1` versions of those parsers +- `named_args` can now specify the input type as well +- `HexDisplay` is now implemented for `&str` +- `alloc` feature +- the `InputTakeAtposition` trait allows specialized implementations of parsers like `take_while!` + +### Removed + +- the producers and consumers were removed +- the `error_code` and `error_node` macros are not used anymore + +### Fixed + +- `anychar!` now works correctly with multibyte characters +- `take_until_and_consume1!` no longer results in "no method named \`find_substring\`" and "no method named \`slice\`" compilation errors +- `take_until_and_consume1!` returns the correct Incomplete(Needed) amount +- `no_std` compiles properly, and nom can work with `alloc` too +- `parse_to!` now consumes its input + +### Changed + +- `alt` and other combinators will now clone the input if necessary. If the input is already `Copy` there is no performance impact +- the `rest` parser now works on various input types +- `InputIter::Item` for `&[u8]` is now a `u8` directly, not a reference +- we now use the `compile_error` macro to return a compile time error if there was a syntax issue +- the permutation combinator now supports optional child parsers +- the float numbers parsers have been refactored to use one common implementation that is nearly 2 times faster than the previous one +- the float number parsers now accept more variants + + +## 3.2.1 - 2017-10-27 + +### Thanks + +- @ordian for `alt_complete` fixes +- @friedm for documentation fixes +- @kali for improving error management + +### Fixed + +- there were cases where `alt_complete` could return `Incomplete` + +### Added + +- an `into_error_kind` method can be used to transform any error to a common value. This helps when the library is included multiple times as dependency with different feature sets + + +## 3.2.0 - 2017-07-24 + +### Thanks + +- @jedireza for documentation fixes +- @gmorenz for the `bytes` combinator +- @meh for character combinator fixes for UTF-8 +- @jethrogb for avoiding move issues in `separated_list` + +### Changed + +- new layout for the main page of documentation +- `anychar` can now work on any input type +- `length_bytes` is now an alias for `length_data` + +### Fixed + +- `one_of`, `none_of` and `char` will now index correctly UTF-8 characters +- the `compiler_error` macro is now correctly exported + + +### Added + +- the `bytes` combinator transforms a bit stream back to a byte slice for child parsers + +## 3.1.0 - 2017-06-16 + +### Thanks + +- @sdroege: implementing be_i24 and le_i24 +- @Hywan: integrating faster substring search using memchr +- @nizox: fixing type issues in bit stream parsing +- @grissiom: documentation fixes +- @doomrobo: implementing separated_list_complete and separated_nonempty_list_complete +- @CWood1: fixing memchr integration in no_std +- @lu_zero: integrating the compiler_error crate +- @dtolnay: helping debug a type inference issue in map + +### Changed + +- memchr is used for substring search if possible +- if building on nightly, some common syntax errors will display a specific error message. If building no stable, display the documentation to activate those messages +- `count` no longer preallocates its vector + +### Fixed + +- better type inference in alt_complete +- `alt` should now work with whitespace parsing +- `map` should not make type inference errors anymore + +### Added + +- be_i24 and le_i24, parsing big endian and little endian signed 24 bit integers +- `separated_list_complete` and `separated_nonempty_list_complete` will treat incomplete from sub parsers as error + +## 3.0.0 - 2017-05-12 + +### Thanks + +- Chris Pick for some `Incomplete` related refactors +- @dbrgn for documentation fixes +- @valarauca for adding `be_u24` +- @ithinuel for usability fixes +- @evuez for README readability fixes and improvements to `IResult` +- @s3bk for allowing non-`Copy` types as input +- @keruspe for documentation fixes +- @0xd34d10cc for trait fixes on `InputIter` +- @sdleffler for lifetime shenanigans on `named_args` +- @chengsun for type inference fixes in `alt` +- @iBelieve for adding str to no_std +- @Hywan for simplifying code in input traits +- @azerupi for extensive documentation of `alt` and `alt_complete` + +### Breaking Changes + +- `escaped`, `separated_list` and `separated_nonempty_list` can now return `Incomplete` when necessary +- `InputIter` does not require `AsChar` on its `Item` type anymore +- the `core` feature that was putting nom in `no_std` mode has been removed. There is now a `std` feature, activated by default. If it is not activated, nom is in `no_std` +- in `verbose-errors` mode, the error list is now stored in a `Vec` instead of a box based linked list +- `chain!` has finally been removed + +### Changed + +- `Endianness` now implements `Debug`, `PartialEq`, `Eq`, `Clone` and `Copy` +- custom input types can now be cloned if they're not `Copy` +- the infamous 'Cannot infer type for E' error should happen less often now +- `str` is now available in `no_std` mode + +### Fixed + +- `FileProducer` will be marked as `Eof` on full buffer +- `named_args!` now has lifetimes that cannot conflict with the lifetimes from other arguments + +### Added + +- `be_u24`: big endian 24 bit unsigned integer parsing +- `IResult` now has a `unwrap_or` method + + +## 2.2.1 - 2017-04-03 + +### Thanks + +- @Victor-Savu for formatting fixes in the README +- @chifflier for detecting and fixing integer overflows +- @utkarshkukreti for some performance improvements in benchmarks + +### Changed + +- when calculating how much data is needed in `IResult::Incomplete`, the addition could overflow (it is stored as a usize). This would apparently not result in any security vulnerability on release code + +## 2.2.0 - 2017-03-20 + +### Thanks + +- @seppo0010 for fixing `named_args` +- @keruspe for implementing or() on `IResult`, adding the option of default cases in `switch!`, adding support for `cargo-travis` +- @timlyo for documentation fixes +- @JayKickliter for extending `hex_u32` +- @1011X for fixing regex integration +- @Kerollmops for actually marking `chain!` as deprecated +- @joliss for documentation fixes +- @utkarshkukreti for tests refactoring and performance improvement +- @tmccombs for documentation fixes + +### Added + +- `IResult` gets an `or()` method +- `take_until1`, `take_until_and_consume1`, `take_till1!` and `take_till1_s!` require at least 1 character + +### Changed + +- `hex_u32` accepts uppercase digits as well +- the character based combinators leverage the input traits +- the whitespace parsers now work on &str and other types +- `take_while1` returns `Incomplete` on empty input +- `switch!` can now take a default case + +### Fixed + +- `named_args!` now imports `IResult` directly +- the upgrade to regex 0.2 broke the regex combinators, they work now + +## 2.1.0 - 2017-01-27 + +### Thanks + +- @nickbabcock for documentation fixes +- @derekdreery for documentation fixes +- @DirkyJerky for documentation fixes +- @saschagrunert for documentation fixes +- @lucab for documentation fixes +- @hyone for documentation fixes +- @tstorch for factoring `Slice` +- @shepmaster for adding crate categories +- @antoyo for adding `named_args!` + +### Added + +- `verify!` uses a first parser, then applies a function to check that its result satisfies some conditions +- `named_args!` creates a parser function that can accept other arguments along with the input +- `parse_to!` will use the `parse` method from `FromStr` to parse a value. It will automatically translate the input to a string if necessary +- `float`, `float_s`, `double`, `double_s` can recognize floating point numbers in text + +### Changed + +- `escaped!` will now return `Incomplete` if needed +- `permutation!` supports up to 20 child parsers + +## 2.0.1 - 2016-12-10 + +Bugfix release + +*Warning*: there is a small breaking change, `add_error!` is renamed to `add_return_error!`. This was planned for the 2.0 release but was forgotten. This is a small change in a feature that not many people use, for a release that is not yet widely in use, so there will be no 3.0 release for that change. + +### Thanks + +- @nickbabcock for catching and fixing the `add_error!` mixup +- @lucab for documentation fixes +- @jtdowney for noticing that `tag_no_case!` was not working at all for byte slices + +### Fixed + +- `add_error!` has been renamed to `add_return_error!` +- the `not!` combinator now accepts functions +- `tag_no_case!` is now working as accepted (before, it accepted everything) + + +## 2.0 - 2016-11-25 + +The 2.0 release is one of the biggest yet. It was a good opportunity to clean up some badly named combinators and fix invalid behaviours. + +Since this version introduces a few breaking changes, an [upgrade documentation](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_2.md) is available, detailing the steps to fix the most common migration issues. After testing on a set of 30 crates, most of them will build directly, a large part will just need to activate the "verbose-errors" compilation feature. The remaining fixes are documented. + +This version also adds a lot of interesting features, like the permutation combinator or whitespace separated formats support. + +### Thanks + +- @lu-zero for license help +- @adamgreig for type inference fixes +- @keruspe for documentation and example fixes, for the `IResult => Result` conversion work, making `AsChar`'s method more consistent, and adding `many_till!` +- @jdeeny for implementing `Offset` on `&str` +- @vickenty for documentation fixes and his refactoring of `length_value!` and `length_bytes!` +- @overdrivenpotato for refactoring some combinators +- @taralx for documentation fixes +- @keeperofdakeys for fixing eol behaviour, writing documentation and adding `named_attr!` +- @jturner314 for writing documentation +- @bozaro for fixing compilation errors +- @uniphil for adding a `crates.io` badge +- @badboy for documentation fixes +- @jugglerchris for fixing `take_s!` +- @AndyShiue for implementing `Error` and `Display` on `ErrorKind` and detecting incorrect UTF-8 string indexing + +### Added + +- the "simple" error management system does not accumulates errors when backtracking. This is a big perf gain, and is activated by default in nom 2.0 +- nom can now work on any type that implement the traits defined in `src/traits.rs`: `InputLength`, `InputIter`, `InputTake`, `Compare`, `FindToken`, `FindSubstring`, `Slice` +- the documentation from Github's wiki has been moved to the `doc/` directory. They are markdown files that you can build with [cargo-external-doc](https://crates.io/crates/cargo-external-doc) +- whitespace separated format support: with the `ws!` combinator, you can automatically introduce whitespace parsers between all parsers and combinators +- the `permutation!` combinator applies its child parsers in any order, as long as they all succeed once, and return a tuple of the results +- `do_parse!` is a simpler alternative to `chain!`, which is now deprecated +- you can now transform an `IResult` in a `std::result::Result` +- `length_data!` parses a length, and returns a subslice of that length +- `tag_no_case!` provides case independent comparison. It works nicely, without any allocation, for ASCII strings, but for UTF-8 strings, it defaults to an unsatisfying (and incorrect) comparison by lowercasing both strings +- `named_attr!` creates functions like `named!` but can add attributes like documentation +- `many_till!` applies repeatedly its first child parser until the second succeeds + +### Changed + +- the "verbose" error management that was available in previous versions is now activated by the "verbose-errors" compilation feature +- code reorganization: most of the parsers were moved in separate files to make the source easier to navigate +- most of the combinators are now independent from the input type +- the `eof` function was replaced with the `eof!` macro +- `error!` and `add_error!` were replaced with `return_error!` and `add_return_error!` to fix the name conflict with the log crate +- the `offset()` method is now in the `Offset` trait +- `length_value!` has been renamed to `length_count!`. The new `length_value!` selects a slice and applies the second parser once on that slice +- `AsChar::is_0_to_9` is now `AsChar::is_dec_digit` +- the combinators with configurable endianness now take an enum instead of a boolean as parameter + +### Fixed +- the `count!`, `count_fixed!` and `length_*!` combinator calculate incomplete data needs correctly +- `eol`, `line_ending` and `not_line_ending` now have a consistent behaviour that works correctly with incomplete data +- `take_s!` didn't correctly handle the case when the slice is exactly the right length + +## 1.2.4 - 2016-07-20 + +### Thanks +- @Phlosioneer for documentation fixes +- @sourrust for fixing offsets in `take_bits!` +- @ChrisMacNaughton for the XFS crate +- @pwoolcoc for `rest_s` +- @fitzgen for more `IResult` methods +- @gtors for the negative lookahead feature +- @frk1 and @jeandudey for little endian float parsing +- @jethrogb for fixing input usage in `many1` +- @acatton for beating me at nom golf :D + +### Added +- the `rest_s` method on `IResult` returns the remaining `&str` input +- `unwrap_err` and `unwrap_inc` methods on `IResult` +- `not!` will peek at the input and return `Done` if the underlying parser returned `Error` or `Incomplete`, without consuming the input +- `le_f32` and `le_f64` parse little endian floating point numbers (IEEE 754) +- + +### Fixed +- documentation fixes +- `take_bits!` is now more precise +- `many1` inccorectly used the `len` function instead of `input_len` +- the INI parser is simpler +- `recognize!` had an early `return` that is removed now + +## 1.2.3 - 2016-05-10 + +### Thanks +- @lu-zero for the contribution guidelines +- @GuillaumeGomez for fixes on `length_bytes` and some documentation +- @Hywan for documentation and test fixes +- @Xirdus for correct trait import issues +- @mspiegel for the new AST example +- @cholcombe973 for adding the `cond_with_error!` combinator +- @tstorch for refactoring `many0!` +- @panicbit for the folding combinators +- @evestera for `separated_list!` fixes +- @DanielKeep for correcting some enum imports + +### Added +- Regular expression combinators starting with `re_bytes_` work on byte slices +- example parsing arithmetic expressions to an AST +- `cond_with_error!` works like `cond!` but will return `None` if the condition is false, and `Some(value)` if the underlying parser succeeded +- `fold_many0!`, `fold_many1!` and `fold_many_m_n!` will take a parser, an initial value and a combining function, and fold over the successful applications of the parser + +### Fixed +- `length_bytes!` converts the result of its child parser to usize +- `take_till!` now imports `InputLength` instead of assuming it's in scope +- `separated_list!` and `separated_nonempty_list!` will not consume the separator if there's no following successfully parsed value +- no more warnings on build + +### Changed +- simpler implementation of `many0!` + +## 1.2.2 - 2016-03-09 + +### Thanks +- @conradev for fixing `take_until_s!` +- @GuillaumeGomez for some documentation fixes +- @frewsxcv for some documentation fixes +- @tstorch for some test refactorings + +### Added +- `nom::Err` now implements `std::error::Error` + +### Fixed +- `hex_u32` does not parses more than 8 chars now +- `take_while!` and `take_while1!` will not perturb the behaviour of `recognize!` anymore + +## 1.2.1 - 2016-02-23 + +### Thanks +- @sourrust for adding methods to `IResult` +- @tstorch for the test refactoring, and for adding methods to `IResult` and `Needed` +- @joelself for fixing the method system + +### Added + +- mapping methods over `IResult` and `Needed` + +### Changed + +- `apply_rf` is renamed to `apply_m`. This will not warrant a major version, since it is part missing from the methods feture added in the 1.2.0 release +- the `regexp_macros` feature that used `regex!` to precompile regular expressions has been replaced by the normal regex engine combined with `lazy_static` + +### Fixed + +- when a parser or combinator was returning an empty buffer as remaining part, it was generating one from a static empty string. This was messing with buffer offset calculation. Now, that empty slice is taken like this: `&input[input.len()..]`. +- The `regexp_macros` and `no_std` feature build again and are now tested with Travis CI + +## 1.2.0 - 2016-02-08 + +### Thanks +- @zentner-kyle for type inference fixes +- @joelself for his work on `&str` parsing and method parsers +- @GuillaumeGomez for implementing methods on `IResult` +- @dirk for the `alt_complete!` combinator +- @tstorch for a lot of refactoring work and unit tests additions +- @jansegre for the hex digit parsers +- @belgum for some documentation fixes +- @lwandrebeck for some documentation fixes and code fixes in `hex_digit` + +### Added +- `take_until_and_consume_s!` for consumption of string data until a tag +- more function patterns in `named!`. The error type can now be specified +- `alt_complete!` works like the `alt!` combinator, but tries the next branch if the current one returned `Incomplete`, instead of returning directly +- more unit tests for a lot of combinators +- hexadecimal digit parsers +- the `tuple!` combinator takes a list of parsers as argument, and applies them serially on the input. If all of them are successful, it willr eturn a tuple accumulating all the values. This combinator will (hopefully) replace most uses of `chain!` +- parsers can now be implemented as a method for a struct thanks to the `method!`, `call_m!` and `apply_rf!` combinators + +### Fixed +- there were type inference issues in a few combinators. They will now be easier to compile +- `peek!` compilation with bare functions +- `&str` parsers were splitting data at the byte level, not at the char level, which can result in inconsistencies in parsing UTF-8 characters. They now use character indexes +- some method implementations were missing on `IResult<I,O,E>` (with specified error type instead of implicit) + +## 1.1.0 - 2016-01-01 + +This release adds a lot of features related to `&str` parsing. The previous versions +were focused on `&[u8]` and bit streams parsing, but there's a need for more text +parsing with nom. The parsing functions like `alpha`, `digit` and others will now +accept either a `&[u8]` or a `&str`, so there is no breaking change on that part. + +There are also a few performance improvements and documentation fixes. + +### Thanks +- @Binero for pushing the work on `&str` parsing +- @meh for fixing `Option` and `Vec` imports +- @hoodie for a documentation fix +- @joelself for some documentation fixes +- @vberger for his traits magic making nom functions more generic + +### Added + +- string related parsers: `tag_s!`, `take_s!`, `is_a_s!`, `is_not_s!`, `take_while_s!`, `take_while1_s!`, `take_till_s!` +- `value!` is a combinator that always returns the same value. If a child parser is passed as second argument, that value is returned when the child parser succeeds + +### Changed + +- `tag!` will now compare even on partial input. If it expects "abcd" but receives "ef", it will now return an `Error` instead of `Incomplete` +- `many0!` and others will preallocate a larger vector to avoid some copies and reallocations +- `alpha`, `digit`, `alphanumeric`, `space` and `multispace` now accept as input a `&[u8]` or a `&str`. Additionally, they return an error if they receive an empty input +- `take_while!`, `take_while1!`, `take_while_s!`, `take_while1_s!` wilreturn an error on empty input + +### Fixed + +- if the child parser of `many0!` or `many1!` returns `Incomplete`, it will return `Incomplete` too, possibly updating the needed size +- `Option,` `Some`, `None` and `Vec` are now used with full path imports + +## 1.0.1 - 2015-11-22 + +This releases makes the 1.0 version compatible with Rust 1.2 and 1.3 + +### Thanks +- @steveklabnik for fixing lifetime issues in Producers and Consumers + +## 1.0.0 - 2015-11-16 + +Stable release for nom. A lot of new features, a few breaking changes + +### Thanks +- @ahenry for macro fixes +- @bluss for fixing documentation +- @sourrust for cleaning code and debugging the new streaming utilities +- @meh for inline optimizations +- @ccmtaylor for fixing function imports +- @soro for improvements to the streaming utilities +- @breard-r for catching my typos +- @nelsonjchen for catching my typos too +- @divarvel for hex string parsers +- @mrordinaire for the `length_bytes!` combinator + +### Breaking changes +- `IResult::Error` can now use custom error types, and is generic over the input type +- Producers and consumers have been replaced. The new implementation uses less memory and integrates more with parsers +- `nom::ErrorCode` is now `nom::ErrorKind` +- `filter!` has been renamed to `take_while!` +- `chain!` will count how much data is consumed and use that number to calculate how much data is needed if a parser returned `Incomplete` +- `alt!` returns `Incomplete` if a child parser returned `Incomplete`, instead of skipping to the next parser +- `IResult` does not require a lifetime tag anymore, yay! + +### Added + +- `complete!` will return an error if the child parser returned `Incomplete` +- `add_error!` will wrap an error, but allow backtracking +- `hex_u32` parser + +### Fixed +- the behaviour around `Incomplete` is better for most parsers now + +## 0.5.0 - 2015-10-16 + +This release fixes a few issues and stabilizes the code. + +### Thanks +- @nox for documentation fixes +- @daboross for linting fixes +- @ahenry for fixing `tap!` and extending `dbg!` and `dbg_dmp!` +- @bluss for tracking down and fixing issues with unsafe code +- @meh for inlining parser functions +- @ccmtaylor for fixing import of `str::from_utf8` + +### Fixed +- `tap!`, `dbg!` and `dbg_dmp!` now accept function parameters + +### Changed +- the type used in `count_fixed!` must be `Copy` +- `chain!` calculates how much data is needed if one of the parsers returns `Incomplete +- optional parsers in `chain!` can return `Incomplete` + +## 0.4.0 - 2015-09-08 + +Considering the number of changes since the last release, this version can contain breaking changes, so the version number becomes 0.4.0. A lot of new features and performance improvements! + +### Thanks +- @frewsxcv for documentation fixes +- @ngrewe for his work on producers and consumers +- @meh for fixes on `chain!` and for the `rest` parser +- @daboross for refactoring `many0!` and `many1!` +- @aleksander for the `switch!` combinator idea +- @TechnoMancer for his help with bit level parsing +- @sxeraverx for pointing out a bug in `is_a!` + +### Fixed +- `count_fixed!` must take an explicit type as argument to generate the fixed-size array +- optional parsing behaviour in `chain!` +- `count!` can take 0 elements +- `is_a!` and `is_not!` can now consume the whole input + +### Added +- it is now possible to seek to the end of a `MemProducer` +- `opt!` returns `Done(input, None)` if `the child parser returned `Incomplete` +- `rest` will return the remaining input +- consumers can now seek to and from the end of input +- `switch!` applies a first parser then matches on its result to choose the next parser +- bit-level parsers +- character-level parsers +- regular expression parsers +- implementation of `take_till!`, `take_while!` and `take_while1!` + +### Changed +- `alt!` can return `Incomplete` +- the error analysis functions will now take references to functions instead of moving them +- performance improvements on producers +- performance improvement for `filter!` +- performance improvement for `count!`: a `Vec` of the right size is directly allocated + +## 0.3.11 - 2015-08-04 + +### Thanks +- @bluss for remarking that the crate included random junk lying non committed in my local repository + +### Fixed +- cleanup of my local repository will ship less files in the crates, resulting in a smaller download + +## 0.3.10 - 2015-08-03 + +### Added + +- `bits!` for bit level parsing. It indicates that all child parsers will take a `(&[u8], usize)`as input, with the second parameter indicating the bit offset in the first byte. This allows viewing a byte slice as a bit stream. Most combinators can be used directly under `bits!` +- `take_bits!` takes an integer type and a number of bits, consumes that number of bits and updates the offset, possibly by crossing byte boundaries +- bit level parsers are all written in `src/bits.rs` + +### Changed + +- Parsers that specifically handle bytes have been moved to src/bytes.rs`. This applies to `tag!`, `is_not!`, `is_a!`, `filter!`, `take!`, `take_str!`, `take_until_and_consume!`, `take_until!`, `take_until_either_and_consume!`, `take_until_either!` + +## 0.3.9 - 2015-07-20 + +### Thanks +- @badboy for fixing `filter!` +- @idmit for some documentation fixes + +### Added +- `opt_res!` applies a parser and transform its result in a Result. This parser never fails +- `cond_reduce!` takes an expression as parameter, applies the parser if the expression is true, and returns an error if the expression is false +- `tap!` pass the result of a parser to a block to manipulate it, but do not affect the parser's result +- `AccReader` is a Read+BufRead that supports data accumulation and partial consumption. The `consume` method must be called afterwardsto indicate how much was consumed +- Arithmetic expression evaluation and parsing example +- `u16!`, `u32!`, `u64!`, `i16!`, `i32!`, `i64!` take an expression as parameter, if the expression is true, apply the big endian integer parser, if false, the little endian version +- type information for combinators. This will make the documentation a bit easier to navigate + +### Fixed +- `map_opt!` and `map_res!` had issues with argument order due to bad macros +- `delimited!` did not compile for certain combinations of arguments +- `filter!` did not return a byte slice but a fixed array + +## 0.3.8 - 2015-07-03 + +### Added +- code coverage is now calculated automatically on Travis CI +- `Stepper`: wrap a `Producer`, and call the method `step` with a parser. This method will buffer data if there is not enough, apply the parser if there is, and keep the rest of the input in memory for the next call +- `ReadProducer`: takes something implementing `Read`, and makes a `Producer` out of it + +### Fixed +- the combinators `separated_pair!` and `delimited!` did not work because an implementation macro was not exported +- if a `MemProducer` reached its end, it should always return `Eof` +- `map!` had issues with argument matching + +## 0.3.7 - 2015-06-24 + +### Added +- `expr_res!` and `expr_opt!` evaluate an expression returning a Result or Opt and convert it to IResult +- `AsBytes` is implemented for fixed size arrays. This allows `tag!([41u8, 42u8])` + +### Fixed +- `count_fixed!` argument parsing works again + +## 0.3.6 - 2015-06-15 + +### Added +- documentation for a few functions +- the consumer trait now requires the `failed(&self, error_code)` method in case of parsing error +- `named!` now handles the alternative `named!(pub fun_name<OutputType>, ...)` + +### Fixed +- `filter!` now returns the whole input if the filter function never returned false +- `take!` casts its argument as usize, so it can accepts any integer type now + +## 0.3.5 - 2015-06-10 + +### Thanks +- @cmr for some documentation fixes + +### Added +- `count_fixed!` returns a fixed array + +### Fixed +- `count!` is back to the previous behaviour, returning a `Vec` for sizes known at runtime + +### Changed +- functions and traits exported from `nom::util` are now directly in `nom::` + +## 0.3.4 - 2015-06-09 + +### Thanks +- @andrew-d for fixes on `cond!` +- @keruspe for features in `chain!` + +### Added +- `chain!` can now have mutable fields + +### Fixed +- `cond!` had an infinite macro recursion + +### Changed +- `chain!` generates less code now. No apprent compilation time improvement + +## 0.3.3 - 2015-06-09 + +### Thanks +- @andrew-d for the little endian signed integer parsers +- @keruspe for fixes on `count!` + +### Added +- `le_i8`, `le_i16`, `le_i32`, `le_i64`: little endian signed integer parsers + +### Changed +- the `alt!` parser compiles much faster, even with more than 8 branches +- `count!` can now return a fixed size array instead of a growable vector + +## 0.3.2 - 2015-05-31 + +### Thanks +- @keruspe for the `take_str` parser and the function application combinator + +### Added +- `take_str!`: takes the specified number of bytes and return a UTF-8 string +- `apply!`: do partial application on the parameters of a function + +### Changed +- `Needed::Size` now contains a `usize` instead of a `u32` + +## 0.3.1 - 2015-05-21 + +### Thanks +- @divarvel for the big endian signed integer parsers + +### Added +- `be_i8`, `be_i16`, `be_i32`, `be_i64`: big endian signed integer parsers +- the `core` feature can be passed to cargo to build with `no_std` +- colored hexdump can be generated from error chains + +## 0.3.0 - 2015-05-07 + +### Thanks +- @filipegoncalves for some documentation and the new eof parser +- @CrimsonVoid for putting fully qualified types in the macros +- @lu_zero for some documentation fixes + +### Added +- new error types that can contain an error code, an input slice, and a list of following errors +- `error!` will cut backtracking and return directly from the parser, with a specified error code +- `eof` parser, successful if there is no more input +- specific error codes for the parsers provided by nom + +### Changed +- fully qualified types in macros. A lot of imports are not needed anymore + +### Removed +- `FlatMap`, `FlatpMapOpt` and `Functor` traits (replaced by `map!`, `map_opt!` and `map_res!`) + +## 0.2.2 - 2015-04-12 + +### Thanks +- @filipegoncalves and @thehydroimpulse for debugging an infinite loop in many0 and many1 +- @thehydroimpulse for suggesting public named parsers +- @skade for removing the dependency on the collections gate + +### Added +- `named!` can now declare public functions like this: `named!(pub tst, tag!("abcd"));` +- `pair!(X,Y)` returns a tuple `(x, y)` +- `separated_pair!(X, sep, Y)` returns a tuple `(x, y)` +- `preceded!(opening, X)` returns `x` +- `terminated!(X, closing)` returns `x` +- `delimited(opening, X, closing)` returns `x` +- `separated_list(sep, X)` returns a `Vec<X>` +- `separated_nonempty_list(sep, X)` returns a `Vec<X>` of at list one element + +### Changed +- `many0!` and `many1!` forbid parsers that do not consume input +- `is_a!`, `is_not!`, `alpha`, `digit`, `space`, `multispace` will now return an error if they do not consume at least one byte + +## 0.2.1 - 2015-04-04 + +### Thanks +- @mtsr for catching the remaining debug println! +- @jag426 who killed a lot of warnings +- @skade for removing the dependency on the core feature gate + + +### Added +- little endian unsigned int parsers le_u8, le_u16, le_u32, le_u64 +- `count!` to apply a parser a specified number of times +- `cond!` applies a parser if the condition is met +- more parser development tools in `util::*` + +### Fixed +- in one case, `opt!` would not compile + +### Removed +- most of the feature gates are now removed. The only one still needed is `collections` + +## 0.2.0 - 2015-03-24 +*works with `rustc 1.0.0-dev (81e2396c7 2015-03-19) (built 2015-03-19)`* + +### Thanks +- Ryman for the AsBytes implementation +- jag426 and jaredly for documentation fixes +- eternaleye on #rust IRC for his help on the new macro syntax + +### Changed +- the AsBytes trait improves readability, no more b"...", but "..." instead +- Incomplete will now hold either Needed;;Unknown, or Needed::Size(u32). Matching on Incomplete without caring for the value is done with `Incomplete(_)`, but if more granularity is mandatory, `Needed` can be matched too +- `alt!` can pass the result of the parser to a closure +- the `take_*` macros changed behaviour, the default case is now not to consume the separator. The macros have been renamed as follows: `take_until!` -> `take_until_and_consume!`, `take_until_and_leave!` -> `take_until!`, `take_until_either_and_leave!` -> `take_until_either!`, `take_until_either!` -> `take_until_either_and_consume!` + +### Added +- `peek!` macro: matches the future input but does not consume it +- `length_value!` macro: the first argument is a parser returning a `n` that can cast to usize, then applies the second parser `n` times. The macro has a variant with a third argument indicating the expected input size for the second parser +- benchmarks are available at https://github.com/Geal/nom_benchmarks +- more documentation +- **Unnamed parser syntax**: warning, this is a breaking change. With this new syntax, the macro combinators do not generate functions anymore, they create blocks. That way, they can be nested, for better readability. The `named!` macro is provided to create functions from parsers. Please be aware that nesting parsers comes with a small cost of compilation time, negligible in most cases, but can quickly get to the minutes scale if not careful. If this happens, separate your parsers in multiple subfunctions. +- `named!`, `closure!` and `call!` macros used to support the unnamed syntax +- `map!`, `map_opt!` and `map_res!` to combine a parser with a normal function, transforming the input directly, or returning an `Option` or `Result` + +### Fixed +- `is_a!` is now working properly + +### Removed +- the `o!` macro does less than `chain!`, so it has been removed +- the `fold0!` and `fold1!` macros were too complex and awkward to use, the `many*` combinators will be useful for most uses for now + +## 0.1.6 - 2015-02-24 +### Changed +- consumers must have an end method that will be called after parsing + +### Added +- big endian unsigned int and float parsers: be_u8, be_u16, be_u32, be_u64, be_f32, be_f64 +- producers can seek +- function and macros documentation +- README documentation +### Fixed +- lifetime declarations +- tag! can return Incomplete + +## 0.1.5 - 2015-02-17 +### Changed +- traits were renamed: FlatMapper -> FlatMap, Mapper -> FlatMapOpt, Mapper2 -> Functor + +### Fixed +- woeks with rustc f1bb6c2f4 + +## 0.1.4 - 2015-02-17 +### Changed +- the chaining macro can take optional arguments with '?' + +## 0.1.3 - 2015-02-16 +### Changed +- the chaining macro now takes the closure at the end of the argument list + +## 0.1.2 - 2015-02-16 +### Added +- flat_map implementation for <&[u8], &[u8]> +- chaining macro +- partial MP4 parser example + + +## 0.1.1 - 2015-02-06 +### Fixed +- closure syntax change + +## Compare code + +* [unreleased](https://github.com/Geal/nom/compare/7.1.3...HEAD) +* [7.1.2](https://github.com/Geal/nom/compare/7.1.2...7.1.3) +* [7.1.2](https://github.com/Geal/nom/compare/7.1.1...7.1.2) +* [7.1.1](https://github.com/Geal/nom/compare/7.1.0...7.1.1) +* [7.1.0](https://github.com/Geal/nom/compare/7.0.0...7.1.0) +* [7.0.0](https://github.com/Geal/nom/compare/6.2.1...7.0.0) +* [6.2.1](https://github.com/Geal/nom/compare/6.2.0...6.2.1) +* [6.2.0](https://github.com/Geal/nom/compare/6.1.2...6.2.0) +* [6.1.2](https://github.com/Geal/nom/compare/6.1.1...6.1.2) +* [6.1.1](https://github.com/Geal/nom/compare/6.1.0...6.1.1) +* [6.1.0](https://github.com/Geal/nom/compare/6.0.1...6.1.0) +* [6.0.1](https://github.com/Geal/nom/compare/6.0.0...6.0.1) +* [6.0.0](https://github.com/Geal/nom/compare/5.1.1...6.0.0) +* [5.1.1](https://github.com/Geal/nom/compare/5.1.0...5.1.1) +* [5.1.0](https://github.com/Geal/nom/compare/5.0.1...5.1.0) +* [5.0.1](https://github.com/Geal/nom/compare/5.0.0...5.0.1) +* [5.0.0](https://github.com/Geal/nom/compare/4.2.3...5.0.0) +* [4.2.3](https://github.com/Geal/nom/compare/4.2.2...4.2.3) +* [4.2.2](https://github.com/Geal/nom/compare/4.2.1...4.2.2) +* [4.2.1](https://github.com/Geal/nom/compare/4.2.0...4.2.1) +* [4.2.0](https://github.com/Geal/nom/compare/4.1.1...4.2.0) +* [4.1.1](https://github.com/Geal/nom/compare/4.1.0...4.1.1) +* [4.1.0](https://github.com/Geal/nom/compare/4.0.0...4.1.0) +* [4.0.0](https://github.com/Geal/nom/compare/3.2.1...4.0.0) +* [3.2.1](https://github.com/Geal/nom/compare/3.2.0...3.2.1) +* [3.2.0](https://github.com/Geal/nom/compare/3.1.0...3.2.0) +* [3.1.0](https://github.com/Geal/nom/compare/3.0.0...3.1.0) +* [3.0.0](https://github.com/Geal/nom/compare/2.2.1...3.0.0) +* [2.2.1](https://github.com/Geal/nom/compare/2.2.0...2.2.1) +* [2.2.0](https://github.com/Geal/nom/compare/2.1.0...2.2.0) +* [2.1.0](https://github.com/Geal/nom/compare/2.0.1...2.1.0) +* [2.0.1](https://github.com/Geal/nom/compare/2.0.0...2.0.1) +* [2.0.0](https://github.com/Geal/nom/compare/1.2.4...2.0.0) +* [1.2.4](https://github.com/Geal/nom/compare/1.2.3...1.2.4) +* [1.2.3](https://github.com/Geal/nom/compare/1.2.2...1.2.3) +* [1.2.2](https://github.com/Geal/nom/compare/1.2.1...1.2.2) +* [1.2.1](https://github.com/Geal/nom/compare/1.2.0...1.2.1) +* [1.2.0](https://github.com/Geal/nom/compare/1.1.0...1.2.0) +* [1.1.0](https://github.com/Geal/nom/compare/1.0.1...1.1.0) +* [1.0.1](https://github.com/Geal/nom/compare/1.0.0...1.0.1) +* [1.0.0](https://github.com/Geal/nom/compare/0.5.0...1.0.0) +* [0.5.0](https://github.com/geal/nom/compare/0.4.0...0.5.0) +* [0.4.0](https://github.com/geal/nom/compare/0.3.11...0.4.0) +* [0.3.11](https://github.com/geal/nom/compare/0.3.10...0.3.11) +* [0.3.10](https://github.com/geal/nom/compare/0.3.9...0.3.10) +* [0.3.9](https://github.com/geal/nom/compare/0.3.8...0.3.9) +* [0.3.8](https://github.com/Geal/nom/compare/0.3.7...0.3.8) +* [0.3.7](https://github.com/Geal/nom/compare/0.3.6...0.3.7) +* [0.3.6](https://github.com/Geal/nom/compare/0.3.5...0.3.6) +* [0.3.5](https://github.com/Geal/nom/compare/0.3.4...0.3.5) +* [0.3.4](https://github.com/Geal/nom/compare/0.3.3...0.3.4) +* [0.3.3](https://github.com/Geal/nom/compare/0.3.2...0.3.3) +* [0.3.2](https://github.com/Geal/nom/compare/0.3.1...0.3.2) +* [0.3.1](https://github.com/Geal/nom/compare/0.3.0...0.3.1) +* [0.3.0](https://github.com/Geal/nom/compare/0.2.2...0.3.0) +* [0.2.2](https://github.com/Geal/nom/compare/0.2.1...0.2.2) +* [0.2.1](https://github.com/Geal/nom/compare/0.2.0...0.2.1) +* [0.2.0](https://github.com/Geal/nom/compare/0.1.6...0.2.0) +* [0.1.6](https://github.com/Geal/nom/compare/0.1.5...0.1.6) +* [0.1.5](https://github.com/Geal/nom/compare/0.1.4...0.1.5) +* [0.1.4](https://github.com/Geal/nom/compare/0.1.3...0.1.4) +* [0.1.3](https://github.com/Geal/nom/compare/0.1.2...0.1.3) +* [0.1.2](https://github.com/Geal/nom/compare/0.1.1...0.1.2) +* [0.1.1](https://github.com/Geal/nom/compare/0.1.0...0.1.1) diff --git a/third_party/rust/nom/Cargo.lock b/third_party/rust/nom/Cargo.lock new file mode 100644 index 0000000000..c7168342e7 --- /dev/null +++ b/third_party/rust/nom/Cargo.lock @@ -0,0 +1,282 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bit-set" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e11e16035ea35e4e5997b393eacbf6f63983188f7a2ad25bfb13465f5ad59de" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a60553f9a9e039a333b4e9b20573b9e9b9c0bb3a11e201ccc48ef4283456d673" + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +dependencies = [ + "doc-comment", + "memchr", + "minimal-lexical", + "proptest", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" + +[[package]] +name = "proptest" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e0d9cc07f18492d879586c92b485def06bc850da3118075cd45d50e9c95b0e5" +dependencies = [ + "bit-set", + "bitflags", + "byteorder", + "lazy_static", + "num-traits", + "quick-error 2.0.1", + "rand", + "rand_chacha", + "rand_xorshift", + "regex-syntax", + "rusty-fork", + "tempfile", +] + +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + +[[package]] +name = "quick-error" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3" + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rand_xorshift" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d25bf25ec5ae4a3f1b92f929810509a2f53d7dca2f50b794ff57e3face536c8f" +dependencies = [ + "rand_core", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "rusty-fork" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb3dcc6e454c328bb824492db107ab7c0ae8fcffe4ad210136ef014458c1bc4f" +dependencies = [ + "fnv", + "quick-error 1.2.3", + "tempfile", + "wait-timeout", +] + +[[package]] +name = "tempfile" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +dependencies = [ + "cfg-if", + "libc", + "rand", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "wait-timeout" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6" +dependencies = [ + "libc", +] + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/third_party/rust/nom/Cargo.toml b/third_party/rust/nom/Cargo.toml new file mode 100644 index 0000000000..2388b4ceea --- /dev/null +++ b/third_party/rust/nom/Cargo.toml @@ -0,0 +1,168 @@ +# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO +# +# When uploading crates to the registry Cargo will automatically +# "normalize" Cargo.toml files for maximal compatibility +# with all versions of Cargo and also rewrite `path` dependencies +# to registry (e.g., crates.io) dependencies. +# +# If you are reading this file be aware that the original Cargo.toml +# will likely look very different (and much more reasonable). +# See Cargo.toml.orig for the original contents. + +[package] +edition = "2018" +rust-version = "1.48" +name = "nom" +version = "7.1.3" +authors = ["contact@geoffroycouprie.com"] +include = [ + "CHANGELOG.md", + "LICENSE", + "README.md", + ".gitignore", + "Cargo.toml", + "src/*.rs", + "src/*/*.rs", + "tests/*.rs", + "doc/nom_recipes.md", +] +autoexamples = false +description = "A byte-oriented, zero-copy, parser combinators library" +documentation = "https://docs.rs/nom" +readme = "README.md" +keywords = [ + "parser", + "parser-combinators", + "parsing", + "streaming", + "bit", +] +categories = ["parsing"] +license = "MIT" +repository = "https://github.com/Geal/nom" + +[package.metadata.docs.rs] +features = [ + "alloc", + "std", + "docsrs", +] +all-features = true + +[profile.bench] +lto = true +codegen-units = 1 +debug = true + +[[example]] +name = "custom_error" +path = "examples/custom_error.rs" +required-features = ["alloc"] + +[[example]] +name = "json" +path = "examples/json.rs" +required-features = ["alloc"] + +[[example]] +name = "json_iterator" +path = "examples/json_iterator.rs" +required-features = ["alloc"] + +[[example]] +name = "iterator" +path = "examples/iterator.rs" + +[[example]] +name = "s_expression" +path = "examples/s_expression.rs" +required-features = ["alloc"] + +[[example]] +name = "string" +path = "examples/string.rs" +required-features = ["alloc"] + +[[test]] +name = "arithmetic" + +[[test]] +name = "arithmetic_ast" +required-features = ["alloc"] + +[[test]] +name = "css" + +[[test]] +name = "custom_errors" + +[[test]] +name = "float" + +[[test]] +name = "ini" +required-features = ["alloc"] + +[[test]] +name = "ini_str" +required-features = ["alloc"] + +[[test]] +name = "issues" +required-features = ["alloc"] + +[[test]] +name = "json" + +[[test]] +name = "mp4" +required-features = ["alloc"] + +[[test]] +name = "multiline" +required-features = ["alloc"] + +[[test]] +name = "overflow" + +[[test]] +name = "reborrow_fold" + +[[test]] +name = "fnmut" +required-features = ["alloc"] + +[dependencies.memchr] +version = "2.3" +default-features = false + +[dependencies.minimal-lexical] +version = "0.2.0" +default-features = false + +[dev-dependencies.doc-comment] +version = "0.3" + +[dev-dependencies.proptest] +version = "1.0.0" + +[features] +alloc = [] +default = ["std"] +docsrs = [] +std = [ + "alloc", + "memchr/std", + "minimal-lexical/std", +] + +[badges.coveralls] +branch = "main" +repository = "Geal/nom" +service = "github" + +[badges.maintenance] +status = "actively-developed" + +[badges.travis-ci] +repository = "Geal/nom" diff --git a/third_party/rust/nom/LICENSE b/third_party/rust/nom/LICENSE new file mode 100644 index 0000000000..88557e44e3 --- /dev/null +++ b/third_party/rust/nom/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2014-2019 Geoffroy Couprie + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/third_party/rust/nom/README.md b/third_party/rust/nom/README.md new file mode 100644 index 0000000000..f2c1b05286 --- /dev/null +++ b/third_party/rust/nom/README.md @@ -0,0 +1,331 @@ +# nom, eating data byte by byte + +[![LICENSE](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +[![Join the chat at https://gitter.im/Geal/nom](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/Geal/nom?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) +[![Build Status](https://github.com/Geal/nom/actions/workflows/ci.yml/badge.svg)](https://github.com/Geal/nom/actions/workflows/ci.yml) +[![Coverage Status](https://coveralls.io/repos/github/Geal/nom/badge.svg?branch=main)](https://coveralls.io/github/Geal/nom?branch=main) +[![Crates.io Version](https://img.shields.io/crates/v/nom.svg)](https://crates.io/crates/nom) +[![Minimum rustc version](https://img.shields.io/badge/rustc-1.48.0+-lightgray.svg)](#rust-version-requirements-msrv) + +nom is a parser combinators library written in Rust. Its goal is to provide tools +to build safe parsers without compromising the speed or memory consumption. To +that end, it uses extensively Rust's *strong typing* and *memory safety* to produce +fast and correct parsers, and provides functions, macros and traits to abstract most of the +error prone plumbing. + +![nom logo in CC0 license, by Ange Albertini](https://raw.githubusercontent.com/Geal/nom/main/assets/nom.png) + +*nom will happily take a byte out of your files :)* + +<!-- toc --> + +- [Example](#example) +- [Documentation](#documentation) +- [Why use nom?](#why-use-nom) + - [Binary format parsers](#binary-format-parsers) + - [Text format parsers](#text-format-parsers) + - [Programming language parsers](#programming-language-parsers) + - [Streaming formats](#streaming-formats) +- [Parser combinators](#parser-combinators) +- [Technical features](#technical-features) +- [Rust version requirements](#rust-version-requirements-msrv) +- [Installation](#installation) +- [Related projects](#related-projects) +- [Parsers written with nom](#parsers-written-with-nom) +- [Contributors](#contributors) + +<!-- tocstop --> + +## Example + +[Hexadecimal color](https://developer.mozilla.org/en-US/docs/Web/CSS/color) parser: + +```rust +extern crate nom; +use nom::{ + IResult, + bytes::complete::{tag, take_while_m_n}, + combinator::map_res, + sequence::tuple +}; + +#[derive(Debug,PartialEq)] +pub struct Color { + pub red: u8, + pub green: u8, + pub blue: u8, +} + +fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> { + u8::from_str_radix(input, 16) +} + +fn is_hex_digit(c: char) -> bool { + c.is_digit(16) +} + +fn hex_primary(input: &str) -> IResult<&str, u8> { + map_res( + take_while_m_n(2, 2, is_hex_digit), + from_hex + )(input) +} + +fn hex_color(input: &str) -> IResult<&str, Color> { + let (input, _) = tag("#")(input)?; + let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?; + + Ok((input, Color { red, green, blue })) +} + +fn main() {} + +#[test] +fn parse_color() { + assert_eq!(hex_color("#2F14DF"), Ok(("", Color { + red: 47, + green: 20, + blue: 223, + }))); +} +``` + +## Documentation + +- [Reference documentation](https://docs.rs/nom) +- [Various design documents and tutorials](https://github.com/Geal/nom/tree/main/doc) +- [List of combinators and their behaviour](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) + +If you need any help developing your parsers, please ping `geal` on IRC (libera, geeknode, oftc), go to `#nom-parsers` on Libera IRC, or on the [Gitter chat room](https://gitter.im/Geal/nom). + +## Why use nom + +If you want to write: + +### Binary format parsers + +nom was designed to properly parse binary formats from the beginning. Compared +to the usual handwritten C parsers, nom parsers are just as fast, free from +buffer overflow vulnerabilities, and handle common patterns for you: + +- [TLV](https://en.wikipedia.org/wiki/Type-length-value) +- Bit level parsing +- Hexadecimal viewer in the debugging macros for easy data analysis +- Streaming parsers for network formats and huge files + +Example projects: + +- [FLV parser](https://github.com/rust-av/flavors) +- [Matroska parser](https://github.com/rust-av/matroska) +- [tar parser](https://github.com/Keruspe/tar-parser.rs) + +### Text format parsers + +While nom was made for binary format at first, it soon grew to work just as +well with text formats. From line based formats like CSV, to more complex, nested +formats such as JSON, nom can manage it, and provides you with useful tools: + +- Fast case insensitive comparison +- Recognizers for escaped strings +- Regular expressions can be embedded in nom parsers to represent complex character patterns succinctly +- Special care has been given to managing non ASCII characters properly + +Example projects: + +- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) +- [TOML parser](https://github.com/joelself/tomllib) + +### Programming language parsers + +While programming language parsers are usually written manually for more +flexibility and performance, nom can be (and has been successfully) used +as a prototyping parser for a language. + +nom will get you started quickly with powerful custom error types, that you +can leverage with [nom_locate](https://github.com/fflorent/nom_locate) to +pinpoint the exact line and column of the error. No need for separate +tokenizing, lexing and parsing phases: nom can automatically handle whitespace +parsing, and construct an AST in place. + +Example projects: + +- [PHP VM](https://github.com/tagua-vm/parser) +- eve language prototype +- [xshade shading language](https://github.com/xshade-lang/xshade/) + +### Streaming formats + +While a lot of formats (and the code handling them) assume that they can fit +the complete data in memory, there are formats for which we only get a part +of the data at once, like network formats, or huge files. +nom has been designed for a correct behaviour with partial data: If there is +not enough data to decide, nom will tell you it needs more instead of silently +returning a wrong result. Whether your data comes entirely or in chunks, the +result should be the same. + +It allows you to build powerful, deterministic state machines for your protocols. + +Example projects: + +- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) +- [Using nom with generators](https://github.com/Geal/generator_nom) + +## Parser combinators + +Parser combinators are an approach to parsers that is very different from +software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and +[yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar +in a separate file and generating the corresponding code, you use very +small functions with very specific purpose, like "take 5 bytes", or +"recognize the word 'HTTP'", and assemble them in meaningful patterns +like "recognize 'HTTP', then a space, then a version". +The resulting code is small, and looks like the grammar you would have +written with other parser approaches. + +This has a few advantages: + +- The parsers are small and easy to write +- The parsers components are easy to reuse (if they're general enough, please add them to nom!) +- The parsers components are easy to test separately (unit tests and property-based tests) +- The parser combination code looks close to the grammar you would have written +- You can build partial parsers, specific to the data you need at the moment, and ignore the rest + +## Technical features + +nom parsers are for: +- [x] **byte-oriented**: The basic type is `&[u8]` and parsers will work as much as possible on byte array slices (but are not limited to them) +- [x] **bit-oriented**: nom can address a byte slice as a bit stream +- [x] **string-oriented**: The same kind of combinators can apply on UTF-8 strings as well +- [x] **zero-copy**: If a parser returns a subset of its input data, it will return a slice of that input, without copying +- [x] **streaming**: nom can work on partial data and detect when it needs more data to produce a correct result +- [x] **descriptive errors**: The parsers can aggregate a list of error codes with pointers to the incriminated input slice. Those error lists can be pattern matched to provide useful messages. +- [x] **custom error types**: You can provide a specific type to improve errors returned by parsers +- [x] **safe parsing**: nom leverages Rust's safe memory handling and powerful types, and parsers are routinely fuzzed and tested with real world data. So far, the only flaws found by fuzzing were in code written outside of nom +- [x] **speed**: Benchmarks have shown that nom parsers often outperform many parser combinators library like Parsec and attoparsec, some regular expression engines and even handwritten C parsers + +Some benchmarks are available on [Github](https://github.com/Geal/nom_benchmarks). + +## Rust version requirements (MSRV) + +The 7.0 series of nom supports **Rustc version 1.48 or greater**. It is known to work properly on Rust 1.41.1 but there is no guarantee it will stay the case through this major release. + +The current policy is that this will only be updated in the next major nom release. + +## Installation + +nom is available on [crates.io](https://crates.io/crates/nom) and can be included in your Cargo enabled project like this: + +```toml +[dependencies] +nom = "7" +``` + +There are a few compilation features: + +* `alloc`: (activated by default) if disabled, nom can work in `no_std` builds without memory allocators. If enabled, combinators that allocate (like `many0`) will be available +* `std`: (activated by default, activates `alloc` too) if disabled, nom can work in `no_std` builds + +You can configure those features like this: + +```toml +[dependencies.nom] +version = "7" +default-features = false +features = ["alloc"] +``` + +# Related projects + +- [Get line and column info in nom's input type](https://github.com/fflorent/nom_locate) +- [Using nom as lexer and parser](https://github.com/Rydgel/monkey-rust) + +# Parsers written with nom + +Here is a (non exhaustive) list of known projects using nom: + +- Text file formats: [Ceph Crush](https://github.com/cholcombe973/crushtool), +[Cronenberg](https://github.com/ayrat555/cronenberg), +[XFS Runtime Stats](https://github.com/ChrisMacNaughton/xfs-rs), +[CSV](https://github.com/GuillaumeGomez/csv-parser), +[FASTA](https://github.com/TianyiShi2001/nom-fasta), +[FASTQ](https://github.com/elij/fastq.rs), +[INI](https://github.com/Geal/nom/blob/main/tests/ini.rs), +[ISO 8601 dates](https://github.com/badboy/iso8601), +[libconfig-like configuration file format](https://github.com/filipegoncalves/rust-config), +[Web archive](https://github.com/sbeckeriv/warc_nom_parser), +[PDB](https://github.com/TianyiShi2001/nom-pdb), +[proto files](https://github.com/tafia/protobuf-parser), +[Fountain screenplay markup](https://github.com/adamchalmers/fountain-rs), +[vimwiki](https://github.com/chipsenkbeil/vimwiki-server/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-server/tree/master/vimwiki_macros) +- Programming languages: +[PHP](https://github.com/tagua-vm/parser), +[Basic Calculator](https://github.com/balajisivaraman/basic_calculator_rs), +[GLSL](https://github.com/phaazon/glsl), +[Lua](https://github.com/doomrobo/nom-lua53), +[Python](https://github.com/ProgVal/rust-python-parser), +[SQL](https://github.com/ms705/nom-sql), +[Elm](https://github.com/cout970/Elm-interpreter), +[SystemVerilog](https://github.com/dalance/sv-parser), +[Turtle](https://github.com/vandenoever/rome/tree/master/src/io/turtle), +[CSML](https://github.com/CSML-by-Clevy/csml-interpreter), +[Wasm](https://github.com/Strytyp/wasm-nom), +[Pseudocode](https://github.com/Gungy2/pseudocode) +[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch) +- Interface definition formats: [Thrift](https://github.com/thehydroimpulse/thrust) +- Audio, video and image formats: +[GIF](https://github.com/Geal/gif.rs), +[MagicaVoxel .vox](https://github.com/davidedmonds/dot_vox), +[midi](https://github.com/derekdreery/nom-midi-rs), +[SWF](https://github.com/open-flash/swf-parser), +[WAVE](http://github.com/noise-Labs/wave), +[Matroska (MKV)](https://github.com/rust-av/matroska) +- Document formats: +[TAR](https://github.com/Keruspe/tar-parser.rs), +[GZ](https://github.com/nharward/nom-gzip), +[GDSII](https://github.com/erihsu/gds2-io) +- Cryptographic formats: +[X.509](https://github.com/rusticata/x509-parser) +- Network protocol formats: +[Bencode](https://github.com/jbaum98/bencode.rs), +[D-Bus](https://github.com/toshokan/misato), +[DHCP](https://github.com/rusticata/dhcp-parser), +[HTTP](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http), +[URI](https://github.com/santifa/rrp/blob/master/src/uri.rs), +[IMAP](https://github.com/djc/tokio-imap), +[IRC](https://github.com/Detegr/RBot-parser), +[Pcap-NG](https://github.com/richo/pcapng-rs), +[Pcap](https://github.com/ithinuel/pcap-rs), +[Pcap + PcapNG](https://github.com/rusticata/pcap-parser), +[IKEv2](https://github.com/rusticata/ipsec-parser), +[NTP](https://github.com/rusticata/ntp-parser), +[SNMP](https://github.com/rusticata/snmp-parser), +[Kerberos v5](https://github.com/rusticata/kerberos-parser), +[DER](https://github.com/rusticata/der-parser), +[TLS](https://github.com/rusticata/tls-parser), +[IPFIX / Netflow v10](https://github.com/dominotree/rs-ipfix), +[GTP](https://github.com/fuerstenau/gorrosion-gtp), +[SIP](https://github.com/armatusmiles/sipcore/tree/master/crates/sipmsg), +[Prometheus](https://github.com/timberio/vector/blob/master/lib/prometheus-parser/src/line.rs) +- Language specifications: +[BNF](https://github.com/snewt/bnf) +- Misc formats: +[Gameboy ROM](https://github.com/MarkMcCaskey/gameboy-rom-parser), +[ANT FIT](https://github.com/stadelmanma/fitparse-rs), +[Version Numbers](https://github.com/fosskers/rs-versions), +[Telcordia/Bellcore SR-4731 SOR OTDR files](https://github.com/JamesHarrison/otdrs), +[MySQL binary log](https://github.com/PrivateRookie/boxercrab), +[URI](https://github.com/Skasselbard/nom-uri), +[Furigana](https://github.com/sachaarbonel/furigana.rs), +[Wordle Result](https://github.com/Fyko/wordle-stats/tree/main/parser) + +Want to create a new parser using `nom`? A list of not yet implemented formats is available [here](https://github.com/Geal/nom/issues/14). + +Want to add your parser here? Create a pull request for it! + +# Contributors + +nom is the fruit of the work of many contributors over the years, many thanks for your help! + +<a href="https://github.com/geal/nom/graphs/contributors"> + <img src="https://contributors-img.web.app/image?repo=geal/nom" /> +</a> diff --git a/third_party/rust/nom/doc/nom_recipes.md b/third_party/rust/nom/doc/nom_recipes.md new file mode 100644 index 0000000000..e8626344a7 --- /dev/null +++ b/third_party/rust/nom/doc/nom_recipes.md @@ -0,0 +1,395 @@ +# Nom Recipes + +These are short recipes for accomplishing common tasks with nom. + +* [Whitespace](#whitespace) + + [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser) +* [Comments](#comments) + + [`// C++/EOL-style comments`](#-ceol-style-comments) + + [`/* C-style comments */`](#-c-style-comments-) +* [Identifiers](#identifiers) + + [`Rust-Style Identifiers`](#rust-style-identifiers) +* [Literal Values](#literal-values) + + [Escaped Strings](#escaped-strings) + + [Integers](#integers) + - [Hexadecimal](#hexadecimal) + - [Octal](#octal) + - [Binary](#binary) + - [Decimal](#decimal) + + [Floating Point Numbers](#floating-point-numbers) + +## Whitespace + + + +### Wrapper combinators that eat whitespace before and after a parser + +```rust +use nom::{ + IResult, + error::ParseError, + combinator::value, + sequence::delimited, + character::complete::multispace0, +}; + +/// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and +/// trailing whitespace, returning the output of `inner`. +fn ws<'a, F: 'a, O, E: ParseError<&'a str>>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> + where + F: Fn(&'a str) -> IResult<&'a str, O, E>, +{ + delimited( + multispace0, + inner, + multispace0 + ) +} +``` + +To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`. +Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0, +&inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set +of lexemes. + +## Comments + +### `// C++/EOL-style comments` + +This version uses `%` to start a comment, does not consume the newline character, and returns an +output of `()`. + +```rust +use nom::{ + IResult, + error::ParseError, + combinator::value, + sequence::pair, + bytes::complete::is_not, + character::complete::char, +}; + +pub fn peol_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> +{ + value( + (), // Output is thrown away. + pair(char('%'), is_not("\n\r")) + )(i) +} +``` + +### `/* C-style comments */` + +Inline comments surrounded with sentinel tags `(*` and `*)`. This version returns an output of `()` +and does not handle nested comments. + +```rust +use nom::{ + IResult, + error::ParseError, + combinator::value, + sequence::tuple, + bytes::complete::{tag, take_until}, +}; + +pub fn pinline_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> { + value( + (), // Output is thrown away. + tuple(( + tag("(*"), + take_until("*)"), + tag("*)") + )) + )(i) +} +``` + +## Identifiers + +### `Rust-Style Identifiers` + +Parsing identifiers that may start with a letter (or underscore) and may contain underscores, +letters and numbers may be parsed like this: + +```rust +use nom::{ + IResult, + branch::alt, + multi::many0_count, + combinator::recognize, + sequence::pair, + character::complete::{alpha1, alphanumeric1}, + bytes::complete::tag, +}; + +pub fn identifier(input: &str) -> IResult<&str, &str> { + recognize( + pair( + alt((alpha1, tag("_"))), + many0_count(alt((alphanumeric1, tag("_")))) + ) + )(input) +} +``` + +Let's say we apply this to the identifier `hello_world123abc`. The first `alt` parser would +recognize `h`. The `pair` combinator ensures that `ello_world123abc` will be piped to the next +`alphanumeric0` parser, which recognizes every remaining character. However, the `pair` combinator +returns a tuple of the results of its sub-parsers. The `recognize` parser produces a `&str` of the +input text that was parsed, which in this case is the entire `&str` `hello_world123abc`. + +## Literal Values + +### Escaped Strings + +This is [one of the examples](https://github.com/Geal/nom/blob/main/examples/string.rs) in the +examples directory. + +### Integers + +The following recipes all return string slices rather than integer values. How to obtain an +integer value instead is demonstrated for hexadecimal integers. The others are similar. + +The parsers allow the grouping character `_`, which allows one to group the digits by byte, for +example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a +string slice to an integer value is slightly simpler. You can also strip the `_` from the string +slice that is returned, which is demonstrated in the second hexdecimal number parser. + +If you wish to limit the number of digits in a valid integer literal, replace `many1` with +`many_m_n` in the recipes. + +#### Hexadecimal + +The parser outputs the string slice of the digits without the leading `0x`/`0X`. + +```rust +use nom::{ + IResult, + branch::alt, + multi::{many0, many1}, + combinator::recognize, + sequence::{preceded, terminated}, + character::complete::{char, one_of}, + bytes::complete::tag, +}; + +fn hexadecimal(input: &str) -> IResult<&str, &str> { // <'a, E: ParseError<&'a str>> + preceded( + alt((tag("0x"), tag("0X"))), + recognize( + many1( + terminated(one_of("0123456789abcdefABCDEF"), many0(char('_'))) + ) + ) + )(input) +} +``` + +If you want it to return the integer value instead, use map: + +```rust +use nom::{ + IResult, + branch::alt, + multi::{many0, many1}, + combinator::{map_res, recognize}, + sequence::{preceded, terminated}, + character::complete::{char, one_of}, + bytes::complete::tag, +}; + +fn hexadecimal_value(input: &str) -> IResult<&str, i64> { + map_res( + preceded( + alt((tag("0x"), tag("0X"))), + recognize( + many1( + terminated(one_of("0123456789abcdefABCDEF"), many0(char('_'))) + ) + ) + ), + |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16) + )(input) +} +``` + +#### Octal + +```rust +use nom::{ + IResult, + branch::alt, + multi::{many0, many1}, + combinator::recognize, + sequence::{preceded, terminated}, + character::complete::{char, one_of}, + bytes::complete::tag, +}; + +fn octal(input: &str) -> IResult<&str, &str> { + preceded( + alt((tag("0o"), tag("0O"))), + recognize( + many1( + terminated(one_of("01234567"), many0(char('_'))) + ) + ) + )(input) +} +``` + +#### Binary + +```rust +use nom::{ + IResult, + branch::alt, + multi::{many0, many1}, + combinator::recognize, + sequence::{preceded, terminated}, + character::complete::{char, one_of}, + bytes::complete::tag, +}; + +fn binary(input: &str) -> IResult<&str, &str> { + preceded( + alt((tag("0b"), tag("0B"))), + recognize( + many1( + terminated(one_of("01"), many0(char('_'))) + ) + ) + )(input) +} +``` + +#### Decimal + +```rust +use nom::{ + IResult, + multi::{many0, many1}, + combinator::recognize, + sequence::terminated, + character::complete::{char, one_of}, +}; + +fn decimal(input: &str) -> IResult<&str, &str> { + recognize( + many1( + terminated(one_of("0123456789"), many0(char('_'))) + ) + )(input) +} +``` + +### Floating Point Numbers + +The following is adapted from [the Python parser by Valentin Lorentz (ProgVal)](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs). + +```rust +use nom::{ + IResult, + branch::alt, + multi::{many0, many1}, + combinator::{opt, recognize}, + sequence::{preceded, terminated, tuple}, + character::complete::{char, one_of}, +}; + +fn float(input: &str) -> IResult<&str, &str> { + alt(( + // Case one: .42 + recognize( + tuple(( + char('.'), + decimal, + opt(tuple(( + one_of("eE"), + opt(one_of("+-")), + decimal + ))) + )) + ) + , // Case two: 42e42 and 42.42e42 + recognize( + tuple(( + decimal, + opt(preceded( + char('.'), + decimal, + )), + one_of("eE"), + opt(one_of("+-")), + decimal + )) + ) + , // Case three: 42. and 42.42 + recognize( + tuple(( + decimal, + char('.'), + opt(decimal) + )) + ) + ))(input) +} + +fn decimal(input: &str) -> IResult<&str, &str> { + recognize( + many1( + terminated(one_of("0123456789"), many0(char('_'))) + ) + )(input) +} +``` + +# implementing FromStr + +The [FromStr trait](https://doc.rust-lang.org/std/str/trait.FromStr.html) provides +a common interface to parse from a string. + +```rust +use nom::{ + IResult, Finish, error::Error, + bytes::complete::{tag, take_while}, +}; +use std::str::FromStr; + +// will recognize the name in "Hello, name!" +fn parse_name(input: &str) -> IResult<&str, &str> { + let (i, _) = tag("Hello, ")(input)?; + let (i, name) = take_while(|c:char| c.is_alphabetic())(i)?; + let (i, _) = tag("!")(i)?; + + Ok((i, name)) +} + +// with FromStr, the result cannot be a reference to the input, it must be owned +#[derive(Debug)] +pub struct Name(pub String); + +impl FromStr for Name { + // the error must be owned as well + type Err = Error<String>; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + match parse_name(s).finish() { + Ok((_remaining, name)) => Ok(Name(name.to_string())), + Err(Error { input, code }) => Err(Error { + input: input.to_string(), + code, + }) + } + } +} + +fn main() { + // parsed: Ok(Name("nom")) + println!("parsed: {:?}", "Hello, nom!".parse::<Name>()); + + // parsed: Err(Error { input: "123!", code: Tag }) + println!("parsed: {:?}", "Hello, 123!".parse::<Name>()); +} +``` + diff --git a/third_party/rust/nom/src/bits/complete.rs b/third_party/rust/nom/src/bits/complete.rs new file mode 100644 index 0000000000..bf36dcc2aa --- /dev/null +++ b/third_party/rust/nom/src/bits/complete.rs @@ -0,0 +1,197 @@ +//! Bit level parsers +//! + +use crate::error::{ErrorKind, ParseError}; +use crate::internal::{Err, IResult}; +use crate::lib::std::ops::{AddAssign, Div, RangeFrom, Shl, Shr}; +use crate::traits::{InputIter, InputLength, Slice, ToUsize}; + +/// Generates a parser taking `count` bits +/// +/// # Example +/// ```rust +/// # use nom::bits::complete::take; +/// # use nom::IResult; +/// # use nom::error::{Error, ErrorKind}; +/// // Input is a tuple of (input: I, bit_offset: usize) +/// fn parser(input: (&[u8], usize), count: usize)-> IResult<(&[u8], usize), u8> { +/// take(count)(input) +/// } +/// +/// // Consumes 0 bits, returns 0 +/// assert_eq!(parser(([0b00010010].as_ref(), 0), 0), Ok((([0b00010010].as_ref(), 0), 0))); +/// +/// // Consumes 4 bits, returns their values and increase offset to 4 +/// assert_eq!(parser(([0b00010010].as_ref(), 0), 4), Ok((([0b00010010].as_ref(), 4), 0b00000001))); +/// +/// // Consumes 4 bits, offset is 4, returns their values and increase offset to 0 of next byte +/// assert_eq!(parser(([0b00010010].as_ref(), 4), 4), Ok((([].as_ref(), 0), 0b00000010))); +/// +/// // Tries to consume 12 bits but only 8 are available +/// assert_eq!(parser(([0b00010010].as_ref(), 0), 12), Err(nom::Err::Error(Error{input: ([0b00010010].as_ref(), 0), code: ErrorKind::Eof }))); +/// ``` +pub fn take<I, O, C, E: ParseError<(I, usize)>>( + count: C, +) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, + C: ToUsize, + O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>, +{ + let count = count.to_usize(); + move |(input, bit_offset): (I, usize)| { + if count == 0 { + Ok(((input, bit_offset), 0u8.into())) + } else { + let cnt = (count + bit_offset).div(8); + if input.input_len() * 8 < count + bit_offset { + Err(Err::Error(E::from_error_kind( + (input, bit_offset), + ErrorKind::Eof, + ))) + } else { + let mut acc: O = 0_u8.into(); + let mut offset: usize = bit_offset; + let mut remaining: usize = count; + let mut end_offset: usize = 0; + + for byte in input.iter_elements().take(cnt + 1) { + if remaining == 0 { + break; + } + let val: O = if offset == 0 { + byte.into() + } else { + ((byte << offset) as u8 >> offset).into() + }; + + if remaining < 8 - offset { + acc += val >> (8 - offset - remaining); + end_offset = remaining + offset; + break; + } else { + acc += val << (remaining - (8 - offset)); + remaining -= 8 - offset; + offset = 0; + } + } + Ok(((input.slice(cnt..), end_offset), acc)) + } + } + } +} + +/// Generates a parser taking `count` bits and comparing them to `pattern` +pub fn tag<I, O, C, E: ParseError<(I, usize)>>( + pattern: O, + count: C, +) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength + Clone, + C: ToUsize, + O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O> + PartialEq, +{ + let count = count.to_usize(); + move |input: (I, usize)| { + let inp = input.clone(); + + take(count)(input).and_then(|(i, o)| { + if pattern == o { + Ok((i, o)) + } else { + Err(Err::Error(error_position!(inp, ErrorKind::TagBits))) + } + }) + } +} + +/// Parses one specific bit as a bool. +/// +/// # Example +/// ```rust +/// # use nom::bits::complete::bool; +/// # use nom::IResult; +/// # use nom::error::{Error, ErrorKind}; +/// +/// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { +/// bool(input) +/// } +/// +/// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); +/// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); +/// ``` +pub fn bool<I, E: ParseError<(I, usize)>>(input: (I, usize)) -> IResult<(I, usize), bool, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let (res, bit): (_, u32) = take(1usize)(input)?; + Ok((res, bit != 0)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_take_0() { + let input = [0b00010010].as_ref(); + let count = 0usize; + assert_eq!(count, 0usize); + let offset = 0usize; + + let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset)); + + assert_eq!(result, Ok(((input, offset), 0))); + } + + #[test] + fn test_take_eof() { + let input = [0b00010010].as_ref(); + + let result: crate::IResult<(&[u8], usize), usize> = take(1usize)((input, 8)); + + assert_eq!( + result, + Err(crate::Err::Error(crate::error::Error { + input: (input, 8), + code: ErrorKind::Eof + })) + ) + } + + #[test] + fn test_take_span_over_multiple_bytes() { + let input = [0b00010010, 0b00110100, 0b11111111, 0b11111111].as_ref(); + + let result: crate::IResult<(&[u8], usize), usize> = take(24usize)((input, 4)); + + assert_eq!( + result, + Ok((([0b11111111].as_ref(), 4), 0b1000110100111111111111)) + ); + } + + #[test] + fn test_bool_0() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); + + assert_eq!(result, Ok(((input, 1), true))); + } + + #[test] + fn test_bool_eof() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); + + assert_eq!( + result, + Err(crate::Err::Error(crate::error::Error { + input: (input, 8), + code: ErrorKind::Eof + })) + ); + } +} diff --git a/third_party/rust/nom/src/bits/mod.rs b/third_party/rust/nom/src/bits/mod.rs new file mode 100644 index 0000000000..0d3f73db25 --- /dev/null +++ b/third_party/rust/nom/src/bits/mod.rs @@ -0,0 +1,179 @@ +//! Bit level parsers +//! + +pub mod complete; +pub mod streaming; + +use crate::error::{ErrorKind, ParseError}; +use crate::internal::{Err, IResult, Needed, Parser}; +use crate::lib::std::ops::RangeFrom; +use crate::traits::{ErrorConvert, Slice}; + +/// Converts a byte-level input to a bit-level input, for consumption by a parser that uses bits. +/// +/// Afterwards, the input is converted back to a byte-level parser, with any remaining bits thrown +/// away. +/// +/// # Example +/// ``` +/// use nom::bits::{bits, streaming::take}; +/// use nom::error::Error; +/// use nom::sequence::tuple; +/// use nom::IResult; +/// +/// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8)> { +/// bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input) +/// } +/// +/// let input = &[0x12, 0x34, 0xff, 0xff]; +/// +/// let output = parse(input).expect("We take 1.5 bytes and the input is longer than 2 bytes"); +/// +/// // The first byte is consumed, the second byte is partially consumed and dropped. +/// let remaining = output.0; +/// assert_eq!(remaining, [0xff, 0xff]); +/// +/// let parsed = output.1; +/// assert_eq!(parsed.0, 0x01); +/// assert_eq!(parsed.1, 0x23); +/// ``` +pub fn bits<I, O, E1, E2, P>(mut parser: P) -> impl FnMut(I) -> IResult<I, O, E2> +where + E1: ParseError<(I, usize)> + ErrorConvert<E2>, + E2: ParseError<I>, + I: Slice<RangeFrom<usize>>, + P: Parser<(I, usize), O, E1>, +{ + move |input: I| match parser.parse((input, 0)) { + Ok(((rest, offset), result)) => { + // If the next byte has been partially read, it will be sliced away as well. + // The parser functions might already slice away all fully read bytes. + // That's why `offset / 8` isn't necessarily needed at all times. + let remaining_bytes_index = offset / 8 + if offset % 8 == 0 { 0 } else { 1 }; + Ok((rest.slice(remaining_bytes_index..), result)) + } + Err(Err::Incomplete(n)) => Err(Err::Incomplete(n.map(|u| u.get() / 8 + 1))), + Err(Err::Error(e)) => Err(Err::Error(e.convert())), + Err(Err::Failure(e)) => Err(Err::Failure(e.convert())), + } +} + +/// Counterpart to `bits`, `bytes` transforms its bit stream input into a byte slice for the underlying +/// parser, allowing byte-slice parsers to work on bit streams. +/// +/// A partial byte remaining in the input will be ignored and the given parser will start parsing +/// at the next full byte. +/// +/// ``` +/// use nom::bits::{bits, bytes, streaming::take}; +/// use nom::combinator::rest; +/// use nom::error::Error; +/// use nom::sequence::tuple; +/// use nom::IResult; +/// +/// fn parse(input: &[u8]) -> IResult<&[u8], (u8, u8, &[u8])> { +/// bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple(( +/// take(4usize), +/// take(8usize), +/// bytes::<_, _, Error<&[u8]>, _, _>(rest) +/// )))(input) +/// } +/// +/// let input = &[0x12, 0x34, 0xff, 0xff]; +/// +/// assert_eq!(parse( input ), Ok(( &[][..], (0x01, 0x23, &[0xff, 0xff][..]) ))); +/// ``` +pub fn bytes<I, O, E1, E2, P>(mut parser: P) -> impl FnMut((I, usize)) -> IResult<(I, usize), O, E2> +where + E1: ParseError<I> + ErrorConvert<E2>, + E2: ParseError<(I, usize)>, + I: Slice<RangeFrom<usize>> + Clone, + P: Parser<I, O, E1>, +{ + move |(input, offset): (I, usize)| { + let inner = if offset % 8 != 0 { + input.slice((1 + offset / 8)..) + } else { + input.slice((offset / 8)..) + }; + let i = (input, offset); + match parser.parse(inner) { + Ok((rest, res)) => Ok(((rest, 0), res)), + Err(Err::Incomplete(Needed::Unknown)) => Err(Err::Incomplete(Needed::Unknown)), + Err(Err::Incomplete(Needed::Size(sz))) => Err(match sz.get().checked_mul(8) { + Some(v) => Err::Incomplete(Needed::new(v)), + None => Err::Failure(E2::from_error_kind(i, ErrorKind::TooLarge)), + }), + Err(Err::Error(e)) => Err(Err::Error(e.convert())), + Err(Err::Failure(e)) => Err(Err::Failure(e.convert())), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::bits::streaming::take; + use crate::error::Error; + use crate::sequence::tuple; + + #[test] + /// Take the `bits` function and assert that remaining bytes are correctly returned, if the + /// previous bytes are fully consumed + fn test_complete_byte_consumption_bits() { + let input = &[0x12, 0x34, 0x56, 0x78]; + + // Take 3 bit slices with sizes [4, 8, 4]. + let result: IResult<&[u8], (u8, u8, u8)> = + bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize), take(4usize))))( + input, + ); + + let output = result.expect("We take 2 bytes and the input is longer than 2 bytes"); + + let remaining = output.0; + assert_eq!(remaining, [0x56, 0x78]); + + let parsed = output.1; + assert_eq!(parsed.0, 0x01); + assert_eq!(parsed.1, 0x23); + assert_eq!(parsed.2, 0x04); + } + + #[test] + /// Take the `bits` function and assert that remaining bytes are correctly returned, if the + /// previous bytes are NOT fully consumed. Partially consumed bytes are supposed to be dropped. + /// I.e. if we consume 1.5 bytes of 4 bytes, 2 bytes will be returned, bits 13-16 will be + /// dropped. + fn test_partial_byte_consumption_bits() { + let input = &[0x12, 0x34, 0x56, 0x78]; + + // Take bit slices with sizes [4, 8]. + let result: IResult<&[u8], (u8, u8)> = + bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input); + + let output = result.expect("We take 1.5 bytes and the input is longer than 2 bytes"); + + let remaining = output.0; + assert_eq!(remaining, [0x56, 0x78]); + + let parsed = output.1; + assert_eq!(parsed.0, 0x01); + assert_eq!(parsed.1, 0x23); + } + + #[test] + #[cfg(feature = "std")] + /// Ensure that in Incomplete error is thrown, if too few bytes are passed for a given parser. + fn test_incomplete_bits() { + let input = &[0x12]; + + // Take bit slices with sizes [4, 8]. + let result: IResult<&[u8], (u8, u8)> = + bits::<_, _, Error<(&[u8], usize)>, _, _>(tuple((take(4usize), take(8usize))))(input); + + assert!(result.is_err()); + let error = result.err().unwrap(); + assert_eq!("Parsing requires 2 bytes/chars", error.to_string()); + } +} diff --git a/third_party/rust/nom/src/bits/streaming.rs b/third_party/rust/nom/src/bits/streaming.rs new file mode 100644 index 0000000000..a7c8d0a67b --- /dev/null +++ b/third_party/rust/nom/src/bits/streaming.rs @@ -0,0 +1,170 @@ +//! Bit level parsers +//! + +use crate::error::{ErrorKind, ParseError}; +use crate::internal::{Err, IResult, Needed}; +use crate::lib::std::ops::{AddAssign, Div, RangeFrom, Shl, Shr}; +use crate::traits::{InputIter, InputLength, Slice, ToUsize}; + +/// Generates a parser taking `count` bits +pub fn take<I, O, C, E: ParseError<(I, usize)>>( + count: C, +) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, + C: ToUsize, + O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O>, +{ + let count = count.to_usize(); + move |(input, bit_offset): (I, usize)| { + if count == 0 { + Ok(((input, bit_offset), 0u8.into())) + } else { + let cnt = (count + bit_offset).div(8); + if input.input_len() * 8 < count + bit_offset { + Err(Err::Incomplete(Needed::new(count as usize))) + } else { + let mut acc: O = 0_u8.into(); + let mut offset: usize = bit_offset; + let mut remaining: usize = count; + let mut end_offset: usize = 0; + + for byte in input.iter_elements().take(cnt + 1) { + if remaining == 0 { + break; + } + let val: O = if offset == 0 { + byte.into() + } else { + ((byte << offset) as u8 >> offset).into() + }; + + if remaining < 8 - offset { + acc += val >> (8 - offset - remaining); + end_offset = remaining + offset; + break; + } else { + acc += val << (remaining - (8 - offset)); + remaining -= 8 - offset; + offset = 0; + } + } + Ok(((input.slice(cnt..), end_offset), acc)) + } + } + } +} + +/// Generates a parser taking `count` bits and comparing them to `pattern` +pub fn tag<I, O, C, E: ParseError<(I, usize)>>( + pattern: O, + count: C, +) -> impl Fn((I, usize)) -> IResult<(I, usize), O, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength + Clone, + C: ToUsize, + O: From<u8> + AddAssign + Shl<usize, Output = O> + Shr<usize, Output = O> + PartialEq, +{ + let count = count.to_usize(); + move |input: (I, usize)| { + let inp = input.clone(); + + take(count)(input).and_then(|(i, o)| { + if pattern == o { + Ok((i, o)) + } else { + Err(Err::Error(error_position!(inp, ErrorKind::TagBits))) + } + }) + } +} + +/// Parses one specific bit as a bool. +/// +/// # Example +/// ```rust +/// # use nom::bits::complete::bool; +/// # use nom::IResult; +/// # use nom::error::{Error, ErrorKind}; +/// +/// fn parse(input: (&[u8], usize)) -> IResult<(&[u8], usize), bool> { +/// bool(input) +/// } +/// +/// assert_eq!(parse(([0b10000000].as_ref(), 0)), Ok((([0b10000000].as_ref(), 1), true))); +/// assert_eq!(parse(([0b10000000].as_ref(), 1)), Ok((([0b10000000].as_ref(), 2), false))); +/// ``` +pub fn bool<I, E: ParseError<(I, usize)>>(input: (I, usize)) -> IResult<(I, usize), bool, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let (res, bit): (_, u32) = take(1usize)(input)?; + Ok((res, bit != 0)) +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_take_0() { + let input = [].as_ref(); + let count = 0usize; + assert_eq!(count, 0usize); + let offset = 0usize; + + let result: crate::IResult<(&[u8], usize), usize> = take(count)((input, offset)); + + assert_eq!(result, Ok(((input, offset), 0))); + } + + #[test] + fn test_tag_ok() { + let input = [0b00011111].as_ref(); + let offset = 0usize; + let bits_to_take = 4usize; + let value_to_tag = 0b0001; + + let result: crate::IResult<(&[u8], usize), usize> = + tag(value_to_tag, bits_to_take)((input, offset)); + + assert_eq!(result, Ok(((input, bits_to_take), value_to_tag))); + } + + #[test] + fn test_tag_err() { + let input = [0b00011111].as_ref(); + let offset = 0usize; + let bits_to_take = 4usize; + let value_to_tag = 0b1111; + + let result: crate::IResult<(&[u8], usize), usize> = + tag(value_to_tag, bits_to_take)((input, offset)); + + assert_eq!( + result, + Err(crate::Err::Error(crate::error::Error { + input: (input, offset), + code: ErrorKind::TagBits + })) + ); + } + + #[test] + fn test_bool_0() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 0)); + + assert_eq!(result, Ok(((input, 1), true))); + } + + #[test] + fn test_bool_eof() { + let input = [0b10000000].as_ref(); + + let result: crate::IResult<(&[u8], usize), bool> = bool((input, 8)); + + assert_eq!(result, Err(crate::Err::Incomplete(Needed::new(1)))); + } +} diff --git a/third_party/rust/nom/src/branch/mod.rs b/third_party/rust/nom/src/branch/mod.rs new file mode 100644 index 0000000000..e03622cb0c --- /dev/null +++ b/third_party/rust/nom/src/branch/mod.rs @@ -0,0 +1,267 @@ +//! Choice combinators + +#[cfg(test)] +mod tests; + +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Parser}; + +/// Helper trait for the [alt()] combinator. +/// +/// This trait is implemented for tuples of up to 21 elements +pub trait Alt<I, O, E> { + /// Tests each parser in the tuple and returns the result of the first one that succeeds + fn choice(&mut self, input: I) -> IResult<I, O, E>; +} + +/// Tests a list of parsers one by one until one succeeds. +/// +/// It takes as argument a tuple of parsers. There is a maximum of 21 +/// parsers. If you need more, it is possible to nest them in other `alt` calls, +/// like this: `alt(parser_a, alt(parser_b, parser_c))` +/// +/// ```rust +/// # use nom::error_position; +/// # use nom::{Err,error::ErrorKind, Needed, IResult}; +/// use nom::character::complete::{alpha1, digit1}; +/// use nom::branch::alt; +/// # fn main() { +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt((alpha1, digit1))(input) +/// }; +/// +/// // the first parser, alpha1, recognizes the input +/// assert_eq!(parser("abc"), Ok(("", "abc"))); +/// +/// // the first parser returns an error, so alt tries the second one +/// assert_eq!(parser("123456"), Ok(("", "123456"))); +/// +/// // both parsers failed, and with the default error type, alt will return the last error +/// assert_eq!(parser(" "), Err(Err::Error(error_position!(" ", ErrorKind::Digit)))); +/// # } +/// ``` +/// +/// With a custom error type, it is possible to have alt return the error of the parser +/// that went the farthest in the input data +pub fn alt<I: Clone, O, E: ParseError<I>, List: Alt<I, O, E>>( + mut l: List, +) -> impl FnMut(I) -> IResult<I, O, E> { + move |i: I| l.choice(i) +} + +/// Helper trait for the [permutation()] combinator. +/// +/// This trait is implemented for tuples of up to 21 elements +pub trait Permutation<I, O, E> { + /// Tries to apply all parsers in the tuple in various orders until all of them succeed + fn permutation(&mut self, input: I) -> IResult<I, O, E>; +} + +/// Applies a list of parsers in any order. +/// +/// Permutation will succeed if all of the child parsers succeeded. +/// It takes as argument a tuple of parsers, and returns a +/// tuple of the parser results. +/// +/// ```rust +/// # use nom::{Err,error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::character::complete::{alpha1, digit1}; +/// use nom::branch::permutation; +/// # fn main() { +/// fn parser(input: &str) -> IResult<&str, (&str, &str)> { +/// permutation((alpha1, digit1))(input) +/// } +/// +/// // permutation recognizes alphabetic characters then digit +/// assert_eq!(parser("abc123"), Ok(("", ("abc", "123")))); +/// +/// // but also in inverse order +/// assert_eq!(parser("123abc"), Ok(("", ("abc", "123")))); +/// +/// // it will fail if one of the parsers failed +/// assert_eq!(parser("abc;"), Err(Err::Error(Error::new(";", ErrorKind::Digit)))); +/// # } +/// ``` +/// +/// The parsers are applied greedily: if there are multiple unapplied parsers +/// that could parse the next slice of input, the first one is used. +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, IResult}; +/// use nom::branch::permutation; +/// use nom::character::complete::{anychar, char}; +/// +/// fn parser(input: &str) -> IResult<&str, (char, char)> { +/// permutation((anychar, char('a')))(input) +/// } +/// +/// // anychar parses 'b', then char('a') parses 'a' +/// assert_eq!(parser("ba"), Ok(("", ('b', 'a')))); +/// +/// // anychar parses 'a', then char('a') fails on 'b', +/// // even though char('a') followed by anychar would succeed +/// assert_eq!(parser("ab"), Err(Err::Error(Error::new("b", ErrorKind::Char)))); +/// ``` +/// +pub fn permutation<I: Clone, O, E: ParseError<I>, List: Permutation<I, O, E>>( + mut l: List, +) -> impl FnMut(I) -> IResult<I, O, E> { + move |i: I| l.permutation(i) +} + +macro_rules! alt_trait( + ($first:ident $second:ident $($id: ident)+) => ( + alt_trait!(__impl $first $second; $($id)+); + ); + (__impl $($current:ident)*; $head:ident $($id: ident)+) => ( + alt_trait_impl!($($current)*); + + alt_trait!(__impl $($current)* $head; $($id)+); + ); + (__impl $($current:ident)*; $head:ident) => ( + alt_trait_impl!($($current)*); + alt_trait_impl!($($current)* $head); + ); +); + +macro_rules! alt_trait_impl( + ($($id:ident)+) => ( + impl< + Input: Clone, Output, Error: ParseError<Input>, + $($id: Parser<Input, Output, Error>),+ + > Alt<Input, Output, Error> for ( $($id),+ ) { + + fn choice(&mut self, input: Input) -> IResult<Input, Output, Error> { + match self.0.parse(input.clone()) { + Err(Err::Error(e)) => alt_trait_inner!(1, self, input, e, $($id)+), + res => res, + } + } + } + ); +); + +macro_rules! alt_trait_inner( + ($it:tt, $self:expr, $input:expr, $err:expr, $head:ident $($id:ident)+) => ( + match $self.$it.parse($input.clone()) { + Err(Err::Error(e)) => { + let err = $err.or(e); + succ!($it, alt_trait_inner!($self, $input, err, $($id)+)) + } + res => res, + } + ); + ($it:tt, $self:expr, $input:expr, $err:expr, $head:ident) => ( + Err(Err::Error(Error::append($input, ErrorKind::Alt, $err))) + ); +); + +alt_trait!(A B C D E F G H I J K L M N O P Q R S T U); + +// Manually implement Alt for (A,), the 1-tuple type +impl<Input, Output, Error: ParseError<Input>, A: Parser<Input, Output, Error>> + Alt<Input, Output, Error> for (A,) +{ + fn choice(&mut self, input: Input) -> IResult<Input, Output, Error> { + self.0.parse(input) + } +} + +macro_rules! permutation_trait( + ( + $name1:ident $ty1:ident $item1:ident + $name2:ident $ty2:ident $item2:ident + $($name3:ident $ty3:ident $item3:ident)* + ) => ( + permutation_trait!(__impl $name1 $ty1 $item1, $name2 $ty2 $item2; $($name3 $ty3 $item3)*); + ); + ( + __impl $($name:ident $ty:ident $item:ident),+; + $name1:ident $ty1:ident $item1:ident $($name2:ident $ty2:ident $item2:ident)* + ) => ( + permutation_trait_impl!($($name $ty $item),+); + permutation_trait!(__impl $($name $ty $item),+ , $name1 $ty1 $item1; $($name2 $ty2 $item2)*); + ); + (__impl $($name:ident $ty:ident $item:ident),+;) => ( + permutation_trait_impl!($($name $ty $item),+); + ); +); + +macro_rules! permutation_trait_impl( + ($($name:ident $ty:ident $item:ident),+) => ( + impl< + Input: Clone, $($ty),+ , Error: ParseError<Input>, + $($name: Parser<Input, $ty, Error>),+ + > Permutation<Input, ( $($ty),+ ), Error> for ( $($name),+ ) { + + fn permutation(&mut self, mut input: Input) -> IResult<Input, ( $($ty),+ ), Error> { + let mut res = ($(Option::<$ty>::None),+); + + loop { + let mut err: Option<Error> = None; + permutation_trait_inner!(0, self, input, res, err, $($name)+); + + // If we reach here, every iterator has either been applied before, + // or errored on the remaining input + if let Some(err) = err { + // There are remaining parsers, and all errored on the remaining input + return Err(Err::Error(Error::append(input, ErrorKind::Permutation, err))); + } + + // All parsers were applied + match res { + ($(Some($item)),+) => return Ok((input, ($($item),+))), + _ => unreachable!(), + } + } + } + } + ); +); + +macro_rules! permutation_trait_inner( + ($it:tt, $self:expr, $input:ident, $res:expr, $err:expr, $head:ident $($id:ident)*) => ( + if $res.$it.is_none() { + match $self.$it.parse($input.clone()) { + Ok((i, o)) => { + $input = i; + $res.$it = Some(o); + continue; + } + Err(Err::Error(e)) => { + $err = Some(match $err { + Some(err) => err.or(e), + None => e, + }); + } + Err(e) => return Err(e), + }; + } + succ!($it, permutation_trait_inner!($self, $input, $res, $err, $($id)*)); + ); + ($it:tt, $self:expr, $input:ident, $res:expr, $err:expr,) => (); +); + +permutation_trait!( + FnA A a + FnB B b + FnC C c + FnD D d + FnE E e + FnF F f + FnG G g + FnH H h + FnI I i + FnJ J j + FnK K k + FnL L l + FnM M m + FnN N n + FnO O o + FnP P p + FnQ Q q + FnR R r + FnS S s + FnT T t + FnU U u +); diff --git a/third_party/rust/nom/src/branch/tests.rs b/third_party/rust/nom/src/branch/tests.rs new file mode 100644 index 0000000000..ecd44407e9 --- /dev/null +++ b/third_party/rust/nom/src/branch/tests.rs @@ -0,0 +1,142 @@ +use crate::branch::{alt, permutation}; +use crate::bytes::streaming::tag; +use crate::error::ErrorKind; +use crate::internal::{Err, IResult, Needed}; +#[cfg(feature = "alloc")] +use crate::{ + error::ParseError, + lib::std::{ + fmt::Debug, + string::{String, ToString}, + }, +}; + +#[cfg(feature = "alloc")] +#[derive(Debug, Clone, PartialEq)] +pub struct ErrorStr(String); + +#[cfg(feature = "alloc")] +impl From<u32> for ErrorStr { + fn from(i: u32) -> Self { + ErrorStr(format!("custom error code: {}", i)) + } +} + +#[cfg(feature = "alloc")] +impl<'a> From<&'a str> for ErrorStr { + fn from(i: &'a str) -> Self { + ErrorStr(format!("custom error message: {}", i)) + } +} + +#[cfg(feature = "alloc")] +impl<I: Debug> ParseError<I> for ErrorStr { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + ErrorStr(format!("custom error message: ({:?}, {:?})", input, kind)) + } + + fn append(input: I, kind: ErrorKind, other: Self) -> Self { + ErrorStr(format!( + "custom error message: ({:?}, {:?}) - {:?}", + input, kind, other + )) + } +} + +#[cfg(feature = "alloc")] +#[test] +fn alt_test() { + fn work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Ok((&b""[..], input)) + } + + #[allow(unused_variables)] + fn dont_work(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Err(Err::Error(ErrorStr("abcd".to_string()))) + } + + fn work2(input: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + Ok((input, &b""[..])) + } + + fn alt1(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt((dont_work, dont_work))(i) + } + fn alt2(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt((dont_work, work))(i) + } + fn alt3(i: &[u8]) -> IResult<&[u8], &[u8], ErrorStr> { + alt((dont_work, dont_work, work2, dont_work))(i) + } + //named!(alt1, alt!(dont_work | dont_work)); + //named!(alt2, alt!(dont_work | work)); + //named!(alt3, alt!(dont_work | dont_work | work2 | dont_work)); + + let a = &b"abcd"[..]; + assert_eq!( + alt1(a), + Err(Err::Error(error_node_position!( + a, + ErrorKind::Alt, + ErrorStr("abcd".to_string()) + ))) + ); + assert_eq!(alt2(a), Ok((&b""[..], a))); + assert_eq!(alt3(a), Ok((a, &b""[..]))); + + fn alt4(i: &[u8]) -> IResult<&[u8], &[u8]> { + alt((tag("abcd"), tag("efgh")))(i) + } + let b = &b"efgh"[..]; + assert_eq!(alt4(a), Ok((&b""[..], a))); + assert_eq!(alt4(b), Ok((&b""[..], b))); +} + +#[test] +fn alt_incomplete() { + fn alt1(i: &[u8]) -> IResult<&[u8], &[u8]> { + alt((tag("a"), tag("bc"), tag("def")))(i) + } + + let a = &b""[..]; + assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); + let a = &b"b"[..]; + assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); + let a = &b"bcd"[..]; + assert_eq!(alt1(a), Ok((&b"d"[..], &b"bc"[..]))); + let a = &b"cde"[..]; + assert_eq!(alt1(a), Err(Err::Error(error_position!(a, ErrorKind::Tag)))); + let a = &b"de"[..]; + assert_eq!(alt1(a), Err(Err::Incomplete(Needed::new(1)))); + let a = &b"defg"[..]; + assert_eq!(alt1(a), Ok((&b"g"[..], &b"def"[..]))); +} + +#[test] +fn permutation_test() { + fn perm(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8], &[u8])> { + permutation((tag("abcd"), tag("efg"), tag("hi")))(i) + } + + let expected = (&b"abcd"[..], &b"efg"[..], &b"hi"[..]); + + let a = &b"abcdefghijk"[..]; + assert_eq!(perm(a), Ok((&b"jk"[..], expected))); + let b = &b"efgabcdhijk"[..]; + assert_eq!(perm(b), Ok((&b"jk"[..], expected))); + let c = &b"hiefgabcdjk"[..]; + assert_eq!(perm(c), Ok((&b"jk"[..], expected))); + + let d = &b"efgxyzabcdefghi"[..]; + assert_eq!( + perm(d), + Err(Err::Error(error_node_position!( + &b"efgxyzabcdefghi"[..], + ErrorKind::Permutation, + error_position!(&b"xyzabcdefghi"[..], ErrorKind::Tag) + ))) + ); + + let e = &b"efgabc"[..]; + assert_eq!(perm(e), Err(Err::Incomplete(Needed::new(1)))); +} diff --git a/third_party/rust/nom/src/bytes/complete.rs b/third_party/rust/nom/src/bytes/complete.rs new file mode 100644 index 0000000000..a5442b53f7 --- /dev/null +++ b/third_party/rust/nom/src/bytes/complete.rs @@ -0,0 +1,756 @@ +//! Parsers recognizing bytes streams, complete input version + +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Parser}; +use crate::lib::std::ops::RangeFrom; +use crate::lib::std::result::Result::*; +use crate::traits::{ + Compare, CompareResult, FindSubstring, FindToken, InputIter, InputLength, InputTake, + InputTakeAtPosition, Slice, ToUsize, +}; + +/// Recognizes a pattern +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument +/// +/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag("Hello")(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// ``` +pub fn tag<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + Compare<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let tag_len = tag.input_len(); + let t = tag.clone(); + let res: IResult<_, _, Error> = match i.compare(t) { + CompareResult::Ok => Ok(i.take_split(tag_len)), + _ => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(Error::from_error_kind(i, e))) + } + }; + res + } +} + +/// Recognizes a case insensitive pattern. +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument with no regard to case. +/// +/// It will return `Err(Err::Error((_, ErrorKind::Tag)))` if the input doesn't match the pattern. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::tag_no_case; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag_no_case("hello")(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); +/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// ``` +pub fn tag_no_case<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + Compare<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let tag_len = tag.input_len(); + let t = tag.clone(); + + let res: IResult<_, _, Error> = match (i).compare_no_case(t) { + CompareResult::Ok => Ok(i.take_split(tag_len)), + _ => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(Error::from_error_kind(i, e))) + } + }; + res + } +} + +/// Parse till certain characters are met. +/// +/// The parser will return the longest slice till one of the characters of the combinator's argument are met. +/// +/// It doesn't consume the matched character. +/// +/// It will return a `Err::Error(("", ErrorKind::IsNot))` if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::is_not; +/// +/// fn not_space(s: &str) -> IResult<&str, &str> { +/// is_not(" \t\r\n")(s) +/// } +/// +/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,"))); +/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes"))); +/// assert_eq!(not_space("Nospace"), Ok(("", "Nospace"))); +/// assert_eq!(not_space(""), Err(Err::Error(Error::new("", ErrorKind::IsNot)))); +/// ``` +pub fn is_not<T, Input, Error: ParseError<Input>>( + arr: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + T: FindToken<<Input as InputTakeAtPosition>::Item>, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::IsNot; + i.split_at_position1_complete(|c| arr.find_token(c), e) + } +} + +/// Returns the longest slice of the matches the pattern. +/// +/// The parser will return the longest slice consisting of the characters in provided in the +/// combinator's argument. +/// +/// It will return a `Err(Err::Error((_, ErrorKind::IsA)))` if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::is_a; +/// +/// fn hex(s: &str) -> IResult<&str, &str> { +/// is_a("1234567890ABCDEF")(s) +/// } +/// +/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123"))); +/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF"))); +/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE"))); +/// assert_eq!(hex("D15EA5E"), Ok(("", "D15EA5E"))); +/// assert_eq!(hex(""), Err(Err::Error(Error::new("", ErrorKind::IsA)))); +/// ``` +pub fn is_a<T, Input, Error: ParseError<Input>>( + arr: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + T: FindToken<<Input as InputTakeAtPosition>::Item>, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::IsA; + i.split_at_position1_complete(|c| !arr.find_token(c), e) + } +} + +/// Returns the longest input slice (if any) that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::complete::take_while; +/// use nom::character::is_alphabetic; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while(is_alphabetic)(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..]))); +/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(alpha(b""), Ok((&b""[..], &b""[..]))); +/// ``` +pub fn take_while<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| i.split_at_position_complete(|c| !cond(c)) +} + +/// Returns the longest (at least 1) input slice that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take_while1; +/// use nom::character::is_alphabetic; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while1(is_alphabetic)(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1)))); +/// ``` +pub fn take_while1<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::TakeWhile1; + i.split_at_position1_complete(|c| !cond(c), e) + } +} + +/// Returns the longest (m <= len <= n) input slice that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met or is out +/// of range (m <= len <= n). +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take_while_m_n; +/// use nom::character::is_alphabetic; +/// +/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while_m_n(3, 6, is_alphabetic)(s) +/// } +/// +/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); +/// assert_eq!(short_alpha(b"latin"), Ok((&b""[..], &b"latin"[..]))); +/// assert_eq!(short_alpha(b"ed"), Err(Err::Error(Error::new(&b"ed"[..], ErrorKind::TakeWhileMN)))); +/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN)))); +/// ``` +pub fn take_while_m_n<F, Input, Error: ParseError<Input>>( + m: usize, + n: usize, + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputIter + InputLength + Slice<RangeFrom<usize>>, + F: Fn(<Input as InputIter>::Item) -> bool, +{ + move |i: Input| { + let input = i; + + match input.position(|c| !cond(c)) { + Some(idx) => { + if idx >= m { + if idx <= n { + let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(idx) { + Ok(input.take_split(index)) + } else { + Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))) + }; + res + } else { + let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(n) { + Ok(input.take_split(index)) + } else { + Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))) + }; + res + } + } else { + let e = ErrorKind::TakeWhileMN; + Err(Err::Error(Error::from_error_kind(input, e))) + } + } + None => { + let len = input.input_len(); + if len >= n { + match input.slice_index(n) { + Ok(index) => Ok(input.take_split(index)), + Err(_needed) => Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))), + } + } else if len >= m && len <= n { + let res: IResult<_, _, Error> = Ok((input.slice(len..), input)); + res + } else { + let e = ErrorKind::TakeWhileMN; + Err(Err::Error(Error::from_error_kind(input, e))) + } + } + } + } +} + +/// Returns the longest input slice (if any) till a predicate is met. +/// +/// The parser will return the longest slice till the given predicate *(a function that +/// takes the input and returns a bool)*. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::complete::take_till; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till(|c| c == ':')(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Ok(("", ""))); +/// ``` +pub fn take_till<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| i.split_at_position_complete(|c| cond(c)) +} + +/// Returns the longest (at least 1) input slice till a predicate is met. +/// +/// The parser will return the longest slice till the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// It will return `Err(Err::Error((_, ErrorKind::TakeTill1)))` if the input is empty or the +/// predicate matches the first input. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take_till1; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till1(|c| c == ':')(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1)))); +/// assert_eq!(till_colon("12345"), Ok(("", "12345"))); +/// assert_eq!(till_colon(""), Err(Err::Error(Error::new("", ErrorKind::TakeTill1)))); +/// ``` +pub fn take_till1<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::TakeTill1; + i.split_at_position1_complete(|c| cond(c), e) + } +} + +/// Returns an input slice containing the first N input elements (Input[..N]). +/// +/// It will return `Err(Err::Error((_, ErrorKind::Eof)))` if the input is shorter than the argument. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take; +/// +/// fn take6(s: &str) -> IResult<&str, &str> { +/// take(6usize)(s) +/// } +/// +/// assert_eq!(take6("1234567"), Ok(("7", "123456"))); +/// assert_eq!(take6("things"), Ok(("", "things"))); +/// assert_eq!(take6("short"), Err(Err::Error(Error::new("short", ErrorKind::Eof)))); +/// assert_eq!(take6(""), Err(Err::Error(Error::new("", ErrorKind::Eof)))); +/// ``` +/// +/// The units that are taken will depend on the input type. For example, for a +/// `&str` it will take a number of `char`'s, whereas for a `&[u8]` it will +/// take that many `u8`'s: +/// +/// ```rust +/// use nom::error::Error; +/// use nom::bytes::complete::take; +/// +/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙"), Ok(("", "💙"))); +/// assert_eq!(take::<_, _, Error<_>>(1usize)("💙".as_bytes()), Ok((b"\x9F\x92\x99".as_ref(), b"\xF0".as_ref()))); +/// ``` +pub fn take<C, Input, Error: ParseError<Input>>( + count: C, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputIter + InputTake, + C: ToUsize, +{ + let c = count.to_usize(); + move |i: Input| match i.slice_index(c) { + Err(_needed) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::Eof))), + Ok(index) => Ok(i.take_split(index)), + } +} + +/// Returns the input slice up to the first occurrence of the pattern. +/// +/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))` +/// if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take_until; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until("eof")(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil)))); +/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil)))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// ``` +pub fn take_until<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + FindSubstring<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let t = tag.clone(); + let res: IResult<_, _, Error> = match i.find_substring(t) { + None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))), + Some(index) => Ok(i.take_split(index)), + }; + res + } +} + +/// Returns the non empty input slice up to the first occurrence of the pattern. +/// +/// It doesn't consume the pattern. It will return `Err(Err::Error((_, ErrorKind::TakeUntil)))` +/// if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::complete::take_until1; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until1("eof")(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(Err::Error(Error::new("hello, world", ErrorKind::TakeUntil)))); +/// assert_eq!(until_eof(""), Err(Err::Error(Error::new("", ErrorKind::TakeUntil)))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil)))); +/// ``` +pub fn take_until1<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + FindSubstring<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let t = tag.clone(); + let res: IResult<_, _, Error> = match i.find_substring(t) { + None => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))), + Some(0) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))), + Some(index) => Ok(i.take_split(index)), + }; + res + } +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not accept the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters +/// # Example +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// # use nom::character::complete::digit1; +/// use nom::bytes::complete::escaped; +/// use nom::character::complete::one_of; +/// +/// fn esc(s: &str) -> IResult<&str, &str> { +/// escaped(digit1, '\\', one_of(r#""n\"#))(s) +/// } +/// +/// assert_eq!(esc("123;"), Ok((";", "123"))); +/// assert_eq!(esc(r#"12\"34;"#), Ok((";", r#"12\"34"#))); +/// ``` +/// +pub fn escaped<'a, Input: 'a, Error, F, G, O1, O2>( + mut normal: F, + control_char: char, + mut escapable: G, +) -> impl FnMut(Input) -> IResult<Input, Input, Error> +where + Input: Clone + + crate::traits::Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice<RangeFrom<usize>> + + InputIter, + <Input as InputIter>::Item: crate::traits::AsChar, + F: Parser<Input, O1, Error>, + G: Parser<Input, O2, Error>, + Error: ParseError<Input>, +{ + use crate::traits::AsChar; + + move |input: Input| { + let mut i = input.clone(); + + while i.input_len() > 0 { + let current_len = i.input_len(); + + match normal.parse(i.clone()) { + Ok((i2, _)) => { + // return if we consumed everything or if the normal parser + // does not consume anything + if i2.input_len() == 0 { + return Ok((input.slice(input.input_len()..), input)); + } else if i2.input_len() == current_len { + let index = input.offset(&i2); + return Ok(input.take_split(index)); + } else { + i = i2; + } + } + Err(Err::Error(_)) => { + // unwrap() should be safe here since index < $i.input_len() + if i.iter_elements().next().unwrap().as_char() == control_char { + let next = control_char.len_utf8(); + if next >= i.input_len() { + return Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::Escaped, + ))); + } else { + match escapable.parse(i.slice(next..)) { + Ok((i2, _)) => { + if i2.input_len() == 0 { + return Ok((input.slice(input.input_len()..), input)); + } else { + i = i2; + } + } + Err(e) => return Err(e), + } + } + } else { + let index = input.offset(&i); + if index == 0 { + return Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::Escaped, + ))); + } + return Ok(input.take_split(index)); + } + } + Err(e) => { + return Err(e); + } + } + } + + Ok((input.slice(input.input_len()..), input)) + } +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not match the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters and transforms them +/// +/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character) +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// # use std::str::from_utf8; +/// use nom::bytes::complete::{escaped_transform, tag}; +/// use nom::character::complete::alpha1; +/// use nom::branch::alt; +/// use nom::combinator::value; +/// +/// fn parser(input: &str) -> IResult<&str, String> { +/// escaped_transform( +/// alpha1, +/// '\\', +/// alt(( +/// value("\\", tag("\\")), +/// value("\"", tag("\"")), +/// value("\n", tag("n")), +/// )) +/// )(input) +/// } +/// +/// assert_eq!(parser("ab\\\"cd"), Ok(("", String::from("ab\"cd")))); +/// assert_eq!(parser("ab\\ncd"), Ok(("", String::from("ab\ncd")))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn escaped_transform<Input, Error, F, G, O1, O2, ExtendItem, Output>( + mut normal: F, + control_char: char, + mut transform: G, +) -> impl FnMut(Input) -> IResult<Input, Output, Error> +where + Input: Clone + + crate::traits::Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice<RangeFrom<usize>> + + InputIter, + Input: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + <Input as InputIter>::Item: crate::traits::AsChar, + F: Parser<Input, O1, Error>, + G: Parser<Input, O2, Error>, + Error: ParseError<Input>, +{ + use crate::traits::AsChar; + + move |input: Input| { + let mut index = 0; + let mut res = input.new_builder(); + + let i = input.clone(); + + while index < i.input_len() { + let current_len = i.input_len(); + let remainder = i.slice(index..); + match normal.parse(remainder.clone()) { + Ok((i2, o)) => { + o.extend_into(&mut res); + if i2.input_len() == 0 { + return Ok((i.slice(i.input_len()..), res)); + } else if i2.input_len() == current_len { + return Ok((remainder, res)); + } else { + index = input.offset(&i2); + } + } + Err(Err::Error(_)) => { + // unwrap() should be safe here since index < $i.input_len() + if remainder.iter_elements().next().unwrap().as_char() == control_char { + let next = index + control_char.len_utf8(); + let input_len = input.input_len(); + + if next >= input_len { + return Err(Err::Error(Error::from_error_kind( + remainder, + ErrorKind::EscapedTransform, + ))); + } else { + match transform.parse(i.slice(next..)) { + Ok((i2, o)) => { + o.extend_into(&mut res); + if i2.input_len() == 0 { + return Ok((i.slice(i.input_len()..), res)); + } else { + index = input.offset(&i2); + } + } + Err(e) => return Err(e), + } + } + } else { + if index == 0 { + return Err(Err::Error(Error::from_error_kind( + remainder, + ErrorKind::EscapedTransform, + ))); + } + return Ok((remainder, res)); + } + } + Err(e) => return Err(e), + } + } + Ok((input.slice(index..), res)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn complete_take_while_m_n_utf8_all_matching() { + let result: IResult<&str, &str> = + super::take_while_m_n(1, 4, |c: char| c.is_alphabetic())("øn"); + assert_eq!(result, Ok(("", "øn"))); + } + + #[test] + fn complete_take_while_m_n_utf8_all_matching_substring() { + let result: IResult<&str, &str> = + super::take_while_m_n(1, 1, |c: char| c.is_alphabetic())("øn"); + assert_eq!(result, Ok(("n", "ø"))); + } + + // issue #1336 "escaped hangs if normal parser accepts empty" + fn escaped_string(input: &str) -> IResult<&str, &str> { + use crate::character::complete::{alpha0, one_of}; + escaped(alpha0, '\\', one_of("n"))(input) + } + + // issue #1336 "escaped hangs if normal parser accepts empty" + #[test] + fn escaped_hang() { + escaped_string("7").unwrap(); + escaped_string("a7").unwrap(); + } + + // issue ##1118 escaped does not work with empty string + fn unquote<'a>(input: &'a str) -> IResult<&'a str, &'a str> { + use crate::bytes::complete::*; + use crate::character::complete::*; + use crate::combinator::opt; + use crate::sequence::delimited; + + delimited( + char('"'), + escaped(opt(none_of(r#"\""#)), '\\', one_of(r#"\"rnt"#)), + char('"'), + )(input) + } + + #[test] + fn escaped_hang_1118() { + assert_eq!(unquote(r#""""#), Ok(("", ""))); + } +} diff --git a/third_party/rust/nom/src/bytes/mod.rs b/third_party/rust/nom/src/bytes/mod.rs new file mode 100644 index 0000000000..7bc2d15a79 --- /dev/null +++ b/third_party/rust/nom/src/bytes/mod.rs @@ -0,0 +1,6 @@ +//! Parsers recognizing bytes streams + +pub mod complete; +pub mod streaming; +#[cfg(test)] +mod tests; diff --git a/third_party/rust/nom/src/bytes/streaming.rs b/third_party/rust/nom/src/bytes/streaming.rs new file mode 100644 index 0000000000..e972760e21 --- /dev/null +++ b/third_party/rust/nom/src/bytes/streaming.rs @@ -0,0 +1,700 @@ +//! Parsers recognizing bytes streams, streaming version + +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Needed, Parser}; +use crate::lib::std::ops::RangeFrom; +use crate::lib::std::result::Result::*; +use crate::traits::{ + Compare, CompareResult, FindSubstring, FindToken, InputIter, InputLength, InputTake, + InputTakeAtPosition, Slice, ToUsize, +}; + +/// Recognizes a pattern. +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::tag; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag("Hello")(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser("S"), Err(Err::Error(Error::new("S", ErrorKind::Tag)))); +/// assert_eq!(parser("H"), Err(Err::Incomplete(Needed::new(4)))); +/// ``` +pub fn tag<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputLength + Compare<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let tag_len = tag.input_len(); + let t = tag.clone(); + + let res: IResult<_, _, Error> = match i.compare(t) { + CompareResult::Ok => Ok(i.take_split(tag_len)), + CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(tag_len - i.input_len()))), + CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(Error::from_error_kind(i, e))) + } + }; + res + } +} + +/// Recognizes a case insensitive pattern. +/// +/// The input data will be compared to the tag combinator's argument and will return the part of +/// the input that matches the argument with no regard to case. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::tag_no_case; +/// +/// fn parser(s: &str) -> IResult<&str, &str> { +/// tag_no_case("hello")(s) +/// } +/// +/// assert_eq!(parser("Hello, World!"), Ok((", World!", "Hello"))); +/// assert_eq!(parser("hello, World!"), Ok((", World!", "hello"))); +/// assert_eq!(parser("HeLlO, World!"), Ok((", World!", "HeLlO"))); +/// assert_eq!(parser("Something"), Err(Err::Error(Error::new("Something", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Incomplete(Needed::new(5)))); +/// ``` +pub fn tag_no_case<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputLength + Compare<T>, + T: InputLength + Clone, +{ + move |i: Input| { + let tag_len = tag.input_len(); + let t = tag.clone(); + + let res: IResult<_, _, Error> = match (i).compare_no_case(t) { + CompareResult::Ok => Ok(i.take_split(tag_len)), + CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(tag_len - i.input_len()))), + CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(Error::from_error_kind(i, e))) + } + }; + res + } +} + +/// Parse till certain characters are met. +/// +/// The parser will return the longest slice till one of the characters of the combinator's argument are met. +/// +/// It doesn't consume the matched character. +/// +/// It will return a `Err::Incomplete(Needed::new(1))` if the pattern wasn't met. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::is_not; +/// +/// fn not_space(s: &str) -> IResult<&str, &str> { +/// is_not(" \t\r\n")(s) +/// } +/// +/// assert_eq!(not_space("Hello, World!"), Ok((" World!", "Hello,"))); +/// assert_eq!(not_space("Sometimes\t"), Ok(("\t", "Sometimes"))); +/// assert_eq!(not_space("Nospace"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(not_space(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn is_not<T, Input, Error: ParseError<Input>>( + arr: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + T: FindToken<<Input as InputTakeAtPosition>::Item>, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::IsNot; + i.split_at_position1(|c| arr.find_token(c), e) + } +} + +/// Returns the longest slice of the matches the pattern. +/// +/// The parser will return the longest slice consisting of the characters in provided in the +/// combinator's argument. +/// +/// # Streaming specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern wasn't met +/// or if the pattern reaches the end of the input. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::is_a; +/// +/// fn hex(s: &str) -> IResult<&str, &str> { +/// is_a("1234567890ABCDEF")(s) +/// } +/// +/// assert_eq!(hex("123 and voila"), Ok((" and voila", "123"))); +/// assert_eq!(hex("DEADBEEF and others"), Ok((" and others", "DEADBEEF"))); +/// assert_eq!(hex("BADBABEsomething"), Ok(("something", "BADBABE"))); +/// assert_eq!(hex("D15EA5E"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(hex(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn is_a<T, Input, Error: ParseError<Input>>( + arr: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + T: FindToken<<Input as InputTakeAtPosition>::Item>, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::IsA; + i.split_at_position1(|c| !arr.find_token(c), e) + } +} + +/// Returns the longest input slice (if any) that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern reaches the end of the input. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::take_while; +/// use nom::character::is_alphabetic; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while(is_alphabetic)(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"12345"), Ok((&b"12345"[..], &b""[..]))); +/// assert_eq!(alpha(b"latin"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(alpha(b""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn take_while<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| i.split_at_position(|c| !cond(c)) +} + +/// Returns the longest (at least 1) input slice that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// It will return an `Err(Err::Error((_, ErrorKind::TakeWhile1)))` if the pattern wasn't met. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` or if the pattern reaches the end of the input. +/// +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::take_while1; +/// use nom::character::is_alphabetic; +/// +/// fn alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while1(is_alphabetic)(s) +/// } +/// +/// assert_eq!(alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(alpha(b"latin"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhile1)))); +/// ``` +pub fn take_while1<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::TakeWhile1; + i.split_at_position1(|c| !cond(c), e) + } +} + +/// Returns the longest (m <= len <= n) input slice that matches the predicate. +/// +/// The parser will return the longest slice that matches the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// It will return an `Err::Error((_, ErrorKind::TakeWhileMN))` if the pattern wasn't met. +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the pattern reaches the end of the input or is too short. +/// +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::take_while_m_n; +/// use nom::character::is_alphabetic; +/// +/// fn short_alpha(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// take_while_m_n(3, 6, is_alphabetic)(s) +/// } +/// +/// assert_eq!(short_alpha(b"latin123"), Ok((&b"123"[..], &b"latin"[..]))); +/// assert_eq!(short_alpha(b"lengthy"), Ok((&b"y"[..], &b"length"[..]))); +/// assert_eq!(short_alpha(b"latin"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(short_alpha(b"ed"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(short_alpha(b"12345"), Err(Err::Error(Error::new(&b"12345"[..], ErrorKind::TakeWhileMN)))); +/// ``` +pub fn take_while_m_n<F, Input, Error: ParseError<Input>>( + m: usize, + n: usize, + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputIter + InputLength, + F: Fn(<Input as InputIter>::Item) -> bool, +{ + move |i: Input| { + let input = i; + + match input.position(|c| !cond(c)) { + Some(idx) => { + if idx >= m { + if idx <= n { + let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(idx) { + Ok(input.take_split(index)) + } else { + Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))) + }; + res + } else { + let res: IResult<_, _, Error> = if let Ok(index) = input.slice_index(n) { + Ok(input.take_split(index)) + } else { + Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))) + }; + res + } + } else { + let e = ErrorKind::TakeWhileMN; + Err(Err::Error(Error::from_error_kind(input, e))) + } + } + None => { + let len = input.input_len(); + if len >= n { + match input.slice_index(n) { + Ok(index) => Ok(input.take_split(index)), + Err(_needed) => Err(Err::Error(Error::from_error_kind( + input, + ErrorKind::TakeWhileMN, + ))), + } + } else { + let needed = if m > len { m - len } else { 1 }; + Err(Err::Incomplete(Needed::new(needed))) + } + } + } + } +} + +/// Returns the longest input slice (if any) till a predicate is met. +/// +/// The parser will return the longest slice till the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::take_till; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till(|c| c == ':')(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Ok((":empty matched", ""))); //allowed +/// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn take_till<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| i.split_at_position(|c| cond(c)) +} + +/// Returns the longest (at least 1) input slice till a predicate is met. +/// +/// The parser will return the longest slice till the given predicate *(a function that +/// takes the input and returns a bool)*. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(1))` if the match reaches the +/// end of input or if there was not match. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::take_till1; +/// +/// fn till_colon(s: &str) -> IResult<&str, &str> { +/// take_till1(|c| c == ':')(s) +/// } +/// +/// assert_eq!(till_colon("latin:123"), Ok((":123", "latin"))); +/// assert_eq!(till_colon(":empty matched"), Err(Err::Error(Error::new(":empty matched", ErrorKind::TakeTill1)))); +/// assert_eq!(till_colon("12345"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(till_colon(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn take_till1<F, Input, Error: ParseError<Input>>( + cond: F, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTakeAtPosition, + F: Fn(<Input as InputTakeAtPosition>::Item) -> bool, +{ + move |i: Input| { + let e: ErrorKind = ErrorKind::TakeTill1; + i.split_at_position1(|c| cond(c), e) + } +} + +/// Returns an input slice containing the first N input elements (Input[..N]). +/// +/// # Streaming Specific +/// *Streaming version* if the input has less than N elements, `take` will +/// return a `Err::Incomplete(Needed::new(M))` where M is the number of +/// additional bytes the parser would need to succeed. +/// It is well defined for `&[u8]` as the number of elements is the byte size, +/// but for types like `&str`, we cannot know how many bytes correspond for +/// the next few chars, so the result will be `Err::Incomplete(Needed::Unknown)` +/// +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::take; +/// +/// fn take6(s: &str) -> IResult<&str, &str> { +/// take(6usize)(s) +/// } +/// +/// assert_eq!(take6("1234567"), Ok(("7", "123456"))); +/// assert_eq!(take6("things"), Ok(("", "things"))); +/// assert_eq!(take6("short"), Err(Err::Incomplete(Needed::Unknown))); +/// ``` +pub fn take<C, Input, Error: ParseError<Input>>( + count: C, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputIter + InputTake + InputLength, + C: ToUsize, +{ + let c = count.to_usize(); + move |i: Input| match i.slice_index(c) { + Err(i) => Err(Err::Incomplete(i)), + Ok(index) => Ok(i.take_split(index)), + } +} + +/// Returns the input slice up to the first occurrence of the pattern. +/// +/// It doesn't consume the pattern. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(N))` if the input doesn't +/// contain the pattern or if the input is smaller than the pattern. +/// # Example +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::bytes::streaming::take_until; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until("eof")(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof("hello, worldeo"), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// ``` +pub fn take_until<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputLength + FindSubstring<T>, + T: Clone, +{ + move |i: Input| { + let t = tag.clone(); + + let res: IResult<_, _, Error> = match i.find_substring(t) { + None => Err(Err::Incomplete(Needed::Unknown)), + Some(index) => Ok(i.take_split(index)), + }; + res + } +} + +/// Returns the non empty input slice up to the first occurrence of the pattern. +/// +/// It doesn't consume the pattern. +/// +/// # Streaming Specific +/// *Streaming version* will return a `Err::Incomplete(Needed::new(N))` if the input doesn't +/// contain the pattern or if the input is smaller than the pattern. +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::bytes::streaming::take_until1; +/// +/// fn until_eof(s: &str) -> IResult<&str, &str> { +/// take_until1("eof")(s) +/// } +/// +/// assert_eq!(until_eof("hello, worldeof"), Ok(("eof", "hello, world"))); +/// assert_eq!(until_eof("hello, world"), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof("hello, worldeo"), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(until_eof("1eof2eof"), Ok(("eof2eof", "1"))); +/// assert_eq!(until_eof("eof"), Err(Err::Error(Error::new("eof", ErrorKind::TakeUntil)))); +/// ``` +pub fn take_until1<T, Input, Error: ParseError<Input>>( + tag: T, +) -> impl Fn(Input) -> IResult<Input, Input, Error> +where + Input: InputTake + InputLength + FindSubstring<T>, + T: Clone, +{ + move |i: Input| { + let t = tag.clone(); + + let res: IResult<_, _, Error> = match i.find_substring(t) { + None => Err(Err::Incomplete(Needed::Unknown)), + Some(0) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::TakeUntil))), + Some(index) => Ok(i.take_split(index)), + }; + res + } +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not accept the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters +/// # Example +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// # use nom::character::complete::digit1; +/// use nom::bytes::streaming::escaped; +/// use nom::character::streaming::one_of; +/// +/// fn esc(s: &str) -> IResult<&str, &str> { +/// escaped(digit1, '\\', one_of("\"n\\"))(s) +/// } +/// +/// assert_eq!(esc("123;"), Ok((";", "123"))); +/// assert_eq!(esc("12\\\"34;"), Ok((";", "12\\\"34"))); +/// ``` +/// +pub fn escaped<Input, Error, F, G, O1, O2>( + mut normal: F, + control_char: char, + mut escapable: G, +) -> impl FnMut(Input) -> IResult<Input, Input, Error> +where + Input: Clone + + crate::traits::Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice<RangeFrom<usize>> + + InputIter, + <Input as InputIter>::Item: crate::traits::AsChar, + F: Parser<Input, O1, Error>, + G: Parser<Input, O2, Error>, + Error: ParseError<Input>, +{ + use crate::traits::AsChar; + + move |input: Input| { + let mut i = input.clone(); + + while i.input_len() > 0 { + let current_len = i.input_len(); + + match normal.parse(i.clone()) { + Ok((i2, _)) => { + if i2.input_len() == 0 { + return Err(Err::Incomplete(Needed::Unknown)); + } else if i2.input_len() == current_len { + let index = input.offset(&i2); + return Ok(input.take_split(index)); + } else { + i = i2; + } + } + Err(Err::Error(_)) => { + // unwrap() should be safe here since index < $i.input_len() + if i.iter_elements().next().unwrap().as_char() == control_char { + let next = control_char.len_utf8(); + if next >= i.input_len() { + return Err(Err::Incomplete(Needed::new(1))); + } else { + match escapable.parse(i.slice(next..)) { + Ok((i2, _)) => { + if i2.input_len() == 0 { + return Err(Err::Incomplete(Needed::Unknown)); + } else { + i = i2; + } + } + Err(e) => return Err(e), + } + } + } else { + let index = input.offset(&i); + return Ok(input.take_split(index)); + } + } + Err(e) => { + return Err(e); + } + } + } + + Err(Err::Incomplete(Needed::Unknown)) + } +} + +/// Matches a byte string with escaped characters. +/// +/// * The first argument matches the normal characters (it must not match the control character) +/// * The second argument is the control character (like `\` in most languages) +/// * The third argument matches the escaped characters and transforms them +/// +/// As an example, the chain `abc\tdef` could be `abc def` (it also consumes the control character) +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// # use std::str::from_utf8; +/// use nom::bytes::streaming::{escaped_transform, tag}; +/// use nom::character::streaming::alpha1; +/// use nom::branch::alt; +/// use nom::combinator::value; +/// +/// fn parser(input: &str) -> IResult<&str, String> { +/// escaped_transform( +/// alpha1, +/// '\\', +/// alt(( +/// value("\\", tag("\\")), +/// value("\"", tag("\"")), +/// value("\n", tag("n")), +/// )) +/// )(input) +/// } +/// +/// assert_eq!(parser("ab\\\"cd\""), Ok(("\"", String::from("ab\"cd")))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn escaped_transform<Input, Error, F, G, O1, O2, ExtendItem, Output>( + mut normal: F, + control_char: char, + mut transform: G, +) -> impl FnMut(Input) -> IResult<Input, Output, Error> +where + Input: Clone + + crate::traits::Offset + + InputLength + + InputTake + + InputTakeAtPosition + + Slice<RangeFrom<usize>> + + InputIter, + Input: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + O1: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + O2: crate::traits::ExtendInto<Item = ExtendItem, Extender = Output>, + <Input as InputIter>::Item: crate::traits::AsChar, + F: Parser<Input, O1, Error>, + G: Parser<Input, O2, Error>, + Error: ParseError<Input>, +{ + use crate::traits::AsChar; + + move |input: Input| { + let mut index = 0; + let mut res = input.new_builder(); + + let i = input.clone(); + + while index < i.input_len() { + let current_len = i.input_len(); + let remainder = i.slice(index..); + match normal.parse(remainder.clone()) { + Ok((i2, o)) => { + o.extend_into(&mut res); + if i2.input_len() == 0 { + return Err(Err::Incomplete(Needed::Unknown)); + } else if i2.input_len() == current_len { + return Ok((remainder, res)); + } else { + index = input.offset(&i2); + } + } + Err(Err::Error(_)) => { + // unwrap() should be safe here since index < $i.input_len() + if remainder.iter_elements().next().unwrap().as_char() == control_char { + let next = index + control_char.len_utf8(); + let input_len = input.input_len(); + + if next >= input_len { + return Err(Err::Incomplete(Needed::Unknown)); + } else { + match transform.parse(i.slice(next..)) { + Ok((i2, o)) => { + o.extend_into(&mut res); + if i2.input_len() == 0 { + return Err(Err::Incomplete(Needed::Unknown)); + } else { + index = input.offset(&i2); + } + } + Err(e) => return Err(e), + } + } + } else { + return Ok((remainder, res)); + } + } + Err(e) => return Err(e), + } + } + Err(Err::Incomplete(Needed::Unknown)) + } +} diff --git a/third_party/rust/nom/src/bytes/tests.rs b/third_party/rust/nom/src/bytes/tests.rs new file mode 100644 index 0000000000..159c4b4ffc --- /dev/null +++ b/third_party/rust/nom/src/bytes/tests.rs @@ -0,0 +1,636 @@ +use crate::character::is_alphabetic; +use crate::character::streaming::{ + alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit, + multispace1 as multispace, oct_digit1 as oct_digit, space1 as space, +}; +use crate::error::ErrorKind; +use crate::internal::{Err, IResult, Needed}; +#[cfg(feature = "alloc")] +use crate::{ + branch::alt, + bytes::complete::{escaped, escaped_transform, tag}, + combinator::{map, value}, + lib::std::string::String, + lib::std::vec::Vec, +}; + +#[test] +fn is_a() { + use crate::bytes::streaming::is_a; + + fn a_or_b(i: &[u8]) -> IResult<&[u8], &[u8]> { + is_a("ab")(i) + } + + let a = &b"abcd"[..]; + assert_eq!(a_or_b(a), Ok((&b"cd"[..], &b"ab"[..]))); + + let b = &b"bcde"[..]; + assert_eq!(a_or_b(b), Ok((&b"cde"[..], &b"b"[..]))); + + let c = &b"cdef"[..]; + assert_eq!( + a_or_b(c), + Err(Err::Error(error_position!(c, ErrorKind::IsA))) + ); + + let d = &b"bacdef"[..]; + assert_eq!(a_or_b(d), Ok((&b"cdef"[..], &b"ba"[..]))); +} + +#[test] +fn is_not() { + use crate::bytes::streaming::is_not; + + fn a_or_b(i: &[u8]) -> IResult<&[u8], &[u8]> { + is_not("ab")(i) + } + + let a = &b"cdab"[..]; + assert_eq!(a_or_b(a), Ok((&b"ab"[..], &b"cd"[..]))); + + let b = &b"cbde"[..]; + assert_eq!(a_or_b(b), Ok((&b"bde"[..], &b"c"[..]))); + + let c = &b"abab"[..]; + assert_eq!( + a_or_b(c), + Err(Err::Error(error_position!(c, ErrorKind::IsNot))) + ); + + let d = &b"cdefba"[..]; + assert_eq!(a_or_b(d), Ok((&b"ba"[..], &b"cdef"[..]))); + + let e = &b"e"[..]; + assert_eq!(a_or_b(e), Err(Err::Incomplete(Needed::new(1)))); +} + +#[cfg(feature = "alloc")] +#[allow(unused_variables)] +#[test] +fn escaping() { + use crate::character::streaming::one_of; + + fn esc(i: &[u8]) -> IResult<&[u8], &[u8]> { + escaped(alpha, '\\', one_of("\"n\\"))(i) + } + assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], &b"abcd"[..]))); + assert_eq!(esc(&b"ab\\\"cd;"[..]), Ok((&b";"[..], &b"ab\\\"cd"[..]))); + assert_eq!(esc(&b"\\\"abcd;"[..]), Ok((&b";"[..], &b"\\\"abcd"[..]))); + assert_eq!(esc(&b"\\n;"[..]), Ok((&b";"[..], &b"\\n"[..]))); + assert_eq!(esc(&b"ab\\\"12"[..]), Ok((&b"12"[..], &b"ab\\\""[..]))); + assert_eq!( + esc(&b"AB\\"[..]), + Err(Err::Error(error_position!( + &b"AB\\"[..], + ErrorKind::Escaped + ))) + ); + assert_eq!( + esc(&b"AB\\A"[..]), + Err(Err::Error(error_node_position!( + &b"AB\\A"[..], + ErrorKind::Escaped, + error_position!(&b"A"[..], ErrorKind::OneOf) + ))) + ); + + fn esc2(i: &[u8]) -> IResult<&[u8], &[u8]> { + escaped(digit, '\\', one_of("\"n\\"))(i) + } + assert_eq!(esc2(&b"12\\nnn34"[..]), Ok((&b"nn34"[..], &b"12\\n"[..]))); +} + +#[cfg(feature = "alloc")] +#[test] +fn escaping_str() { + use crate::character::streaming::one_of; + + fn esc(i: &str) -> IResult<&str, &str> { + escaped(alpha, '\\', one_of("\"n\\"))(i) + } + assert_eq!(esc("abcd;"), Ok((";", "abcd"))); + assert_eq!(esc("ab\\\"cd;"), Ok((";", "ab\\\"cd"))); + assert_eq!(esc("\\\"abcd;"), Ok((";", "\\\"abcd"))); + assert_eq!(esc("\\n;"), Ok((";", "\\n"))); + assert_eq!(esc("ab\\\"12"), Ok(("12", "ab\\\""))); + assert_eq!( + esc("AB\\"), + Err(Err::Error(error_position!("AB\\", ErrorKind::Escaped))) + ); + assert_eq!( + esc("AB\\A"), + Err(Err::Error(error_node_position!( + "AB\\A", + ErrorKind::Escaped, + error_position!("A", ErrorKind::OneOf) + ))) + ); + + fn esc2(i: &str) -> IResult<&str, &str> { + escaped(digit, '\\', one_of("\"n\\"))(i) + } + assert_eq!(esc2("12\\nnn34"), Ok(("nn34", "12\\n"))); + + fn esc3(i: &str) -> IResult<&str, &str> { + escaped(alpha, '\u{241b}', one_of("\"n"))(i) + } + assert_eq!(esc3("ab␛ncd;"), Ok((";", "ab␛ncd"))); +} + +#[cfg(feature = "alloc")] +fn to_s(i: Vec<u8>) -> String { + String::from_utf8_lossy(&i).into_owned() +} + +#[cfg(feature = "alloc")] +#[test] +fn escape_transform() { + fn esc(i: &[u8]) -> IResult<&[u8], String> { + map( + escaped_transform( + alpha, + '\\', + alt(( + value(&b"\\"[..], tag("\\")), + value(&b"\""[..], tag("\"")), + value(&b"\n"[..], tag("n")), + )), + ), + to_s, + )(i) + } + + assert_eq!(esc(&b"abcd;"[..]), Ok((&b";"[..], String::from("abcd")))); + assert_eq!( + esc(&b"ab\\\"cd;"[..]), + Ok((&b";"[..], String::from("ab\"cd"))) + ); + assert_eq!( + esc(&b"\\\"abcd;"[..]), + Ok((&b";"[..], String::from("\"abcd"))) + ); + assert_eq!(esc(&b"\\n;"[..]), Ok((&b";"[..], String::from("\n")))); + assert_eq!( + esc(&b"ab\\\"12"[..]), + Ok((&b"12"[..], String::from("ab\""))) + ); + assert_eq!( + esc(&b"AB\\"[..]), + Err(Err::Error(error_position!( + &b"\\"[..], + ErrorKind::EscapedTransform + ))) + ); + assert_eq!( + esc(&b"AB\\A"[..]), + Err(Err::Error(error_node_position!( + &b"AB\\A"[..], + ErrorKind::EscapedTransform, + error_position!(&b"A"[..], ErrorKind::Tag) + ))) + ); + + fn esc2(i: &[u8]) -> IResult<&[u8], String> { + map( + escaped_transform( + alpha, + '&', + alt(( + value("è".as_bytes(), tag("egrave;")), + value("à".as_bytes(), tag("agrave;")), + )), + ), + to_s, + )(i) + } + assert_eq!( + esc2(&b"abèDEF;"[..]), + Ok((&b";"[..], String::from("abèDEF"))) + ); + assert_eq!( + esc2(&b"abèDàEF;"[..]), + Ok((&b";"[..], String::from("abèDàEF"))) + ); +} + +#[cfg(feature = "std")] +#[test] +fn escape_transform_str() { + fn esc(i: &str) -> IResult<&str, String> { + escaped_transform( + alpha, + '\\', + alt(( + value("\\", tag("\\")), + value("\"", tag("\"")), + value("\n", tag("n")), + )), + )(i) + } + + assert_eq!(esc("abcd;"), Ok((";", String::from("abcd")))); + assert_eq!(esc("ab\\\"cd;"), Ok((";", String::from("ab\"cd")))); + assert_eq!(esc("\\\"abcd;"), Ok((";", String::from("\"abcd")))); + assert_eq!(esc("\\n;"), Ok((";", String::from("\n")))); + assert_eq!(esc("ab\\\"12"), Ok(("12", String::from("ab\"")))); + assert_eq!( + esc("AB\\"), + Err(Err::Error(error_position!( + "\\", + ErrorKind::EscapedTransform + ))) + ); + assert_eq!( + esc("AB\\A"), + Err(Err::Error(error_node_position!( + "AB\\A", + ErrorKind::EscapedTransform, + error_position!("A", ErrorKind::Tag) + ))) + ); + + fn esc2(i: &str) -> IResult<&str, String> { + escaped_transform( + alpha, + '&', + alt((value("è", tag("egrave;")), value("à", tag("agrave;")))), + )(i) + } + assert_eq!(esc2("abèDEF;"), Ok((";", String::from("abèDEF")))); + assert_eq!( + esc2("abèDàEF;"), + Ok((";", String::from("abèDàEF"))) + ); + + fn esc3(i: &str) -> IResult<&str, String> { + escaped_transform( + alpha, + '␛', + alt((value("\0", tag("0")), value("\n", tag("n")))), + )(i) + } + assert_eq!(esc3("a␛0bc␛n"), Ok(("", String::from("a\0bc\n")))); +} + +#[test] +fn take_until_incomplete() { + use crate::bytes::streaming::take_until; + fn y(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_until("end")(i) + } + assert_eq!(y(&b"nd"[..]), Err(Err::Incomplete(Needed::Unknown))); + assert_eq!(y(&b"123"[..]), Err(Err::Incomplete(Needed::Unknown))); + assert_eq!(y(&b"123en"[..]), Err(Err::Incomplete(Needed::Unknown))); +} + +#[test] +fn take_until_incomplete_s() { + use crate::bytes::streaming::take_until; + fn ys(i: &str) -> IResult<&str, &str> { + take_until("end")(i) + } + assert_eq!(ys("123en"), Err(Err::Incomplete(Needed::Unknown))); +} + +#[test] +fn recognize() { + use crate::bytes::streaming::{tag, take}; + use crate::combinator::recognize; + use crate::sequence::delimited; + + fn x(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(delimited(tag("<!--"), take(5_usize), tag("-->")))(i) + } + let r = x(&b"<!-- abc --> aaa"[..]); + assert_eq!(r, Ok((&b" aaa"[..], &b"<!-- abc -->"[..]))); + + let semicolon = &b";"[..]; + + fn ya(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(alpha)(i) + } + let ra = ya(&b"abc;"[..]); + assert_eq!(ra, Ok((semicolon, &b"abc"[..]))); + + fn yd(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(digit)(i) + } + let rd = yd(&b"123;"[..]); + assert_eq!(rd, Ok((semicolon, &b"123"[..]))); + + fn yhd(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(hex_digit)(i) + } + let rhd = yhd(&b"123abcDEF;"[..]); + assert_eq!(rhd, Ok((semicolon, &b"123abcDEF"[..]))); + + fn yod(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(oct_digit)(i) + } + let rod = yod(&b"1234567;"[..]); + assert_eq!(rod, Ok((semicolon, &b"1234567"[..]))); + + fn yan(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(alphanumeric)(i) + } + let ran = yan(&b"123abc;"[..]); + assert_eq!(ran, Ok((semicolon, &b"123abc"[..]))); + + fn ys(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(space)(i) + } + let rs = ys(&b" \t;"[..]); + assert_eq!(rs, Ok((semicolon, &b" \t"[..]))); + + fn yms(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(multispace)(i) + } + let rms = yms(&b" \t\r\n;"[..]); + assert_eq!(rms, Ok((semicolon, &b" \t\r\n"[..]))); +} + +#[test] +fn take_while() { + use crate::bytes::streaming::take_while; + + fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(is_alphabetic)(i) + } + let a = b""; + let b = b"abcd"; + let c = b"abcd123"; + let d = b"123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&c[..]), Ok((&d[..], &b[..]))); + assert_eq!(f(&d[..]), Ok((&d[..], &a[..]))); +} + +#[test] +fn take_while1() { + use crate::bytes::streaming::take_while1; + + fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_while1(is_alphabetic)(i) + } + let a = b""; + let b = b"abcd"; + let c = b"abcd123"; + let d = b"123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&c[..]), Ok((&b"123"[..], &b[..]))); + assert_eq!( + f(&d[..]), + Err(Err::Error(error_position!(&d[..], ErrorKind::TakeWhile1))) + ); +} + +#[test] +fn take_while_m_n() { + use crate::bytes::streaming::take_while_m_n; + + fn x(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_while_m_n(2, 4, is_alphabetic)(i) + } + let a = b""; + let b = b"a"; + let c = b"abc"; + let d = b"abc123"; + let e = b"abcde"; + let f = b"123"; + + assert_eq!(x(&a[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!(x(&b[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(x(&c[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(x(&d[..]), Ok((&b"123"[..], &c[..]))); + assert_eq!(x(&e[..]), Ok((&b"e"[..], &b"abcd"[..]))); + assert_eq!( + x(&f[..]), + Err(Err::Error(error_position!(&f[..], ErrorKind::TakeWhileMN))) + ); +} + +#[test] +fn take_till() { + use crate::bytes::streaming::take_till; + + fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_till(is_alphabetic)(i) + } + let a = b""; + let b = b"abcd"; + let c = b"123abcd"; + let d = b"123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&b[..]), Ok((&b"abcd"[..], &b""[..]))); + assert_eq!(f(&c[..]), Ok((&b"abcd"[..], &b"123"[..]))); + assert_eq!(f(&d[..]), Err(Err::Incomplete(Needed::new(1)))); +} + +#[test] +fn take_till1() { + use crate::bytes::streaming::take_till1; + + fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_till1(is_alphabetic)(i) + } + let a = b""; + let b = b"abcd"; + let c = b"123abcd"; + let d = b"123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!( + f(&b[..]), + Err(Err::Error(error_position!(&b[..], ErrorKind::TakeTill1))) + ); + assert_eq!(f(&c[..]), Ok((&b"abcd"[..], &b"123"[..]))); + assert_eq!(f(&d[..]), Err(Err::Incomplete(Needed::new(1)))); +} + +#[test] +fn take_while_utf8() { + use crate::bytes::streaming::take_while; + + fn f(i: &str) -> IResult<&str, &str> { + take_while(|c| c != '點')(i) + } + + assert_eq!(f(""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f("abcd"), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f("abcd點"), Ok(("點", "abcd"))); + assert_eq!(f("abcd點a"), Ok(("點a", "abcd"))); + + fn g(i: &str) -> IResult<&str, &str> { + take_while(|c| c == '點')(i) + } + + assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(g("點abcd"), Ok(("abcd", "點"))); + assert_eq!(g("點點點a"), Ok(("a", "點點點"))); +} + +#[test] +fn take_till_utf8() { + use crate::bytes::streaming::take_till; + + fn f(i: &str) -> IResult<&str, &str> { + take_till(|c| c == '點')(i) + } + + assert_eq!(f(""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f("abcd"), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f("abcd點"), Ok(("點", "abcd"))); + assert_eq!(f("abcd點a"), Ok(("點a", "abcd"))); + + fn g(i: &str) -> IResult<&str, &str> { + take_till(|c| c != '點')(i) + } + + assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(g("點abcd"), Ok(("abcd", "點"))); + assert_eq!(g("點點點a"), Ok(("a", "點點點"))); +} + +#[test] +fn take_utf8() { + use crate::bytes::streaming::{take, take_while}; + + fn f(i: &str) -> IResult<&str, &str> { + take(3_usize)(i) + } + + assert_eq!(f(""), Err(Err::Incomplete(Needed::Unknown))); + assert_eq!(f("ab"), Err(Err::Incomplete(Needed::Unknown))); + assert_eq!(f("點"), Err(Err::Incomplete(Needed::Unknown))); + assert_eq!(f("ab點cd"), Ok(("cd", "ab點"))); + assert_eq!(f("a點bcd"), Ok(("cd", "a點b"))); + assert_eq!(f("a點b"), Ok(("", "a點b"))); + + fn g(i: &str) -> IResult<&str, &str> { + take_while(|c| c == '點')(i) + } + + assert_eq!(g(""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(g("點abcd"), Ok(("abcd", "點"))); + assert_eq!(g("點點點a"), Ok(("a", "點點點"))); +} + +#[test] +fn take_while_m_n_utf8() { + use crate::bytes::streaming::take_while_m_n; + + fn parser(i: &str) -> IResult<&str, &str> { + take_while_m_n(1, 1, |c| c == 'A' || c == '😃')(i) + } + assert_eq!(parser("A!"), Ok(("!", "A"))); + assert_eq!(parser("😃!"), Ok(("!", "😃"))); +} + +#[test] +fn take_while_m_n_utf8_full_match() { + use crate::bytes::streaming::take_while_m_n; + + fn parser(i: &str) -> IResult<&str, &str> { + take_while_m_n(1, 1, |c: char| c.is_alphabetic())(i) + } + assert_eq!(parser("øn"), Ok(("n", "ø"))); +} + +#[test] +#[cfg(feature = "std")] +fn recognize_take_while() { + use crate::bytes::streaming::take_while; + use crate::character::is_alphanumeric; + use crate::combinator::recognize; + + fn x(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_while(is_alphanumeric)(i) + } + fn y(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(x)(i) + } + assert_eq!(x(&b"ab."[..]), Ok((&b"."[..], &b"ab"[..]))); + println!("X: {:?}", x(&b"ab"[..])); + assert_eq!(y(&b"ab."[..]), Ok((&b"."[..], &b"ab"[..]))); +} + +#[test] +fn length_bytes() { + use crate::{bytes::streaming::tag, multi::length_data, number::streaming::le_u8}; + + fn x(i: &[u8]) -> IResult<&[u8], &[u8]> { + length_data(le_u8)(i) + } + assert_eq!(x(b"\x02..>>"), Ok((&b">>"[..], &b".."[..]))); + assert_eq!(x(b"\x02.."), Ok((&[][..], &b".."[..]))); + assert_eq!(x(b"\x02."), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(x(b"\x02"), Err(Err::Incomplete(Needed::new(2)))); + + fn y(i: &[u8]) -> IResult<&[u8], &[u8]> { + let (i, _) = tag("magic")(i)?; + length_data(le_u8)(i) + } + assert_eq!(y(b"magic\x02..>>"), Ok((&b">>"[..], &b".."[..]))); + assert_eq!(y(b"magic\x02.."), Ok((&[][..], &b".."[..]))); + assert_eq!(y(b"magic\x02."), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(y(b"magic\x02"), Err(Err::Incomplete(Needed::new(2)))); +} + +#[cfg(feature = "alloc")] +#[test] +fn case_insensitive() { + use crate::bytes::streaming::tag_no_case; + + fn test(i: &[u8]) -> IResult<&[u8], &[u8]> { + tag_no_case("ABcd")(i) + } + assert_eq!(test(&b"aBCdefgh"[..]), Ok((&b"efgh"[..], &b"aBCd"[..]))); + assert_eq!(test(&b"abcdefgh"[..]), Ok((&b"efgh"[..], &b"abcd"[..]))); + assert_eq!(test(&b"ABCDefgh"[..]), Ok((&b"efgh"[..], &b"ABCD"[..]))); + assert_eq!(test(&b"ab"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!( + test(&b"Hello"[..]), + Err(Err::Error(error_position!(&b"Hello"[..], ErrorKind::Tag))) + ); + assert_eq!( + test(&b"Hel"[..]), + Err(Err::Error(error_position!(&b"Hel"[..], ErrorKind::Tag))) + ); + + fn test2(i: &str) -> IResult<&str, &str> { + tag_no_case("ABcd")(i) + } + assert_eq!(test2("aBCdefgh"), Ok(("efgh", "aBCd"))); + assert_eq!(test2("abcdefgh"), Ok(("efgh", "abcd"))); + assert_eq!(test2("ABCDefgh"), Ok(("efgh", "ABCD"))); + assert_eq!(test2("ab"), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!( + test2("Hello"), + Err(Err::Error(error_position!(&"Hello"[..], ErrorKind::Tag))) + ); + assert_eq!( + test2("Hel"), + Err(Err::Error(error_position!(&"Hel"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn tag_fixed_size_array() { + use crate::bytes::streaming::tag; + + fn test(i: &[u8]) -> IResult<&[u8], &[u8]> { + tag([0x42])(i) + } + fn test2(i: &[u8]) -> IResult<&[u8], &[u8]> { + tag(&[0x42])(i) + } + let input = [0x42, 0x00]; + assert_eq!(test(&input), Ok((&b"\x00"[..], &b"\x42"[..]))); + assert_eq!(test2(&input), Ok((&b"\x00"[..], &b"\x42"[..]))); +} diff --git a/third_party/rust/nom/src/character/complete.rs b/third_party/rust/nom/src/character/complete.rs new file mode 100644 index 0000000000..7cb760a683 --- /dev/null +++ b/third_party/rust/nom/src/character/complete.rs @@ -0,0 +1,1227 @@ +//! Character specific parsers and combinators, complete input version. +//! +//! Functions recognizing specific characters. + +use crate::branch::alt; +use crate::combinator::opt; +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult}; +use crate::lib::std::ops::{Range, RangeFrom, RangeTo}; +use crate::traits::{ + AsChar, FindToken, InputIter, InputLength, InputTake, InputTakeAtPosition, Slice, +}; +use crate::traits::{Compare, CompareResult}; + +/// Recognizes one character. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{ErrorKind, Error}, IResult}; +/// # use nom::character::complete::char; +/// fn parser(i: &str) -> IResult<&str, char> { +/// char('a')(i) +/// } +/// assert_eq!(parser("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser(" abc"), Err(Err::Error(Error::new(" abc", ErrorKind::Char)))); +/// assert_eq!(parser("bc"), Err(Err::Error(Error::new("bc", ErrorKind::Char)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Char)))); +/// ``` +pub fn char<I, Error: ParseError<I>>(c: char) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar, +{ + move |i: I| match (i).iter_elements().next().map(|t| { + let b = t.as_char() == c; + (&c, b) + }) { + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + _ => Err(Err::Error(Error::from_char(i, c))), + } +} + +/// Recognizes one character and checks that it satisfies a predicate +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{ErrorKind, Error}, Needed, IResult}; +/// # use nom::character::complete::satisfy; +/// fn parser(i: &str) -> IResult<&str, char> { +/// satisfy(|c| c == 'a' || c == 'b')(i) +/// } +/// assert_eq!(parser("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Satisfy)))); +/// ``` +pub fn satisfy<F, I, Error: ParseError<I>>(cond: F) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar, + F: Fn(char) -> bool, +{ + move |i: I| match (i).iter_elements().next().map(|t| { + let c = t.as_char(); + let b = cond(c); + (c, b) + }) { + Some((c, true)) => Ok((i.slice(c.len()..), c)), + _ => Err(Err::Error(Error::from_error_kind(i, ErrorKind::Satisfy))), + } +} + +/// Recognizes one of the provided characters. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind}; +/// # use nom::character::complete::one_of; +/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("abc")("b"), Ok(("", 'b'))); +/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")("bc"), Err(Err::Error(("bc", ErrorKind::OneOf)))); +/// assert_eq!(one_of::<_, _, (&str, ErrorKind)>("a")(""), Err(Err::Error(("", ErrorKind::OneOf)))); +/// ``` +pub fn one_of<I, T, Error: ParseError<I>>(list: T) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar + Copy, + T: FindToken<<I as InputIter>::Item>, +{ + move |i: I| match (i).iter_elements().next().map(|c| (c, list.find_token(c))) { + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + _ => Err(Err::Error(Error::from_error_kind(i, ErrorKind::OneOf))), + } +} + +/// Recognizes a character that is not in the provided characters. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind}; +/// # use nom::character::complete::none_of; +/// assert_eq!(none_of::<_, _, (&str, ErrorKind)>("abc")("z"), Ok(("", 'z'))); +/// assert_eq!(none_of::<_, _, (&str, ErrorKind)>("ab")("a"), Err(Err::Error(("a", ErrorKind::NoneOf)))); +/// assert_eq!(none_of::<_, _, (&str, ErrorKind)>("a")(""), Err(Err::Error(("", ErrorKind::NoneOf)))); +/// ``` +pub fn none_of<I, T, Error: ParseError<I>>(list: T) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar + Copy, + T: FindToken<<I as InputIter>::Item>, +{ + move |i: I| match (i).iter_elements().next().map(|c| (c, !list.find_token(c))) { + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + _ => Err(Err::Error(Error::from_error_kind(i, ErrorKind::NoneOf))), + } +} + +/// Recognizes the string "\r\n". +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult}; +/// # use nom::character::complete::crlf; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// crlf(input) +/// } +/// +/// assert_eq!(parser("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(parser("ab\r\nc"), Err(Err::Error(Error::new("ab\r\nc", ErrorKind::CrLf)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::CrLf)))); +/// ``` +pub fn crlf<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>>, + T: InputIter, + T: Compare<&'static str>, +{ + match input.compare("\r\n") { + //FIXME: is this the right index? + CompareResult::Ok => Ok((input.slice(2..), input.slice(0..2))), + _ => { + let e: ErrorKind = ErrorKind::CrLf; + Err(Err::Error(E::from_error_kind(input, e))) + } + } +} + +//FIXME: there's still an incomplete +/// Recognizes a string of any char except '\r\n' or '\n'. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::not_line_ending; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// not_line_ending(input) +/// } +/// +/// assert_eq!(parser("ab\r\nc"), Ok(("\r\nc", "ab"))); +/// assert_eq!(parser("ab\nc"), Ok(("\nc", "ab"))); +/// assert_eq!(parser("abc"), Ok(("", "abc"))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// assert_eq!(parser("a\rb\nc"), Err(Err::Error(Error { input: "a\rb\nc", code: ErrorKind::Tag }))); +/// assert_eq!(parser("a\rbc"), Err(Err::Error(Error { input: "a\rbc", code: ErrorKind::Tag }))); +/// ``` +pub fn not_line_ending<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: InputIter + InputLength, + T: Compare<&'static str>, + <T as InputIter>::Item: AsChar, + <T as InputIter>::Item: AsChar, +{ + match input.position(|item| { + let c = item.as_char(); + c == '\r' || c == '\n' + }) { + None => Ok((input.slice(input.input_len()..), input)), + Some(index) => { + let mut it = input.slice(index..).iter_elements(); + let nth = it.next().unwrap().as_char(); + if nth == '\r' { + let sliced = input.slice(index..); + let comp = sliced.compare("\r\n"); + match comp { + //FIXME: calculate the right index + CompareResult::Ok => Ok((input.slice(index..), input.slice(..index))), + _ => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(E::from_error_kind(input, e))) + } + } + } else { + Ok((input.slice(index..), input.slice(..index))) + } + } + } +} + +/// Recognizes an end of line (both '\n' and '\r\n'). +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::line_ending; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// line_ending(input) +/// } +/// +/// assert_eq!(parser("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(parser("ab\r\nc"), Err(Err::Error(Error::new("ab\r\nc", ErrorKind::CrLf)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::CrLf)))); +/// ``` +pub fn line_ending<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: InputIter + InputLength, + T: Compare<&'static str>, +{ + match input.compare("\n") { + CompareResult::Ok => Ok((input.slice(1..), input.slice(0..1))), + CompareResult::Incomplete => Err(Err::Error(E::from_error_kind(input, ErrorKind::CrLf))), + CompareResult::Error => { + match input.compare("\r\n") { + //FIXME: is this the right index? + CompareResult::Ok => Ok((input.slice(2..), input.slice(0..2))), + _ => Err(Err::Error(E::from_error_kind(input, ErrorKind::CrLf))), + } + } + } +} + +/// Matches a newline character '\n'. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::newline; +/// fn parser(input: &str) -> IResult<&str, char> { +/// newline(input) +/// } +/// +/// assert_eq!(parser("\nc"), Ok(("c", '\n'))); +/// assert_eq!(parser("\r\nc"), Err(Err::Error(Error::new("\r\nc", ErrorKind::Char)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Char)))); +/// ``` +pub fn newline<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar, +{ + char('\n')(input) +} + +/// Matches a tab character '\t'. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::tab; +/// fn parser(input: &str) -> IResult<&str, char> { +/// tab(input) +/// } +/// +/// assert_eq!(parser("\tc"), Ok(("c", '\t'))); +/// assert_eq!(parser("\r\nc"), Err(Err::Error(Error::new("\r\nc", ErrorKind::Char)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Char)))); +/// ``` +pub fn tab<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar, +{ + char('\t')(input) +} + +/// Matches one byte as a character. Note that the input type will +/// accept a `str`, but not a `&[u8]`, unlike many other nom parsers. +/// +/// *Complete version*: Will return an error if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{character::complete::anychar, Err, error::{Error, ErrorKind}, IResult}; +/// fn parser(input: &str) -> IResult<&str, char> { +/// anychar(input) +/// } +/// +/// assert_eq!(parser("abc"), Ok(("bc",'a'))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Eof)))); +/// ``` +pub fn anychar<T, E: ParseError<T>>(input: T) -> IResult<T, char, E> +where + T: InputIter + InputLength + Slice<RangeFrom<usize>>, + <T as InputIter>::Item: AsChar, +{ + let mut it = input.iter_indices(); + match it.next() { + None => Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))), + Some((_, c)) => match it.next() { + None => Ok((input.slice(input.input_len()..), c.as_char())), + Some((idx, _)) => Ok((input.slice(idx..), c.as_char())), + }, + } +} + +/// Recognizes zero or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non +/// alphabetic character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::alpha0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alpha0(input) +/// } +/// +/// assert_eq!(parser("ab1c"), Ok(("1c", "ab"))); +/// assert_eq!(parser("1c"), Ok(("1c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn alpha0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_alpha()) +} + +/// Recognizes one or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non alphabetic character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::alpha1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alpha1(input) +/// } +/// +/// assert_eq!(parser("aB1c"), Ok(("1c", "aB"))); +/// assert_eq!(parser("1c"), Err(Err::Error(Error::new("1c", ErrorKind::Alpha)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Alpha)))); +/// ``` +pub fn alpha1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_alpha(), ErrorKind::Alpha) +} + +/// Recognizes zero or more ASCII numerical characters: 0-9 +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::digit0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// digit0(input) +/// } +/// +/// assert_eq!(parser("21c"), Ok(("c", "21"))); +/// assert_eq!(parser("21"), Ok(("", "21"))); +/// assert_eq!(parser("a21c"), Ok(("a21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_dec_digit()) +} + +/// Recognizes one or more ASCII numerical characters: 0-9 +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::digit1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// digit1(input) +/// } +/// +/// assert_eq!(parser("21c"), Ok(("c", "21"))); +/// assert_eq!(parser("c1"), Err(Err::Error(Error::new("c1", ErrorKind::Digit)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Digit)))); +/// ``` +/// +/// ## Parsing an integer +/// You can use `digit1` in combination with [`map_res`] to parse an integer: +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::combinator::map_res; +/// # use nom::character::complete::digit1; +/// fn parser(input: &str) -> IResult<&str, u32> { +/// map_res(digit1, str::parse)(input) +/// } +/// +/// assert_eq!(parser("416"), Ok(("", 416))); +/// assert_eq!(parser("12b"), Ok(("b", 12))); +/// assert!(parser("b").is_err()); +/// ``` +/// +/// [`map_res`]: crate::combinator::map_res +pub fn digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_dec_digit(), ErrorKind::Digit) +} + +/// Recognizes zero or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non hexadecimal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::hex_digit0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// hex_digit0(input) +/// } +/// +/// assert_eq!(parser("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn hex_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_hex_digit()) +} +/// Recognizes one or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non hexadecimal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::hex_digit1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// hex_digit1(input) +/// } +/// +/// assert_eq!(parser("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(parser("H2"), Err(Err::Error(Error::new("H2", ErrorKind::HexDigit)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::HexDigit)))); +/// ``` +pub fn hex_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_hex_digit(), ErrorKind::HexDigit) +} + +/// Recognizes zero or more octal characters: 0-7 +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non octal +/// digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::oct_digit0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// oct_digit0(input) +/// } +/// +/// assert_eq!(parser("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn oct_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_oct_digit()) +} + +/// Recognizes one or more octal characters: 0-7 +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non octal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::oct_digit1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// oct_digit1(input) +/// } +/// +/// assert_eq!(parser("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(parser("H2"), Err(Err::Error(Error::new("H2", ErrorKind::OctDigit)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::OctDigit)))); +/// ``` +pub fn oct_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_oct_digit(), ErrorKind::OctDigit) +} + +/// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non +/// alphanumerical character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::alphanumeric0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alphanumeric0(input) +/// } +/// +/// assert_eq!(parser("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(parser("&Z21c"), Ok(("&Z21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn alphanumeric0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_alphanum()) +} + +/// Recognizes one or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non alphanumerical character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::alphanumeric1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alphanumeric1(input) +/// } +/// +/// assert_eq!(parser("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(parser("&H2"), Err(Err::Error(Error::new("&H2", ErrorKind::AlphaNumeric)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::AlphaNumeric)))); +/// ``` +pub fn alphanumeric1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_alphanum(), ErrorKind::AlphaNumeric) +} + +/// Recognizes zero or more spaces and tabs. +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non space +/// character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::space0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// space0(input) +/// } +/// +/// assert_eq!(parser(" \t21c"), Ok(("21c", " \t"))); +/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn space0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position_complete(|item| { + let c = item.as_char(); + !(c == ' ' || c == '\t') + }) +} + +/// Recognizes one or more spaces and tabs. +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::space1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// space1(input) +/// } +/// +/// assert_eq!(parser(" \t21c"), Ok(("21c", " \t"))); +/// assert_eq!(parser("H2"), Err(Err::Error(Error::new("H2", ErrorKind::Space)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Space)))); +/// ``` +pub fn space1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position1_complete( + |item| { + let c = item.as_char(); + !(c == ' ' || c == '\t') + }, + ErrorKind::Space, + ) +} + +/// Recognizes zero or more spaces, tabs, carriage returns and line feeds. +/// +/// *Complete version*: will return the whole input if no terminating token is found (a non space +/// character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::multispace0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// multispace0(input) +/// } +/// +/// assert_eq!(parser(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(parser("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn multispace0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position_complete(|item| { + let c = item.as_char(); + !(c == ' ' || c == '\t' || c == '\r' || c == '\n') + }) +} + +/// Recognizes one or more spaces, tabs, carriage returns and line feeds. +/// +/// *Complete version*: will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::multispace1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// multispace1(input) +/// } +/// +/// assert_eq!(parser(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(parser("H2"), Err(Err::Error(Error::new("H2", ErrorKind::MultiSpace)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::MultiSpace)))); +/// ``` +pub fn multispace1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position1_complete( + |item| { + let c = item.as_char(); + !(c == ' ' || c == '\t' || c == '\r' || c == '\n') + }, + ErrorKind::MultiSpace, + ) +} + +pub(crate) fn sign<T, E: ParseError<T>>(input: T) -> IResult<T, bool, E> +where + T: Clone + InputTake, + T: for<'a> Compare<&'a [u8]>, +{ + use crate::bytes::complete::tag; + use crate::combinator::value; + + let (i, opt_sign) = opt(alt(( + value(false, tag(&b"-"[..])), + value(true, tag(&b"+"[..])), + )))(input)?; + let sign = opt_sign.unwrap_or(true); + + Ok((i, sign)) +} + +#[doc(hidden)] +macro_rules! ints { + ($($t:tt)+) => { + $( + /// will parse a number in text form to a number + /// + /// *Complete version*: can parse until the end of input. + pub fn $t<T, E: ParseError<T>>(input: T) -> IResult<T, $t, E> + where + T: InputIter + Slice<RangeFrom<usize>> + InputLength + InputTake + Clone, + <T as InputIter>::Item: AsChar, + T: for <'a> Compare<&'a[u8]>, + { + let (i, sign) = sign(input.clone())?; + + if i.input_len() == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))); + } + + let mut value: $t = 0; + if sign { + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + } else { + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_sub(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + } + + Ok((i.slice(i.input_len()..), value)) + } + )+ + } +} + +ints! { i8 i16 i32 i64 i128 } + +#[doc(hidden)] +macro_rules! uints { + ($($t:tt)+) => { + $( + /// will parse a number in text form to a number + /// + /// *Complete version*: can parse until the end of input. + pub fn $t<T, E: ParseError<T>>(input: T) -> IResult<T, $t, E> + where + T: InputIter + Slice<RangeFrom<usize>> + InputLength, + <T as InputIter>::Item: AsChar, + { + let i = input; + + if i.input_len() == 0 { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))); + } + + let mut value: $t = 0; + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + + Ok((i.slice(i.input_len()..), value)) + } + )+ + } +} + +uints! { u8 u16 u32 u64 u128 } + +#[cfg(test)] +mod tests { + use super::*; + use crate::internal::Err; + use crate::traits::ParseTo; + use proptest::prelude::*; + + macro_rules! assert_parse( + ($left: expr, $right: expr) => { + let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; + assert_eq!(res, $right); + }; + ); + + #[test] + fn character() { + let empty: &[u8] = b""; + let a: &[u8] = b"abcd"; + let b: &[u8] = b"1234"; + let c: &[u8] = b"a123"; + let d: &[u8] = "azé12".as_bytes(); + let e: &[u8] = b" "; + let f: &[u8] = b" ;"; + //assert_eq!(alpha1::<_, (_, ErrorKind)>(a), Err(Err::Incomplete(Needed::Size(1)))); + assert_parse!(alpha1(a), Ok((empty, a))); + assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &b"a"[..]))); + assert_eq!( + alpha1::<_, (_, ErrorKind)>(d), + Ok(("é12".as_bytes(), &b"az"[..])) + ); + assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); + assert_eq!(digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(digit1(c), Err(Err::Error((c, ErrorKind::Digit)))); + assert_eq!(digit1(d), Err(Err::Error((d, ErrorKind::Digit)))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(a), Ok((empty, a))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(c), Ok((empty, c))); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(d), + Ok(("zé12".as_bytes(), &b"a"[..])) + ); + assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); + assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a))); + //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(d), + Ok(("é12".as_bytes(), &b"az"[..])) + ); + assert_eq!(space1::<_, (_, ErrorKind)>(e), Ok((empty, e))); + assert_eq!(space1::<_, (_, ErrorKind)>(f), Ok((&b";"[..], &b" "[..]))); + } + + #[cfg(feature = "alloc")] + #[test] + fn character_s() { + let empty = ""; + let a = "abcd"; + let b = "1234"; + let c = "a123"; + let d = "azé12"; + let e = " "; + assert_eq!(alpha1::<_, (_, ErrorKind)>(a), Ok((empty, a))); + assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &"a"[..]))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", &"az"[..]))); + assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); + assert_eq!(digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(digit1(c), Err(Err::Error((c, ErrorKind::Digit)))); + assert_eq!(digit1(d), Err(Err::Error((d, ErrorKind::Digit)))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(a), Ok((empty, a))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(c), Ok((empty, c))); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", &"a"[..]))); + assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); + assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); + assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a))); + //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c))); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(d), Ok(("é12", "az"))); + assert_eq!(space1::<_, (_, ErrorKind)>(e), Ok((empty, e))); + } + + use crate::traits::Offset; + #[test] + fn offset() { + let a = &b"abcd;"[..]; + let b = &b"1234;"[..]; + let c = &b"a123;"[..]; + let d = &b" \t;"[..]; + let e = &b" \t\r\n;"[..]; + let f = &b"123abcDEF;"[..]; + + match alpha1::<_, (_, ErrorKind)>(a) { + Ok((i, _)) => { + assert_eq!(a.offset(i) + i.len(), a.len()); + } + _ => panic!("wrong return type in offset test for alpha"), + } + match digit1::<_, (_, ErrorKind)>(b) { + Ok((i, _)) => { + assert_eq!(b.offset(i) + i.len(), b.len()); + } + _ => panic!("wrong return type in offset test for digit"), + } + match alphanumeric1::<_, (_, ErrorKind)>(c) { + Ok((i, _)) => { + assert_eq!(c.offset(i) + i.len(), c.len()); + } + _ => panic!("wrong return type in offset test for alphanumeric"), + } + match space1::<_, (_, ErrorKind)>(d) { + Ok((i, _)) => { + assert_eq!(d.offset(i) + i.len(), d.len()); + } + _ => panic!("wrong return type in offset test for space"), + } + match multispace1::<_, (_, ErrorKind)>(e) { + Ok((i, _)) => { + assert_eq!(e.offset(i) + i.len(), e.len()); + } + _ => panic!("wrong return type in offset test for multispace"), + } + match hex_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for hex_digit"), + } + match oct_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for oct_digit"), + } + } + + #[test] + fn is_not_line_ending_bytes() { + let a: &[u8] = b"ab12cd\nefgh"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(a), + Ok((&b"\nefgh"[..], &b"ab12cd"[..])) + ); + + let b: &[u8] = b"ab12cd\nefgh\nijkl"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(b), + Ok((&b"\nefgh\nijkl"[..], &b"ab12cd"[..])) + ); + + let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(c), + Ok((&b"\r\nefgh\nijkl"[..], &b"ab12cd"[..])) + ); + + let d: &[u8] = b"ab12cd"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(d), + Ok((&[][..], &d[..])) + ); + } + + #[test] + fn is_not_line_ending_str() { + /* + let a: &str = "ab12cd\nefgh"; + assert_eq!(not_line_ending(a), Ok((&"\nefgh"[..], &"ab12cd"[..]))); + + let b: &str = "ab12cd\nefgh\nijkl"; + assert_eq!(not_line_ending(b), Ok((&"\nefgh\nijkl"[..], &"ab12cd"[..]))); + + let c: &str = "ab12cd\r\nefgh\nijkl"; + assert_eq!(not_line_ending(c), Ok((&"\r\nefgh\nijkl"[..], &"ab12cd"[..]))); + + let d = "βèƒôřè\nÂßÇáƒƭèř"; + assert_eq!(not_line_ending(d), Ok((&"\nÂßÇáƒƭèř"[..], &"βèƒôřè"[..]))); + + let e = "βèƒôřè\r\nÂßÇáƒƭèř"; + assert_eq!(not_line_ending(e), Ok((&"\r\nÂßÇáƒƭèř"[..], &"βèƒôřè"[..]))); + */ + + let f = "βèƒôřè\rÂßÇáƒƭèř"; + assert_eq!(not_line_ending(f), Err(Err::Error((f, ErrorKind::Tag)))); + + let g2: &str = "ab12cd"; + assert_eq!(not_line_ending::<_, (_, ErrorKind)>(g2), Ok(("", g2))); + } + + #[test] + fn hex_digit_test() { + let i = &b"0123456789abcdefABCDEF;"[..]; + assert_parse!(hex_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"g"[..]; + assert_parse!( + hex_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::HexDigit))) + ); + + let i = &b"G"[..]; + assert_parse!( + hex_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::HexDigit))) + ); + + assert!(crate::character::is_hex_digit(b'0')); + assert!(crate::character::is_hex_digit(b'9')); + assert!(crate::character::is_hex_digit(b'a')); + assert!(crate::character::is_hex_digit(b'f')); + assert!(crate::character::is_hex_digit(b'A')); + assert!(crate::character::is_hex_digit(b'F')); + assert!(!crate::character::is_hex_digit(b'g')); + assert!(!crate::character::is_hex_digit(b'G')); + assert!(!crate::character::is_hex_digit(b'/')); + assert!(!crate::character::is_hex_digit(b':')); + assert!(!crate::character::is_hex_digit(b'@')); + assert!(!crate::character::is_hex_digit(b'\x60')); + } + + #[test] + fn oct_digit_test() { + let i = &b"01234567;"[..]; + assert_parse!(oct_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"8"[..]; + assert_parse!( + oct_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::OctDigit))) + ); + + assert!(crate::character::is_oct_digit(b'0')); + assert!(crate::character::is_oct_digit(b'7')); + assert!(!crate::character::is_oct_digit(b'8')); + assert!(!crate::character::is_oct_digit(b'9')); + assert!(!crate::character::is_oct_digit(b'a')); + assert!(!crate::character::is_oct_digit(b'A')); + assert!(!crate::character::is_oct_digit(b'/')); + assert!(!crate::character::is_oct_digit(b':')); + assert!(!crate::character::is_oct_digit(b'@')); + assert!(!crate::character::is_oct_digit(b'\x60')); + } + + #[test] + fn full_line_windows() { + use crate::sequence::pair; + fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + pair(not_line_ending, line_ending)(i) + } + let input = b"abc\r\n"; + let output = take_full_line(input); + assert_eq!(output, Ok((&b""[..], (&b"abc"[..], &b"\r\n"[..])))); + } + + #[test] + fn full_line_unix() { + use crate::sequence::pair; + fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + pair(not_line_ending, line_ending)(i) + } + let input = b"abc\n"; + let output = take_full_line(input); + assert_eq!(output, Ok((&b""[..], (&b"abc"[..], &b"\n"[..])))); + } + + #[test] + fn check_windows_lineending() { + let input = b"\r\n"; + let output = line_ending(&input[..]); + assert_parse!(output, Ok((&b""[..], &b"\r\n"[..]))); + } + + #[test] + fn check_unix_lineending() { + let input = b"\n"; + let output = line_ending(&input[..]); + assert_parse!(output, Ok((&b""[..], &b"\n"[..]))); + } + + #[test] + fn cr_lf() { + assert_parse!(crlf(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); + assert_parse!( + crlf(&b"\r"[..]), + Err(Err::Error(error_position!(&b"\r"[..], ErrorKind::CrLf))) + ); + assert_parse!( + crlf(&b"\ra"[..]), + Err(Err::Error(error_position!(&b"\ra"[..], ErrorKind::CrLf))) + ); + + assert_parse!(crlf("\r\na"), Ok(("a", "\r\n"))); + assert_parse!( + crlf("\r"), + Err(Err::Error(error_position!(&"\r"[..], ErrorKind::CrLf))) + ); + assert_parse!( + crlf("\ra"), + Err(Err::Error(error_position!("\ra", ErrorKind::CrLf))) + ); + } + + #[test] + fn end_of_line() { + assert_parse!(line_ending(&b"\na"[..]), Ok((&b"a"[..], &b"\n"[..]))); + assert_parse!(line_ending(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); + assert_parse!( + line_ending(&b"\r"[..]), + Err(Err::Error(error_position!(&b"\r"[..], ErrorKind::CrLf))) + ); + assert_parse!( + line_ending(&b"\ra"[..]), + Err(Err::Error(error_position!(&b"\ra"[..], ErrorKind::CrLf))) + ); + + assert_parse!(line_ending("\na"), Ok(("a", "\n"))); + assert_parse!(line_ending("\r\na"), Ok(("a", "\r\n"))); + assert_parse!( + line_ending("\r"), + Err(Err::Error(error_position!(&"\r"[..], ErrorKind::CrLf))) + ); + assert_parse!( + line_ending("\ra"), + Err(Err::Error(error_position!("\ra", ErrorKind::CrLf))) + ); + } + + fn digit_to_i16(input: &str) -> IResult<&str, i16> { + let i = input; + let (i, opt_sign) = opt(alt((char('+'), char('-'))))(i)?; + let sign = match opt_sign { + Some('+') => true, + Some('-') => false, + _ => true, + }; + + let (i, s) = match digit1::<_, crate::error::Error<_>>(i) { + Ok((i, s)) => (i, s), + Err(_) => { + return Err(Err::Error(crate::error::Error::from_error_kind( + input, + ErrorKind::Digit, + ))) + } + }; + + match s.parse_to() { + Some(n) => { + if sign { + Ok((i, n)) + } else { + Ok((i, -n)) + } + } + None => Err(Err::Error(crate::error::Error::from_error_kind( + i, + ErrorKind::Digit, + ))), + } + } + + fn digit_to_u32(i: &str) -> IResult<&str, u32> { + let (i, s) = digit1(i)?; + match s.parse_to() { + Some(n) => Ok((i, n)), + None => Err(Err::Error(crate::error::Error::from_error_kind( + i, + ErrorKind::Digit, + ))), + } + } + + proptest! { + #[test] + fn ints(s in "\\PC*") { + let res1 = digit_to_i16(&s); + let res2 = i16(s.as_str()); + assert_eq!(res1, res2); + } + + #[test] + fn uints(s in "\\PC*") { + let res1 = digit_to_u32(&s); + let res2 = u32(s.as_str()); + assert_eq!(res1, res2); + } + } +} diff --git a/third_party/rust/nom/src/character/mod.rs b/third_party/rust/nom/src/character/mod.rs new file mode 100644 index 0000000000..2c5d3bc4ad --- /dev/null +++ b/third_party/rust/nom/src/character/mod.rs @@ -0,0 +1,116 @@ +//! Character specific parsers and combinators +//! +//! Functions recognizing specific characters + +#[cfg(test)] +mod tests; + +pub mod complete; +pub mod streaming; + +/// Tests if byte is ASCII alphabetic: A-Z, a-z +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_alphabetic; +/// assert_eq!(is_alphabetic(b'9'), false); +/// assert_eq!(is_alphabetic(b'a'), true); +/// ``` +#[inline] +pub fn is_alphabetic(chr: u8) -> bool { + (chr >= 0x41 && chr <= 0x5A) || (chr >= 0x61 && chr <= 0x7A) +} + +/// Tests if byte is ASCII digit: 0-9 +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_digit; +/// assert_eq!(is_digit(b'a'), false); +/// assert_eq!(is_digit(b'9'), true); +/// ``` +#[inline] +pub fn is_digit(chr: u8) -> bool { + chr >= 0x30 && chr <= 0x39 +} + +/// Tests if byte is ASCII hex digit: 0-9, A-F, a-f +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_hex_digit; +/// assert_eq!(is_hex_digit(b'a'), true); +/// assert_eq!(is_hex_digit(b'9'), true); +/// assert_eq!(is_hex_digit(b'A'), true); +/// assert_eq!(is_hex_digit(b'x'), false); +/// ``` +#[inline] +pub fn is_hex_digit(chr: u8) -> bool { + (chr >= 0x30 && chr <= 0x39) || (chr >= 0x41 && chr <= 0x46) || (chr >= 0x61 && chr <= 0x66) +} + +/// Tests if byte is ASCII octal digit: 0-7 +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_oct_digit; +/// assert_eq!(is_oct_digit(b'a'), false); +/// assert_eq!(is_oct_digit(b'9'), false); +/// assert_eq!(is_oct_digit(b'6'), true); +/// ``` +#[inline] +pub fn is_oct_digit(chr: u8) -> bool { + chr >= 0x30 && chr <= 0x37 +} + +/// Tests if byte is ASCII alphanumeric: A-Z, a-z, 0-9 +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_alphanumeric; +/// assert_eq!(is_alphanumeric(b'-'), false); +/// assert_eq!(is_alphanumeric(b'a'), true); +/// assert_eq!(is_alphanumeric(b'9'), true); +/// assert_eq!(is_alphanumeric(b'A'), true); +/// ``` +#[inline] +pub fn is_alphanumeric(chr: u8) -> bool { + is_alphabetic(chr) || is_digit(chr) +} + +/// Tests if byte is ASCII space or tab +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_space; +/// assert_eq!(is_space(b'\n'), false); +/// assert_eq!(is_space(b'\r'), false); +/// assert_eq!(is_space(b' '), true); +/// assert_eq!(is_space(b'\t'), true); +/// ``` +#[inline] +pub fn is_space(chr: u8) -> bool { + chr == b' ' || chr == b'\t' +} + +/// Tests if byte is ASCII newline: \n +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_newline; +/// assert_eq!(is_newline(b'\n'), true); +/// assert_eq!(is_newline(b'\r'), false); +/// assert_eq!(is_newline(b' '), false); +/// assert_eq!(is_newline(b'\t'), false); +/// ``` +#[inline] +pub fn is_newline(chr: u8) -> bool { + chr == b'\n' +} diff --git a/third_party/rust/nom/src/character/streaming.rs b/third_party/rust/nom/src/character/streaming.rs new file mode 100644 index 0000000000..88aabba356 --- /dev/null +++ b/third_party/rust/nom/src/character/streaming.rs @@ -0,0 +1,1182 @@ +//! Character specific parsers and combinators, streaming version +//! +//! Functions recognizing specific characters + +use crate::branch::alt; +use crate::combinator::opt; +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Needed}; +use crate::lib::std::ops::{Range, RangeFrom, RangeTo}; +use crate::traits::{ + AsChar, FindToken, InputIter, InputLength, InputTake, InputTakeAtPosition, Slice, +}; +use crate::traits::{Compare, CompareResult}; + +/// Recognizes one character. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{ErrorKind, Error}, Needed, IResult}; +/// # use nom::character::streaming::char; +/// fn parser(i: &str) -> IResult<&str, char> { +/// char('a')(i) +/// } +/// assert_eq!(parser("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser("bc"), Err(Err::Error(Error::new("bc", ErrorKind::Char)))); +/// assert_eq!(parser(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn char<I, Error: ParseError<I>>(c: char) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter + InputLength, + <I as InputIter>::Item: AsChar, +{ + move |i: I| match (i).iter_elements().next().map(|t| { + let b = t.as_char() == c; + (&c, b) + }) { + None => Err(Err::Incomplete(Needed::new(c.len() - i.input_len()))), + Some((_, false)) => Err(Err::Error(Error::from_char(i, c))), + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + } +} + +/// Recognizes one character and checks that it satisfies a predicate +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{ErrorKind, Error}, Needed, IResult}; +/// # use nom::character::streaming::satisfy; +/// fn parser(i: &str) -> IResult<&str, char> { +/// satisfy(|c| c == 'a' || c == 'b')(i) +/// } +/// assert_eq!(parser("abc"), Ok(("bc", 'a'))); +/// assert_eq!(parser("cd"), Err(Err::Error(Error::new("cd", ErrorKind::Satisfy)))); +/// assert_eq!(parser(""), Err(Err::Incomplete(Needed::Unknown))); +/// ``` +pub fn satisfy<F, I, Error: ParseError<I>>(cond: F) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar, + F: Fn(char) -> bool, +{ + move |i: I| match (i).iter_elements().next().map(|t| { + let c = t.as_char(); + let b = cond(c); + (c, b) + }) { + None => Err(Err::Incomplete(Needed::Unknown)), + Some((_, false)) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::Satisfy))), + Some((c, true)) => Ok((i.slice(c.len()..), c)), + } +} + +/// Recognizes one of the provided characters. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::character::streaming::one_of; +/// assert_eq!(one_of::<_, _, (_, ErrorKind)>("abc")("b"), Ok(("", 'b'))); +/// assert_eq!(one_of::<_, _, (_, ErrorKind)>("a")("bc"), Err(Err::Error(("bc", ErrorKind::OneOf)))); +/// assert_eq!(one_of::<_, _, (_, ErrorKind)>("a")(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn one_of<I, T, Error: ParseError<I>>(list: T) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar + Copy, + T: FindToken<<I as InputIter>::Item>, +{ + move |i: I| match (i).iter_elements().next().map(|c| (c, list.find_token(c))) { + None => Err(Err::Incomplete(Needed::new(1))), + Some((_, false)) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::OneOf))), + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + } +} + +/// Recognizes a character that is not in the provided characters. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::character::streaming::none_of; +/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("abc")("z"), Ok(("", 'z'))); +/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("ab")("a"), Err(Err::Error(("a", ErrorKind::NoneOf)))); +/// assert_eq!(none_of::<_, _, (_, ErrorKind)>("a")(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn none_of<I, T, Error: ParseError<I>>(list: T) -> impl Fn(I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter, + <I as InputIter>::Item: AsChar + Copy, + T: FindToken<<I as InputIter>::Item>, +{ + move |i: I| match (i).iter_elements().next().map(|c| (c, !list.find_token(c))) { + None => Err(Err::Incomplete(Needed::new(1))), + Some((_, false)) => Err(Err::Error(Error::from_error_kind(i, ErrorKind::NoneOf))), + Some((c, true)) => Ok((i.slice(c.len()..), c.as_char())), + } +} + +/// Recognizes the string "\r\n". +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::crlf; +/// assert_eq!(crlf::<_, (_, ErrorKind)>("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(crlf::<_, (_, ErrorKind)>("ab\r\nc"), Err(Err::Error(("ab\r\nc", ErrorKind::CrLf)))); +/// assert_eq!(crlf::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +pub fn crlf<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: InputIter, + T: Compare<&'static str>, +{ + match input.compare("\r\n") { + //FIXME: is this the right index? + CompareResult::Ok => Ok((input.slice(2..), input.slice(0..2))), + CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(2))), + CompareResult::Error => { + let e: ErrorKind = ErrorKind::CrLf; + Err(Err::Error(E::from_error_kind(input, e))) + } + } +} + +/// Recognizes a string of any char except '\r\n' or '\n'. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::streaming::not_line_ending; +/// assert_eq!(not_line_ending::<_, (_, ErrorKind)>("ab\r\nc"), Ok(("\r\nc", "ab"))); +/// assert_eq!(not_line_ending::<_, (_, ErrorKind)>("abc"), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(not_line_ending::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::Unknown))); +/// assert_eq!(not_line_ending::<_, (_, ErrorKind)>("a\rb\nc"), Err(Err::Error(("a\rb\nc", ErrorKind::Tag )))); +/// assert_eq!(not_line_ending::<_, (_, ErrorKind)>("a\rbc"), Err(Err::Error(("a\rbc", ErrorKind::Tag )))); +/// ``` +pub fn not_line_ending<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: InputIter + InputLength, + T: Compare<&'static str>, + <T as InputIter>::Item: AsChar, + <T as InputIter>::Item: AsChar, +{ + match input.position(|item| { + let c = item.as_char(); + c == '\r' || c == '\n' + }) { + None => Err(Err::Incomplete(Needed::Unknown)), + Some(index) => { + let mut it = input.slice(index..).iter_elements(); + let nth = it.next().unwrap().as_char(); + if nth == '\r' { + let sliced = input.slice(index..); + let comp = sliced.compare("\r\n"); + match comp { + //FIXME: calculate the right index + CompareResult::Incomplete => Err(Err::Incomplete(Needed::Unknown)), + CompareResult::Error => { + let e: ErrorKind = ErrorKind::Tag; + Err(Err::Error(E::from_error_kind(input, e))) + } + CompareResult::Ok => Ok((input.slice(index..), input.slice(..index))), + } + } else { + Ok((input.slice(index..), input.slice(..index))) + } + } + } +} + +/// Recognizes an end of line (both '\n' and '\r\n'). +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::line_ending; +/// assert_eq!(line_ending::<_, (_, ErrorKind)>("\r\nc"), Ok(("c", "\r\n"))); +/// assert_eq!(line_ending::<_, (_, ErrorKind)>("ab\r\nc"), Err(Err::Error(("ab\r\nc", ErrorKind::CrLf)))); +/// assert_eq!(line_ending::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn line_ending<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<Range<usize>> + Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: InputIter + InputLength, + T: Compare<&'static str>, +{ + match input.compare("\n") { + CompareResult::Ok => Ok((input.slice(1..), input.slice(0..1))), + CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(1))), + CompareResult::Error => { + match input.compare("\r\n") { + //FIXME: is this the right index? + CompareResult::Ok => Ok((input.slice(2..), input.slice(0..2))), + CompareResult::Incomplete => Err(Err::Incomplete(Needed::new(2))), + CompareResult::Error => Err(Err::Error(E::from_error_kind(input, ErrorKind::CrLf))), + } + } + } +} + +/// Matches a newline character '\\n'. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::newline; +/// assert_eq!(newline::<_, (_, ErrorKind)>("\nc"), Ok(("c", '\n'))); +/// assert_eq!(newline::<_, (_, ErrorKind)>("\r\nc"), Err(Err::Error(("\r\nc", ErrorKind::Char)))); +/// assert_eq!(newline::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn newline<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter + InputLength, + <I as InputIter>::Item: AsChar, +{ + char('\n')(input) +} + +/// Matches a tab character '\t'. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::tab; +/// assert_eq!(tab::<_, (_, ErrorKind)>("\tc"), Ok(("c", '\t'))); +/// assert_eq!(tab::<_, (_, ErrorKind)>("\r\nc"), Err(Err::Error(("\r\nc", ErrorKind::Char)))); +/// assert_eq!(tab::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn tab<I, Error: ParseError<I>>(input: I) -> IResult<I, char, Error> +where + I: Slice<RangeFrom<usize>> + InputIter + InputLength, + <I as InputIter>::Item: AsChar, +{ + char('\t')(input) +} + +/// Matches one byte as a character. Note that the input type will +/// accept a `str`, but not a `&[u8]`, unlike many other nom parsers. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data. +/// # Example +/// +/// ``` +/// # use nom::{character::streaming::anychar, Err, error::ErrorKind, IResult, Needed}; +/// assert_eq!(anychar::<_, (_, ErrorKind)>("abc"), Ok(("bc",'a'))); +/// assert_eq!(anychar::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn anychar<T, E: ParseError<T>>(input: T) -> IResult<T, char, E> +where + T: InputIter + InputLength + Slice<RangeFrom<usize>>, + <T as InputIter>::Item: AsChar, +{ + let mut it = input.iter_indices(); + match it.next() { + None => Err(Err::Incomplete(Needed::new(1))), + Some((_, c)) => match it.next() { + None => Ok((input.slice(input.input_len()..), c.as_char())), + Some((idx, _)) => Ok((input.slice(idx..), c.as_char())), + }, + } +} + +/// Recognizes zero or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphabetic character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::alpha0; +/// assert_eq!(alpha0::<_, (_, ErrorKind)>("ab1c"), Ok(("1c", "ab"))); +/// assert_eq!(alpha0::<_, (_, ErrorKind)>("1c"), Ok(("1c", ""))); +/// assert_eq!(alpha0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn alpha0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position(|item| !item.is_alpha()) +} + +/// Recognizes one or more lowercase and uppercase ASCII alphabetic characters: a-z, A-Z +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphabetic character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::alpha1; +/// assert_eq!(alpha1::<_, (_, ErrorKind)>("aB1c"), Ok(("1c", "aB"))); +/// assert_eq!(alpha1::<_, (_, ErrorKind)>("1c"), Err(Err::Error(("1c", ErrorKind::Alpha)))); +/// assert_eq!(alpha1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn alpha1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_alpha(), ErrorKind::Alpha) +} + +/// Recognizes zero or more ASCII numerical characters: 0-9 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::digit0; +/// assert_eq!(digit0::<_, (_, ErrorKind)>("21c"), Ok(("c", "21"))); +/// assert_eq!(digit0::<_, (_, ErrorKind)>("a21c"), Ok(("a21c", ""))); +/// assert_eq!(digit0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position(|item| !item.is_dec_digit()) +} + +/// Recognizes one or more ASCII numerical characters: 0-9 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::digit1; +/// assert_eq!(digit1::<_, (_, ErrorKind)>("21c"), Ok(("c", "21"))); +/// assert_eq!(digit1::<_, (_, ErrorKind)>("c1"), Err(Err::Error(("c1", ErrorKind::Digit)))); +/// assert_eq!(digit1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_dec_digit(), ErrorKind::Digit) +} + +/// Recognizes zero or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non hexadecimal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::hex_digit0; +/// assert_eq!(hex_digit0::<_, (_, ErrorKind)>("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(hex_digit0::<_, (_, ErrorKind)>("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(hex_digit0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn hex_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position(|item| !item.is_hex_digit()) +} + +/// Recognizes one or more ASCII hexadecimal numerical characters: 0-9, A-F, a-f +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non hexadecimal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::hex_digit1; +/// assert_eq!(hex_digit1::<_, (_, ErrorKind)>("21cZ"), Ok(("Z", "21c"))); +/// assert_eq!(hex_digit1::<_, (_, ErrorKind)>("H2"), Err(Err::Error(("H2", ErrorKind::HexDigit)))); +/// assert_eq!(hex_digit1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn hex_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_hex_digit(), ErrorKind::HexDigit) +} + +/// Recognizes zero or more octal characters: 0-7 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non octal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::oct_digit0; +/// assert_eq!(oct_digit0::<_, (_, ErrorKind)>("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(oct_digit0::<_, (_, ErrorKind)>("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(oct_digit0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn oct_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position(|item| !item.is_oct_digit()) +} + +/// Recognizes one or more octal characters: 0-7 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non octal digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::oct_digit1; +/// assert_eq!(oct_digit1::<_, (_, ErrorKind)>("21cZ"), Ok(("cZ", "21"))); +/// assert_eq!(oct_digit1::<_, (_, ErrorKind)>("H2"), Err(Err::Error(("H2", ErrorKind::OctDigit)))); +/// assert_eq!(oct_digit1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn oct_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_oct_digit(), ErrorKind::OctDigit) +} + +/// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphanumerical character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::alphanumeric0; +/// assert_eq!(alphanumeric0::<_, (_, ErrorKind)>("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(alphanumeric0::<_, (_, ErrorKind)>("&Z21c"), Ok(("&Z21c", ""))); +/// assert_eq!(alphanumeric0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn alphanumeric0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position(|item| !item.is_alphanum()) +} + +/// Recognizes one or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non alphanumerical character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::alphanumeric1; +/// assert_eq!(alphanumeric1::<_, (_, ErrorKind)>("21cZ%1"), Ok(("%1", "21cZ"))); +/// assert_eq!(alphanumeric1::<_, (_, ErrorKind)>("&H2"), Err(Err::Error(("&H2", ErrorKind::AlphaNumeric)))); +/// assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn alphanumeric1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_alphanum(), ErrorKind::AlphaNumeric) +} + +/// Recognizes zero or more spaces and tabs. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::space0; +/// assert_eq!(space0::<_, (_, ErrorKind)>(" \t21c"), Ok(("21c", " \t"))); +/// assert_eq!(space0::<_, (_, ErrorKind)>("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(space0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn space0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position(|item| { + let c = item.as_char(); + !(c == ' ' || c == '\t') + }) +} +/// Recognizes one or more spaces and tabs. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::space1; +/// assert_eq!(space1::<_, (_, ErrorKind)>(" \t21c"), Ok(("21c", " \t"))); +/// assert_eq!(space1::<_, (_, ErrorKind)>("H2"), Err(Err::Error(("H2", ErrorKind::Space)))); +/// assert_eq!(space1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn space1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position1( + |item| { + let c = item.as_char(); + !(c == ' ' || c == '\t') + }, + ErrorKind::Space, + ) +} + +/// Recognizes zero or more spaces, tabs, carriage returns and line feeds. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::multispace0; +/// assert_eq!(multispace0::<_, (_, ErrorKind)>(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(multispace0::<_, (_, ErrorKind)>("Z21c"), Ok(("Z21c", ""))); +/// assert_eq!(multispace0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn multispace0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position(|item| { + let c = item.as_char(); + !(c == ' ' || c == '\t' || c == '\r' || c == '\n') + }) +} + +/// Recognizes one or more spaces, tabs, carriage returns and line feeds. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non space character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::multispace1; +/// assert_eq!(multispace1::<_, (_, ErrorKind)>(" \t\n\r21c"), Ok(("21c", " \t\n\r"))); +/// assert_eq!(multispace1::<_, (_, ErrorKind)>("H2"), Err(Err::Error(("H2", ErrorKind::MultiSpace)))); +/// assert_eq!(multispace1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn multispace1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar + Clone, +{ + input.split_at_position1( + |item| { + let c = item.as_char(); + !(c == ' ' || c == '\t' || c == '\r' || c == '\n') + }, + ErrorKind::MultiSpace, + ) +} + +pub(crate) fn sign<T, E: ParseError<T>>(input: T) -> IResult<T, bool, E> +where + T: Clone + InputTake + InputLength, + T: for<'a> Compare<&'a [u8]>, +{ + use crate::bytes::streaming::tag; + use crate::combinator::value; + + let (i, opt_sign) = opt(alt(( + value(false, tag(&b"-"[..])), + value(true, tag(&b"+"[..])), + )))(input)?; + let sign = opt_sign.unwrap_or(true); + + Ok((i, sign)) +} + +#[doc(hidden)] +macro_rules! ints { + ($($t:tt)+) => { + $( + /// will parse a number in text form to a number + /// + /// *Complete version*: can parse until the end of input. + pub fn $t<T, E: ParseError<T>>(input: T) -> IResult<T, $t, E> + where + T: InputIter + Slice<RangeFrom<usize>> + InputLength + InputTake + Clone, + <T as InputIter>::Item: AsChar, + T: for <'a> Compare<&'a[u8]>, + { + let (i, sign) = sign(input.clone())?; + + if i.input_len() == 0 { + return Err(Err::Incomplete(Needed::new(1))); + } + + let mut value: $t = 0; + if sign { + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + } else { + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_sub(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(input, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + } + + Err(Err::Incomplete(Needed::new(1))) + } + )+ + } +} + +ints! { i8 i16 i32 i64 i128 } + +#[doc(hidden)] +macro_rules! uints { + ($($t:tt)+) => { + $( + /// will parse a number in text form to a number + /// + /// *Complete version*: can parse until the end of input. + pub fn $t<T, E: ParseError<T>>(input: T) -> IResult<T, $t, E> + where + T: InputIter + Slice<RangeFrom<usize>> + InputLength, + <T as InputIter>::Item: AsChar, + { + let i = input; + + if i.input_len() == 0 { + return Err(Err::Incomplete(Needed::new(1))); + } + + let mut value: $t = 0; + for (pos, c) in i.iter_indices() { + match c.as_char().to_digit(10) { + None => { + if pos == 0 { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))); + } else { + return Ok((i.slice(pos..), value)); + } + }, + Some(d) => match value.checked_mul(10).and_then(|v| v.checked_add(d as $t)) { + None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Digit))), + Some(v) => value = v, + } + } + } + + Err(Err::Incomplete(Needed::new(1))) + } + )+ + } +} + +uints! { u8 u16 u32 u64 u128 } + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::ErrorKind; + use crate::internal::{Err, Needed}; + use crate::sequence::pair; + use crate::traits::ParseTo; + use proptest::prelude::*; + + macro_rules! assert_parse( + ($left: expr, $right: expr) => { + let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; + assert_eq!(res, $right); + }; + ); + + #[test] + fn anychar_str() { + use super::anychar; + assert_eq!(anychar::<_, (&str, ErrorKind)>("Ә"), Ok(("", 'Ә'))); + } + + #[test] + fn character() { + let a: &[u8] = b"abcd"; + let b: &[u8] = b"1234"; + let c: &[u8] = b"a123"; + let d: &[u8] = "azé12".as_bytes(); + let e: &[u8] = b" "; + let f: &[u8] = b" ;"; + //assert_eq!(alpha1::<_, (_, ErrorKind)>(a), Err(Err::Incomplete(Needed::new(1)))); + assert_parse!(alpha1(a), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &b"a"[..]))); + assert_eq!( + alpha1::<_, (_, ErrorKind)>(d), + Ok(("é12".as_bytes(), &b"az"[..])) + ); + assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); + assert_eq!( + digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(digit1(c), Err(Err::Error((c, ErrorKind::Digit)))); + assert_eq!(digit1(d), Err(Err::Error((d, ErrorKind::Digit)))); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(c), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(d), + Ok(("zé12".as_bytes(), &b"a"[..])) + ); + assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); + assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); + assert_eq!( + oct_digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); + //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(c), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(d), + Ok(("é12".as_bytes(), &b"az"[..])) + ); + assert_eq!( + space1::<_, (_, ErrorKind)>(e), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(space1::<_, (_, ErrorKind)>(f), Ok((&b";"[..], &b" "[..]))); + } + + #[cfg(feature = "alloc")] + #[test] + fn character_s() { + let a = "abcd"; + let b = "1234"; + let c = "a123"; + let d = "azé12"; + let e = " "; + assert_eq!( + alpha1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(alpha1(b), Err(Err::Error((b, ErrorKind::Alpha)))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(c), Ok((&c[1..], &"a"[..]))); + assert_eq!(alpha1::<_, (_, ErrorKind)>(d), Ok(("é12", &"az"[..]))); + assert_eq!(digit1(a), Err(Err::Error((a, ErrorKind::Digit)))); + assert_eq!( + digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(digit1(c), Err(Err::Error((c, ErrorKind::Digit)))); + assert_eq!(digit1(d), Err(Err::Error((d, ErrorKind::Digit)))); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + hex_digit1::<_, (_, ErrorKind)>(c), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(hex_digit1::<_, (_, ErrorKind)>(d), Ok(("zé12", &"a"[..]))); + assert_eq!(hex_digit1(e), Err(Err::Error((e, ErrorKind::HexDigit)))); + assert_eq!(oct_digit1(a), Err(Err::Error((a, ErrorKind::OctDigit)))); + assert_eq!( + oct_digit1::<_, (_, ErrorKind)>(b), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); + assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); + //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(c), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(d), Ok(("é12", "az"))); + assert_eq!( + space1::<_, (_, ErrorKind)>(e), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + use crate::traits::Offset; + #[test] + fn offset() { + let a = &b"abcd;"[..]; + let b = &b"1234;"[..]; + let c = &b"a123;"[..]; + let d = &b" \t;"[..]; + let e = &b" \t\r\n;"[..]; + let f = &b"123abcDEF;"[..]; + + match alpha1::<_, (_, ErrorKind)>(a) { + Ok((i, _)) => { + assert_eq!(a.offset(i) + i.len(), a.len()); + } + _ => panic!("wrong return type in offset test for alpha"), + } + match digit1::<_, (_, ErrorKind)>(b) { + Ok((i, _)) => { + assert_eq!(b.offset(i) + i.len(), b.len()); + } + _ => panic!("wrong return type in offset test for digit"), + } + match alphanumeric1::<_, (_, ErrorKind)>(c) { + Ok((i, _)) => { + assert_eq!(c.offset(i) + i.len(), c.len()); + } + _ => panic!("wrong return type in offset test for alphanumeric"), + } + match space1::<_, (_, ErrorKind)>(d) { + Ok((i, _)) => { + assert_eq!(d.offset(i) + i.len(), d.len()); + } + _ => panic!("wrong return type in offset test for space"), + } + match multispace1::<_, (_, ErrorKind)>(e) { + Ok((i, _)) => { + assert_eq!(e.offset(i) + i.len(), e.len()); + } + _ => panic!("wrong return type in offset test for multispace"), + } + match hex_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for hex_digit"), + } + match oct_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for oct_digit"), + } + } + + #[test] + fn is_not_line_ending_bytes() { + let a: &[u8] = b"ab12cd\nefgh"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(a), + Ok((&b"\nefgh"[..], &b"ab12cd"[..])) + ); + + let b: &[u8] = b"ab12cd\nefgh\nijkl"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(b), + Ok((&b"\nefgh\nijkl"[..], &b"ab12cd"[..])) + ); + + let c: &[u8] = b"ab12cd\r\nefgh\nijkl"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(c), + Ok((&b"\r\nefgh\nijkl"[..], &b"ab12cd"[..])) + ); + + let d: &[u8] = b"ab12cd"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(d), + Err(Err::Incomplete(Needed::Unknown)) + ); + } + + #[test] + fn is_not_line_ending_str() { + /* + let a: &str = "ab12cd\nefgh"; + assert_eq!(not_line_ending(a), Ok((&"\nefgh"[..], &"ab12cd"[..]))); + + let b: &str = "ab12cd\nefgh\nijkl"; + assert_eq!(not_line_ending(b), Ok((&"\nefgh\nijkl"[..], &"ab12cd"[..]))); + + let c: &str = "ab12cd\r\nefgh\nijkl"; + assert_eq!(not_line_ending(c), Ok((&"\r\nefgh\nijkl"[..], &"ab12cd"[..]))); + + let d = "βèƒôřè\nÂßÇáƒƭèř"; + assert_eq!(not_line_ending(d), Ok((&"\nÂßÇáƒƭèř"[..], &"βèƒôřè"[..]))); + + let e = "βèƒôřè\r\nÂßÇáƒƭèř"; + assert_eq!(not_line_ending(e), Ok((&"\r\nÂßÇáƒƭèř"[..], &"βèƒôřè"[..]))); + */ + + let f = "βèƒôřè\rÂßÇáƒƭèř"; + assert_eq!(not_line_ending(f), Err(Err::Error((f, ErrorKind::Tag)))); + + let g2: &str = "ab12cd"; + assert_eq!( + not_line_ending::<_, (_, ErrorKind)>(g2), + Err(Err::Incomplete(Needed::Unknown)) + ); + } + + #[test] + fn hex_digit_test() { + let i = &b"0123456789abcdefABCDEF;"[..]; + assert_parse!(hex_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"g"[..]; + assert_parse!( + hex_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::HexDigit))) + ); + + let i = &b"G"[..]; + assert_parse!( + hex_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::HexDigit))) + ); + + assert!(crate::character::is_hex_digit(b'0')); + assert!(crate::character::is_hex_digit(b'9')); + assert!(crate::character::is_hex_digit(b'a')); + assert!(crate::character::is_hex_digit(b'f')); + assert!(crate::character::is_hex_digit(b'A')); + assert!(crate::character::is_hex_digit(b'F')); + assert!(!crate::character::is_hex_digit(b'g')); + assert!(!crate::character::is_hex_digit(b'G')); + assert!(!crate::character::is_hex_digit(b'/')); + assert!(!crate::character::is_hex_digit(b':')); + assert!(!crate::character::is_hex_digit(b'@')); + assert!(!crate::character::is_hex_digit(b'\x60')); + } + + #[test] + fn oct_digit_test() { + let i = &b"01234567;"[..]; + assert_parse!(oct_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"8"[..]; + assert_parse!( + oct_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::OctDigit))) + ); + + assert!(crate::character::is_oct_digit(b'0')); + assert!(crate::character::is_oct_digit(b'7')); + assert!(!crate::character::is_oct_digit(b'8')); + assert!(!crate::character::is_oct_digit(b'9')); + assert!(!crate::character::is_oct_digit(b'a')); + assert!(!crate::character::is_oct_digit(b'A')); + assert!(!crate::character::is_oct_digit(b'/')); + assert!(!crate::character::is_oct_digit(b':')); + assert!(!crate::character::is_oct_digit(b'@')); + assert!(!crate::character::is_oct_digit(b'\x60')); + } + + #[test] + fn full_line_windows() { + fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + pair(not_line_ending, line_ending)(i) + } + let input = b"abc\r\n"; + let output = take_full_line(input); + assert_eq!(output, Ok((&b""[..], (&b"abc"[..], &b"\r\n"[..])))); + } + + #[test] + fn full_line_unix() { + fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + pair(not_line_ending, line_ending)(i) + } + let input = b"abc\n"; + let output = take_full_line(input); + assert_eq!(output, Ok((&b""[..], (&b"abc"[..], &b"\n"[..])))); + } + + #[test] + fn check_windows_lineending() { + let input = b"\r\n"; + let output = line_ending(&input[..]); + assert_parse!(output, Ok((&b""[..], &b"\r\n"[..]))); + } + + #[test] + fn check_unix_lineending() { + let input = b"\n"; + let output = line_ending(&input[..]); + assert_parse!(output, Ok((&b""[..], &b"\n"[..]))); + } + + #[test] + fn cr_lf() { + assert_parse!(crlf(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); + assert_parse!(crlf(&b"\r"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!( + crlf(&b"\ra"[..]), + Err(Err::Error(error_position!(&b"\ra"[..], ErrorKind::CrLf))) + ); + + assert_parse!(crlf("\r\na"), Ok(("a", "\r\n"))); + assert_parse!(crlf("\r"), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!( + crlf("\ra"), + Err(Err::Error(error_position!("\ra", ErrorKind::CrLf))) + ); + } + + #[test] + fn end_of_line() { + assert_parse!(line_ending(&b"\na"[..]), Ok((&b"a"[..], &b"\n"[..]))); + assert_parse!(line_ending(&b"\r\na"[..]), Ok((&b"a"[..], &b"\r\n"[..]))); + assert_parse!( + line_ending(&b"\r"[..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_parse!( + line_ending(&b"\ra"[..]), + Err(Err::Error(error_position!(&b"\ra"[..], ErrorKind::CrLf))) + ); + + assert_parse!(line_ending("\na"), Ok(("a", "\n"))); + assert_parse!(line_ending("\r\na"), Ok(("a", "\r\n"))); + assert_parse!(line_ending("\r"), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!( + line_ending("\ra"), + Err(Err::Error(error_position!("\ra", ErrorKind::CrLf))) + ); + } + + fn digit_to_i16(input: &str) -> IResult<&str, i16> { + let i = input; + let (i, opt_sign) = opt(alt((char('+'), char('-'))))(i)?; + let sign = match opt_sign { + Some('+') => true, + Some('-') => false, + _ => true, + }; + + let (i, s) = match digit1::<_, crate::error::Error<_>>(i) { + Ok((i, s)) => (i, s), + Err(Err::Incomplete(i)) => return Err(Err::Incomplete(i)), + Err(_) => { + return Err(Err::Error(crate::error::Error::from_error_kind( + input, + ErrorKind::Digit, + ))) + } + }; + match s.parse_to() { + Some(n) => { + if sign { + Ok((i, n)) + } else { + Ok((i, -n)) + } + } + None => Err(Err::Error(crate::error::Error::from_error_kind( + i, + ErrorKind::Digit, + ))), + } + } + + fn digit_to_u32(i: &str) -> IResult<&str, u32> { + let (i, s) = digit1(i)?; + match s.parse_to() { + Some(n) => Ok((i, n)), + None => Err(Err::Error(crate::error::Error::from_error_kind( + i, + ErrorKind::Digit, + ))), + } + } + + proptest! { + #[test] + fn ints(s in "\\PC*") { + let res1 = digit_to_i16(&s); + let res2 = i16(s.as_str()); + assert_eq!(res1, res2); + } + + #[test] + fn uints(s in "\\PC*") { + let res1 = digit_to_u32(&s); + let res2 = u32(s.as_str()); + assert_eq!(res1, res2); + } + } +} diff --git a/third_party/rust/nom/src/character/tests.rs b/third_party/rust/nom/src/character/tests.rs new file mode 100644 index 0000000000..64c2a1c8a7 --- /dev/null +++ b/third_party/rust/nom/src/character/tests.rs @@ -0,0 +1,62 @@ +use super::streaming::*; +use crate::error::ErrorKind; +use crate::internal::{Err, IResult}; + +#[test] +fn one_of_test() { + fn f(i: &[u8]) -> IResult<&[u8], char> { + one_of("ab")(i) + } + + let a = &b"abcd"[..]; + assert_eq!(f(a), Ok((&b"bcd"[..], 'a'))); + + let b = &b"cde"[..]; + assert_eq!(f(b), Err(Err::Error(error_position!(b, ErrorKind::OneOf)))); + + fn utf8(i: &str) -> IResult<&str, char> { + one_of("+\u{FF0B}")(i) + } + + assert!(utf8("+").is_ok()); + assert!(utf8("\u{FF0B}").is_ok()); +} + +#[test] +fn none_of_test() { + fn f(i: &[u8]) -> IResult<&[u8], char> { + none_of("ab")(i) + } + + let a = &b"abcd"[..]; + assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::NoneOf)))); + + let b = &b"cde"[..]; + assert_eq!(f(b), Ok((&b"de"[..], 'c'))); +} + +#[test] +fn char_byteslice() { + fn f(i: &[u8]) -> IResult<&[u8], char> { + char('c')(i) + } + + let a = &b"abcd"[..]; + assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char)))); + + let b = &b"cde"[..]; + assert_eq!(f(b), Ok((&b"de"[..], 'c'))); +} + +#[test] +fn char_str() { + fn f(i: &str) -> IResult<&str, char> { + char('c')(i) + } + + let a = &"abcd"[..]; + assert_eq!(f(a), Err(Err::Error(error_position!(a, ErrorKind::Char)))); + + let b = &"cde"[..]; + assert_eq!(f(b), Ok((&"de"[..], 'c'))); +} diff --git a/third_party/rust/nom/src/combinator/mod.rs b/third_party/rust/nom/src/combinator/mod.rs new file mode 100644 index 0000000000..fe08d4a105 --- /dev/null +++ b/third_party/rust/nom/src/combinator/mod.rs @@ -0,0 +1,809 @@ +//! General purpose combinators + +#![allow(unused_imports)] + +#[cfg(feature = "alloc")] +use crate::lib::std::boxed::Box; + +use crate::error::{ErrorKind, FromExternalError, ParseError}; +use crate::internal::*; +use crate::lib::std::borrow::Borrow; +use crate::lib::std::convert::Into; +#[cfg(feature = "std")] +use crate::lib::std::fmt::Debug; +use crate::lib::std::mem::transmute; +use crate::lib::std::ops::{Range, RangeFrom, RangeTo}; +use crate::traits::{AsChar, InputIter, InputLength, InputTakeAtPosition, ParseTo}; +use crate::traits::{Compare, CompareResult, Offset, Slice}; + +#[cfg(test)] +mod tests; + +/// Return the remaining input. +/// +/// ```rust +/// # use nom::error::ErrorKind; +/// use nom::combinator::rest; +/// assert_eq!(rest::<_,(_, ErrorKind)>("abc"), Ok(("", "abc"))); +/// assert_eq!(rest::<_,(_, ErrorKind)>(""), Ok(("", ""))); +/// ``` +#[inline] +pub fn rest<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<RangeFrom<usize>>, + T: InputLength, +{ + Ok((input.slice(input.input_len()..), input)) +} + +/// Return the length of the remaining input. +/// +/// ```rust +/// # use nom::error::ErrorKind; +/// use nom::combinator::rest_len; +/// assert_eq!(rest_len::<_,(_, ErrorKind)>("abc"), Ok(("abc", 3))); +/// assert_eq!(rest_len::<_,(_, ErrorKind)>(""), Ok(("", 0))); +/// ``` +#[inline] +pub fn rest_len<T, E: ParseError<T>>(input: T) -> IResult<T, usize, E> +where + T: InputLength, +{ + let len = input.input_len(); + Ok((input, len)) +} + +/// Maps a function on the result of a parser. +/// +/// ```rust +/// use nom::{Err,error::ErrorKind, IResult,Parser}; +/// use nom::character::complete::digit1; +/// use nom::combinator::map; +/// # fn main() { +/// +/// let mut parser = map(digit1, |s: &str| s.len()); +/// +/// // the parser will count how many characters were returned by digit1 +/// assert_eq!(parser.parse("123456"), Ok(("", 6))); +/// +/// // this will fail if digit1 fails +/// assert_eq!(parser.parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit)))); +/// # } +/// ``` +pub fn map<I, O1, O2, E, F, G>(mut parser: F, mut f: G) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: FnMut(O1) -> O2, +{ + move |input: I| { + let (input, o1) = parser.parse(input)?; + Ok((input, f(o1))) + } +} + +/// Applies a function returning a `Result` over the result of a parser. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::character::complete::digit1; +/// use nom::combinator::map_res; +/// # fn main() { +/// +/// let mut parse = map_res(digit1, |s: &str| s.parse::<u8>()); +/// +/// // the parser will convert the result of digit1 to a number +/// assert_eq!(parse("123"), Ok(("", 123))); +/// +/// // this will fail if digit1 fails +/// assert_eq!(parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit)))); +/// +/// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`) +/// assert_eq!(parse("123456"), Err(Err::Error(("123456", ErrorKind::MapRes)))); +/// # } +/// ``` +pub fn map_res<I: Clone, O1, O2, E: FromExternalError<I, E2>, E2, F, G>( + mut parser: F, + mut f: G, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: FnMut(O1) -> Result<O2, E2>, +{ + move |input: I| { + let i = input.clone(); + let (input, o1) = parser.parse(input)?; + match f(o1) { + Ok(o2) => Ok((input, o2)), + Err(e) => Err(Err::Error(E::from_external_error(i, ErrorKind::MapRes, e))), + } + } +} + +/// Applies a function returning an `Option` over the result of a parser. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::character::complete::digit1; +/// use nom::combinator::map_opt; +/// # fn main() { +/// +/// let mut parse = map_opt(digit1, |s: &str| s.parse::<u8>().ok()); +/// +/// // the parser will convert the result of digit1 to a number +/// assert_eq!(parse("123"), Ok(("", 123))); +/// +/// // this will fail if digit1 fails +/// assert_eq!(parse("abc"), Err(Err::Error(("abc", ErrorKind::Digit)))); +/// +/// // this will fail if the mapped function fails (a `u8` is too small to hold `123456`) +/// assert_eq!(parse("123456"), Err(Err::Error(("123456", ErrorKind::MapOpt)))); +/// # } +/// ``` +pub fn map_opt<I: Clone, O1, O2, E: ParseError<I>, F, G>( + mut parser: F, + mut f: G, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: FnMut(O1) -> Option<O2>, +{ + move |input: I| { + let i = input.clone(); + let (input, o1) = parser.parse(input)?; + match f(o1) { + Some(o2) => Ok((input, o2)), + None => Err(Err::Error(E::from_error_kind(i, ErrorKind::MapOpt))), + } + } +} + +/// Applies a parser over the result of another one. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::character::complete::digit1; +/// use nom::bytes::complete::take; +/// use nom::combinator::map_parser; +/// # fn main() { +/// +/// let mut parse = map_parser(take(5u8), digit1); +/// +/// assert_eq!(parse("12345"), Ok(("", "12345"))); +/// assert_eq!(parse("123ab"), Ok(("", "123"))); +/// assert_eq!(parse("123"), Err(Err::Error(("123", ErrorKind::Eof)))); +/// # } +/// ``` +pub fn map_parser<I, O1, O2, E: ParseError<I>, F, G>( + mut parser: F, + mut applied_parser: G, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: Parser<O1, O2, E>, +{ + move |input: I| { + let (input, o1) = parser.parse(input)?; + let (_, o2) = applied_parser.parse(o1)?; + Ok((input, o2)) + } +} + +/// Creates a new parser from the output of the first parser, then apply that parser over the rest of the input. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::bytes::complete::take; +/// use nom::number::complete::u8; +/// use nom::combinator::flat_map; +/// # fn main() { +/// +/// let mut parse = flat_map(u8, take); +/// +/// assert_eq!(parse(&[2, 0, 1, 2][..]), Ok((&[2][..], &[0, 1][..]))); +/// assert_eq!(parse(&[4, 0, 1, 2][..]), Err(Err::Error((&[0, 1, 2][..], ErrorKind::Eof)))); +/// # } +/// ``` +pub fn flat_map<I, O1, O2, E: ParseError<I>, F, G, H>( + mut parser: F, + mut applied_parser: G, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: FnMut(O1) -> H, + H: Parser<I, O2, E>, +{ + move |input: I| { + let (input, o1) = parser.parse(input)?; + applied_parser(o1).parse(input) + } +} + +/// Optional parser, will return `None` on [`Err::Error`]. +/// +/// To chain an error up, see [`cut`]. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::opt; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// fn parser(i: &str) -> IResult<&str, Option<&str>> { +/// opt(alpha1)(i) +/// } +/// +/// assert_eq!(parser("abcd;"), Ok((";", Some("abcd")))); +/// assert_eq!(parser("123;"), Ok(("123;", None))); +/// # } +/// ``` +pub fn opt<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Option<O>, E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match f.parse(input) { + Ok((i, o)) => Ok((i, Some(o))), + Err(Err::Error(_)) => Ok((i, None)), + Err(e) => Err(e), + } + } +} + +/// Calls the parser if the condition is met. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, IResult}; +/// use nom::combinator::cond; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// fn parser(b: bool, i: &str) -> IResult<&str, Option<&str>> { +/// cond(b, alpha1)(i) +/// } +/// +/// assert_eq!(parser(true, "abcd;"), Ok((";", Some("abcd")))); +/// assert_eq!(parser(false, "abcd;"), Ok(("abcd;", None))); +/// assert_eq!(parser(true, "123;"), Err(Err::Error(Error::new("123;", ErrorKind::Alpha)))); +/// assert_eq!(parser(false, "123;"), Ok(("123;", None))); +/// # } +/// ``` +pub fn cond<I, O, E: ParseError<I>, F>( + b: bool, + mut f: F, +) -> impl FnMut(I) -> IResult<I, Option<O>, E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + if b { + match f.parse(input) { + Ok((i, o)) => Ok((i, Some(o))), + Err(e) => Err(e), + } + } else { + Ok((input, None)) + } + } +} + +/// Tries to apply its parser without consuming the input. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::peek; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// let mut parser = peek(alpha1); +/// +/// assert_eq!(parser("abcd;"), Ok(("abcd;", "abcd"))); +/// assert_eq!(parser("123;"), Err(Err::Error(("123;", ErrorKind::Alpha)))); +/// # } +/// ``` +pub fn peek<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match f.parse(input) { + Ok((_, o)) => Ok((i, o)), + Err(e) => Err(e), + } + } +} + +/// returns its input if it is at the end of input data +/// +/// When we're at the end of the data, this combinator +/// will succeed +/// +/// ``` +/// # use std::str; +/// # use nom::{Err, error::ErrorKind, IResult}; +/// # use nom::combinator::eof; +/// +/// # fn main() { +/// let parser = eof; +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Eof)))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// # } +/// ``` +pub fn eof<I: InputLength + Clone, E: ParseError<I>>(input: I) -> IResult<I, I, E> { + if input.input_len() == 0 { + let clone = input.clone(); + Ok((input, clone)) + } else { + Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))) + } +} + +/// Transforms Incomplete into `Error`. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::bytes::streaming::take; +/// use nom::combinator::complete; +/// # fn main() { +/// +/// let mut parser = complete(take(5u8)); +/// +/// assert_eq!(parser("abcdefg"), Ok(("fg", "abcde"))); +/// assert_eq!(parser("abcd"), Err(Err::Error(("abcd", ErrorKind::Complete)))); +/// # } +/// ``` +pub fn complete<I: Clone, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match f.parse(input) { + Err(Err::Incomplete(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Complete))), + rest => rest, + } + } +} + +/// Succeeds if all the input has been consumed by its child parser. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::all_consuming; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// let mut parser = all_consuming(alpha1); +/// +/// assert_eq!(parser("abcd"), Ok(("", "abcd"))); +/// assert_eq!(parser("abcd;"),Err(Err::Error((";", ErrorKind::Eof)))); +/// assert_eq!(parser("123abcd;"),Err(Err::Error(("123abcd;", ErrorKind::Alpha)))); +/// # } +/// ``` +pub fn all_consuming<I, O, E: ParseError<I>, F>(mut f: F) -> impl FnMut(I) -> IResult<I, O, E> +where + I: InputLength, + F: Parser<I, O, E>, +{ + move |input: I| { + let (input, res) = f.parse(input)?; + if input.input_len() == 0 { + Ok((input, res)) + } else { + Err(Err::Error(E::from_error_kind(input, ErrorKind::Eof))) + } + } +} + +/// Returns the result of the child parser if it satisfies a verification function. +/// +/// The verification function takes as argument a reference to the output of the +/// parser. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::verify; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// let mut parser = verify(alpha1, |s: &str| s.len() == 4); +/// +/// assert_eq!(parser("abcd"), Ok(("", "abcd"))); +/// assert_eq!(parser("abcde"), Err(Err::Error(("abcde", ErrorKind::Verify)))); +/// assert_eq!(parser("123abcd;"),Err(Err::Error(("123abcd;", ErrorKind::Alpha)))); +/// # } +/// ``` +pub fn verify<I: Clone, O1, O2, E: ParseError<I>, F, G>( + mut first: F, + second: G, +) -> impl FnMut(I) -> IResult<I, O1, E> +where + F: Parser<I, O1, E>, + G: Fn(&O2) -> bool, + O1: Borrow<O2>, + O2: ?Sized, +{ + move |input: I| { + let i = input.clone(); + let (input, o) = first.parse(input)?; + + if second(o.borrow()) { + Ok((input, o)) + } else { + Err(Err::Error(E::from_error_kind(i, ErrorKind::Verify))) + } + } +} + +/// Returns the provided value if the child parser succeeds. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::value; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// let mut parser = value(1234, alpha1); +/// +/// assert_eq!(parser("abcd"), Ok(("", 1234))); +/// assert_eq!(parser("123abcd;"), Err(Err::Error(("123abcd;", ErrorKind::Alpha)))); +/// # } +/// ``` +pub fn value<I, O1: Clone, O2, E: ParseError<I>, F>( + val: O1, + mut parser: F, +) -> impl FnMut(I) -> IResult<I, O1, E> +where + F: Parser<I, O2, E>, +{ + move |input: I| parser.parse(input).map(|(i, _)| (i, val.clone())) +} + +/// Succeeds if the child parser returns an error. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::not; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// let mut parser = not(alpha1); +/// +/// assert_eq!(parser("123"), Ok(("123", ()))); +/// assert_eq!(parser("abcd"), Err(Err::Error(("abcd", ErrorKind::Not)))); +/// # } +/// ``` +pub fn not<I: Clone, O, E: ParseError<I>, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, (), E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match parser.parse(input) { + Ok(_) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Not))), + Err(Err::Error(_)) => Ok((i, ())), + Err(e) => Err(e), + } + } +} + +/// If the child parser was successful, return the consumed input as produced value. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::recognize; +/// use nom::character::complete::{char, alpha1}; +/// use nom::sequence::separated_pair; +/// # fn main() { +/// +/// let mut parser = recognize(separated_pair(alpha1, char(','), alpha1)); +/// +/// assert_eq!(parser("abcd,efgh"), Ok(("", "abcd,efgh"))); +/// assert_eq!(parser("abcd;"),Err(Err::Error((";", ErrorKind::Char)))); +/// # } +/// ``` +pub fn recognize<I: Clone + Offset + Slice<RangeTo<usize>>, O, E: ParseError<I>, F>( + mut parser: F, +) -> impl FnMut(I) -> IResult<I, I, E> +where + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((i, _)) => { + let index = input.offset(&i); + Ok((i, input.slice(..index))) + } + Err(e) => Err(e), + } + } +} + +/// if the child parser was successful, return the consumed input with the output +/// as a tuple. Functions similarly to [recognize](fn.recognize.html) except it +/// returns the parser output as well. +/// +/// This can be useful especially in cases where the output is not the same type +/// as the input, or the input is a user defined type. +/// +/// Returned tuple is of the format `(consumed input, produced output)`. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::combinator::{consumed, value, recognize, map}; +/// use nom::character::complete::{char, alpha1}; +/// use nom::bytes::complete::tag; +/// use nom::sequence::separated_pair; +/// +/// fn inner_parser(input: &str) -> IResult<&str, bool> { +/// value(true, tag("1234"))(input) +/// } +/// +/// # fn main() { +/// +/// let mut consumed_parser = consumed(value(true, separated_pair(alpha1, char(','), alpha1))); +/// +/// assert_eq!(consumed_parser("abcd,efgh1"), Ok(("1", ("abcd,efgh", true)))); +/// assert_eq!(consumed_parser("abcd;"),Err(Err::Error((";", ErrorKind::Char)))); +/// +/// +/// // the first output (representing the consumed input) +/// // should be the same as that of the `recognize` parser. +/// let mut recognize_parser = recognize(inner_parser); +/// let mut consumed_parser = map(consumed(inner_parser), |(consumed, output)| consumed); +/// +/// assert_eq!(recognize_parser("1234"), consumed_parser("1234")); +/// assert_eq!(recognize_parser("abcd"), consumed_parser("abcd")); +/// # } +/// ``` +pub fn consumed<I, O, F, E>(mut parser: F) -> impl FnMut(I) -> IResult<I, (I, O), E> +where + I: Clone + Offset + Slice<RangeTo<usize>>, + E: ParseError<I>, + F: Parser<I, O, E>, +{ + move |input: I| { + let i = input.clone(); + match parser.parse(i) { + Ok((remaining, result)) => { + let index = input.offset(&remaining); + let consumed = input.slice(..index); + Ok((remaining, (consumed, result))) + } + Err(e) => Err(e), + } + } +} + +/// Transforms an [`Err::Error`] (recoverable) to [`Err::Failure`] (unrecoverable) +/// +/// This commits the parse result, preventing alternative branch paths like with +/// [`nom::branch::alt`][crate::branch::alt]. +/// +/// # Example +/// +/// Without `cut`: +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// # use nom::character::complete::{one_of, digit1}; +/// # use nom::combinator::rest; +/// # use nom::branch::alt; +/// # use nom::sequence::preceded; +/// # fn main() { +/// +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of("+-"), digit1), +/// rest +/// ))(input) +/// } +/// +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Ok(("", "+"))); +/// # } +/// ``` +/// +/// With `cut`: +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult, error::Error}; +/// # use nom::character::complete::{one_of, digit1}; +/// # use nom::combinator::rest; +/// # use nom::branch::alt; +/// # use nom::sequence::preceded; +/// use nom::combinator::cut; +/// # fn main() { +/// +/// fn parser(input: &str) -> IResult<&str, &str> { +/// alt(( +/// preceded(one_of("+-"), cut(digit1)), +/// rest +/// ))(input) +/// } +/// +/// assert_eq!(parser("+10 ab"), Ok((" ab", "10"))); +/// assert_eq!(parser("ab"), Ok(("", "ab"))); +/// assert_eq!(parser("+"), Err(Err::Failure(Error { input: "", code: ErrorKind::Digit }))); +/// # } +/// ``` +pub fn cut<I, O, E: ParseError<I>, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, O, E> +where + F: Parser<I, O, E>, +{ + move |input: I| match parser.parse(input) { + Err(Err::Error(e)) => Err(Err::Failure(e)), + rest => rest, + } +} + +/// automatically converts the child parser's result to another type +/// +/// it will be able to convert the output value and the error value +/// as long as the `Into` implementations are available +/// +/// ```rust +/// # use nom::IResult; +/// use nom::combinator::into; +/// use nom::character::complete::alpha1; +/// # fn main() { +/// +/// fn parser1(i: &str) -> IResult<&str, &str> { +/// alpha1(i) +/// } +/// +/// let mut parser2 = into(parser1); +/// +/// // the parser converts the &str output of the child parser into a Vec<u8> +/// let bytes: IResult<&str, Vec<u8>> = parser2("abcd"); +/// assert_eq!(bytes, Ok(("", vec![97, 98, 99, 100]))); +/// # } +/// ``` +pub fn into<I, O1, O2, E1, E2, F>(mut parser: F) -> impl FnMut(I) -> IResult<I, O2, E2> +where + O1: Into<O2>, + E1: Into<E2>, + E1: ParseError<I>, + E2: ParseError<I>, + F: Parser<I, O1, E1>, +{ + //map(parser, Into::into) + move |input: I| match parser.parse(input) { + Ok((i, o)) => Ok((i, o.into())), + Err(Err::Error(e)) => Err(Err::Error(e.into())), + Err(Err::Failure(e)) => Err(Err::Failure(e.into())), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + } +} + +/// Creates an iterator from input data and a parser. +/// +/// Call the iterator's [ParserIterator::finish] method to get the remaining input if successful, +/// or the error value if we encountered an error. +/// +/// On [`Err::Error`], iteration will stop. To instead chain an error up, see [`cut`]. +/// +/// ```rust +/// use nom::{combinator::iterator, IResult, bytes::complete::tag, character::complete::alpha1, sequence::terminated}; +/// use std::collections::HashMap; +/// +/// let data = "abc|defg|hijkl|mnopqr|123"; +/// let mut it = iterator(data, terminated(alpha1, tag("|"))); +/// +/// let parsed = it.map(|v| (v, v.len())).collect::<HashMap<_,_>>(); +/// let res: IResult<_,_> = it.finish(); +/// +/// assert_eq!(parsed, [("abc", 3usize), ("defg", 4), ("hijkl", 5), ("mnopqr", 6)].iter().cloned().collect()); +/// assert_eq!(res, Ok(("123", ()))); +/// ``` +pub fn iterator<Input, Output, Error, F>(input: Input, f: F) -> ParserIterator<Input, Error, F> +where + F: Parser<Input, Output, Error>, + Error: ParseError<Input>, +{ + ParserIterator { + iterator: f, + input, + state: Some(State::Running), + } +} + +/// Main structure associated to the [iterator] function. +pub struct ParserIterator<I, E, F> { + iterator: F, + input: I, + state: Option<State<E>>, +} + +impl<I: Clone, E, F> ParserIterator<I, E, F> { + /// Returns the remaining input if parsing was successful, or the error if we encountered an error. + pub fn finish(mut self) -> IResult<I, (), E> { + match self.state.take().unwrap() { + State::Running | State::Done => Ok((self.input, ())), + State::Failure(e) => Err(Err::Failure(e)), + State::Incomplete(i) => Err(Err::Incomplete(i)), + } + } +} + +impl<'a, Input, Output, Error, F> core::iter::Iterator for &'a mut ParserIterator<Input, Error, F> +where + F: FnMut(Input) -> IResult<Input, Output, Error>, + Input: Clone, +{ + type Item = Output; + + fn next(&mut self) -> Option<Self::Item> { + if let State::Running = self.state.take().unwrap() { + let input = self.input.clone(); + + match (self.iterator)(input) { + Ok((i, o)) => { + self.input = i; + self.state = Some(State::Running); + Some(o) + } + Err(Err::Error(_)) => { + self.state = Some(State::Done); + None + } + Err(Err::Failure(e)) => { + self.state = Some(State::Failure(e)); + None + } + Err(Err::Incomplete(i)) => { + self.state = Some(State::Incomplete(i)); + None + } + } + } else { + None + } + } +} + +enum State<E> { + Running, + Done, + Failure(E), + Incomplete(Needed), +} + +/// a parser which always succeeds with given value without consuming any input. +/// +/// It can be used for example as the last alternative in `alt` to +/// specify the default case. +/// +/// ```rust +/// # use nom::{Err,error::ErrorKind, IResult}; +/// use nom::branch::alt; +/// use nom::combinator::{success, value}; +/// use nom::character::complete::char; +/// # fn main() { +/// +/// let mut parser = success::<_,_,(_,ErrorKind)>(10); +/// assert_eq!(parser("xyz"), Ok(("xyz", 10))); +/// +/// let mut sign = alt((value(-1, char('-')), value(1, char('+')), success::<_,_,(_,ErrorKind)>(1))); +/// assert_eq!(sign("+10"), Ok(("10", 1))); +/// assert_eq!(sign("-10"), Ok(("10", -1))); +/// assert_eq!(sign("10"), Ok(("10", 1))); +/// # } +/// ``` +pub fn success<I, O: Clone, E: ParseError<I>>(val: O) -> impl Fn(I) -> IResult<I, O, E> { + move |input: I| Ok((input, val.clone())) +} + +/// A parser which always fails. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, IResult}; +/// use nom::combinator::fail; +/// +/// let s = "string"; +/// assert_eq!(fail::<_, &str, _>(s), Err(Err::Error((s, ErrorKind::Fail)))); +/// ``` +pub fn fail<I, O, E: ParseError<I>>(i: I) -> IResult<I, O, E> { + Err(Err::Error(E::from_error_kind(i, ErrorKind::Fail))) +} diff --git a/third_party/rust/nom/src/combinator/tests.rs b/third_party/rust/nom/src/combinator/tests.rs new file mode 100644 index 0000000000..15d32b8aae --- /dev/null +++ b/third_party/rust/nom/src/combinator/tests.rs @@ -0,0 +1,275 @@ +use super::*; +use crate::bytes::complete::take; +use crate::bytes::streaming::tag; +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Needed}; +#[cfg(feature = "alloc")] +use crate::lib::std::boxed::Box; +use crate::number::complete::u8; + +macro_rules! assert_parse( + ($left: expr, $right: expr) => { + let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; + assert_eq!(res, $right); + }; +); + +/*#[test] +fn t1() { + let v1:Vec<u8> = vec![1,2,3]; + let v2:Vec<u8> = vec![4,5,6]; + let d = Ok((&v1[..], &v2[..])); + let res = d.flat_map(print); + assert_eq!(res, Ok((&v2[..], ()))); +}*/ + +#[test] +fn eof_on_slices() { + let not_over: &[u8] = &b"Hello, world!"[..]; + let is_over: &[u8] = &b""[..]; + + let res_not_over = eof(not_over); + assert_parse!( + res_not_over, + Err(Err::Error(error_position!(not_over, ErrorKind::Eof))) + ); + + let res_over = eof(is_over); + assert_parse!(res_over, Ok((is_over, is_over))); +} + +#[test] +fn eof_on_strs() { + let not_over: &str = "Hello, world!"; + let is_over: &str = ""; + + let res_not_over = eof(not_over); + assert_parse!( + res_not_over, + Err(Err::Error(error_position!(not_over, ErrorKind::Eof))) + ); + + let res_over = eof(is_over); + assert_parse!(res_over, Ok((is_over, is_over))); +} + +/* +#[test] +fn end_of_input() { + let not_over = &b"Hello, world!"[..]; + let is_over = &b""[..]; + named!(eof_test, eof!()); + + let res_not_over = eof_test(not_over); + assert_eq!(res_not_over, Err(Err::Error(error_position!(not_over, ErrorKind::Eof)))); + + let res_over = eof_test(is_over); + assert_eq!(res_over, Ok((is_over, is_over))); +} +*/ + +#[test] +fn rest_on_slices() { + let input: &[u8] = &b"Hello, world!"[..]; + let empty: &[u8] = &b""[..]; + assert_parse!(rest(input), Ok((empty, input))); +} + +#[test] +fn rest_on_strs() { + let input: &str = "Hello, world!"; + let empty: &str = ""; + assert_parse!(rest(input), Ok((empty, input))); +} + +#[test] +fn rest_len_on_slices() { + let input: &[u8] = &b"Hello, world!"[..]; + assert_parse!(rest_len(input), Ok((input, input.len()))); +} + +use crate::lib::std::convert::From; +impl From<u32> for CustomError { + fn from(_: u32) -> Self { + CustomError + } +} + +impl<I> ParseError<I> for CustomError { + fn from_error_kind(_: I, _: ErrorKind) -> Self { + CustomError + } + + fn append(_: I, _: ErrorKind, _: CustomError) -> Self { + CustomError + } +} + +struct CustomError; +#[allow(dead_code)] +fn custom_error(input: &[u8]) -> IResult<&[u8], &[u8], CustomError> { + //fix_error!(input, CustomError, alphanumeric) + crate::character::streaming::alphanumeric1(input) +} + +#[test] +fn test_flat_map() { + let input: &[u8] = &[3, 100, 101, 102, 103, 104][..]; + assert_parse!( + flat_map(u8, take)(input), + Ok((&[103, 104][..], &[100, 101, 102][..])) + ); +} + +#[test] +fn test_map_opt() { + let input: &[u8] = &[50][..]; + assert_parse!( + map_opt(u8, |u| if u < 20 { Some(u) } else { None })(input), + Err(Err::Error((&[50][..], ErrorKind::MapOpt))) + ); + assert_parse!( + map_opt(u8, |u| if u > 20 { Some(u) } else { None })(input), + Ok((&[][..], 50)) + ); +} + +#[test] +fn test_map_parser() { + let input: &[u8] = &[100, 101, 102, 103, 104][..]; + assert_parse!( + map_parser(take(4usize), take(2usize))(input), + Ok((&[104][..], &[100, 101][..])) + ); +} + +#[test] +fn test_all_consuming() { + let input: &[u8] = &[100, 101, 102][..]; + assert_parse!( + all_consuming(take(2usize))(input), + Err(Err::Error((&[102][..], ErrorKind::Eof))) + ); + assert_parse!( + all_consuming(take(3usize))(input), + Ok((&[][..], &[100, 101, 102][..])) + ); +} + +#[test] +#[allow(unused)] +fn test_verify_ref() { + use crate::bytes::complete::take; + + let mut parser1 = verify(take(3u8), |s: &[u8]| s == &b"abc"[..]); + + assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], &b"abc"[..]))); + assert_eq!( + parser1(&b"defg"[..]), + Err(Err::Error((&b"defg"[..], ErrorKind::Verify))) + ); + + fn parser2(i: &[u8]) -> IResult<&[u8], u32> { + verify(crate::number::streaming::be_u32, |val: &u32| *val < 3)(i) + } +} + +#[test] +#[cfg(feature = "alloc")] +fn test_verify_alloc() { + use crate::bytes::complete::take; + let mut parser1 = verify(map(take(3u8), |s: &[u8]| s.to_vec()), |s: &[u8]| { + s == &b"abc"[..] + }); + + assert_eq!(parser1(&b"abcd"[..]), Ok((&b"d"[..], (&b"abc").to_vec()))); + assert_eq!( + parser1(&b"defg"[..]), + Err(Err::Error((&b"defg"[..], ErrorKind::Verify))) + ); +} + +#[test] +#[cfg(feature = "std")] +fn test_into() { + use crate::bytes::complete::take; + use crate::{ + error::{Error, ParseError}, + Err, + }; + + let mut parser = into(take::<_, _, Error<_>>(3u8)); + let result: IResult<&[u8], Vec<u8>> = parser(&b"abcdefg"[..]); + + assert_eq!(result, Ok((&b"defg"[..], vec![97, 98, 99]))); +} + +#[test] +fn opt_test() { + fn opt_abcd(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { + opt(tag("abcd"))(i) + } + + let a = &b"abcdef"[..]; + let b = &b"bcdefg"[..]; + let c = &b"ab"[..]; + assert_eq!(opt_abcd(a), Ok((&b"ef"[..], Some(&b"abcd"[..])))); + assert_eq!(opt_abcd(b), Ok((&b"bcdefg"[..], None))); + assert_eq!(opt_abcd(c), Err(Err::Incomplete(Needed::new(2)))); +} + +#[test] +fn peek_test() { + fn peek_tag(i: &[u8]) -> IResult<&[u8], &[u8]> { + peek(tag("abcd"))(i) + } + + assert_eq!(peek_tag(&b"abcdef"[..]), Ok((&b"abcdef"[..], &b"abcd"[..]))); + assert_eq!(peek_tag(&b"ab"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!( + peek_tag(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn not_test() { + fn not_aaa(i: &[u8]) -> IResult<&[u8], ()> { + not(tag("aaa"))(i) + } + + assert_eq!( + not_aaa(&b"aaa"[..]), + Err(Err::Error(error_position!(&b"aaa"[..], ErrorKind::Not))) + ); + assert_eq!(not_aaa(&b"aa"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(not_aaa(&b"abcd"[..]), Ok((&b"abcd"[..], ()))); +} + +#[test] +fn verify_test() { + use crate::bytes::streaming::take; + + fn test(i: &[u8]) -> IResult<&[u8], &[u8]> { + verify(take(5u8), |slice: &[u8]| slice[0] == b'a')(i) + } + assert_eq!(test(&b"bcd"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!( + test(&b"bcdefg"[..]), + Err(Err::Error(error_position!( + &b"bcdefg"[..], + ErrorKind::Verify + ))) + ); + assert_eq!(test(&b"abcdefg"[..]), Ok((&b"fg"[..], &b"abcde"[..]))); +} + +#[test] +fn fail_test() { + let a = "string"; + let b = "another string"; + + assert_eq!(fail::<_, &str, _>(a), Err(Err::Error((a, ErrorKind::Fail)))); + assert_eq!(fail::<_, &str, _>(b), Err(Err::Error((b, ErrorKind::Fail)))); +} diff --git a/third_party/rust/nom/src/error.rs b/third_party/rust/nom/src/error.rs new file mode 100644 index 0000000000..498b5e135a --- /dev/null +++ b/third_party/rust/nom/src/error.rs @@ -0,0 +1,831 @@ +//! Error management +//! +//! Parsers are generic over their error type, requiring that it implements +//! the `error::ParseError<Input>` trait. + +use crate::internal::Parser; +use crate::lib::std::fmt; + +/// This trait must be implemented by the error type of a nom parser. +/// +/// There are already implementations of it for `(Input, ErrorKind)` +/// and `VerboseError<Input>`. +/// +/// It provides methods to create an error from some combinators, +/// and combine existing errors in combinators like `alt`. +pub trait ParseError<I>: Sized { + /// Creates an error from the input position and an [ErrorKind] + fn from_error_kind(input: I, kind: ErrorKind) -> Self; + + /// Combines an existing error with a new one created from the input + /// position and an [ErrorKind]. This is useful when backtracking + /// through a parse tree, accumulating error context on the way + fn append(input: I, kind: ErrorKind, other: Self) -> Self; + + /// Creates an error from an input position and an expected character + fn from_char(input: I, _: char) -> Self { + Self::from_error_kind(input, ErrorKind::Char) + } + + /// Combines two existing errors. This function is used to compare errors + /// generated in various branches of `alt`. + fn or(self, other: Self) -> Self { + other + } +} + +/// This trait is required by the `context` combinator to add a static string +/// to an existing error +pub trait ContextError<I>: Sized { + /// Creates a new error from an input position, a static string and an existing error. + /// This is used mainly in the [context] combinator, to add user friendly information + /// to errors when backtracking through a parse tree + fn add_context(_input: I, _ctx: &'static str, other: Self) -> Self { + other + } +} + +/// This trait is required by the `map_res` combinator to integrate +/// error types from external functions, like [std::str::FromStr] +pub trait FromExternalError<I, E> { + /// Creates a new error from an input position, an [ErrorKind] indicating the + /// wrapping parser, and an external error + fn from_external_error(input: I, kind: ErrorKind, e: E) -> Self; +} + +/// default error type, only contains the error' location and code +#[derive(Debug, PartialEq)] +pub struct Error<I> { + /// position of the error in the input data + pub input: I, + /// nom error code + pub code: ErrorKind, +} + +impl<I> Error<I> { + /// creates a new basic error + pub fn new(input: I, code: ErrorKind) -> Error<I> { + Error { input, code } + } +} + +impl<I> ParseError<I> for Error<I> { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + Error { input, code: kind } + } + + fn append(_: I, _: ErrorKind, other: Self) -> Self { + other + } +} + +impl<I> ContextError<I> for Error<I> {} + +impl<I, E> FromExternalError<I, E> for Error<I> { + /// Create a new error from an input position and an external error + fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { + Error { input, code: kind } + } +} + +/// The Display implementation allows the std::error::Error implementation +impl<I: fmt::Display> fmt::Display for Error<I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "error {:?} at: {}", self.code, self.input) + } +} + +#[cfg(feature = "std")] +impl<I: fmt::Debug + fmt::Display> std::error::Error for Error<I> {} + +// for backward compatibility, keep those trait implementations +// for the previously used error type +impl<I> ParseError<I> for (I, ErrorKind) { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + (input, kind) + } + + fn append(_: I, _: ErrorKind, other: Self) -> Self { + other + } +} + +impl<I> ContextError<I> for (I, ErrorKind) {} + +impl<I, E> FromExternalError<I, E> for (I, ErrorKind) { + fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { + (input, kind) + } +} + +impl<I> ParseError<I> for () { + fn from_error_kind(_: I, _: ErrorKind) -> Self {} + + fn append(_: I, _: ErrorKind, _: Self) -> Self {} +} + +impl<I> ContextError<I> for () {} + +impl<I, E> FromExternalError<I, E> for () { + fn from_external_error(_input: I, _kind: ErrorKind, _e: E) -> Self {} +} + +/// Creates an error from the input position and an [ErrorKind] +pub fn make_error<I, E: ParseError<I>>(input: I, kind: ErrorKind) -> E { + E::from_error_kind(input, kind) +} + +/// Combines an existing error with a new one created from the input +/// position and an [ErrorKind]. This is useful when backtracking +/// through a parse tree, accumulating error context on the way +pub fn append_error<I, E: ParseError<I>>(input: I, kind: ErrorKind, other: E) -> E { + E::append(input, kind, other) +} + +/// This error type accumulates errors and their position when backtracking +/// through a parse tree. With some post processing (cf `examples/json.rs`), +/// it can be used to display user friendly error messages +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +#[derive(Clone, Debug, PartialEq)] +pub struct VerboseError<I> { + /// List of errors accumulated by `VerboseError`, containing the affected + /// part of input data, and some context + pub errors: crate::lib::std::vec::Vec<(I, VerboseErrorKind)>, +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +#[derive(Clone, Debug, PartialEq)] +/// Error context for `VerboseError` +pub enum VerboseErrorKind { + /// Static string added by the `context` function + Context(&'static str), + /// Indicates which character was expected by the `char` function + Char(char), + /// Error kind given by various nom parsers + Nom(ErrorKind), +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +impl<I> ParseError<I> for VerboseError<I> { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + VerboseError { + errors: vec![(input, VerboseErrorKind::Nom(kind))], + } + } + + fn append(input: I, kind: ErrorKind, mut other: Self) -> Self { + other.errors.push((input, VerboseErrorKind::Nom(kind))); + other + } + + fn from_char(input: I, c: char) -> Self { + VerboseError { + errors: vec![(input, VerboseErrorKind::Char(c))], + } + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +impl<I> ContextError<I> for VerboseError<I> { + fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self { + other.errors.push((input, VerboseErrorKind::Context(ctx))); + other + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +impl<I, E> FromExternalError<I, E> for VerboseError<I> { + /// Create a new error from an input position and an external error + fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { + Self::from_error_kind(input, kind) + } +} + +#[cfg(feature = "alloc")] +impl<I: fmt::Display> fmt::Display for VerboseError<I> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Parse error:")?; + for (input, error) in &self.errors { + match error { + VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?, + VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?, + VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, + } + } + + Ok(()) + } +} + +#[cfg(feature = "std")] +impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {} + +use crate::internal::{Err, IResult}; + +/// Create a new error from an input position, a static string and an existing error. +/// This is used mainly in the [context] combinator, to add user friendly information +/// to errors when backtracking through a parse tree +pub fn context<I: Clone, E: ContextError<I>, F, O>( + context: &'static str, + mut f: F, +) -> impl FnMut(I) -> IResult<I, O, E> +where + F: Parser<I, O, E>, +{ + move |i: I| match f.parse(i.clone()) { + Ok(o) => Ok(o), + Err(Err::Incomplete(i)) => Err(Err::Incomplete(i)), + Err(Err::Error(e)) => Err(Err::Error(E::add_context(i, context, e))), + Err(Err::Failure(e)) => Err(Err::Failure(E::add_context(i, context, e))), + } +} + +/// Transforms a `VerboseError` into a trace with input position information +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn convert_error<I: core::ops::Deref<Target = str>>( + input: I, + e: VerboseError<I>, +) -> crate::lib::std::string::String { + use crate::lib::std::fmt::Write; + use crate::traits::Offset; + + let mut result = crate::lib::std::string::String::new(); + + for (i, (substring, kind)) in e.errors.iter().enumerate() { + let offset = input.offset(substring); + + if input.is_empty() { + match kind { + VerboseErrorKind::Char(c) => { + write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c) + } + VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s), + VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e), + } + } else { + let prefix = &input.as_bytes()[..offset]; + + // Count the number of newlines in the first `offset` bytes of input + let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1; + + // Find the line that includes the subslice: + // Find the *last* newline before the substring starts + let line_begin = prefix + .iter() + .rev() + .position(|&b| b == b'\n') + .map(|pos| offset - pos) + .unwrap_or(0); + + // Find the full line after that newline + let line = input[line_begin..] + .lines() + .next() + .unwrap_or(&input[line_begin..]) + .trim_end(); + + // The (1-indexed) column number is the offset of our substring into that line + let column_number = line.offset(substring) + 1; + + match kind { + VerboseErrorKind::Char(c) => { + if let Some(actual) = substring.chars().next() { + write!( + &mut result, + "{i}: at line {line_number}:\n\ + {line}\n\ + {caret:>column$}\n\ + expected '{expected}', found {actual}\n\n", + i = i, + line_number = line_number, + line = line, + caret = '^', + column = column_number, + expected = c, + actual = actual, + ) + } else { + write!( + &mut result, + "{i}: at line {line_number}:\n\ + {line}\n\ + {caret:>column$}\n\ + expected '{expected}', got end of input\n\n", + i = i, + line_number = line_number, + line = line, + caret = '^', + column = column_number, + expected = c, + ) + } + } + VerboseErrorKind::Context(s) => write!( + &mut result, + "{i}: at line {line_number}, in {context}:\n\ + {line}\n\ + {caret:>column$}\n\n", + i = i, + line_number = line_number, + context = s, + line = line, + caret = '^', + column = column_number, + ), + VerboseErrorKind::Nom(e) => write!( + &mut result, + "{i}: at line {line_number}, in {nom_err:?}:\n\ + {line}\n\ + {caret:>column$}\n\n", + i = i, + line_number = line_number, + nom_err = e, + line = line, + caret = '^', + column = column_number, + ), + } + } + // Because `write!` to a `String` is infallible, this `unwrap` is fine. + .unwrap(); + } + + result +} + +/// Indicates which parser returned an error +#[rustfmt::skip] +#[derive(Debug,PartialEq,Eq,Hash,Clone,Copy)] +#[allow(deprecated,missing_docs)] +pub enum ErrorKind { + Tag, + MapRes, + MapOpt, + Alt, + IsNot, + IsA, + SeparatedList, + SeparatedNonEmptyList, + Many0, + Many1, + ManyTill, + Count, + TakeUntil, + LengthValue, + TagClosure, + Alpha, + Digit, + HexDigit, + OctDigit, + AlphaNumeric, + Space, + MultiSpace, + LengthValueFn, + Eof, + Switch, + TagBits, + OneOf, + NoneOf, + Char, + CrLf, + RegexpMatch, + RegexpMatches, + RegexpFind, + RegexpCapture, + RegexpCaptures, + TakeWhile1, + Complete, + Fix, + Escaped, + EscapedTransform, + NonEmpty, + ManyMN, + Not, + Permutation, + Verify, + TakeTill1, + TakeWhileMN, + TooLarge, + Many0Count, + Many1Count, + Float, + Satisfy, + Fail, +} + +#[rustfmt::skip] +#[allow(deprecated)] +/// Converts an ErrorKind to a number +pub fn error_to_u32(e: &ErrorKind) -> u32 { + match *e { + ErrorKind::Tag => 1, + ErrorKind::MapRes => 2, + ErrorKind::MapOpt => 3, + ErrorKind::Alt => 4, + ErrorKind::IsNot => 5, + ErrorKind::IsA => 6, + ErrorKind::SeparatedList => 7, + ErrorKind::SeparatedNonEmptyList => 8, + ErrorKind::Many1 => 9, + ErrorKind::Count => 10, + ErrorKind::TakeUntil => 12, + ErrorKind::LengthValue => 15, + ErrorKind::TagClosure => 16, + ErrorKind::Alpha => 17, + ErrorKind::Digit => 18, + ErrorKind::AlphaNumeric => 19, + ErrorKind::Space => 20, + ErrorKind::MultiSpace => 21, + ErrorKind::LengthValueFn => 22, + ErrorKind::Eof => 23, + ErrorKind::Switch => 27, + ErrorKind::TagBits => 28, + ErrorKind::OneOf => 29, + ErrorKind::NoneOf => 30, + ErrorKind::Char => 40, + ErrorKind::CrLf => 41, + ErrorKind::RegexpMatch => 42, + ErrorKind::RegexpMatches => 43, + ErrorKind::RegexpFind => 44, + ErrorKind::RegexpCapture => 45, + ErrorKind::RegexpCaptures => 46, + ErrorKind::TakeWhile1 => 47, + ErrorKind::Complete => 48, + ErrorKind::Fix => 49, + ErrorKind::Escaped => 50, + ErrorKind::EscapedTransform => 51, + ErrorKind::NonEmpty => 56, + ErrorKind::ManyMN => 57, + ErrorKind::HexDigit => 59, + ErrorKind::OctDigit => 61, + ErrorKind::Many0 => 62, + ErrorKind::Not => 63, + ErrorKind::Permutation => 64, + ErrorKind::ManyTill => 65, + ErrorKind::Verify => 66, + ErrorKind::TakeTill1 => 67, + ErrorKind::TakeWhileMN => 69, + ErrorKind::TooLarge => 70, + ErrorKind::Many0Count => 71, + ErrorKind::Many1Count => 72, + ErrorKind::Float => 73, + ErrorKind::Satisfy => 74, + ErrorKind::Fail => 75, + } +} + +impl ErrorKind { + #[rustfmt::skip] + #[allow(deprecated)] + /// Converts an ErrorKind to a text description + pub fn description(&self) -> &str { + match *self { + ErrorKind::Tag => "Tag", + ErrorKind::MapRes => "Map on Result", + ErrorKind::MapOpt => "Map on Option", + ErrorKind::Alt => "Alternative", + ErrorKind::IsNot => "IsNot", + ErrorKind::IsA => "IsA", + ErrorKind::SeparatedList => "Separated list", + ErrorKind::SeparatedNonEmptyList => "Separated non empty list", + ErrorKind::Many0 => "Many0", + ErrorKind::Many1 => "Many1", + ErrorKind::Count => "Count", + ErrorKind::TakeUntil => "Take until", + ErrorKind::LengthValue => "Length followed by value", + ErrorKind::TagClosure => "Tag closure", + ErrorKind::Alpha => "Alphabetic", + ErrorKind::Digit => "Digit", + ErrorKind::AlphaNumeric => "AlphaNumeric", + ErrorKind::Space => "Space", + ErrorKind::MultiSpace => "Multiple spaces", + ErrorKind::LengthValueFn => "LengthValueFn", + ErrorKind::Eof => "End of file", + ErrorKind::Switch => "Switch", + ErrorKind::TagBits => "Tag on bitstream", + ErrorKind::OneOf => "OneOf", + ErrorKind::NoneOf => "NoneOf", + ErrorKind::Char => "Char", + ErrorKind::CrLf => "CrLf", + ErrorKind::RegexpMatch => "RegexpMatch", + ErrorKind::RegexpMatches => "RegexpMatches", + ErrorKind::RegexpFind => "RegexpFind", + ErrorKind::RegexpCapture => "RegexpCapture", + ErrorKind::RegexpCaptures => "RegexpCaptures", + ErrorKind::TakeWhile1 => "TakeWhile1", + ErrorKind::Complete => "Complete", + ErrorKind::Fix => "Fix", + ErrorKind::Escaped => "Escaped", + ErrorKind::EscapedTransform => "EscapedTransform", + ErrorKind::NonEmpty => "NonEmpty", + ErrorKind::ManyMN => "Many(m, n)", + ErrorKind::HexDigit => "Hexadecimal Digit", + ErrorKind::OctDigit => "Octal digit", + ErrorKind::Not => "Negation", + ErrorKind::Permutation => "Permutation", + ErrorKind::ManyTill => "ManyTill", + ErrorKind::Verify => "predicate verification", + ErrorKind::TakeTill1 => "TakeTill1", + ErrorKind::TakeWhileMN => "TakeWhileMN", + ErrorKind::TooLarge => "Needed data size is too large", + ErrorKind::Many0Count => "Count occurrence of >=0 patterns", + ErrorKind::Many1Count => "Count occurrence of >=1 patterns", + ErrorKind::Float => "Float", + ErrorKind::Satisfy => "Satisfy", + ErrorKind::Fail => "Fail", + } + } +} + +/// Creates a parse error from a `nom::ErrorKind` +/// and the position in the input +#[allow(unused_variables)] +#[macro_export(local_inner_macros)] +macro_rules! error_position( + ($input:expr, $code:expr) => ({ + $crate::error::make_error($input, $code) + }); +); + +/// Creates a parse error from a `nom::ErrorKind`, +/// the position in the input and the next error in +/// the parsing tree +#[allow(unused_variables)] +#[macro_export(local_inner_macros)] +macro_rules! error_node_position( + ($input:expr, $code:expr, $next:expr) => ({ + $crate::error::append_error($input, $code, $next) + }); +); + +/// Prints a message and the input if the parser fails. +/// +/// The message prints the `Error` or `Incomplete` +/// and the parser's calling code. +/// +/// It also displays the input in hexdump format +/// +/// ```rust +/// use nom::{IResult, error::dbg_dmp, bytes::complete::tag}; +/// +/// fn f(i: &[u8]) -> IResult<&[u8], &[u8]> { +/// dbg_dmp(tag("abcd"), "tag")(i) +/// } +/// +/// let a = &b"efghijkl"[..]; +/// +/// // Will print the following message: +/// // Error(Position(0, [101, 102, 103, 104, 105, 106, 107, 108])) at l.5 by ' tag ! ( "abcd" ) ' +/// // 00000000 65 66 67 68 69 6a 6b 6c efghijkl +/// f(a); +/// ``` +#[cfg(feature = "std")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "std")))] +pub fn dbg_dmp<'a, F, O, E: std::fmt::Debug>( + f: F, + context: &'static str, +) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], O, E> +where + F: Fn(&'a [u8]) -> IResult<&'a [u8], O, E>, +{ + use crate::HexDisplay; + move |i: &'a [u8]| match f(i) { + Err(e) => { + println!("{}: Error({:?}) at:\n{}", context, e, i.to_hex(8)); + Err(e) + } + a => a, + } +} + +#[cfg(test)] +#[cfg(feature = "alloc")] +mod tests { + use super::*; + use crate::character::complete::char; + + #[test] + fn convert_error_panic() { + let input = ""; + + let _result: IResult<_, _, VerboseError<&str>> = char('x')(input); + } +} + +/* +#[cfg(feature = "alloc")] +use lib::std::{vec::Vec, collections::HashMap}; + +#[cfg(feature = "std")] +use lib::std::hash::Hash; + +#[cfg(feature = "std")] +pub fn add_error_pattern<'a, I: Clone + Hash + Eq, O, E: Clone + Hash + Eq>( + h: &mut HashMap<VerboseError<I>, &'a str>, + e: VerboseError<I>, + message: &'a str, +) -> bool { + h.insert(e, message); + true +} + +pub fn slice_to_offsets(input: &[u8], s: &[u8]) -> (usize, usize) { + let start = input.as_ptr(); + let off1 = s.as_ptr() as usize - start as usize; + let off2 = off1 + s.len(); + (off1, off2) +} + +#[cfg(feature = "std")] +pub fn prepare_errors<O, E: Clone>(input: &[u8], e: VerboseError<&[u8]>) -> Option<Vec<(ErrorKind, usize, usize)>> { + let mut v: Vec<(ErrorKind, usize, usize)> = Vec::new(); + + for (p, kind) in e.errors.drain(..) { + let (o1, o2) = slice_to_offsets(input, p); + v.push((kind, o1, o2)); + } + + v.reverse(); + Some(v) +} + +#[cfg(feature = "std")] +pub fn print_error<O, E: Clone>(input: &[u8], res: VerboseError<&[u8]>) { + if let Some(v) = prepare_errors(input, res) { + let colors = generate_colors(&v); + println!("parser codes: {}", print_codes(&colors, &HashMap::new())); + println!("{}", print_offsets(input, 0, &v)); + } else { + println!("not an error"); + } +} + +#[cfg(feature = "std")] +pub fn generate_colors<E>(v: &[(ErrorKind, usize, usize)]) -> HashMap<u32, u8> { + let mut h: HashMap<u32, u8> = HashMap::new(); + let mut color = 0; + + for &(ref c, _, _) in v.iter() { + h.insert(error_to_u32(c), color + 31); + color = color + 1 % 7; + } + + h +} + +pub fn code_from_offset(v: &[(ErrorKind, usize, usize)], offset: usize) -> Option<u32> { + let mut acc: Option<(u32, usize, usize)> = None; + for &(ref ek, s, e) in v.iter() { + let c = error_to_u32(ek); + if s <= offset && offset <= e { + if let Some((_, start, end)) = acc { + if start <= s && e <= end { + acc = Some((c, s, e)); + } + } else { + acc = Some((c, s, e)); + } + } + } + if let Some((code, _, _)) = acc { + return Some(code); + } else { + return None; + } +} + +#[cfg(feature = "alloc")] +pub fn reset_color(v: &mut Vec<u8>) { + v.push(0x1B); + v.push(b'['); + v.push(0); + v.push(b'm'); +} + +#[cfg(feature = "alloc")] +pub fn write_color(v: &mut Vec<u8>, color: u8) { + v.push(0x1B); + v.push(b'['); + v.push(1); + v.push(b';'); + let s = color.to_string(); + let bytes = s.as_bytes(); + v.extend(bytes.iter().cloned()); + v.push(b'm'); +} + +#[cfg(feature = "std")] +#[cfg_attr(feature = "cargo-clippy", allow(implicit_hasher))] +pub fn print_codes(colors: &HashMap<u32, u8>, names: &HashMap<u32, &str>) -> String { + let mut v = Vec::new(); + for (code, &color) in colors { + if let Some(&s) = names.get(code) { + let bytes = s.as_bytes(); + write_color(&mut v, color); + v.extend(bytes.iter().cloned()); + } else { + let s = code.to_string(); + let bytes = s.as_bytes(); + write_color(&mut v, color); + v.extend(bytes.iter().cloned()); + } + reset_color(&mut v); + v.push(b' '); + } + reset_color(&mut v); + + String::from_utf8_lossy(&v[..]).into_owned() +} + +#[cfg(feature = "std")] +pub fn print_offsets(input: &[u8], from: usize, offsets: &[(ErrorKind, usize, usize)]) -> String { + let mut v = Vec::with_capacity(input.len() * 3); + let mut i = from; + let chunk_size = 8; + let mut current_code: Option<u32> = None; + let mut current_code2: Option<u32> = None; + + let colors = generate_colors(&offsets); + + for chunk in input.chunks(chunk_size) { + let s = format!("{:08x}", i); + for &ch in s.as_bytes().iter() { + v.push(ch); + } + v.push(b'\t'); + + let mut k = i; + let mut l = i; + for &byte in chunk { + if let Some(code) = code_from_offset(&offsets, k) { + if let Some(current) = current_code { + if current != code { + reset_color(&mut v); + current_code = Some(code); + if let Some(&color) = colors.get(&code) { + write_color(&mut v, color); + } + } + } else { + current_code = Some(code); + if let Some(&color) = colors.get(&code) { + write_color(&mut v, color); + } + } + } + v.push(CHARS[(byte >> 4) as usize]); + v.push(CHARS[(byte & 0xf) as usize]); + v.push(b' '); + k = k + 1; + } + + reset_color(&mut v); + + if chunk_size > chunk.len() { + for _ in 0..(chunk_size - chunk.len()) { + v.push(b' '); + v.push(b' '); + v.push(b' '); + } + } + v.push(b'\t'); + + for &byte in chunk { + if let Some(code) = code_from_offset(&offsets, l) { + if let Some(current) = current_code2 { + if current != code { + reset_color(&mut v); + current_code2 = Some(code); + if let Some(&color) = colors.get(&code) { + write_color(&mut v, color); + } + } + } else { + current_code2 = Some(code); + if let Some(&color) = colors.get(&code) { + write_color(&mut v, color); + } + } + } + if (byte >= 32 && byte <= 126) || byte >= 128 { + v.push(byte); + } else { + v.push(b'.'); + } + l = l + 1; + } + reset_color(&mut v); + + v.push(b'\n'); + i = i + chunk_size; + } + + String::from_utf8_lossy(&v[..]).into_owned() +} +*/ diff --git a/third_party/rust/nom/src/internal.rs b/third_party/rust/nom/src/internal.rs new file mode 100644 index 0000000000..b7572fbd0a --- /dev/null +++ b/third_party/rust/nom/src/internal.rs @@ -0,0 +1,489 @@ +//! Basic types to build the parsers + +use self::Needed::*; +use crate::error::{self, ErrorKind}; +use crate::lib::std::fmt; +use core::num::NonZeroUsize; + +/// Holds the result of parsing functions +/// +/// It depends on the input type `I`, the output type `O`, and the error type `E` +/// (by default `(I, nom::ErrorKind)`) +/// +/// The `Ok` side is a pair containing the remainder of the input (the part of the data that +/// was not parsed) and the produced value. The `Err` side contains an instance of `nom::Err`. +/// +/// Outside of the parsing code, you can use the [Finish::finish] method to convert +/// it to a more common result type +pub type IResult<I, O, E = error::Error<I>> = Result<(I, O), Err<E>>; + +/// Helper trait to convert a parser's result to a more manageable type +pub trait Finish<I, O, E> { + /// converts the parser's result to a type that is more consumable by error + /// management libraries. It keeps the same `Ok` branch, and merges `Err::Error` + /// and `Err::Failure` into the `Err` side. + /// + /// *warning*: if the result is `Err(Err::Incomplete(_))`, this method will panic. + /// - "complete" parsers: It will not be an issue, `Incomplete` is never used + /// - "streaming" parsers: `Incomplete` will be returned if there's not enough data + /// for the parser to decide, and you should gather more data before parsing again. + /// Once the parser returns either `Ok(_)`, `Err(Err::Error(_))` or `Err(Err::Failure(_))`, + /// you can get out of the parsing loop and call `finish()` on the parser's result + fn finish(self) -> Result<(I, O), E>; +} + +impl<I, O, E> Finish<I, O, E> for IResult<I, O, E> { + fn finish(self) -> Result<(I, O), E> { + match self { + Ok(res) => Ok(res), + Err(Err::Error(e)) | Err(Err::Failure(e)) => Err(e), + Err(Err::Incomplete(_)) => { + panic!("Cannot call `finish()` on `Err(Err::Incomplete(_))`: this result means that the parser does not have enough data to decide, you should gather more data and try to reapply the parser instead") + } + } + } +} + +/// Contains information on needed data if a parser returned `Incomplete` +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub enum Needed { + /// Needs more data, but we do not know how much + Unknown, + /// Contains the required data size in bytes + Size(NonZeroUsize), +} + +impl Needed { + /// Creates `Needed` instance, returns `Needed::Unknown` if the argument is zero + pub fn new(s: usize) -> Self { + match NonZeroUsize::new(s) { + Some(sz) => Needed::Size(sz), + None => Needed::Unknown, + } + } + + /// Indicates if we know how many bytes we need + pub fn is_known(&self) -> bool { + *self != Unknown + } + + /// Maps a `Needed` to `Needed` by applying a function to a contained `Size` value. + #[inline] + pub fn map<F: Fn(NonZeroUsize) -> usize>(self, f: F) -> Needed { + match self { + Unknown => Unknown, + Size(n) => Needed::new(f(n)), + } + } +} + +/// The `Err` enum indicates the parser was not successful +/// +/// It has three cases: +/// +/// * `Incomplete` indicates that more data is needed to decide. The `Needed` enum +/// can contain how many additional bytes are necessary. If you are sure your parser +/// is working on full data, you can wrap your parser with the `complete` combinator +/// to transform that case in `Error` +/// * `Error` means some parser did not succeed, but another one might (as an example, +/// when testing different branches of an `alt` combinator) +/// * `Failure` indicates an unrecoverable error. As an example, if you recognize a prefix +/// to decide on the next parser to apply, and that parser fails, you know there's no need +/// to try other parsers, you were already in the right branch, so the data is invalid +/// +#[derive(Debug, Clone, PartialEq)] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub enum Err<E> { + /// There was not enough data + Incomplete(Needed), + /// The parser had an error (recoverable) + Error(E), + /// The parser had an unrecoverable error: we got to the right + /// branch and we know other branches won't work, so backtrack + /// as fast as possible + Failure(E), +} + +impl<E> Err<E> { + /// Tests if the result is Incomplete + pub fn is_incomplete(&self) -> bool { + if let Err::Incomplete(_) = self { + true + } else { + false + } + } + + /// Applies the given function to the inner error + pub fn map<E2, F>(self, f: F) -> Err<E2> + where + F: FnOnce(E) -> E2, + { + match self { + Err::Incomplete(n) => Err::Incomplete(n), + Err::Failure(t) => Err::Failure(f(t)), + Err::Error(t) => Err::Error(f(t)), + } + } + + /// Automatically converts between errors if the underlying type supports it + pub fn convert<F>(e: Err<F>) -> Self + where + E: From<F>, + { + e.map(crate::lib::std::convert::Into::into) + } +} + +impl<T> Err<(T, ErrorKind)> { + /// Maps `Err<(T, ErrorKind)>` to `Err<(U, ErrorKind)>` with the given `F: T -> U` + pub fn map_input<U, F>(self, f: F) -> Err<(U, ErrorKind)> + where + F: FnOnce(T) -> U, + { + match self { + Err::Incomplete(n) => Err::Incomplete(n), + Err::Failure((input, k)) => Err::Failure((f(input), k)), + Err::Error((input, k)) => Err::Error((f(input), k)), + } + } +} + +impl<T> Err<error::Error<T>> { + /// Maps `Err<error::Error<T>>` to `Err<error::Error<U>>` with the given `F: T -> U` + pub fn map_input<U, F>(self, f: F) -> Err<error::Error<U>> + where + F: FnOnce(T) -> U, + { + match self { + Err::Incomplete(n) => Err::Incomplete(n), + Err::Failure(error::Error { input, code }) => Err::Failure(error::Error { + input: f(input), + code, + }), + Err::Error(error::Error { input, code }) => Err::Error(error::Error { + input: f(input), + code, + }), + } + } +} + +#[cfg(feature = "alloc")] +use crate::lib::std::{borrow::ToOwned, string::String, vec::Vec}; +#[cfg(feature = "alloc")] +impl Err<(&[u8], ErrorKind)> { + /// Obtaining ownership + #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] + pub fn to_owned(self) -> Err<(Vec<u8>, ErrorKind)> { + self.map_input(ToOwned::to_owned) + } +} + +#[cfg(feature = "alloc")] +impl Err<(&str, ErrorKind)> { + /// Obtaining ownership + #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] + pub fn to_owned(self) -> Err<(String, ErrorKind)> { + self.map_input(ToOwned::to_owned) + } +} + +#[cfg(feature = "alloc")] +impl Err<error::Error<&[u8]>> { + /// Obtaining ownership + #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] + pub fn to_owned(self) -> Err<error::Error<Vec<u8>>> { + self.map_input(ToOwned::to_owned) + } +} + +#[cfg(feature = "alloc")] +impl Err<error::Error<&str>> { + /// Obtaining ownership + #[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] + pub fn to_owned(self) -> Err<error::Error<String>> { + self.map_input(ToOwned::to_owned) + } +} + +impl<E: Eq> Eq for Err<E> {} + +impl<E> fmt::Display for Err<E> +where + E: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Err::Incomplete(Needed::Size(u)) => write!(f, "Parsing requires {} bytes/chars", u), + Err::Incomplete(Needed::Unknown) => write!(f, "Parsing requires more data"), + Err::Failure(c) => write!(f, "Parsing Failure: {:?}", c), + Err::Error(c) => write!(f, "Parsing Error: {:?}", c), + } + } +} + +#[cfg(feature = "std")] +use std::error::Error; + +#[cfg(feature = "std")] +impl<E> Error for Err<E> +where + E: fmt::Debug, +{ + fn source(&self) -> Option<&(dyn Error + 'static)> { + None // no underlying error + } +} + +/// All nom parsers implement this trait +pub trait Parser<I, O, E> { + /// A parser takes in input type, and returns a `Result` containing + /// either the remaining input and the output value, or an error + fn parse(&mut self, input: I) -> IResult<I, O, E>; + + /// Maps a function over the result of a parser + fn map<G, O2>(self, g: G) -> Map<Self, G, O> + where + G: Fn(O) -> O2, + Self: core::marker::Sized, + { + Map { + f: self, + g, + phantom: core::marker::PhantomData, + } + } + + /// Creates a second parser from the output of the first one, then apply over the rest of the input + fn flat_map<G, H, O2>(self, g: G) -> FlatMap<Self, G, O> + where + G: FnMut(O) -> H, + H: Parser<I, O2, E>, + Self: core::marker::Sized, + { + FlatMap { + f: self, + g, + phantom: core::marker::PhantomData, + } + } + + /// Applies a second parser over the output of the first one + fn and_then<G, O2>(self, g: G) -> AndThen<Self, G, O> + where + G: Parser<O, O2, E>, + Self: core::marker::Sized, + { + AndThen { + f: self, + g, + phantom: core::marker::PhantomData, + } + } + + /// Applies a second parser after the first one, return their results as a tuple + fn and<G, O2>(self, g: G) -> And<Self, G> + where + G: Parser<I, O2, E>, + Self: core::marker::Sized, + { + And { f: self, g } + } + + /// Applies a second parser over the input if the first one failed + fn or<G>(self, g: G) -> Or<Self, G> + where + G: Parser<I, O, E>, + Self: core::marker::Sized, + { + Or { f: self, g } + } + + /// automatically converts the parser's output and error values to another type, as long as they + /// implement the `From` trait + fn into<O2: From<O>, E2: From<E>>(self) -> Into<Self, O, O2, E, E2> + where + Self: core::marker::Sized, + { + Into { + f: self, + phantom_out1: core::marker::PhantomData, + phantom_err1: core::marker::PhantomData, + phantom_out2: core::marker::PhantomData, + phantom_err2: core::marker::PhantomData, + } + } +} + +impl<'a, I, O, E, F> Parser<I, O, E> for F +where + F: FnMut(I) -> IResult<I, O, E> + 'a, +{ + fn parse(&mut self, i: I) -> IResult<I, O, E> { + self(i) + } +} + +#[cfg(feature = "alloc")] +use alloc::boxed::Box; + +#[cfg(feature = "alloc")] +impl<'a, I, O, E> Parser<I, O, E> for Box<dyn Parser<I, O, E> + 'a> { + fn parse(&mut self, input: I) -> IResult<I, O, E> { + (**self).parse(input) + } +} + +/// Implementation of `Parser::map` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Map<F, G, O1> { + f: F, + g: G, + phantom: core::marker::PhantomData<O1>, +} + +impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> O2> Parser<I, O2, E> for Map<F, G, O1> { + fn parse(&mut self, i: I) -> IResult<I, O2, E> { + match self.f.parse(i) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, (self.g)(o))), + } + } +} + +/// Implementation of `Parser::flat_map` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct FlatMap<F, G, O1> { + f: F, + g: G, + phantom: core::marker::PhantomData<O1>, +} + +impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Fn(O1) -> H, H: Parser<I, O2, E>> Parser<I, O2, E> + for FlatMap<F, G, O1> +{ + fn parse(&mut self, i: I) -> IResult<I, O2, E> { + let (i, o1) = self.f.parse(i)?; + (self.g)(o1).parse(i) + } +} + +/// Implementation of `Parser::and_then` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct AndThen<F, G, O1> { + f: F, + g: G, + phantom: core::marker::PhantomData<O1>, +} + +impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<O1, O2, E>> Parser<I, O2, E> + for AndThen<F, G, O1> +{ + fn parse(&mut self, i: I) -> IResult<I, O2, E> { + let (i, o1) = self.f.parse(i)?; + let (_, o2) = self.g.parse(o1)?; + Ok((i, o2)) + } +} + +/// Implementation of `Parser::and` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct And<F, G> { + f: F, + g: G, +} + +impl<'a, I, O1, O2, E, F: Parser<I, O1, E>, G: Parser<I, O2, E>> Parser<I, (O1, O2), E> + for And<F, G> +{ + fn parse(&mut self, i: I) -> IResult<I, (O1, O2), E> { + let (i, o1) = self.f.parse(i)?; + let (i, o2) = self.g.parse(i)?; + Ok((i, (o1, o2))) + } +} + +/// Implementation of `Parser::or` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Or<F, G> { + f: F, + g: G, +} + +impl<'a, I: Clone, O, E: crate::error::ParseError<I>, F: Parser<I, O, E>, G: Parser<I, O, E>> + Parser<I, O, E> for Or<F, G> +{ + fn parse(&mut self, i: I) -> IResult<I, O, E> { + match self.f.parse(i.clone()) { + Err(Err::Error(e1)) => match self.g.parse(i) { + Err(Err::Error(e2)) => Err(Err::Error(e1.or(e2))), + res => res, + }, + res => res, + } + } +} + +/// Implementation of `Parser::into` +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +pub struct Into<F, O1, O2: From<O1>, E1, E2: From<E1>> { + f: F, + phantom_out1: core::marker::PhantomData<O1>, + phantom_err1: core::marker::PhantomData<E1>, + phantom_out2: core::marker::PhantomData<O2>, + phantom_err2: core::marker::PhantomData<E2>, +} + +impl< + 'a, + I: Clone, + O1, + O2: From<O1>, + E1, + E2: crate::error::ParseError<I> + From<E1>, + F: Parser<I, O1, E1>, + > Parser<I, O2, E2> for Into<F, O1, O2, E1, E2> +{ + fn parse(&mut self, i: I) -> IResult<I, O2, E2> { + match self.f.parse(i) { + Ok((i, o)) => Ok((i, o.into())), + Err(Err::Error(e)) => Err(Err::Error(e.into())), + Err(Err::Failure(e)) => Err(Err::Failure(e.into())), + Err(Err::Incomplete(e)) => Err(Err::Incomplete(e)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::ErrorKind; + + #[doc(hidden)] + #[macro_export] + macro_rules! assert_size ( + ($t:ty, $sz:expr) => ( + assert_eq!(crate::lib::std::mem::size_of::<$t>(), $sz); + ); + ); + + #[test] + #[cfg(target_pointer_width = "64")] + fn size_test() { + assert_size!(IResult<&[u8], &[u8], (&[u8], u32)>, 40); + //FIXME: since rust 1.65, this is now 32 bytes, likely thanks to https://github.com/rust-lang/rust/pull/94075 + // deactivating that test for now because it'll have different values depending on the rust version + // assert_size!(IResult<&str, &str, u32>, 40); + assert_size!(Needed, 8); + assert_size!(Err<u32>, 16); + assert_size!(ErrorKind, 1); + } + + #[test] + fn err_map_test() { + let e = Err::Error(1); + assert_eq!(e.map(|v| v + 1), Err::Error(2)); + } +} diff --git a/third_party/rust/nom/src/lib.rs b/third_party/rust/nom/src/lib.rs new file mode 100644 index 0000000000..3beb2f4179 --- /dev/null +++ b/third_party/rust/nom/src/lib.rs @@ -0,0 +1,464 @@ +//! # nom, eating data byte by byte +//! +//! nom is a parser combinator library with a focus on safe parsing, +//! streaming patterns, and as much as possible zero copy. +//! +//! ## Example +//! +//! ```rust +//! use nom::{ +//! IResult, +//! bytes::complete::{tag, take_while_m_n}, +//! combinator::map_res, +//! sequence::tuple}; +//! +//! #[derive(Debug,PartialEq)] +//! pub struct Color { +//! pub red: u8, +//! pub green: u8, +//! pub blue: u8, +//! } +//! +//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> { +//! u8::from_str_radix(input, 16) +//! } +//! +//! fn is_hex_digit(c: char) -> bool { +//! c.is_digit(16) +//! } +//! +//! fn hex_primary(input: &str) -> IResult<&str, u8> { +//! map_res( +//! take_while_m_n(2, 2, is_hex_digit), +//! from_hex +//! )(input) +//! } +//! +//! fn hex_color(input: &str) -> IResult<&str, Color> { +//! let (input, _) = tag("#")(input)?; +//! let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?; +//! +//! Ok((input, Color { red, green, blue })) +//! } +//! +//! fn main() { +//! assert_eq!(hex_color("#2F14DF"), Ok(("", Color { +//! red: 47, +//! green: 20, +//! blue: 223, +//! }))); +//! } +//! ``` +//! +//! The code is available on [Github](https://github.com/Geal/nom) +//! +//! There are a few [guides](https://github.com/Geal/nom/tree/main/doc) with more details +//! about [how to write parsers](https://github.com/Geal/nom/blob/main/doc/making_a_new_parser_from_scratch.md), +//! or the [error management system](https://github.com/Geal/nom/blob/main/doc/error_management.md). +//! You can also check out the [recipes] module that contains examples of common patterns. +//! +//! **Looking for a specific combinator? Read the +//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md)** +//! +//! If you are upgrading to nom 5.0, please read the +//! [migration document](https://github.com/Geal/nom/blob/main/doc/upgrading_to_nom_5.md). +//! +//! ## Parser combinators +//! +//! Parser combinators are an approach to parsers that is very different from +//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and +//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar +//! in a separate syntax and generating the corresponding code, you use very small +//! functions with very specific purposes, like "take 5 bytes", or "recognize the +//! word 'HTTP'", and assemble them in meaningful patterns like "recognize +//! 'HTTP', then a space, then a version". +//! The resulting code is small, and looks like the grammar you would have +//! written with other parser approaches. +//! +//! This gives us a few advantages: +//! +//! - The parsers are small and easy to write +//! - The parsers components are easy to reuse (if they're general enough, please add them to nom!) +//! - The parsers components are easy to test separately (unit tests and property-based tests) +//! - The parser combination code looks close to the grammar you would have written +//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest +//! +//! Here is an example of one such parser, to recognize text between parentheses: +//! +//! ```rust +//! use nom::{ +//! IResult, +//! sequence::delimited, +//! // see the "streaming/complete" paragraph lower for an explanation of these submodules +//! character::complete::char, +//! bytes::complete::is_not +//! }; +//! +//! fn parens(input: &str) -> IResult<&str, &str> { +//! delimited(char('('), is_not(")"), char(')'))(input) +//! } +//! ``` +//! +//! It defines a function named `parens` which will recognize a sequence of the +//! character `(`, the longest byte array not containing `)`, then the character +//! `)`, and will return the byte array in the middle. +//! +//! Here is another parser, written without using nom's combinators this time: +//! +//! ```rust +//! use nom::{IResult, Err, Needed}; +//! +//! # fn main() { +//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{ +//! if i.len() < 4 { +//! Err(Err::Incomplete(Needed::new(4))) +//! } else { +//! Ok((&i[4..], &i[0..4])) +//! } +//! } +//! # } +//! ``` +//! +//! This function takes a byte array as input, and tries to consume 4 bytes. +//! Writing all the parsers manually, like this, is dangerous, despite Rust's +//! safety features. There are still a lot of mistakes one can make. That's why +//! nom provides a list of functions to help in developing parsers. +//! +//! With functions, you would write it like this: +//! +//! ```rust +//! use nom::{IResult, bytes::streaming::take}; +//! fn take4(input: &str) -> IResult<&str, &str> { +//! take(4u8)(input) +//! } +//! ``` +//! +//! A parser in nom is a function which, for an input type `I`, an output type `O` +//! and an optional error type `E`, will have the following signature: +//! +//! ```rust,compile_fail +//! fn parser(input: I) -> IResult<I, O, E>; +//! ``` +//! +//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default): +//! +//! ```rust,compile_fail +//! fn parser(input: I) -> IResult<I, O>; +//! ``` +//! +//! `IResult` is an alias for the `Result` type: +//! +//! ```rust +//! use nom::{Needed, error::Error}; +//! +//! type IResult<I, O, E = Error<I>> = Result<(I, O), Err<E>>; +//! +//! enum Err<E> { +//! Incomplete(Needed), +//! Error(E), +//! Failure(E), +//! } +//! ``` +//! +//! It can have the following values: +//! +//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value; +//! - An error `Err(Err::Error(c))` with `c` an error that can be built from the input position and a parser specific error +//! - An error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed +//! - An error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser +//! +//! Please refer to the ["choose a combinator" guide](https://github.com/Geal/nom/blob/main/doc/choosing_a_combinator.md) for an exhaustive list of parsers. +//! See also the rest of the documentation [here](https://github.com/Geal/nom/blob/main/doc). +//! +//! ## Making new parsers with function combinators +//! +//! nom is based on functions that generate parsers, with a signature like +//! this: `(arguments) -> impl Fn(Input) -> IResult<Input, Output, Error>`. +//! The arguments of a combinator can be direct values (like `take` which uses +//! a number of bytes or character as argument) or even other parsers (like +//! `delimited` which takes as argument 3 parsers, and returns the result of +//! the second one if all are successful). +//! +//! Here are some examples: +//! +//! ```rust +//! use nom::IResult; +//! use nom::bytes::complete::{tag, take}; +//! fn abcd_parser(i: &str) -> IResult<&str, &str> { +//! tag("abcd")(i) // will consume bytes if the input begins with "abcd" +//! } +//! +//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> { +//! take(10u8)(i) // will consume and return 10 bytes of input +//! } +//! ``` +//! +//! ## Combining parsers +//! +//! There are higher level patterns, like the **`alt`** combinator, which +//! provides a choice between multiple parsers. If one branch fails, it tries +//! the next, and returns the result of the first parser that succeeds: +//! +//! ```rust +//! use nom::IResult; +//! use nom::branch::alt; +//! use nom::bytes::complete::tag; +//! +//! let mut alt_tags = alt((tag("abcd"), tag("efgh"))); +//! +//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..]))); +//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..]))); +//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(nom::Err::Error((&b"ijklxxx"[..], nom::error::ErrorKind::Tag)))); +//! ``` +//! +//! The **`opt`** combinator makes a parser optional. If the child parser returns +//! an error, **`opt`** will still succeed and return None: +//! +//! ```rust +//! use nom::{IResult, combinator::opt, bytes::complete::tag}; +//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { +//! opt(tag("abcd"))(i) +//! } +//! +//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..])))); +//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None))); +//! ``` +//! +//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results: +//! +//! ```rust +//! # #[cfg(feature = "alloc")] +//! # fn main() { +//! use nom::{IResult, multi::many0, bytes::complete::tag}; +//! use std::str; +//! +//! fn multi(i: &str) -> IResult<&str, Vec<&str>> { +//! many0(tag("abcd"))(i) +//! } +//! +//! let a = "abcdef"; +//! let b = "abcdabcdef"; +//! let c = "azerty"; +//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"]))); +//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"]))); +//! assert_eq!(multi(c), Ok(("azerty", Vec::new()))); +//! # } +//! # #[cfg(not(feature = "alloc"))] +//! # fn main() {} +//! ``` +//! +//! Here are some basic combinators available: +//! +//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`) +//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`) +//! - **`many1`**: Will apply the parser 1 or more times +//! +//! There are more complex (and more useful) parsers like `tuple`, which is +//! used to apply a series of parsers then assemble their results. +//! +//! Example with `tuple`: +//! +//! ```rust +//! # fn main() { +//! use nom::{error::ErrorKind, Needed, +//! number::streaming::be_u16, +//! bytes::streaming::{tag, take}, +//! sequence::tuple}; +//! +//! let mut tpl = tuple((be_u16, take(3u8), tag("fg"))); +//! +//! assert_eq!( +//! tpl(&b"abcdefgh"[..]), +//! Ok(( +//! &b"h"[..], +//! (0x6162u16, &b"cde"[..], &b"fg"[..]) +//! )) +//! ); +//! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::new(2)))); +//! let input = &b"abcdejk"[..]; +//! assert_eq!(tpl(input), Err(nom::Err::Error((&input[5..], ErrorKind::Tag)))); +//! # } +//! ``` +//! +//! But you can also use a sequence of combinators written in imperative style, +//! thanks to the `?` operator: +//! +//! ```rust +//! # fn main() { +//! use nom::{IResult, bytes::complete::tag}; +//! +//! #[derive(Debug, PartialEq)] +//! struct A { +//! a: u8, +//! b: u8 +//! } +//! +//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) } +//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) } +//! +//! fn f(i: &[u8]) -> IResult<&[u8], A> { +//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure +//! let (i, _) = tag("abcd")(i)?; +//! let (i, a) = ret_int1(i)?; +//! let (i, _) = tag("efgh")(i)?; +//! let (i, b) = ret_int2(i)?; +//! +//! Ok((i, A { a, b })) +//! } +//! +//! let r = f(b"abcdefghX"); +//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2}))); +//! # } +//! ``` +//! +//! ## Streaming / Complete +//! +//! Some of nom's modules have `streaming` or `complete` submodules. They hold +//! different variants of the same combinators. +//! +//! A streaming parser assumes that we might not have all of the input data. +//! This can happen with some network protocol or large file parsers, where the +//! input buffer can be full and need to be resized or refilled. +//! +//! A complete parser assumes that we already have all of the input data. +//! This will be the common case with small files that can be read entirely to +//! memory. +//! +//! Here is how it works in practice: +//! +//! ```rust +//! use nom::{IResult, Err, Needed, error::{Error, ErrorKind}, bytes, character}; +//! +//! fn take_streaming(i: &[u8]) -> IResult<&[u8], &[u8]> { +//! bytes::streaming::take(4u8)(i) +//! } +//! +//! fn take_complete(i: &[u8]) -> IResult<&[u8], &[u8]> { +//! bytes::complete::take(4u8)(i) +//! } +//! +//! // both parsers will take 4 bytes as expected +//! assert_eq!(take_streaming(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); +//! assert_eq!(take_complete(&b"abcde"[..]), Ok((&b"e"[..], &b"abcd"[..]))); +//! +//! // if the input is smaller than 4 bytes, the streaming parser +//! // will return `Incomplete` to indicate that we need more data +//! assert_eq!(take_streaming(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1)))); +//! +//! // but the complete parser will return an error +//! assert_eq!(take_complete(&b"abc"[..]), Err(Err::Error(Error::new(&b"abc"[..], ErrorKind::Eof)))); +//! +//! // the alpha0 function recognizes 0 or more alphabetic characters +//! fn alpha0_streaming(i: &str) -> IResult<&str, &str> { +//! character::streaming::alpha0(i) +//! } +//! +//! fn alpha0_complete(i: &str) -> IResult<&str, &str> { +//! character::complete::alpha0(i) +//! } +//! +//! // if there's a clear limit to the recognized characters, both parsers work the same way +//! assert_eq!(alpha0_streaming("abcd;"), Ok((";", "abcd"))); +//! assert_eq!(alpha0_complete("abcd;"), Ok((";", "abcd"))); +//! +//! // but when there's no limit, the streaming version returns `Incomplete`, because it cannot +//! // know if more input data should be recognized. The whole input could be "abcd;", or +//! // "abcde;" +//! assert_eq!(alpha0_streaming("abcd"), Err(Err::Incomplete(Needed::new(1)))); +//! +//! // while the complete version knows that all of the data is there +//! assert_eq!(alpha0_complete("abcd"), Ok(("", "abcd"))); +//! ``` +//! **Going further:** Read the [guides](https://github.com/Geal/nom/tree/main/doc), +//! check out the [recipes]! +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr(feature = "cargo-clippy", allow(clippy::doc_markdown))] +#![cfg_attr(feature = "docsrs", feature(doc_cfg))] +#![cfg_attr(feature = "docsrs", feature(extended_key_value_attributes))] +#![deny(missing_docs)] +#[cfg_attr(nightly, warn(rustdoc::missing_doc_code_examples))] +#[cfg(feature = "alloc")] +#[macro_use] +extern crate alloc; +#[cfg(doctest)] +extern crate doc_comment; + +#[cfg(doctest)] +doc_comment::doctest!("../README.md"); + +/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does +/// it, albeit there it is not public. +#[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] +pub mod lib { + /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally, + /// as well as `core` or `std` + #[cfg(not(feature = "std"))] + #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] + /// internal std exports for no_std compatibility + pub mod std { + #[doc(hidden)] + #[cfg(not(feature = "alloc"))] + pub use core::borrow; + + #[cfg(feature = "alloc")] + #[doc(hidden)] + pub use alloc::{borrow, boxed, string, vec}; + + #[doc(hidden)] + pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str}; + + /// internal reproduction of std prelude + #[doc(hidden)] + pub mod prelude { + pub use core::prelude as v1; + } + } + + #[cfg(feature = "std")] + #[cfg_attr(nightly, allow(rustdoc::missing_doc_code_examples))] + /// internal std exports for no_std compatibility + pub mod std { + #[doc(hidden)] + pub use std::{ + alloc, borrow, boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result, + slice, str, string, vec, + }; + + /// internal reproduction of std prelude + #[doc(hidden)] + pub mod prelude { + pub use std::prelude as v1; + } + } +} + +pub use self::bits::*; +pub use self::internal::*; +pub use self::traits::*; + +pub use self::str::*; + +#[macro_use] +mod macros; +#[macro_use] +pub mod error; + +pub mod branch; +pub mod combinator; +mod internal; +pub mod multi; +pub mod sequence; +mod traits; + +pub mod bits; +pub mod bytes; + +pub mod character; + +mod str; + +pub mod number; + +#[cfg(feature = "docsrs")] +#[cfg_attr(feature = "docsrs", cfg_attr(feature = "docsrs", doc = include_str!("../doc/nom_recipes.md")))] +pub mod recipes {} diff --git a/third_party/rust/nom/src/macros.rs b/third_party/rust/nom/src/macros.rs new file mode 100644 index 0000000000..980d2d90ed --- /dev/null +++ b/third_party/rust/nom/src/macros.rs @@ -0,0 +1,23 @@ +macro_rules! succ ( + (0, $submac:ident ! ($($rest:tt)*)) => ($submac!(1, $($rest)*)); + (1, $submac:ident ! ($($rest:tt)*)) => ($submac!(2, $($rest)*)); + (2, $submac:ident ! ($($rest:tt)*)) => ($submac!(3, $($rest)*)); + (3, $submac:ident ! ($($rest:tt)*)) => ($submac!(4, $($rest)*)); + (4, $submac:ident ! ($($rest:tt)*)) => ($submac!(5, $($rest)*)); + (5, $submac:ident ! ($($rest:tt)*)) => ($submac!(6, $($rest)*)); + (6, $submac:ident ! ($($rest:tt)*)) => ($submac!(7, $($rest)*)); + (7, $submac:ident ! ($($rest:tt)*)) => ($submac!(8, $($rest)*)); + (8, $submac:ident ! ($($rest:tt)*)) => ($submac!(9, $($rest)*)); + (9, $submac:ident ! ($($rest:tt)*)) => ($submac!(10, $($rest)*)); + (10, $submac:ident ! ($($rest:tt)*)) => ($submac!(11, $($rest)*)); + (11, $submac:ident ! ($($rest:tt)*)) => ($submac!(12, $($rest)*)); + (12, $submac:ident ! ($($rest:tt)*)) => ($submac!(13, $($rest)*)); + (13, $submac:ident ! ($($rest:tt)*)) => ($submac!(14, $($rest)*)); + (14, $submac:ident ! ($($rest:tt)*)) => ($submac!(15, $($rest)*)); + (15, $submac:ident ! ($($rest:tt)*)) => ($submac!(16, $($rest)*)); + (16, $submac:ident ! ($($rest:tt)*)) => ($submac!(17, $($rest)*)); + (17, $submac:ident ! ($($rest:tt)*)) => ($submac!(18, $($rest)*)); + (18, $submac:ident ! ($($rest:tt)*)) => ($submac!(19, $($rest)*)); + (19, $submac:ident ! ($($rest:tt)*)) => ($submac!(20, $($rest)*)); + (20, $submac:ident ! ($($rest:tt)*)) => ($submac!(21, $($rest)*)); +); diff --git a/third_party/rust/nom/src/multi/mod.rs b/third_party/rust/nom/src/multi/mod.rs new file mode 100644 index 0000000000..73129084e2 --- /dev/null +++ b/third_party/rust/nom/src/multi/mod.rs @@ -0,0 +1,1049 @@ +//! Combinators applying their child parser multiple times + +#[cfg(test)] +mod tests; + +use crate::error::ErrorKind; +use crate::error::ParseError; +use crate::internal::{Err, IResult, Needed, Parser}; +#[cfg(feature = "alloc")] +use crate::lib::std::vec::Vec; +use crate::traits::{InputLength, InputTake, ToUsize}; +use core::num::NonZeroUsize; + +/// Don't pre-allocate more than 64KiB when calling `Vec::with_capacity`. +/// +/// Pre-allocating memory is a nice optimization but count fields can't +/// always be trusted. We should clamp initial capacities to some reasonable +/// amount. This reduces the risk of a bogus count value triggering a panic +/// due to an OOM error. +/// +/// This does not affect correctness. Nom will always read the full number +/// of elements regardless of the capacity cap. +#[cfg(feature = "alloc")] +const MAX_INITIAL_CAPACITY_BYTES: usize = 65536; + +/// Repeats the embedded parser, gathering the results in a `Vec`. +/// +/// This stops on [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::many0; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// many0(tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn many0<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |mut i: I| { + let mut acc = crate::lib::std::vec::Vec::with_capacity(4); + loop { + let len = i.input_len(); + match f.parse(i.clone()) { + Err(Err::Error(_)) => return Ok((i, acc)), + Err(e) => return Err(e), + Ok((i1, o)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many0))); + } + + i = i1; + acc.push(o); + } + } + } + } +} + +/// Runs the embedded parser, gathering the results in a `Vec`. +/// +/// This stops on [`Err::Error`] if there is at least one result, and returns the results that were accumulated. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::many1; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// many1(tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn many1<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |mut i: I| match f.parse(i.clone()) { + Err(Err::Error(err)) => Err(Err::Error(E::append(i, ErrorKind::Many1, err))), + Err(e) => Err(e), + Ok((i1, o)) => { + let mut acc = crate::lib::std::vec::Vec::with_capacity(4); + acc.push(o); + i = i1; + + loop { + let len = i.input_len(); + match f.parse(i.clone()) { + Err(Err::Error(_)) => return Ok((i, acc)), + Err(e) => return Err(e), + Ok((i1, o)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1))); + } + + i = i1; + acc.push(o); + } + } + } + } + } +} + +/// Applies the parser `f` until the parser `g` produces a result. +/// +/// Returns a tuple of the results of `f` in a `Vec` and the result of `g`. +/// +/// `f` keeps going so long as `g` produces [`Err::Error`]. To instead chain an error up, see [`cut`][crate::combinator::cut]. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::many_till; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, (Vec<&str>, &str)> { +/// many_till(tag("abc"), tag("end"))(s) +/// }; +/// +/// assert_eq!(parser("abcabcend"), Ok(("", (vec!["abc", "abc"], "end")))); +/// assert_eq!(parser("abc123end"), Err(Err::Error(Error::new("123end", ErrorKind::Tag)))); +/// assert_eq!(parser("123123end"), Err(Err::Error(Error::new("123123end", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcendefg"), Ok(("efg", (vec!["abc"], "end")))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn many_till<I, O, P, E, F, G>( + mut f: F, + mut g: G, +) -> impl FnMut(I) -> IResult<I, (Vec<O>, P), E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: Parser<I, P, E>, + E: ParseError<I>, +{ + move |mut i: I| { + let mut res = crate::lib::std::vec::Vec::new(); + loop { + let len = i.input_len(); + match g.parse(i.clone()) { + Ok((i1, o)) => return Ok((i1, (res, o))), + Err(Err::Error(_)) => { + match f.parse(i.clone()) { + Err(Err::Error(err)) => return Err(Err::Error(E::append(i, ErrorKind::ManyTill, err))), + Err(e) => return Err(e), + Ok((i1, o)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(E::from_error_kind(i1, ErrorKind::ManyTill))); + } + + res.push(o); + i = i1; + } + } + } + Err(e) => return Err(e), + } + } + } +} + +/// Alternates between two parsers to produce a list of elements. +/// +/// This stops when either parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `sep` Parses the separator between list elements. +/// * `f` Parses the elements of the list. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::separated_list0; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated_list0(tag("|"), tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("def|abc"), Ok(("def|abc", vec![]))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn separated_list0<I, O, O2, E, F, G>( + mut sep: G, + mut f: F, +) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: Parser<I, O2, E>, + E: ParseError<I>, +{ + move |mut i: I| { + let mut res = Vec::new(); + + match f.parse(i.clone()) { + Err(Err::Error(_)) => return Ok((i, res)), + Err(e) => return Err(e), + Ok((i1, o)) => { + res.push(o); + i = i1; + } + } + + loop { + let len = i.input_len(); + match sep.parse(i.clone()) { + Err(Err::Error(_)) => return Ok((i, res)), + Err(e) => return Err(e), + Ok((i1, _)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(E::from_error_kind(i1, ErrorKind::SeparatedList))); + } + + match f.parse(i1.clone()) { + Err(Err::Error(_)) => return Ok((i, res)), + Err(e) => return Err(e), + Ok((i2, o)) => { + res.push(o); + i = i2; + } + } + } + } + } + } +} + +/// Alternates between two parsers to produce a list of elements until [`Err::Error`]. +/// +/// Fails if the element parser does not produce at least one element.$ +/// +/// This stops when either parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `sep` Parses the separator between list elements. +/// * `f` Parses the elements of the list. +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::separated_list1; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// separated_list1(tag("|"), tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abc|abc|abc"), Ok(("", vec!["abc", "abc", "abc"]))); +/// assert_eq!(parser("abc123abc"), Ok(("123abc", vec!["abc"]))); +/// assert_eq!(parser("abc|def"), Ok(("|def", vec!["abc"]))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("def|abc"), Err(Err::Error(Error::new("def|abc", ErrorKind::Tag)))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn separated_list1<I, O, O2, E, F, G>( + mut sep: G, + mut f: F, +) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: Parser<I, O2, E>, + E: ParseError<I>, +{ + move |mut i: I| { + let mut res = Vec::new(); + + // Parse the first element + match f.parse(i.clone()) { + Err(e) => return Err(e), + Ok((i1, o)) => { + res.push(o); + i = i1; + } + } + + loop { + let len = i.input_len(); + match sep.parse(i.clone()) { + Err(Err::Error(_)) => return Ok((i, res)), + Err(e) => return Err(e), + Ok((i1, _)) => { + // infinite loop check: the parser must always consume + if i1.input_len() == len { + return Err(Err::Error(E::from_error_kind(i1, ErrorKind::SeparatedList))); + } + + match f.parse(i1.clone()) { + Err(Err::Error(_)) => return Ok((i, res)), + Err(e) => return Err(e), + Ok((i2, o)) => { + res.push(o); + i = i2; + } + } + } + } + } + } +} + +/// Repeats the embedded parser `m..=n` times +/// +/// This stops before `n` when the parser returns [`Err::Error`] and returns the results that were accumulated. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `m` The minimum number of iterations. +/// * `n` The maximum number of iterations. +/// * `f` The parser to apply. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::many_m_n; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// many_m_n(0, 2, tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn many_m_n<I, O, E, F>( + min: usize, + max: usize, + mut parse: F, +) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |mut input: I| { + if min > max { + return Err(Err::Failure(E::from_error_kind(input, ErrorKind::ManyMN))); + } + + let max_initial_capacity = + MAX_INITIAL_CAPACITY_BYTES / crate::lib::std::mem::size_of::<O>().max(1); + let mut res = crate::lib::std::vec::Vec::with_capacity(min.min(max_initial_capacity)); + for count in 0..max { + let len = input.input_len(); + match parse.parse(input.clone()) { + Ok((tail, value)) => { + // infinite loop check: the parser must always consume + if tail.input_len() == len { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::ManyMN))); + } + + res.push(value); + input = tail; + } + Err(Err::Error(e)) => { + if count < min { + return Err(Err::Error(E::append(input, ErrorKind::ManyMN, e))); + } else { + return Ok((input, res)); + } + } + Err(e) => { + return Err(e); + } + } + } + + Ok((input, res)) + } +} + +/// Repeats the embedded parser, counting the results +/// +/// This stops on [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::many0_count; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, usize> { +/// many0_count(tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", 2))); +/// assert_eq!(parser("abc123"), Ok(("123", 1))); +/// assert_eq!(parser("123123"), Ok(("123123", 0))); +/// assert_eq!(parser(""), Ok(("", 0))); +/// ``` +pub fn many0_count<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, usize, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |i: I| { + let mut input = i; + let mut count = 0; + + loop { + let input_ = input.clone(); + let len = input.input_len(); + match f.parse(input_) { + Ok((i, _)) => { + // infinite loop check: the parser must always consume + if i.input_len() == len { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0Count))); + } + + input = i; + count += 1; + } + + Err(Err::Error(_)) => return Ok((input, count)), + + Err(e) => return Err(e), + } + } + } +} + +/// Runs the embedded parser, counting the results. +/// +/// This stops on [`Err::Error`] if there is at least one result. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::many1_count; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, usize> { +/// many1_count(tag("abc"))(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", 2))); +/// assert_eq!(parser("abc123"), Ok(("123", 1))); +/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Many1Count)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Many1Count)))); +/// ``` +pub fn many1_count<I, O, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, usize, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |i: I| { + let i_ = i.clone(); + match f.parse(i_) { + Err(Err::Error(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1Count))), + Err(i) => Err(i), + Ok((i1, _)) => { + let mut count = 1; + let mut input = i1; + + loop { + let len = input.input_len(); + let input_ = input.clone(); + match f.parse(input_) { + Err(Err::Error(_)) => return Ok((input, count)), + Err(e) => return Err(e), + Ok((i, _)) => { + // infinite loop check: the parser must always consume + if i.input_len() == len { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1Count))); + } + + count += 1; + input = i; + } + } + } + } + } + } +} + +/// Runs the embedded parser `count` times, gathering the results in a `Vec` +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `count` How often to apply the parser. +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::count; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// count(tag("abc"), 2)(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Err(Err::Error(Error::new("123", ErrorKind::Tag)))); +/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// ``` +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn count<I, O, E, F>(mut f: F, count: usize) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone + PartialEq, + F: Parser<I, O, E>, + E: ParseError<I>, +{ + move |i: I| { + let mut input = i.clone(); + let max_initial_capacity = + MAX_INITIAL_CAPACITY_BYTES / crate::lib::std::mem::size_of::<O>().max(1); + let mut res = crate::lib::std::vec::Vec::with_capacity(count.min(max_initial_capacity)); + + for _ in 0..count { + let input_ = input.clone(); + match f.parse(input_) { + Ok((i, o)) => { + res.push(o); + input = i; + } + Err(Err::Error(e)) => { + return Err(Err::Error(E::append(i, ErrorKind::Count, e))); + } + Err(e) => { + return Err(e); + } + } + } + + Ok((input, res)) + } +} + +/// Runs the embedded parser repeatedly, filling the given slice with results. +/// +/// This parser fails if the input runs out before the given slice is full. +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `buf` The slice to fill +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::fill; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, [&str; 2]> { +/// let mut buf = ["", ""]; +/// let (rest, ()) = fill(tag("abc"), &mut buf)(s)?; +/// Ok((rest, buf)) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", ["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Err(Err::Error(Error::new("123", ErrorKind::Tag)))); +/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Tag)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Tag)))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", ["abc", "abc"]))); +/// ``` +pub fn fill<'a, I, O, E, F>(f: F, buf: &'a mut [O]) -> impl FnMut(I) -> IResult<I, (), E> + 'a +where + I: Clone + PartialEq, + F: Fn(I) -> IResult<I, O, E> + 'a, + E: ParseError<I>, +{ + move |i: I| { + let mut input = i.clone(); + + for elem in buf.iter_mut() { + let input_ = input.clone(); + match f(input_) { + Ok((i, o)) => { + *elem = o; + input = i; + } + Err(Err::Error(e)) => { + return Err(Err::Error(E::append(i, ErrorKind::Count, e))); + } + Err(e) => { + return Err(e); + } + } + } + + Ok((input, ())) + } +} + +/// Repeats the embedded parser, calling `g` to gather the results. +/// +/// This stops on [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `init` A function returning the initial value. +/// * `g` The function that combines a result of `f` with +/// the current accumulator. +/// +/// *Note*: if the parser passed in accepts empty inputs (like `alpha0` or `digit0`), `many0` will +/// return an error, to prevent going into an infinite loop +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::fold_many0; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_many0( +/// tag("abc"), +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// )(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// ``` +pub fn fold_many0<I, O, E, F, G, H, R>( + mut f: F, + mut init: H, + mut g: G, +) -> impl FnMut(I) -> IResult<I, R, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParseError<I>, +{ + move |i: I| { + let mut res = init(); + let mut input = i; + + loop { + let i_ = input.clone(); + let len = input.input_len(); + match f.parse(i_) { + Ok((i, o)) => { + // infinite loop check: the parser must always consume + if i.input_len() == len { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Many0))); + } + + res = g(res, o); + input = i; + } + Err(Err::Error(_)) => { + return Ok((input, res)); + } + Err(e) => { + return Err(e); + } + } + } + } +} + +/// Repeats the embedded parser, calling `g` to gather the results. +/// +/// This stops on [`Err::Error`] if there is at least one result. To instead chain an error up, +/// see [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `f` The parser to apply. +/// * `init` A function returning the initial value. +/// * `g` The function that combines a result of `f` with +/// the current accumulator. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::multi::fold_many1; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_many1( +/// tag("abc"), +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// )(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Err(Err::Error(Error::new("123123", ErrorKind::Many1)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::Many1)))); +/// ``` +pub fn fold_many1<I, O, E, F, G, H, R>( + mut f: F, + mut init: H, + mut g: G, +) -> impl FnMut(I) -> IResult<I, R, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParseError<I>, +{ + move |i: I| { + let _i = i.clone(); + let init = init(); + match f.parse(_i) { + Err(Err::Error(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Many1))), + Err(e) => Err(e), + Ok((i1, o1)) => { + let mut acc = g(init, o1); + let mut input = i1; + + loop { + let _input = input.clone(); + let len = input.input_len(); + match f.parse(_input) { + Err(Err::Error(_)) => { + break; + } + Err(e) => return Err(e), + Ok((i, o)) => { + // infinite loop check: the parser must always consume + if i.input_len() == len { + return Err(Err::Failure(E::from_error_kind(i, ErrorKind::Many1))); + } + + acc = g(acc, o); + input = i; + } + } + } + + Ok((input, acc)) + } + } + } +} + +/// Repeats the embedded parser `m..=n` times, calling `g` to gather the results +/// +/// This stops before `n` when the parser returns [`Err::Error`]. To instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `m` The minimum number of iterations. +/// * `n` The maximum number of iterations. +/// * `f` The parser to apply. +/// * `init` A function returning the initial value. +/// * `g` The function that combines a result of `f` with +/// the current accumulator. +/// +/// *Note*: If the parser passed to `many1` accepts empty inputs +/// (like `alpha0` or `digit0`), `many1` will return an error, +/// to prevent going into an infinite loop. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::multi::fold_many_m_n; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &str) -> IResult<&str, Vec<&str>> { +/// fold_many_m_n( +/// 0, +/// 2, +/// tag("abc"), +/// Vec::new, +/// |mut acc: Vec<_>, item| { +/// acc.push(item); +/// acc +/// } +/// )(s) +/// } +/// +/// assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +/// assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +/// assert_eq!(parser("123123"), Ok(("123123", vec![]))); +/// assert_eq!(parser(""), Ok(("", vec![]))); +/// assert_eq!(parser("abcabcabc"), Ok(("abc", vec!["abc", "abc"]))); +/// ``` +pub fn fold_many_m_n<I, O, E, F, G, H, R>( + min: usize, + max: usize, + mut parse: F, + mut init: H, + mut fold: G, +) -> impl FnMut(I) -> IResult<I, R, E> +where + I: Clone + InputLength, + F: Parser<I, O, E>, + G: FnMut(R, O) -> R, + H: FnMut() -> R, + E: ParseError<I>, +{ + move |mut input: I| { + if min > max { + return Err(Err::Failure(E::from_error_kind(input, ErrorKind::ManyMN))); + } + + let mut acc = init(); + for count in 0..max { + let len = input.input_len(); + match parse.parse(input.clone()) { + Ok((tail, value)) => { + // infinite loop check: the parser must always consume + if tail.input_len() == len { + return Err(Err::Error(E::from_error_kind(tail, ErrorKind::ManyMN))); + } + + acc = fold(acc, value); + input = tail; + } + //FInputXMError: handle failure properly + Err(Err::Error(err)) => { + if count < min { + return Err(Err::Error(E::append(input, ErrorKind::ManyMN, err))); + } else { + break; + } + } + Err(e) => return Err(e), + } + } + + Ok((input, acc)) + } +} + +/// Gets a number from the parser and returns a +/// subslice of the input of that size. +/// If the parser returns `Incomplete`, +/// `length_data` will return an error. +/// # Arguments +/// * `f` The parser to apply. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed, IResult}; +/// use nom::number::complete::be_u16; +/// use nom::multi::length_data; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// length_data(be_u16)(s) +/// } +/// +/// assert_eq!(parser(b"\x00\x03abcefg"), Ok((&b"efg"[..], &b"abc"[..]))); +/// assert_eq!(parser(b"\x00\x03a"), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +pub fn length_data<I, N, E, F>(mut f: F) -> impl FnMut(I) -> IResult<I, I, E> +where + I: InputLength + InputTake, + N: ToUsize, + F: Parser<I, N, E>, + E: ParseError<I>, +{ + move |i: I| { + let (i, length) = f.parse(i)?; + + let length: usize = length.to_usize(); + + if let Some(needed) = length + .checked_sub(i.input_len()) + .and_then(NonZeroUsize::new) + { + Err(Err::Incomplete(Needed::Size(needed))) + } else { + Ok(i.take_split(length)) + } + } +} + +/// Gets a number from the first parser, +/// takes a subslice of the input of that size, +/// then applies the second parser on that subslice. +/// If the second parser returns `Incomplete`, +/// `length_value` will return an error. +/// # Arguments +/// * `f` The parser to apply. +/// * `g` The parser to apply on the subslice. +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::number::complete::be_u16; +/// use nom::multi::length_value; +/// use nom::bytes::complete::tag; +/// +/// fn parser(s: &[u8]) -> IResult<&[u8], &[u8]> { +/// length_value(be_u16, tag("abc"))(s) +/// } +/// +/// assert_eq!(parser(b"\x00\x03abcefg"), Ok((&b"efg"[..], &b"abc"[..]))); +/// assert_eq!(parser(b"\x00\x03123123"), Err(Err::Error(Error::new(&b"123"[..], ErrorKind::Tag)))); +/// assert_eq!(parser(b"\x00\x03a"), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +pub fn length_value<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl FnMut(I) -> IResult<I, O, E> +where + I: Clone + InputLength + InputTake, + N: ToUsize, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParseError<I>, +{ + move |i: I| { + let (i, length) = f.parse(i)?; + + let length: usize = length.to_usize(); + + if let Some(needed) = length + .checked_sub(i.input_len()) + .and_then(NonZeroUsize::new) + { + Err(Err::Incomplete(Needed::Size(needed))) + } else { + let (rest, i) = i.take_split(length); + match g.parse(i.clone()) { + Err(Err::Incomplete(_)) => Err(Err::Error(E::from_error_kind(i, ErrorKind::Complete))), + Err(e) => Err(e), + Ok((_, o)) => Ok((rest, o)), + } + } + } +} + +/// Gets a number from the first parser, +/// then applies the second parser that many times. +/// # Arguments +/// * `f` The parser to apply to obtain the count. +/// * `g` The parser to apply repeatedly. +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, Needed, IResult}; +/// use nom::number::complete::u8; +/// use nom::multi::length_count; +/// use nom::bytes::complete::tag; +/// use nom::combinator::map; +/// +/// fn parser(s: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { +/// length_count(map(u8, |i| { +/// println!("got number: {}", i); +/// i +/// }), tag("abc"))(s) +/// } +/// +/// assert_eq!(parser(&b"\x02abcabcabc"[..]), Ok(((&b"abc"[..], vec![&b"abc"[..], &b"abc"[..]])))); +/// assert_eq!(parser(b"\x03123123123"), Err(Err::Error(Error::new(&b"123123123"[..], ErrorKind::Tag)))); +/// ``` +#[cfg(feature = "alloc")] +pub fn length_count<I, O, N, E, F, G>(mut f: F, mut g: G) -> impl FnMut(I) -> IResult<I, Vec<O>, E> +where + I: Clone, + N: ToUsize, + F: Parser<I, N, E>, + G: Parser<I, O, E>, + E: ParseError<I>, +{ + move |i: I| { + let (i, count) = f.parse(i)?; + let mut input = i.clone(); + let mut res = Vec::new(); + + for _ in 0..count.to_usize() { + let input_ = input.clone(); + match g.parse(input_) { + Ok((i, o)) => { + res.push(o); + input = i; + } + Err(Err::Error(e)) => { + return Err(Err::Error(E::append(i, ErrorKind::Count, e))); + } + Err(e) => { + return Err(e); + } + } + } + + Ok((input, res)) + } +} diff --git a/third_party/rust/nom/src/multi/tests.rs b/third_party/rust/nom/src/multi/tests.rs new file mode 100644 index 0000000000..96a6518176 --- /dev/null +++ b/third_party/rust/nom/src/multi/tests.rs @@ -0,0 +1,534 @@ +use super::{length_data, length_value, many0_count, many1_count}; +use crate::{ + bytes::streaming::tag, + character::streaming::digit1 as digit, + error::{ErrorKind, ParseError}, + internal::{Err, IResult, Needed}, + lib::std::str::{self, FromStr}, + number::streaming::{be_u16, be_u8}, + sequence::{pair, tuple}, +}; +#[cfg(feature = "alloc")] +use crate::{ + lib::std::vec::Vec, + multi::{ + count, fold_many0, fold_many1, fold_many_m_n, length_count, many0, many1, many_m_n, many_till, + separated_list0, separated_list1, + }, +}; + +#[test] +#[cfg(feature = "alloc")] +fn separated_list0_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list0(tag(","), tag("abcd"))(i) + } + fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list0(tag(","), tag(""))(i) + } + fn empty_sep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list0(tag(""), tag("abc"))(i) + } + fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list0(tag(".."), tag("abcd"))(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcd,abcdef"[..]; + let c = &b"azerty"[..]; + let d = &b",,abc"[..]; + let e = &b"abcd,abcd,ef"[..]; + let f = &b"abc"[..]; + let g = &b"abcd."[..]; + let h = &b"abcd,abc"[..]; + let i = &b"abcabc"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(a), Ok((&b"ef"[..], res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(b), Ok((&b"ef"[..], res2))); + assert_eq!(multi(c), Ok((&b"azerty"[..], Vec::new()))); + let res3 = vec![&b""[..], &b""[..], &b""[..]]; + assert_eq!(multi_empty(d), Ok((&b"abc"[..], res3))); + let i_err_pos = &i[3..]; + assert_eq!( + empty_sep(i), + Err(Err::Error(error_position!( + i_err_pos, + ErrorKind::SeparatedList + ))) + ); + let res4 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(e), Ok((&b",ef"[..], res4))); + + assert_eq!(multi(f), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(multi_longsep(g), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn separated_list1_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list1(tag(","), tag("abcd"))(i) + } + fn multi_longsep(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + separated_list1(tag(".."), tag("abcd"))(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcd,abcdef"[..]; + let c = &b"azerty"[..]; + let d = &b"abcd,abcd,ef"[..]; + + let f = &b"abc"[..]; + let g = &b"abcd."[..]; + let h = &b"abcd,abc"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(a), Ok((&b"ef"[..], res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(b), Ok((&b"ef"[..], res2))); + assert_eq!( + multi(c), + Err(Err::Error(error_position!(c, ErrorKind::Tag))) + ); + let res3 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(d), Ok((&b",ef"[..], res3))); + + assert_eq!(multi(f), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(multi_longsep(g), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(multi(h), Err(Err::Incomplete(Needed::new(1)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn many0_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(tag("abcd"))(i) + } + fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(tag(""))(i) + } + + assert_eq!(multi(&b"abcdef"[..]), Ok((&b"ef"[..], vec![&b"abcd"[..]]))); + assert_eq!( + multi(&b"abcdabcdefgh"[..]), + Ok((&b"efgh"[..], vec![&b"abcd"[..], &b"abcd"[..]])) + ); + assert_eq!(multi(&b"azerty"[..]), Ok((&b"azerty"[..], Vec::new()))); + assert_eq!(multi(&b"abcdab"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!(multi(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(4)))); + assert_eq!(multi(&b""[..]), Err(Err::Incomplete(Needed::new(4)))); + assert_eq!( + multi_empty(&b"abcdef"[..]), + Err(Err::Error(error_position!( + &b"abcdef"[..], + ErrorKind::Many0 + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn many1_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many1(tag("abcd"))(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcdabcdefgh"[..]; + let c = &b"azerty"[..]; + let d = &b"abcdab"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(a), Ok((&b"ef"[..], res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(b), Ok((&b"efgh"[..], res2))); + assert_eq!( + multi(c), + Err(Err::Error(error_position!(c, ErrorKind::Tag))) + ); + assert_eq!(multi(d), Err(Err::Incomplete(Needed::new(2)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn many_till_test() { + fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { + many_till(tag("abcd"), tag("efgh"))(i) + } + + let a = b"abcdabcdefghabcd"; + let b = b"efghabcd"; + let c = b"azerty"; + + let res_a = (vec![&b"abcd"[..], &b"abcd"[..]], &b"efgh"[..]); + let res_b: (Vec<&[u8]>, &[u8]) = (Vec::new(), &b"efgh"[..]); + assert_eq!(multi(&a[..]), Ok((&b"abcd"[..], res_a))); + assert_eq!(multi(&b[..]), Ok((&b"abcd"[..], res_b))); + assert_eq!( + multi(&c[..]), + Err(Err::Error(error_node_position!( + &c[..], + ErrorKind::ManyTill, + error_position!(&c[..], ErrorKind::Tag) + ))) + ); +} + +#[test] +#[cfg(feature = "std")] +fn infinite_many() { + fn tst(input: &[u8]) -> IResult<&[u8], &[u8]> { + println!("input: {:?}", input); + Err(Err::Error(error_position!(input, ErrorKind::Tag))) + } + + // should not go into an infinite loop + fn multi0(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(tst)(i) + } + let a = &b"abcdef"[..]; + assert_eq!(multi0(a), Ok((a, Vec::new()))); + + fn multi1(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many1(tst)(i) + } + let a = &b"abcdef"[..]; + assert_eq!( + multi1(a), + Err(Err::Error(error_position!(a, ErrorKind::Tag))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn many_m_n_test() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many_m_n(2, 4, tag("Abcd"))(i) + } + + let a = &b"Abcdef"[..]; + let b = &b"AbcdAbcdefgh"[..]; + let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; + let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; + let e = &b"AbcdAb"[..]; + + assert_eq!( + multi(a), + Err(Err::Error(error_position!(&b"ef"[..], ErrorKind::Tag))) + ); + let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(b), Ok((&b"efgh"[..], res1))); + let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(c), Ok((&b"efgh"[..], res2))); + let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(d), Ok((&b"Abcdefgh"[..], res3))); + assert_eq!(multi(e), Err(Err::Incomplete(Needed::new(2)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn count_test() { + const TIMES: usize = 2; + fn cnt_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + count(tag("abc"), TIMES)(i) + } + + assert_eq!( + cnt_2(&b"abcabcabcdef"[..]), + Ok((&b"abcdef"[..], vec![&b"abc"[..], &b"abc"[..]])) + ); + assert_eq!(cnt_2(&b"ab"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(cnt_2(&b"abcab"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!( + cnt_2(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + cnt_2(&b"xxxabcabcdef"[..]), + Err(Err::Error(error_position!( + &b"xxxabcabcdef"[..], + ErrorKind::Tag + ))) + ); + assert_eq!( + cnt_2(&b"abcxxxabcdef"[..]), + Err(Err::Error(error_position!( + &b"xxxabcdef"[..], + ErrorKind::Tag + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn count_zero() { + const TIMES: usize = 0; + fn counter_2(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + count(tag("abc"), TIMES)(i) + } + + let done = &b"abcabcabcdef"[..]; + let parsed_done = Vec::new(); + let rest = done; + let incomplete_1 = &b"ab"[..]; + let parsed_incompl_1 = Vec::new(); + let incomplete_2 = &b"abcab"[..]; + let parsed_incompl_2 = Vec::new(); + let error = &b"xxx"[..]; + let error_remain = &b"xxx"[..]; + let parsed_err = Vec::new(); + let error_1 = &b"xxxabcabcdef"[..]; + let parsed_err_1 = Vec::new(); + let error_1_remain = &b"xxxabcabcdef"[..]; + let error_2 = &b"abcxxxabcdef"[..]; + let parsed_err_2 = Vec::new(); + let error_2_remain = &b"abcxxxabcdef"[..]; + + assert_eq!(counter_2(done), Ok((rest, parsed_done))); + assert_eq!( + counter_2(incomplete_1), + Ok((incomplete_1, parsed_incompl_1)) + ); + assert_eq!( + counter_2(incomplete_2), + Ok((incomplete_2, parsed_incompl_2)) + ); + assert_eq!(counter_2(error), Ok((error_remain, parsed_err))); + assert_eq!(counter_2(error_1), Ok((error_1_remain, parsed_err_1))); + assert_eq!(counter_2(error_2), Ok((error_2_remain, parsed_err_2))); +} + +#[derive(Debug, Clone, PartialEq)] +pub struct NilError; + +impl<I> From<(I, ErrorKind)> for NilError { + fn from(_: (I, ErrorKind)) -> Self { + NilError + } +} + +impl<I> ParseError<I> for NilError { + fn from_error_kind(_: I, _: ErrorKind) -> NilError { + NilError + } + fn append(_: I, _: ErrorKind, _: NilError) -> NilError { + NilError + } +} + +fn number(i: &[u8]) -> IResult<&[u8], u32> { + use crate::combinator::map_res; + + map_res(map_res(digit, str::from_utf8), FromStr::from_str)(i) +} + +#[test] +#[cfg(feature = "alloc")] +fn length_count_test() { + fn cnt(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + length_count(number, tag("abc"))(i) + } + + assert_eq!( + cnt(&b"2abcabcabcdef"[..]), + Ok((&b"abcdef"[..], vec![&b"abc"[..], &b"abc"[..]])) + ); + assert_eq!(cnt(&b"2ab"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(cnt(&b"3abcab"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!( + cnt(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Digit))) + ); + assert_eq!( + cnt(&b"2abcxxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn length_data_test() { + fn take(i: &[u8]) -> IResult<&[u8], &[u8]> { + length_data(number)(i) + } + + assert_eq!( + take(&b"6abcabcabcdef"[..]), + Ok((&b"abcdef"[..], &b"abcabc"[..])) + ); + assert_eq!(take(&b"3ab"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!( + take(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Digit))) + ); + assert_eq!(take(&b"2abcxxx"[..]), Ok((&b"cxxx"[..], &b"ab"[..]))); +} + +#[test] +fn length_value_test() { + fn length_value_1(i: &[u8]) -> IResult<&[u8], u16> { + length_value(be_u8, be_u16)(i) + } + fn length_value_2(i: &[u8]) -> IResult<&[u8], (u8, u8)> { + length_value(be_u8, tuple((be_u8, be_u8)))(i) + } + + let i1 = [0, 5, 6]; + assert_eq!( + length_value_1(&i1), + Err(Err::Error(error_position!(&b""[..], ErrorKind::Complete))) + ); + assert_eq!( + length_value_2(&i1), + Err(Err::Error(error_position!(&b""[..], ErrorKind::Complete))) + ); + + let i2 = [1, 5, 6, 3]; + assert_eq!( + length_value_1(&i2), + Err(Err::Error(error_position!(&i2[1..2], ErrorKind::Complete))) + ); + assert_eq!( + length_value_2(&i2), + Err(Err::Error(error_position!(&i2[1..2], ErrorKind::Complete))) + ); + + let i3 = [2, 5, 6, 3, 4, 5, 7]; + assert_eq!(length_value_1(&i3), Ok((&i3[3..], 1286))); + assert_eq!(length_value_2(&i3), Ok((&i3[3..], (5, 6)))); + + let i4 = [3, 5, 6, 3, 4, 5]; + assert_eq!(length_value_1(&i4), Ok((&i4[4..], 1286))); + assert_eq!(length_value_2(&i4), Ok((&i4[4..], (5, 6)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_many0_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + fold_many0(tag("abcd"), Vec::new, fold_into_vec)(i) + } + fn multi_empty(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + fold_many0(tag(""), Vec::new, fold_into_vec)(i) + } + + assert_eq!(multi(&b"abcdef"[..]), Ok((&b"ef"[..], vec![&b"abcd"[..]]))); + assert_eq!( + multi(&b"abcdabcdefgh"[..]), + Ok((&b"efgh"[..], vec![&b"abcd"[..], &b"abcd"[..]])) + ); + assert_eq!(multi(&b"azerty"[..]), Ok((&b"azerty"[..], Vec::new()))); + assert_eq!(multi(&b"abcdab"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!(multi(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(4)))); + assert_eq!(multi(&b""[..]), Err(Err::Incomplete(Needed::new(4)))); + assert_eq!( + multi_empty(&b"abcdef"[..]), + Err(Err::Error(error_position!( + &b"abcdef"[..], + ErrorKind::Many0 + ))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_many1_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + fold_many1(tag("abcd"), Vec::new, fold_into_vec)(i) + } + + let a = &b"abcdef"[..]; + let b = &b"abcdabcdefgh"[..]; + let c = &b"azerty"[..]; + let d = &b"abcdab"[..]; + + let res1 = vec![&b"abcd"[..]]; + assert_eq!(multi(a), Ok((&b"ef"[..], res1))); + let res2 = vec![&b"abcd"[..], &b"abcd"[..]]; + assert_eq!(multi(b), Ok((&b"efgh"[..], res2))); + assert_eq!( + multi(c), + Err(Err::Error(error_position!(c, ErrorKind::Many1))) + ); + assert_eq!(multi(d), Err(Err::Incomplete(Needed::new(2)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn fold_many_m_n_test() { + fn fold_into_vec<T>(mut acc: Vec<T>, item: T) -> Vec<T> { + acc.push(item); + acc + } + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + fold_many_m_n(2, 4, tag("Abcd"), Vec::new, fold_into_vec)(i) + } + + let a = &b"Abcdef"[..]; + let b = &b"AbcdAbcdefgh"[..]; + let c = &b"AbcdAbcdAbcdAbcdefgh"[..]; + let d = &b"AbcdAbcdAbcdAbcdAbcdefgh"[..]; + let e = &b"AbcdAb"[..]; + + assert_eq!( + multi(a), + Err(Err::Error(error_position!(&b"ef"[..], ErrorKind::Tag))) + ); + let res1 = vec![&b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(b), Ok((&b"efgh"[..], res1))); + let res2 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(c), Ok((&b"efgh"[..], res2))); + let res3 = vec![&b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..], &b"Abcd"[..]]; + assert_eq!(multi(d), Ok((&b"Abcdefgh"[..], res3))); + assert_eq!(multi(e), Err(Err::Incomplete(Needed::new(2)))); +} + +#[test] +fn many0_count_test() { + fn count0_nums(i: &[u8]) -> IResult<&[u8], usize> { + many0_count(pair(digit, tag(",")))(i) + } + + assert_eq!(count0_nums(&b"123,junk"[..]), Ok((&b"junk"[..], 1))); + + assert_eq!(count0_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); + + assert_eq!( + count0_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), + Ok((&b"junk"[..], 10)) + ); + + assert_eq!(count0_nums(&b"hello"[..]), Ok((&b"hello"[..], 0))); +} + +#[test] +fn many1_count_test() { + fn count1_nums(i: &[u8]) -> IResult<&[u8], usize> { + many1_count(pair(digit, tag(",")))(i) + } + + assert_eq!(count1_nums(&b"123,45,junk"[..]), Ok((&b"junk"[..], 2))); + + assert_eq!( + count1_nums(&b"1,2,3,4,5,6,7,8,9,0,junk"[..]), + Ok((&b"junk"[..], 10)) + ); + + assert_eq!( + count1_nums(&b"hello"[..]), + Err(Err::Error(error_position!( + &b"hello"[..], + ErrorKind::Many1Count + ))) + ); +} diff --git a/third_party/rust/nom/src/number/complete.rs b/third_party/rust/nom/src/number/complete.rs new file mode 100644 index 0000000000..98b8b3abf8 --- /dev/null +++ b/third_party/rust/nom/src/number/complete.rs @@ -0,0 +1,2126 @@ +//! Parsers recognizing numbers, complete input version + +use crate::branch::alt; +use crate::bytes::complete::tag; +use crate::character::complete::{char, digit1, sign}; +use crate::combinator::{cut, map, opt, recognize}; +use crate::error::ParseError; +use crate::error::{make_error, ErrorKind}; +use crate::internal::*; +use crate::lib::std::ops::{Range, RangeFrom, RangeTo}; +use crate::sequence::{pair, tuple}; +use crate::traits::{ + AsBytes, AsChar, Compare, InputIter, InputLength, InputTake, InputTakeAtPosition, Offset, Slice, +}; + +/// Recognizes an unsigned 1 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u8; +/// +/// let parser = |s| { +/// be_u8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 2 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u16; +/// +/// let parser = |s| { +/// be_u16(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 2; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u16; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u16; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 3 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u24; +/// +/// let parser = |s| { +/// be_u24(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 3; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u32; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u32; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 4 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u32; +/// +/// let parser = |s| { +/// be_u32(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 4; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u32; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u32; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 8 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u64; +/// +/// let parser = |s| { +/// be_u64(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 8; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u64; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u64; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 16 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_u128; +/// +/// let parser = |s| { +/// be_u128(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 16; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u128; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u128; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a signed 1 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i8; +/// +/// let parser = |s| { +/// be_i8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u8.map(|x| x as i8).parse(input) +} + +/// Recognizes a big endian signed 2 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i16; +/// +/// let parser = |s| { +/// be_i16(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u16.map(|x| x as i16).parse(input) +} + +/// Recognizes a big endian signed 3 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i24; +/// +/// let parser = |s| { +/// be_i24(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + // Same as the unsigned version but we need to sign-extend manually here + be_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) +} + +/// Recognizes a big endian signed 4 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i32; +/// +/// let parser = |s| { +/// be_i32(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u32.map(|x| x as i32).parse(input) +} + +/// Recognizes a big endian signed 8 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i64; +/// +/// let parser = |s| { +/// be_i64(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u64.map(|x| x as i64).parse(input) +} + +/// Recognizes a big endian signed 16 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_i128; +/// +/// let parser = |s| { +/// be_i128(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u128.map(|x| x as i128).parse(input) +} + +/// Recognizes an unsigned 1 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u8; +/// +/// let parser = |s| { +/// le_u8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 2 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u16; +/// +/// let parser = |s| { +/// le_u16(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 2; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u16; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u16) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 3 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u24; +/// +/// let parser = |s| { +/// le_u24(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 3; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u32; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u32) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 4 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u32; +/// +/// let parser = |s| { +/// le_u32(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 4; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u32; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u32) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 8 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u64; +/// +/// let parser = |s| { +/// le_u64(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 8; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u64; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u64) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 16 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_u128; +/// +/// let parser = |s| { +/// le_u128(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 16; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let mut res = 0u128; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u128) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a signed 1 byte integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i8; +/// +/// let parser = |s| { +/// le_i8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u8.map(|x| x as i8).parse(input) +} + +/// Recognizes a little endian signed 2 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i16; +/// +/// let parser = |s| { +/// le_i16(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u16.map(|x| x as i16).parse(input) +} + +/// Recognizes a little endian signed 3 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i24; +/// +/// let parser = |s| { +/// le_i24(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + // Same as the unsigned version but we need to sign-extend manually here + le_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) +} + +/// Recognizes a little endian signed 4 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i32; +/// +/// let parser = |s| { +/// le_i32(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u32.map(|x| x as i32).parse(input) +} + +/// Recognizes a little endian signed 8 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i64; +/// +/// let parser = |s| { +/// le_i64(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u64.map(|x| x as i64).parse(input) +} + +/// Recognizes a little endian signed 16 bytes integer. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_i128; +/// +/// let parser = |s| { +/// le_i128(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u128.map(|x| x as i128).parse(input) +} + +/// Recognizes an unsigned 1 byte integer +/// +/// Note that endianness does not apply to 1 byte numbers. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u8; +/// +/// let parser = |s| { +/// u8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Error(make_error(input, ErrorKind::Eof))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes an unsigned 2 bytes integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u16 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u16 integer. +/// *complete version*: returns an error if there is not enough input data +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u16; +/// +/// let be_u16 = |s| { +/// u16(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(be_u16(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_u16 = |s| { +/// u16(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(le_u16(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u16, + crate::number::Endianness::Little => le_u16, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u16, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u16, + } +} + +/// Recognizes an unsigned 3 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u24 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u24 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u24; +/// +/// let be_u24 = |s| { +/// u24(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(be_u24(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_u24 = |s| { +/// u24(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(le_u24(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u24, + crate::number::Endianness::Little => le_u24, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u24, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u24, + } +} + +/// Recognizes an unsigned 4 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u32 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u32 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u32; +/// +/// let be_u32 = |s| { +/// u32(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(be_u32(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_u32 = |s| { +/// u32(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(le_u32(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u32, + crate::number::Endianness::Little => le_u32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u32, + } +} + +/// Recognizes an unsigned 8 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u64 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u64 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u64; +/// +/// let be_u64 = |s| { +/// u64(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(be_u64(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_u64 = |s| { +/// u64(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(le_u64(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u64, + crate::number::Endianness::Little => le_u64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u64, + } +} + +/// Recognizes an unsigned 16 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u128 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u128 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::u128; +/// +/// let be_u128 = |s| { +/// u128(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(be_u128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_u128 = |s| { +/// u128(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(le_u128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn u128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u128, + crate::number::Endianness::Little => le_u128, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u128, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u128, + } +} + +/// Recognizes a signed 1 byte integer +/// +/// Note that endianness does not apply to 1 byte numbers. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i8; +/// +/// let parser = |s| { +/// i8(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Error((&[][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i8<I, E: ParseError<I>>(i: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + u8.map(|x| x as i8).parse(i) +} + +/// Recognizes a signed 2 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i16 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i16 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i16; +/// +/// let be_i16 = |s| { +/// i16(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(be_i16(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_i16 = |s| { +/// i16(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(le_i16(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i16, + crate::number::Endianness::Little => le_i16, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i16, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i16, + } +} + +/// Recognizes a signed 3 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i24 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i24 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i24; +/// +/// let be_i24 = |s| { +/// i24(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(be_i24(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_i24 = |s| { +/// i24(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(le_i24(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i24, + crate::number::Endianness::Little => le_i24, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i24, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i24, + } +} + +/// Recognizes a signed 4 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i32 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i32 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i32; +/// +/// let be_i32 = |s| { +/// i32(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(be_i32(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_i32 = |s| { +/// i32(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(le_i32(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i32, + crate::number::Endianness::Little => le_i32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i32, + } +} + +/// Recognizes a signed 8 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i64 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i64 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i64; +/// +/// let be_i64 = |s| { +/// i64(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(be_i64(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_i64 = |s| { +/// i64(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(le_i64(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i64, + crate::number::Endianness::Little => le_i64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i64, + } +} + +/// Recognizes a signed 16 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i128 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i128 integer. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::i128; +/// +/// let be_i128 = |s| { +/// i128(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(be_i128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// +/// let le_i128 = |s| { +/// i128(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(le_i128(&b"\x01"[..]), Err(Err::Error((&[0x01][..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn i128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i128, + crate::number::Endianness::Little => le_i128, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i128, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i128, + } +} + +/// Recognizes a big endian 4 bytes floating point number. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_f32; +/// +/// let parser = |s| { +/// be_f32(s) +/// }; +/// +/// assert_eq!(parser(&[0x41, 0x48, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match be_u32(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f32::from_bits(o))), + } +} + +/// Recognizes a big endian 8 bytes floating point number. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::be_f64; +/// +/// let parser = |s| { +/// be_f64(s) +/// }; +/// +/// assert_eq!(parser(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn be_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match be_u64(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f64::from_bits(o))), + } +} + +/// Recognizes a little endian 4 bytes floating point number. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_f32; +/// +/// let parser = |s| { +/// le_f32(s) +/// }; +/// +/// assert_eq!(parser(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match le_u32(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f32::from_bits(o))), + } +} + +/// Recognizes a little endian 8 bytes floating point number. +/// +/// *Complete version*: Returns an error if there is not enough input data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::le_f64; +/// +/// let parser = |s| { +/// le_f64(s) +/// }; +/// +/// assert_eq!(parser(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn le_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match le_u64(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f64::from_bits(o))), + } +} + +/// Recognizes a 4 byte floating point number +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian f32 float, +/// otherwise if `nom::number::Endianness::Little` parse a little endian f32 float. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::f32; +/// +/// let be_f32 = |s| { +/// f32(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_f32(&[0x41, 0x48, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(be_f32(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// +/// let le_f32 = |s| { +/// f32(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_f32(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(le_f32(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn f32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_f32, + crate::number::Endianness::Little => le_f32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_f32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_f32, + } +} + +/// Recognizes an 8 byte floating point number +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian f64 float, +/// otherwise if `nom::number::Endianness::Little` parse a little endian f64 float. +/// *complete version*: returns an error if there is not enough input data +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::f64; +/// +/// let be_f64 = |s| { +/// f64(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_f64(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(be_f64(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// +/// let le_f64 = |s| { +/// f64(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(le_f64(&b"abc"[..]), Err(Err::Error((&b"abc"[..], ErrorKind::Eof)))); +/// ``` +#[inline] +pub fn f64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_f64, + crate::number::Endianness::Little => le_f64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_f64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_f64, + } +} + +/// Recognizes a hex-encoded integer. +/// +/// *Complete version*: Will parse until the end of input if it has less than 8 bytes. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::hex_u32; +/// +/// let parser = |s| { +/// hex_u32(s) +/// }; +/// +/// assert_eq!(parser(&b"01AE"[..]), Ok((&b""[..], 0x01AE))); +/// assert_eq!(parser(&b"abc"[..]), Ok((&b""[..], 0x0ABC))); +/// assert_eq!(parser(&b"ggg"[..]), Err(Err::Error((&b"ggg"[..], ErrorKind::IsA)))); +/// ``` +#[inline] +pub fn hex_u32<'a, E: ParseError<&'a [u8]>>(input: &'a [u8]) -> IResult<&'a [u8], u32, E> { + let (i, o) = crate::bytes::complete::is_a(&b"0123456789abcdefABCDEF"[..])(input)?; + // Do not parse more than 8 characters for a u32 + let (parsed, remaining) = if o.len() <= 8 { + (o, i) + } else { + (&input[..8], &input[8..]) + }; + + let res = parsed + .iter() + .rev() + .enumerate() + .map(|(k, &v)| { + let digit = v as char; + digit.to_digit(16).unwrap_or(0) << (k * 4) + }) + .sum(); + + Ok((remaining, res)) +} + +/// Recognizes floating point number in a byte string and returns the corresponding slice. +/// +/// *Complete version*: Can parse until the end of input. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::recognize_float; +/// +/// let parser = |s| { +/// recognize_float(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", "11e-1"))); +/// assert_eq!(parser("123E-02"), Ok(("", "123E-02"))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", "123"))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Char)))); +/// ``` +#[rustfmt::skip] +pub fn recognize_float<T, E:ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter, + <T as InputIter>::Item: AsChar, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + recognize( + tuple(( + opt(alt((char('+'), char('-')))), + alt(( + map(tuple((digit1, opt(pair(char('.'), opt(digit1))))), |_| ()), + map(tuple((char('.'), digit1)), |_| ()) + )), + opt(tuple(( + alt((char('e'), char('E'))), + opt(alt((char('+'), char('-')))), + cut(digit1) + ))) + )) + )(input) +} + +// workaround until issues with minimal-lexical are fixed +#[doc(hidden)] +pub fn recognize_float_or_exceptions<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter + InputTake + Compare<&'static str>, + <T as InputIter>::Item: AsChar, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + alt(( + |i: T| { + recognize_float::<_, E>(i.clone()).map_err(|e| match e { + crate::Err::Error(_) => crate::Err::Error(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Failure(_) => crate::Err::Failure(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Incomplete(needed) => crate::Err::Incomplete(needed), + }) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("nan")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("inf")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::complete::tag_no_case::<_, _, E>("infinity")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + ))(input) +} + +/// Recognizes a floating point number in text format +/// +/// It returns a tuple of (`sign`, `integer part`, `fraction part` and `exponent`) of the input +/// data. +/// +/// *Complete version*: Can parse until the end of input. +/// +pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (bool, T, T, i32), E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>, + T: Clone + Offset, + T: InputIter + InputTake, + <T as InputIter>::Item: AsChar + Copy, + T: InputTakeAtPosition + InputLength, + <T as InputTakeAtPosition>::Item: AsChar, + T: for<'a> Compare<&'a [u8]>, + T: AsBytes, +{ + let (i, sign) = sign(input.clone())?; + + //let (i, zeroes) = take_while(|c: <T as InputTakeAtPosition>::Item| c.as_char() == '0')(i)?; + let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0') { + Some(index) => i.take_split(index), + None => i.take_split(i.input_len()), + }; + //let (i, mut integer) = digit0(i)?; + let (i, mut integer) = match i + .as_bytes() + .iter() + .position(|c| !(*c >= b'0' && *c <= b'9')) + { + Some(index) => i.take_split(index), + None => i.take_split(i.input_len()), + }; + + if integer.input_len() == 0 && zeroes.input_len() > 0 { + // keep the last zero if integer is empty + integer = zeroes.slice(zeroes.input_len() - 1..); + } + + let (i, opt_dot) = opt(tag(&b"."[..]))(i)?; + let (i, fraction) = if opt_dot.is_none() { + let i2 = i.clone(); + (i2, i.slice(..0)) + } else { + // match number, trim right zeroes + let mut zero_count = 0usize; + let mut position = None; + for (pos, c) in i.as_bytes().iter().enumerate() { + if *c >= b'0' && *c <= b'9' { + if *c == b'0' { + zero_count += 1; + } else { + zero_count = 0; + } + } else { + position = Some(pos); + break; + } + } + + let position = position.unwrap_or(i.input_len()); + + let index = if zero_count == 0 { + position + } else if zero_count == position { + position - zero_count + 1 + } else { + position - zero_count + }; + + (i.slice(position..), i.slice(..index)) + }; + + if integer.input_len() == 0 && fraction.input_len() == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + } + + let i2 = i.clone(); + let (i, e) = match i.as_bytes().iter().next() { + Some(b'e') => (i.slice(1..), true), + Some(b'E') => (i.slice(1..), true), + _ => (i, false), + }; + + let (i, exp) = if e { + cut(crate::character::complete::i32)(i)? + } else { + (i2, 0) + }; + + Ok((i, (sign, integer, fraction, exp))) +} + +use crate::traits::ParseTo; + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Complete version*: Can parse until the end of input. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::float; +/// +/// let parser = |s| { +/// float(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn float<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>, + T: Clone + Offset + ParseTo<f32> + Compare<&'static str>, + T: InputIter + InputLength + InputTake, + <T as InputIter>::Item: AsChar + Copy, + <T as InputIter>::IterElem: Clone, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + /* + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + + let mut float: f32 = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Complete version*: Can parse until the end of input. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::double; +/// +/// let parser = |s| { +/// double(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn double<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>> + Slice<Range<usize>>, + T: Clone + Offset + ParseTo<f64> + Compare<&'static str>, + T: InputIter + InputLength + InputTake, + <T as InputIter>::Item: AsChar + Copy, + <T as InputIter>::IterElem: Clone, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + /* + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + + let mut float: f64 = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::ErrorKind; + use crate::internal::Err; + use proptest::prelude::*; + + macro_rules! assert_parse( + ($left: expr, $right: expr) => { + let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; + assert_eq!(res, $right); + }; + ); + + #[test] + fn i8_tests() { + assert_parse!(i8(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(i8(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(i8(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(i8(&[0x80][..]), Ok((&b""[..], -128))); + } + + #[test] + fn be_i8_tests() { + assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_i8(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(be_i8(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(be_i8(&[0x80][..]), Ok((&b""[..], -128))); + } + + #[test] + fn be_i16_tests() { + assert_parse!(be_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_i16(&[0x7f, 0xff][..]), Ok((&b""[..], 32_767_i16))); + assert_parse!(be_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!(be_i16(&[0x80, 0x00][..]), Ok((&b""[..], -32_768_i16))); + } + + #[test] + fn be_u24_tests() { + assert_parse!(be_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_u24(&[0x00, 0xFF, 0xFF][..]), Ok((&b""[..], 65_535_u32))); + assert_parse!( + be_u24(&[0x12, 0x34, 0x56][..]), + Ok((&b""[..], 1_193_046_u32)) + ); + } + + #[test] + fn be_i24_tests() { + assert_parse!(be_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); + assert_parse!(be_i24(&[0xFF, 0x00, 0x00][..]), Ok((&b""[..], -65_536_i32))); + assert_parse!( + be_i24(&[0xED, 0xCB, 0xAA][..]), + Ok((&b""[..], -1_193_046_i32)) + ); + } + + #[test] + fn be_i32_tests() { + assert_parse!(be_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + be_i32(&[0x7f, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], 2_147_483_647_i32)) + ); + assert_parse!(be_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + be_i32(&[0x80, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], -2_147_483_648_i32)) + ); + } + + #[test] + fn be_i64_tests() { + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_i64(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], 9_223_372_036_854_775_807_i64)) + ); + assert_parse!( + be_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + be_i64(&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], -9_223_372_036_854_775_808_i64)) + ); + } + + #[test] + fn be_i128_tests() { + assert_parse!( + be_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_i128( + &[ + 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok(( + &b""[..], + 170_141_183_460_469_231_731_687_303_715_884_105_727_i128 + )) + ); + assert_parse!( + be_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok((&b""[..], -1)) + ); + assert_parse!( + be_i128( + &[ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok(( + &b""[..], + -170_141_183_460_469_231_731_687_303_715_884_105_728_i128 + )) + ); + } + + #[test] + fn le_i8_tests() { + assert_parse!(le_i8(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_i8(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(le_i8(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(le_i8(&[0x80][..]), Ok((&b""[..], -128))); + } + + #[test] + fn le_i16_tests() { + assert_parse!(le_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_i16(&[0xff, 0x7f][..]), Ok((&b""[..], 32_767_i16))); + assert_parse!(le_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!(le_i16(&[0x00, 0x80][..]), Ok((&b""[..], -32_768_i16))); + } + + #[test] + fn le_u24_tests() { + assert_parse!(le_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_u24(&[0xFF, 0xFF, 0x00][..]), Ok((&b""[..], 65_535_u32))); + assert_parse!( + le_u24(&[0x56, 0x34, 0x12][..]), + Ok((&b""[..], 1_193_046_u32)) + ); + } + + #[test] + fn le_i24_tests() { + assert_parse!(le_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); + assert_parse!(le_i24(&[0x00, 0x00, 0xFF][..]), Ok((&b""[..], -65_536_i32))); + assert_parse!( + le_i24(&[0xAA, 0xCB, 0xED][..]), + Ok((&b""[..], -1_193_046_i32)) + ); + } + + #[test] + fn le_i32_tests() { + assert_parse!(le_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + le_i32(&[0xff, 0xff, 0xff, 0x7f][..]), + Ok((&b""[..], 2_147_483_647_i32)) + ); + assert_parse!(le_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + le_i32(&[0x00, 0x00, 0x00, 0x80][..]), + Ok((&b""[..], -2_147_483_648_i32)) + ); + } + + #[test] + fn le_i64_tests() { + assert_parse!( + le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f][..]), + Ok((&b""[..], 9_223_372_036_854_775_807_i64)) + ); + assert_parse!( + le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80][..]), + Ok((&b""[..], -9_223_372_036_854_775_808_i64)) + ); + } + + #[test] + fn le_i128_tests() { + assert_parse!( + le_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f + ][..] + ), + Ok(( + &b""[..], + 170_141_183_460_469_231_731_687_303_715_884_105_727_i128 + )) + ); + assert_parse!( + le_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok((&b""[..], -1)) + ); + assert_parse!( + le_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80 + ][..] + ), + Ok(( + &b""[..], + -170_141_183_460_469_231_731_687_303_715_884_105_728_i128 + )) + ); + } + + #[test] + fn be_f32_tests() { + assert_parse!(be_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); + assert_parse!( + be_f32(&[0x4d, 0x31, 0x1f, 0xd8][..]), + Ok((&b""[..], 185_728_392_f32)) + ); + } + + #[test] + fn be_f64_tests() { + assert_parse!( + be_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f64)) + ); + assert_parse!( + be_f64(&[0x41, 0xa6, 0x23, 0xfb, 0x10, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 185_728_392_f64)) + ); + } + + #[test] + fn le_f32_tests() { + assert_parse!(le_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); + assert_parse!( + le_f32(&[0xd8, 0x1f, 0x31, 0x4d][..]), + Ok((&b""[..], 185_728_392_f32)) + ); + } + + #[test] + fn le_f64_tests() { + assert_parse!( + le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f64)) + ); + assert_parse!( + le_f64(&[0x00, 0x00, 0x00, 0x10, 0xfb, 0x23, 0xa6, 0x41][..]), + Ok((&b""[..], 185_728_392_f64)) + ); + } + + #[test] + fn hex_u32_tests() { + assert_parse!( + hex_u32(&b";"[..]), + Err(Err::Error(error_position!(&b";"[..], ErrorKind::IsA))) + ); + assert_parse!(hex_u32(&b"ff;"[..]), Ok((&b";"[..], 255))); + assert_parse!(hex_u32(&b"1be2;"[..]), Ok((&b";"[..], 7138))); + assert_parse!(hex_u32(&b"c5a31be2;"[..]), Ok((&b";"[..], 3_315_801_058))); + assert_parse!(hex_u32(&b"C5A31be2;"[..]), Ok((&b";"[..], 3_315_801_058))); + assert_parse!(hex_u32(&b"00c5a31be2;"[..]), Ok((&b"e2;"[..], 12_952_347))); + assert_parse!( + hex_u32(&b"c5a31be201;"[..]), + Ok((&b"01;"[..], 3_315_801_058)) + ); + assert_parse!(hex_u32(&b"ffffffff;"[..]), Ok((&b";"[..], 4_294_967_295))); + assert_parse!(hex_u32(&b"0x1be2;"[..]), Ok((&b"x1be2;"[..], 0))); + assert_parse!(hex_u32(&b"12af"[..]), Ok((&b""[..], 0x12af))); + } + + #[test] + #[cfg(feature = "std")] + fn float_test() { + let mut test_cases = vec![ + "+3.14", + "3.14", + "-3.14", + "0", + "0.0", + "1.", + ".789", + "-.5", + "1e7", + "-1E-7", + ".3e-2", + "1.e4", + "1.2e4", + "12.34", + "-1.234E-12", + "-1.234e-12", + "0.00000000000000000087", + ]; + + for test in test_cases.drain(..) { + let expected32 = str::parse::<f32>(test).unwrap(); + let expected64 = str::parse::<f64>(test).unwrap(); + + println!("now parsing: {} -> {}", test, expected32); + + let larger = format!("{}", test); + assert_parse!(recognize_float(&larger[..]), Ok(("", test))); + + assert_parse!(float(larger.as_bytes()), Ok((&b""[..], expected32))); + assert_parse!(float(&larger[..]), Ok(("", expected32))); + + assert_parse!(double(larger.as_bytes()), Ok((&b""[..], expected64))); + assert_parse!(double(&larger[..]), Ok(("", expected64))); + } + + let remaining_exponent = "-1.234E-"; + assert_parse!( + recognize_float(remaining_exponent), + Err(Err::Failure(("", ErrorKind::Digit))) + ); + + let (_i, nan) = float::<_, ()>("NaN").unwrap(); + assert!(nan.is_nan()); + + let (_i, inf) = float::<_, ()>("inf").unwrap(); + assert!(inf.is_infinite()); + let (_i, inf) = float::<_, ()>("infinite").unwrap(); + assert!(inf.is_infinite()); + } + + #[test] + fn configurable_endianness() { + use crate::number::Endianness; + + fn be_tst16(i: &[u8]) -> IResult<&[u8], u16> { + u16(Endianness::Big)(i) + } + fn le_tst16(i: &[u8]) -> IResult<&[u8], u16> { + u16(Endianness::Little)(i) + } + assert_eq!(be_tst16(&[0x80, 0x00]), Ok((&b""[..], 32_768_u16))); + assert_eq!(le_tst16(&[0x80, 0x00]), Ok((&b""[..], 128_u16))); + + fn be_tst32(i: &[u8]) -> IResult<&[u8], u32> { + u32(Endianness::Big)(i) + } + fn le_tst32(i: &[u8]) -> IResult<&[u8], u32> { + u32(Endianness::Little)(i) + } + assert_eq!( + be_tst32(&[0x12, 0x00, 0x60, 0x00]), + Ok((&b""[..], 302_014_464_u32)) + ); + assert_eq!( + le_tst32(&[0x12, 0x00, 0x60, 0x00]), + Ok((&b""[..], 6_291_474_u32)) + ); + + fn be_tst64(i: &[u8]) -> IResult<&[u8], u64> { + u64(Endianness::Big)(i) + } + fn le_tst64(i: &[u8]) -> IResult<&[u8], u64> { + u64(Endianness::Little)(i) + } + assert_eq!( + be_tst64(&[0x12, 0x00, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 1_297_142_246_100_992_000_u64)) + ); + assert_eq!( + le_tst64(&[0x12, 0x00, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 36_028_874_334_666_770_u64)) + ); + + fn be_tsti16(i: &[u8]) -> IResult<&[u8], i16> { + i16(Endianness::Big)(i) + } + fn le_tsti16(i: &[u8]) -> IResult<&[u8], i16> { + i16(Endianness::Little)(i) + } + assert_eq!(be_tsti16(&[0x00, 0x80]), Ok((&b""[..], 128_i16))); + assert_eq!(le_tsti16(&[0x00, 0x80]), Ok((&b""[..], -32_768_i16))); + + fn be_tsti32(i: &[u8]) -> IResult<&[u8], i32> { + i32(Endianness::Big)(i) + } + fn le_tsti32(i: &[u8]) -> IResult<&[u8], i32> { + i32(Endianness::Little)(i) + } + assert_eq!( + be_tsti32(&[0x00, 0x12, 0x60, 0x00]), + Ok((&b""[..], 1_204_224_i32)) + ); + assert_eq!( + le_tsti32(&[0x00, 0x12, 0x60, 0x00]), + Ok((&b""[..], 6_296_064_i32)) + ); + + fn be_tsti64(i: &[u8]) -> IResult<&[u8], i64> { + i64(Endianness::Big)(i) + } + fn le_tsti64(i: &[u8]) -> IResult<&[u8], i64> { + i64(Endianness::Little)(i) + } + assert_eq!( + be_tsti64(&[0x00, 0xFF, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 71_881_672_479_506_432_i64)) + ); + assert_eq!( + le_tsti64(&[0x00, 0xFF, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 36_028_874_334_732_032_i64)) + ); + } + + #[cfg(feature = "std")] + fn parse_f64(i: &str) -> IResult<&str, f64, ()> { + match recognize_float_or_exceptions(i) { + Err(e) => Err(e), + Ok((i, s)) => { + if s.is_empty() { + return Err(Err::Error(())); + } + match s.parse_to() { + Some(n) => Ok((i, n)), + None => Err(Err::Error(())), + } + } + } + } + + proptest! { + #[test] + #[cfg(feature = "std")] + fn floats(s in "\\PC*") { + println!("testing {}", s); + let res1 = parse_f64(&s); + let res2 = double::<_, ()>(s.as_str()); + assert_eq!(res1, res2); + } + } +} diff --git a/third_party/rust/nom/src/number/mod.rs b/third_party/rust/nom/src/number/mod.rs new file mode 100644 index 0000000000..58c3d51b0b --- /dev/null +++ b/third_party/rust/nom/src/number/mod.rs @@ -0,0 +1,15 @@ +//! Parsers recognizing numbers + +pub mod complete; +pub mod streaming; + +/// Configurable endianness +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum Endianness { + /// Big endian + Big, + /// Little endian + Little, + /// Will match the host's endianness + Native, +} diff --git a/third_party/rust/nom/src/number/streaming.rs b/third_party/rust/nom/src/number/streaming.rs new file mode 100644 index 0000000000..b4e856d298 --- /dev/null +++ b/third_party/rust/nom/src/number/streaming.rs @@ -0,0 +1,2206 @@ +//! Parsers recognizing numbers, streaming version + +use crate::branch::alt; +use crate::bytes::streaming::tag; +use crate::character::streaming::{char, digit1, sign}; +use crate::combinator::{cut, map, opt, recognize}; +use crate::error::{ErrorKind, ParseError}; +use crate::internal::*; +use crate::lib::std::ops::{RangeFrom, RangeTo}; +use crate::sequence::{pair, tuple}; +use crate::traits::{ + AsBytes, AsChar, Compare, InputIter, InputLength, InputTake, InputTakeAtPosition, Offset, Slice, +}; + +/// Recognizes an unsigned 1 byte integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u8; +/// +/// let parser = |s| { +/// be_u8::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"\x01abcd"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn be_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(1))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 2 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u16; +/// +/// let parser = |s| { +/// be_u16::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"abcd"[..], 0x0001))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn be_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 2; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u16; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u16; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 3 byte integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u24; +/// +/// let parser = |s| { +/// be_u24::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02abcd"[..]), Ok((&b"abcd"[..], 0x000102))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn be_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 3; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u32; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u32; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 4 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u32; +/// +/// let parser = |s| { +/// be_u32::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03abcd"[..]), Ok((&b"abcd"[..], 0x00010203))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn be_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 4; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u32; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u32; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 8 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u64; +/// +/// let parser = |s| { +/// be_u64::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..]), Ok((&b"abcd"[..], 0x0001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn be_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 8; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u64; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u64; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a big endian unsigned 16 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_u128; +/// +/// let parser = |s| { +/// be_u128::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..]), Ok((&b"abcd"[..], 0x00010203040506070809101112131415))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn be_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 16; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u128; + for byte in input.iter_elements().take(bound) { + res = (res << 8) + byte as u128; + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a signed 1 byte integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i8; +/// +/// let parser = be_i8::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"\x01abcd"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn be_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u8.map(|x| x as i8).parse(input) +} + +/// Recognizes a big endian signed 2 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i16; +/// +/// let parser = be_i16::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"abcd"[..], 0x0001))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn be_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u16.map(|x| x as i16).parse(input) +} + +/// Recognizes a big endian signed 3 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i24; +/// +/// let parser = be_i24::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01\x02abcd"[..]), Ok((&b"abcd"[..], 0x000102))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn be_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + // Same as the unsigned version but we need to sign-extend manually here + be_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) +} + +/// Recognizes a big endian signed 4 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i32; +/// +/// let parser = be_i32::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03abcd"[..]), Ok((&b"abcd"[..], 0x00010203))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(4)))); +/// ``` +#[inline] +pub fn be_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u32.map(|x| x as i32).parse(input) +} + +/// Recognizes a big endian signed 8 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i64; +/// +/// let parser = be_i64::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..]), Ok((&b"abcd"[..], 0x0001020304050607))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn be_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u64.map(|x| x as i64).parse(input) +} + +/// Recognizes a big endian signed 16 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_i128; +/// +/// let parser = be_i128::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..]), Ok((&b"abcd"[..], 0x00010203040506070809101112131415))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn be_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + be_u128.map(|x| x as i128).parse(input) +} + +/// Recognizes an unsigned 1 byte integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u8; +/// +/// let parser = le_u8::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"\x01abcd"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn le_u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(1))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 2 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u16; +/// +/// let parser = |s| { +/// le_u16::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"abcd"[..], 0x0100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn le_u16<I, E: ParseError<I>>(input: I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 2; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u16; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u16) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 3 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u24; +/// +/// let parser = |s| { +/// le_u24::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02abcd"[..]), Ok((&b"abcd"[..], 0x020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn le_u24<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 3; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u32; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u32) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 4 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u32; +/// +/// let parser = |s| { +/// le_u32::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03abcd"[..]), Ok((&b"abcd"[..], 0x03020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn le_u32<I, E: ParseError<I>>(input: I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 4; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u32; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u32) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 8 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u64; +/// +/// let parser = |s| { +/// le_u64::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..]), Ok((&b"abcd"[..], 0x0706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn le_u64<I, E: ParseError<I>>(input: I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 8; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u64; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u64) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a little endian unsigned 16 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_u128; +/// +/// let parser = |s| { +/// le_u128::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..]), Ok((&b"abcd"[..], 0x15141312111009080706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn le_u128<I, E: ParseError<I>>(input: I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 16; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(bound - input.input_len()))) + } else { + let mut res = 0u128; + for (index, byte) in input.iter_indices().take(bound) { + res += (byte as u128) << (8 * index); + } + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes a signed 1 byte integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i8; +/// +/// let parser = le_i8::<_, (_, ErrorKind)>; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"\x01abcd"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn le_i8<I, E: ParseError<I>>(input: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u8.map(|x| x as i8).parse(input) +} + +/// Recognizes a little endian signed 2 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i16; +/// +/// let parser = |s| { +/// le_i16::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01abcd"[..]), Ok((&b"abcd"[..], 0x0100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn le_i16<I, E: ParseError<I>>(input: I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u16.map(|x| x as i16).parse(input) +} + +/// Recognizes a little endian signed 3 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i24; +/// +/// let parser = |s| { +/// le_i24::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02abcd"[..]), Ok((&b"abcd"[..], 0x020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn le_i24<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + // Same as the unsigned version but we need to sign-extend manually here + le_u24 + .map(|x| { + if x & 0x80_00_00 != 0 { + (x | 0xff_00_00_00) as i32 + } else { + x as i32 + } + }) + .parse(input) +} + +/// Recognizes a little endian signed 4 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i32; +/// +/// let parser = |s| { +/// le_i32::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03abcd"[..]), Ok((&b"abcd"[..], 0x03020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn le_i32<I, E: ParseError<I>>(input: I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u32.map(|x| x as i32).parse(input) +} + +/// Recognizes a little endian signed 8 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i64; +/// +/// let parser = |s| { +/// le_i64::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcd"[..]), Ok((&b"abcd"[..], 0x0706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn le_i64<I, E: ParseError<I>>(input: I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u64.map(|x| x as i64).parse(input) +} + +/// Recognizes a little endian signed 16 bytes integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_i128; +/// +/// let parser = |s| { +/// le_i128::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x10\x11\x12\x13\x14\x15abcd"[..]), Ok((&b"abcd"[..], 0x15141312111009080706050403020100))); +/// assert_eq!(parser(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn le_i128<I, E: ParseError<I>>(input: I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + le_u128.map(|x| x as i128).parse(input) +} + +/// Recognizes an unsigned 1 byte integer +/// +/// Note that endianness does not apply to 1 byte numbers. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u8; +/// +/// let parser = |s| { +/// u8::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn u8<I, E: ParseError<I>>(input: I) -> IResult<I, u8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + let bound: usize = 1; + if input.input_len() < bound { + Err(Err::Incomplete(Needed::new(1))) + } else { + let res = input.iter_elements().next().unwrap(); + + Ok((input.slice(bound..), res)) + } +} + +/// Recognizes an unsigned 2 bytes integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u16 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u16 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u16; +/// +/// let be_u16 = |s| { +/// u16::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(be_u16(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// +/// let le_u16 = |s| { +/// u16::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(le_u16(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn u16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u16, + crate::number::Endianness::Little => le_u16, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u16, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u16, + } +} + +/// Recognizes an unsigned 3 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u24 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u24 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u24; +/// +/// let be_u24 = |s| { +/// u24::<_,(_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(be_u24(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// +/// let le_u24 = |s| { +/// u24::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(le_u24(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn u24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u24, + crate::number::Endianness::Little => le_u24, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u24, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u24, + } +} + +/// Recognizes an unsigned 4 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u32 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u32 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u32; +/// +/// let be_u32 = |s| { +/// u32::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(be_u32(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// +/// let le_u32 = |s| { +/// u32::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(le_u32(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn u32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u32, + crate::number::Endianness::Little => le_u32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u32, + } +} + +/// Recognizes an unsigned 8 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u64 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u64 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u64; +/// +/// let be_u64 = |s| { +/// u64::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(be_u64(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// +/// let le_u64 = |s| { +/// u64::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(le_u64(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn u64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u64, + crate::number::Endianness::Little => le_u64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u64, + } +} + +/// Recognizes an unsigned 16 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian u128 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian u128 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::u128; +/// +/// let be_u128 = |s| { +/// u128::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(be_u128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// +/// let le_u128 = |s| { +/// u128::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_u128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(le_u128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn u128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, u128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_u128, + crate::number::Endianness::Little => le_u128, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_u128, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_u128, + } +} + +/// Recognizes a signed 1 byte integer +/// +/// Note that endianness does not apply to 1 byte numbers. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i8; +/// +/// let parser = |s| { +/// i8::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&b"\x00\x03abcefg"[..]), Ok((&b"\x03abcefg"[..], 0x00))); +/// assert_eq!(parser(&b""[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn i8<I, E: ParseError<I>>(i: I) -> IResult<I, i8, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + u8.map(|x| x as i8).parse(i) +} + +/// Recognizes a signed 2 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i16 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i16 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i16; +/// +/// let be_i16 = |s| { +/// i16::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0003))); +/// assert_eq!(be_i16(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// +/// let le_i16 = |s| { +/// i16::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i16(&b"\x00\x03abcefg"[..]), Ok((&b"abcefg"[..], 0x0300))); +/// assert_eq!(le_i16(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn i16<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i16, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i16, + crate::number::Endianness::Little => le_i16, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i16, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i16, + } +} + +/// Recognizes a signed 3 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i24 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i24 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i24; +/// +/// let be_i24 = |s| { +/// i24::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x000305))); +/// assert_eq!(be_i24(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// +/// let le_i24 = |s| { +/// i24::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i24(&b"\x00\x03\x05abcefg"[..]), Ok((&b"abcefg"[..], 0x050300))); +/// assert_eq!(le_i24(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(2)))); +/// ``` +#[inline] +pub fn i24<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i24, + crate::number::Endianness::Little => le_i24, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i24, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i24, + } +} + +/// Recognizes a signed 4 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i32 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i32 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i32; +/// +/// let be_i32 = |s| { +/// i32::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00030507))); +/// assert_eq!(be_i32(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// +/// let le_i32 = |s| { +/// i32::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i32(&b"\x00\x03\x05\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07050300))); +/// assert_eq!(le_i32(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn i32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i32, + crate::number::Endianness::Little => le_i32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i32, + } +} + +/// Recognizes a signed 8 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i64 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i64 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i64; +/// +/// let be_i64 = |s| { +/// i64::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0001020304050607))); +/// assert_eq!(be_i64(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// +/// let le_i64 = |s| { +/// i64::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i64(&b"\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x0706050403020100))); +/// assert_eq!(le_i64(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn i64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i64, + crate::number::Endianness::Little => le_i64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i64, + } +} + +/// Recognizes a signed 16 byte integer +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian i128 integer, +/// otherwise if `nom::number::Endianness::Little` parse a little endian i128 integer. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::i128; +/// +/// let be_i128 = |s| { +/// i128::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x00010203040506070001020304050607))); +/// assert_eq!(be_i128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// +/// let le_i128 = |s| { +/// i128::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_i128(&b"\x00\x01\x02\x03\x04\x05\x06\x07\x00\x01\x02\x03\x04\x05\x06\x07abcefg"[..]), Ok((&b"abcefg"[..], 0x07060504030201000706050403020100))); +/// assert_eq!(le_i128(&b"\x01"[..]), Err(Err::Incomplete(Needed::new(15)))); +/// ``` +#[inline] +pub fn i128<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, i128, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_i128, + crate::number::Endianness::Little => le_i128, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_i128, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_i128, + } +} + +/// Recognizes a big endian 4 bytes floating point number. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_f32; +/// +/// let parser = |s| { +/// be_f32::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&[0x40, 0x29, 0x00, 0x00][..]), Ok((&b""[..], 2.640625))); +/// assert_eq!(parser(&[0x01][..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn be_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match be_u32(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f32::from_bits(o))), + } +} + +/// Recognizes a big endian 8 bytes floating point number. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::be_f64; +/// +/// let parser = |s| { +/// be_f64::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&[0x01][..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn be_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match be_u64(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f64::from_bits(o))), + } +} + +/// Recognizes a little endian 4 bytes floating point number. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_f32; +/// +/// let parser = |s| { +/// le_f32::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(parser(&[0x01][..]), Err(Err::Incomplete(Needed::new(3)))); +/// ``` +#[inline] +pub fn le_f32<I, E: ParseError<I>>(input: I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match le_u32(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f32::from_bits(o))), + } +} + +/// Recognizes a little endian 8 bytes floating point number. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::le_f64; +/// +/// let parser = |s| { +/// le_f64::<_, (_, ErrorKind)>(s) +/// }; +/// +/// assert_eq!(parser(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 3145728.0))); +/// assert_eq!(parser(&[0x01][..]), Err(Err::Incomplete(Needed::new(7)))); +/// ``` +#[inline] +pub fn le_f64<I, E: ParseError<I>>(input: I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match le_u64(input) { + Err(e) => Err(e), + Ok((i, o)) => Ok((i, f64::from_bits(o))), + } +} + +/// Recognizes a 4 byte floating point number +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian f32 float, +/// otherwise if `nom::number::Endianness::Little` parse a little endian f32 float. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::f32; +/// +/// let be_f32 = |s| { +/// f32::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_f32(&[0x41, 0x48, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(be_f32(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// +/// let le_f32 = |s| { +/// f32::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_f32(&[0x00, 0x00, 0x48, 0x41][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(le_f32(&b"abc"[..]), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +#[inline] +pub fn f32<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, f32, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_f32, + crate::number::Endianness::Little => le_f32, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_f32, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_f32, + } +} + +/// Recognizes an 8 byte floating point number +/// +/// If the parameter is `nom::number::Endianness::Big`, parse a big endian f64 float, +/// otherwise if `nom::number::Endianness::Little` parse a little endian f64 float. +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::streaming::f64; +/// +/// let be_f64 = |s| { +/// f64::<_, (_, ErrorKind)>(nom::number::Endianness::Big)(s) +/// }; +/// +/// assert_eq!(be_f64(&[0x40, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(be_f64(&b"abc"[..]), Err(Err::Incomplete(Needed::new(5)))); +/// +/// let le_f64 = |s| { +/// f64::<_, (_, ErrorKind)>(nom::number::Endianness::Little)(s) +/// }; +/// +/// assert_eq!(le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x29, 0x40][..]), Ok((&b""[..], 12.5))); +/// assert_eq!(le_f64(&b"abc"[..]), Err(Err::Incomplete(Needed::new(5)))); +/// ``` +#[inline] +pub fn f64<I, E: ParseError<I>>(endian: crate::number::Endianness) -> fn(I) -> IResult<I, f64, E> +where + I: Slice<RangeFrom<usize>> + InputIter<Item = u8> + InputLength, +{ + match endian { + crate::number::Endianness::Big => be_f64, + crate::number::Endianness::Little => le_f64, + #[cfg(target_endian = "big")] + crate::number::Endianness::Native => be_f64, + #[cfg(target_endian = "little")] + crate::number::Endianness::Native => le_f64, + } +} + +/// Recognizes a hex-encoded integer. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::hex_u32; +/// +/// let parser = |s| { +/// hex_u32(s) +/// }; +/// +/// assert_eq!(parser(b"01AE;"), Ok((&b";"[..], 0x01AE))); +/// assert_eq!(parser(b"abc"), Err(Err::Incomplete(Needed::new(1)))); +/// assert_eq!(parser(b"ggg"), Err(Err::Error((&b"ggg"[..], ErrorKind::IsA)))); +/// ``` +#[inline] +pub fn hex_u32<'a, E: ParseError<&'a [u8]>>(input: &'a [u8]) -> IResult<&'a [u8], u32, E> { + let (i, o) = crate::bytes::streaming::is_a(&b"0123456789abcdefABCDEF"[..])(input)?; + + // Do not parse more than 8 characters for a u32 + let (parsed, remaining) = if o.len() <= 8 { + (o, i) + } else { + (&input[..8], &input[8..]) + }; + + let res = parsed + .iter() + .rev() + .enumerate() + .map(|(k, &v)| { + let digit = v as char; + digit.to_digit(16).unwrap_or(0) << (k * 4) + }) + .sum(); + + Ok((remaining, res)) +} + +/// Recognizes a floating point number in text format and returns the corresponding part of the input. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if it reaches the end of input. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// use nom::number::streaming::recognize_float; +/// +/// let parser = |s| { +/// recognize_float(s) +/// }; +/// +/// assert_eq!(parser("11e-1;"), Ok((";", "11e-1"))); +/// assert_eq!(parser("123E-02;"), Ok((";", "123E-02"))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", "123"))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Char)))); +/// ``` +#[rustfmt::skip] +pub fn recognize_float<T, E:ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter, + <T as InputIter>::Item: AsChar, + T: InputTakeAtPosition + InputLength, + <T as InputTakeAtPosition>::Item: AsChar +{ + recognize( + tuple(( + opt(alt((char('+'), char('-')))), + alt(( + map(tuple((digit1, opt(pair(char('.'), opt(digit1))))), |_| ()), + map(tuple((char('.'), digit1)), |_| ()) + )), + opt(tuple(( + alt((char('e'), char('E'))), + opt(alt((char('+'), char('-')))), + cut(digit1) + ))) + )) + )(input) +} + +// workaround until issues with minimal-lexical are fixed +#[doc(hidden)] +pub fn recognize_float_or_exceptions<T, E: ParseError<T>>(input: T) -> IResult<T, T, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter + InputTake + InputLength + Compare<&'static str>, + <T as InputIter>::Item: AsChar, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, +{ + alt(( + |i: T| { + recognize_float::<_, E>(i.clone()).map_err(|e| match e { + crate::Err::Error(_) => crate::Err::Error(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Failure(_) => crate::Err::Failure(E::from_error_kind(i, ErrorKind::Float)), + crate::Err::Incomplete(needed) => crate::Err::Incomplete(needed), + }) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("nan")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("inf")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + |i: T| { + crate::bytes::streaming::tag_no_case::<_, _, E>("infinity")(i.clone()) + .map_err(|_| crate::Err::Error(E::from_error_kind(i, ErrorKind::Float))) + }, + ))(input) +} + +/// Recognizes a floating point number in text format +/// +/// It returns a tuple of (`sign`, `integer part`, `fraction part` and `exponent`) of the input +/// data. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +pub fn recognize_float_parts<T, E: ParseError<T>>(input: T) -> IResult<T, (bool, T, T, i32), E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter + crate::traits::ParseTo<i32>, + <T as InputIter>::Item: AsChar, + T: InputTakeAtPosition + InputTake + InputLength, + <T as InputTakeAtPosition>::Item: AsChar, + T: for<'a> Compare<&'a [u8]>, + T: AsBytes, +{ + let (i, sign) = sign(input.clone())?; + + //let (i, zeroes) = take_while(|c: <T as InputTakeAtPosition>::Item| c.as_char() == '0')(i)?; + let (i, zeroes) = match i.as_bytes().iter().position(|c| *c != b'0') { + Some(index) => i.take_split(index), + None => i.take_split(i.input_len()), + }; + + //let (i, mut integer) = digit0(i)?; + let (i, mut integer) = match i + .as_bytes() + .iter() + .position(|c| !(*c >= b'0' && *c <= b'9')) + { + Some(index) => i.take_split(index), + None => i.take_split(i.input_len()), + }; + + if integer.input_len() == 0 && zeroes.input_len() > 0 { + // keep the last zero if integer is empty + integer = zeroes.slice(zeroes.input_len() - 1..); + } + + let (i, opt_dot) = opt(tag(&b"."[..]))(i)?; + let (i, fraction) = if opt_dot.is_none() { + let i2 = i.clone(); + (i2, i.slice(..0)) + } else { + // match number, trim right zeroes + let mut zero_count = 0usize; + let mut position = None; + for (pos, c) in i.as_bytes().iter().enumerate() { + if *c >= b'0' && *c <= b'9' { + if *c == b'0' { + zero_count += 1; + } else { + zero_count = 0; + } + } else { + position = Some(pos); + break; + } + } + + let position = match position { + Some(p) => p, + None => return Err(Err::Incomplete(Needed::new(1))), + }; + + let index = if zero_count == 0 { + position + } else if zero_count == position { + position - zero_count + 1 + } else { + position - zero_count + }; + + (i.slice(position..), i.slice(..index)) + }; + + if integer.input_len() == 0 && fraction.input_len() == 0 { + return Err(Err::Error(E::from_error_kind(input, ErrorKind::Float))); + } + + let i2 = i.clone(); + let (i, e) = match i.as_bytes().iter().next() { + Some(b'e') => (i.slice(1..), true), + Some(b'E') => (i.slice(1..), true), + _ => (i, false), + }; + + let (i, exp) = if e { + cut(crate::character::streaming::i32)(i)? + } else { + (i2, 0) + }; + + Ok((i, (sign, integer, fraction, exp))) +} + +/// Recognizes floating point number in text format and returns a f32. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::float; +/// +/// let parser = |s| { +/// float(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn float<T, E: ParseError<T>>(input: T) -> IResult<T, f32, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo<f32> + Compare<&'static str>, + <T as InputIter>::Item: AsChar, + <T as InputIter>::IterElem: Clone, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + /* + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + + let mut float: f32 = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } +} + +/// Recognizes floating point number in text format and returns a f64. +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there is not enough data. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::number::complete::double; +/// +/// let parser = |s| { +/// double(s) +/// }; +/// +/// assert_eq!(parser("11e-1"), Ok(("", 1.1))); +/// assert_eq!(parser("123E-02"), Ok(("", 1.23))); +/// assert_eq!(parser("123K-01"), Ok(("K-01", 123.0))); +/// assert_eq!(parser("abc"), Err(Err::Error(("abc", ErrorKind::Float)))); +/// ``` +pub fn double<T, E: ParseError<T>>(input: T) -> IResult<T, f64, E> +where + T: Slice<RangeFrom<usize>> + Slice<RangeTo<usize>>, + T: Clone + Offset, + T: InputIter + InputLength + InputTake + crate::traits::ParseTo<f64> + Compare<&'static str>, + <T as InputIter>::Item: AsChar, + <T as InputIter>::IterElem: Clone, + T: InputTakeAtPosition, + <T as InputTakeAtPosition>::Item: AsChar, + T: AsBytes, + T: for<'a> Compare<&'a [u8]>, +{ + /* + let (i, (sign, integer, fraction, exponent)) = recognize_float_parts(input)?; + + let mut float: f64 = minimal_lexical::parse_float( + integer.as_bytes().iter(), + fraction.as_bytes().iter(), + exponent, + ); + if !sign { + float = -float; + } + + Ok((i, float)) + */ + let (i, s) = recognize_float_or_exceptions(input)?; + match s.parse_to() { + Some(f) => Ok((i, f)), + None => Err(crate::Err::Error(E::from_error_kind( + i, + crate::error::ErrorKind::Float, + ))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::error::ErrorKind; + use crate::internal::{Err, Needed}; + use proptest::prelude::*; + + macro_rules! assert_parse( + ($left: expr, $right: expr) => { + let res: $crate::IResult<_, _, (_, ErrorKind)> = $left; + assert_eq!(res, $right); + }; + ); + + #[test] + fn i8_tests() { + assert_parse!(be_i8(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_i8(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(be_i8(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(be_i8(&[0x80][..]), Ok((&b""[..], -128))); + assert_parse!(be_i8(&[][..]), Err(Err::Incomplete(Needed::new(1)))); + } + + #[test] + fn i16_tests() { + assert_parse!(be_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_i16(&[0x7f, 0xff][..]), Ok((&b""[..], 32_767_i16))); + assert_parse!(be_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!(be_i16(&[0x80, 0x00][..]), Ok((&b""[..], -32_768_i16))); + assert_parse!(be_i16(&[][..]), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!(be_i16(&[0x00][..]), Err(Err::Incomplete(Needed::new(1)))); + } + + #[test] + fn u24_tests() { + assert_parse!(be_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(be_u24(&[0x00, 0xFF, 0xFF][..]), Ok((&b""[..], 65_535_u32))); + assert_parse!( + be_u24(&[0x12, 0x34, 0x56][..]), + Ok((&b""[..], 1_193_046_u32)) + ); + assert_parse!(be_u24(&[][..]), Err(Err::Incomplete(Needed::new(3)))); + assert_parse!(be_u24(&[0x00][..]), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!( + be_u24(&[0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + #[test] + fn i24_tests() { + assert_parse!(be_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); + assert_parse!(be_i24(&[0xFF, 0x00, 0x00][..]), Ok((&b""[..], -65_536_i32))); + assert_parse!( + be_i24(&[0xED, 0xCB, 0xAA][..]), + Ok((&b""[..], -1_193_046_i32)) + ); + assert_parse!(be_i24(&[][..]), Err(Err::Incomplete(Needed::new(3)))); + assert_parse!(be_i24(&[0x00][..]), Err(Err::Incomplete(Needed::new(2)))); + assert_parse!( + be_i24(&[0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + #[test] + fn i32_tests() { + assert_parse!(be_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + be_i32(&[0x7f, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], 2_147_483_647_i32)) + ); + assert_parse!(be_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + be_i32(&[0x80, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], -2_147_483_648_i32)) + ); + assert_parse!(be_i32(&[][..]), Err(Err::Incomplete(Needed::new(4)))); + assert_parse!(be_i32(&[0x00][..]), Err(Err::Incomplete(Needed::new(3)))); + assert_parse!( + be_i32(&[0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_parse!( + be_i32(&[0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + #[test] + fn i64_tests() { + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_i64(&[0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], 9_223_372_036_854_775_807_i64)) + ); + assert_parse!( + be_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + be_i64(&[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], -9_223_372_036_854_775_808_i64)) + ); + assert_parse!(be_i64(&[][..]), Err(Err::Incomplete(Needed::new(8)))); + assert_parse!(be_i64(&[0x00][..]), Err(Err::Incomplete(Needed::new(7)))); + assert_parse!( + be_i64(&[0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(6))) + ); + assert_parse!( + be_i64(&[0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(5))) + ); + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(4))) + ); + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(3))) + ); + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_parse!( + be_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + #[test] + fn i128_tests() { + assert_parse!( + be_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok((&b""[..], 0)) + ); + assert_parse!( + be_i128( + &[ + 0x7f, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok(( + &b""[..], + 170_141_183_460_469_231_731_687_303_715_884_105_727_i128 + )) + ); + assert_parse!( + be_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok((&b""[..], -1)) + ); + assert_parse!( + be_i128( + &[ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok(( + &b""[..], + -170_141_183_460_469_231_731_687_303_715_884_105_728_i128 + )) + ); + assert_parse!(be_i128(&[][..]), Err(Err::Incomplete(Needed::new(16)))); + assert_parse!(be_i128(&[0x00][..]), Err(Err::Incomplete(Needed::new(15)))); + assert_parse!( + be_i128(&[0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(14))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(13))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(12))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(11))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(10))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(9))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(8))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(7))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(6))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(5))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(4))) + ); + assert_parse!( + be_i128(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Err(Err::Incomplete(Needed::new(3))) + ); + assert_parse!( + be_i128( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..] + ), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_parse!( + be_i128( + &[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00] + [..] + ), + Err(Err::Incomplete(Needed::new(1))) + ); + } + + #[test] + fn le_i8_tests() { + assert_parse!(le_i8(&[0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_i8(&[0x7f][..]), Ok((&b""[..], 127))); + assert_parse!(le_i8(&[0xff][..]), Ok((&b""[..], -1))); + assert_parse!(le_i8(&[0x80][..]), Ok((&b""[..], -128))); + } + + #[test] + fn le_i16_tests() { + assert_parse!(le_i16(&[0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_i16(&[0xff, 0x7f][..]), Ok((&b""[..], 32_767_i16))); + assert_parse!(le_i16(&[0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!(le_i16(&[0x00, 0x80][..]), Ok((&b""[..], -32_768_i16))); + } + + #[test] + fn le_u24_tests() { + assert_parse!(le_u24(&[0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!(le_u24(&[0xFF, 0xFF, 0x00][..]), Ok((&b""[..], 65_535_u32))); + assert_parse!( + le_u24(&[0x56, 0x34, 0x12][..]), + Ok((&b""[..], 1_193_046_u32)) + ); + } + + #[test] + fn le_i24_tests() { + assert_parse!(le_i24(&[0xFF, 0xFF, 0xFF][..]), Ok((&b""[..], -1_i32))); + assert_parse!(le_i24(&[0x00, 0x00, 0xFF][..]), Ok((&b""[..], -65_536_i32))); + assert_parse!( + le_i24(&[0xAA, 0xCB, 0xED][..]), + Ok((&b""[..], -1_193_046_i32)) + ); + } + + #[test] + fn le_i32_tests() { + assert_parse!(le_i32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0))); + assert_parse!( + le_i32(&[0xff, 0xff, 0xff, 0x7f][..]), + Ok((&b""[..], 2_147_483_647_i32)) + ); + assert_parse!(le_i32(&[0xff, 0xff, 0xff, 0xff][..]), Ok((&b""[..], -1))); + assert_parse!( + le_i32(&[0x00, 0x00, 0x00, 0x80][..]), + Ok((&b""[..], -2_147_483_648_i32)) + ); + } + + #[test] + fn le_i64_tests() { + assert_parse!( + le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f][..]), + Ok((&b""[..], 9_223_372_036_854_775_807_i64)) + ); + assert_parse!( + le_i64(&[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff][..]), + Ok((&b""[..], -1)) + ); + assert_parse!( + le_i64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80][..]), + Ok((&b""[..], -9_223_372_036_854_775_808_i64)) + ); + } + + #[test] + fn le_i128_tests() { + assert_parse!( + le_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00 + ][..] + ), + Ok((&b""[..], 0)) + ); + assert_parse!( + le_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x7f + ][..] + ), + Ok(( + &b""[..], + 170_141_183_460_469_231_731_687_303_715_884_105_727_i128 + )) + ); + assert_parse!( + le_i128( + &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff + ][..] + ), + Ok((&b""[..], -1)) + ); + assert_parse!( + le_i128( + &[ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80 + ][..] + ), + Ok(( + &b""[..], + -170_141_183_460_469_231_731_687_303_715_884_105_728_i128 + )) + ); + } + + #[test] + fn be_f32_tests() { + assert_parse!(be_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); + assert_parse!( + be_f32(&[0x4d, 0x31, 0x1f, 0xd8][..]), + Ok((&b""[..], 185_728_392_f32)) + ); + } + + #[test] + fn be_f64_tests() { + assert_parse!( + be_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f64)) + ); + assert_parse!( + be_f64(&[0x41, 0xa6, 0x23, 0xfb, 0x10, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 185_728_392_f64)) + ); + } + + #[test] + fn le_f32_tests() { + assert_parse!(le_f32(&[0x00, 0x00, 0x00, 0x00][..]), Ok((&b""[..], 0_f32))); + assert_parse!( + le_f32(&[0xd8, 0x1f, 0x31, 0x4d][..]), + Ok((&b""[..], 185_728_392_f32)) + ); + } + + #[test] + fn le_f64_tests() { + assert_parse!( + le_f64(&[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00][..]), + Ok((&b""[..], 0_f64)) + ); + assert_parse!( + le_f64(&[0x00, 0x00, 0x00, 0x10, 0xfb, 0x23, 0xa6, 0x41][..]), + Ok((&b""[..], 185_728_392_f64)) + ); + } + + #[test] + fn hex_u32_tests() { + assert_parse!( + hex_u32(&b";"[..]), + Err(Err::Error(error_position!(&b";"[..], ErrorKind::IsA))) + ); + assert_parse!(hex_u32(&b"ff;"[..]), Ok((&b";"[..], 255))); + assert_parse!(hex_u32(&b"1be2;"[..]), Ok((&b";"[..], 7138))); + assert_parse!(hex_u32(&b"c5a31be2;"[..]), Ok((&b";"[..], 3_315_801_058))); + assert_parse!(hex_u32(&b"C5A31be2;"[..]), Ok((&b";"[..], 3_315_801_058))); + assert_parse!(hex_u32(&b"00c5a31be2;"[..]), Ok((&b"e2;"[..], 12_952_347))); + assert_parse!( + hex_u32(&b"c5a31be201;"[..]), + Ok((&b"01;"[..], 3_315_801_058)) + ); + assert_parse!(hex_u32(&b"ffffffff;"[..]), Ok((&b";"[..], 4_294_967_295))); + assert_parse!(hex_u32(&b"0x1be2;"[..]), Ok((&b"x1be2;"[..], 0))); + assert_parse!(hex_u32(&b"12af"[..]), Err(Err::Incomplete(Needed::new(1)))); + } + + #[test] + #[cfg(feature = "std")] + fn float_test() { + let mut test_cases = vec![ + "+3.14", + "3.14", + "-3.14", + "0", + "0.0", + "1.", + ".789", + "-.5", + "1e7", + "-1E-7", + ".3e-2", + "1.e4", + "1.2e4", + "12.34", + "-1.234E-12", + "-1.234e-12", + "0.00000000000000000087", + ]; + + for test in test_cases.drain(..) { + let expected32 = str::parse::<f32>(test).unwrap(); + let expected64 = str::parse::<f64>(test).unwrap(); + + println!("now parsing: {} -> {}", test, expected32); + + let larger = format!("{};", test); + assert_parse!(recognize_float(&larger[..]), Ok((";", test))); + + assert_parse!(float(larger.as_bytes()), Ok((&b";"[..], expected32))); + assert_parse!(float(&larger[..]), Ok((";", expected32))); + + assert_parse!(double(larger.as_bytes()), Ok((&b";"[..], expected64))); + assert_parse!(double(&larger[..]), Ok((";", expected64))); + } + + let remaining_exponent = "-1.234E-"; + assert_parse!( + recognize_float(remaining_exponent), + Err(Err::Incomplete(Needed::new(1))) + ); + + let (_i, nan) = float::<_, ()>("NaN").unwrap(); + assert!(nan.is_nan()); + + let (_i, inf) = float::<_, ()>("inf").unwrap(); + assert!(inf.is_infinite()); + let (_i, inf) = float::<_, ()>("infinite").unwrap(); + assert!(inf.is_infinite()); + } + + #[test] + fn configurable_endianness() { + use crate::number::Endianness; + + fn be_tst16(i: &[u8]) -> IResult<&[u8], u16> { + u16(Endianness::Big)(i) + } + fn le_tst16(i: &[u8]) -> IResult<&[u8], u16> { + u16(Endianness::Little)(i) + } + assert_eq!(be_tst16(&[0x80, 0x00]), Ok((&b""[..], 32_768_u16))); + assert_eq!(le_tst16(&[0x80, 0x00]), Ok((&b""[..], 128_u16))); + + fn be_tst32(i: &[u8]) -> IResult<&[u8], u32> { + u32(Endianness::Big)(i) + } + fn le_tst32(i: &[u8]) -> IResult<&[u8], u32> { + u32(Endianness::Little)(i) + } + assert_eq!( + be_tst32(&[0x12, 0x00, 0x60, 0x00]), + Ok((&b""[..], 302_014_464_u32)) + ); + assert_eq!( + le_tst32(&[0x12, 0x00, 0x60, 0x00]), + Ok((&b""[..], 6_291_474_u32)) + ); + + fn be_tst64(i: &[u8]) -> IResult<&[u8], u64> { + u64(Endianness::Big)(i) + } + fn le_tst64(i: &[u8]) -> IResult<&[u8], u64> { + u64(Endianness::Little)(i) + } + assert_eq!( + be_tst64(&[0x12, 0x00, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 1_297_142_246_100_992_000_u64)) + ); + assert_eq!( + le_tst64(&[0x12, 0x00, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 36_028_874_334_666_770_u64)) + ); + + fn be_tsti16(i: &[u8]) -> IResult<&[u8], i16> { + i16(Endianness::Big)(i) + } + fn le_tsti16(i: &[u8]) -> IResult<&[u8], i16> { + i16(Endianness::Little)(i) + } + assert_eq!(be_tsti16(&[0x00, 0x80]), Ok((&b""[..], 128_i16))); + assert_eq!(le_tsti16(&[0x00, 0x80]), Ok((&b""[..], -32_768_i16))); + + fn be_tsti32(i: &[u8]) -> IResult<&[u8], i32> { + i32(Endianness::Big)(i) + } + fn le_tsti32(i: &[u8]) -> IResult<&[u8], i32> { + i32(Endianness::Little)(i) + } + assert_eq!( + be_tsti32(&[0x00, 0x12, 0x60, 0x00]), + Ok((&b""[..], 1_204_224_i32)) + ); + assert_eq!( + le_tsti32(&[0x00, 0x12, 0x60, 0x00]), + Ok((&b""[..], 6_296_064_i32)) + ); + + fn be_tsti64(i: &[u8]) -> IResult<&[u8], i64> { + i64(Endianness::Big)(i) + } + fn le_tsti64(i: &[u8]) -> IResult<&[u8], i64> { + i64(Endianness::Little)(i) + } + assert_eq!( + be_tsti64(&[0x00, 0xFF, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 71_881_672_479_506_432_i64)) + ); + assert_eq!( + le_tsti64(&[0x00, 0xFF, 0x60, 0x00, 0x12, 0x00, 0x80, 0x00]), + Ok((&b""[..], 36_028_874_334_732_032_i64)) + ); + } + + #[cfg(feature = "std")] + fn parse_f64(i: &str) -> IResult<&str, f64, ()> { + use crate::traits::ParseTo; + match recognize_float_or_exceptions(i) { + Err(e) => Err(e), + Ok((i, s)) => { + if s.is_empty() { + return Err(Err::Error(())); + } + match s.parse_to() { + Some(n) => Ok((i, n)), + None => Err(Err::Error(())), + } + } + } + } + + proptest! { + #[test] + #[cfg(feature = "std")] + fn floats(s in "\\PC*") { + println!("testing {}", s); + let res1 = parse_f64(&s); + let res2 = double::<_, ()>(s.as_str()); + assert_eq!(res1, res2); + } + } +} diff --git a/third_party/rust/nom/src/sequence/mod.rs b/third_party/rust/nom/src/sequence/mod.rs new file mode 100644 index 0000000000..735ab45cc7 --- /dev/null +++ b/third_party/rust/nom/src/sequence/mod.rs @@ -0,0 +1,279 @@ +//! Combinators applying parsers in sequence + +#[cfg(test)] +mod tests; + +use crate::error::ParseError; +use crate::internal::{IResult, Parser}; + +/// Gets an object from the first parser, +/// then gets another object from the second parser. +/// +/// # Arguments +/// * `first` The first parser to apply. +/// * `second` The second parser to apply. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::sequence::pair; +/// use nom::bytes::complete::tag; +/// +/// let mut parser = pair(tag("abc"), tag("efg")); +/// +/// assert_eq!(parser("abcefg"), Ok(("", ("abc", "efg")))); +/// assert_eq!(parser("abcefghij"), Ok(("hij", ("abc", "efg")))); +/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag)))); +/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag)))); +/// ``` +pub fn pair<I, O1, O2, E: ParseError<I>, F, G>( + mut first: F, + mut second: G, +) -> impl FnMut(I) -> IResult<I, (O1, O2), E> +where + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, +{ + move |input: I| { + let (input, o1) = first.parse(input)?; + second.parse(input).map(|(i, o2)| (i, (o1, o2))) + } +} + +/// Matches an object from the first parser and discards it, +/// then gets an object from the second parser. +/// +/// # Arguments +/// * `first` The opening parser. +/// * `second` The second parser to get object. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::sequence::preceded; +/// use nom::bytes::complete::tag; +/// +/// let mut parser = preceded(tag("abc"), tag("efg")); +/// +/// assert_eq!(parser("abcefg"), Ok(("", "efg"))); +/// assert_eq!(parser("abcefghij"), Ok(("hij", "efg"))); +/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag)))); +/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag)))); +/// ``` +pub fn preceded<I, O1, O2, E: ParseError<I>, F, G>( + mut first: F, + mut second: G, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, +{ + move |input: I| { + let (input, _) = first.parse(input)?; + second.parse(input) + } +} + +/// Gets an object from the first parser, +/// then matches an object from the second parser and discards it. +/// +/// # Arguments +/// * `first` The first parser to apply. +/// * `second` The second parser to match an object. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::sequence::terminated; +/// use nom::bytes::complete::tag; +/// +/// let mut parser = terminated(tag("abc"), tag("efg")); +/// +/// assert_eq!(parser("abcefg"), Ok(("", "abc"))); +/// assert_eq!(parser("abcefghij"), Ok(("hij", "abc"))); +/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag)))); +/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag)))); +/// ``` +pub fn terminated<I, O1, O2, E: ParseError<I>, F, G>( + mut first: F, + mut second: G, +) -> impl FnMut(I) -> IResult<I, O1, E> +where + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, +{ + move |input: I| { + let (input, o1) = first.parse(input)?; + second.parse(input).map(|(i, _)| (i, o1)) + } +} + +/// Gets an object from the first parser, +/// then matches an object from the sep_parser and discards it, +/// then gets another object from the second parser. +/// +/// # Arguments +/// * `first` The first parser to apply. +/// * `sep` The separator parser to apply. +/// * `second` The second parser to apply. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::sequence::separated_pair; +/// use nom::bytes::complete::tag; +/// +/// let mut parser = separated_pair(tag("abc"), tag("|"), tag("efg")); +/// +/// assert_eq!(parser("abc|efg"), Ok(("", ("abc", "efg")))); +/// assert_eq!(parser("abc|efghij"), Ok(("hij", ("abc", "efg")))); +/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag)))); +/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag)))); +/// ``` +pub fn separated_pair<I, O1, O2, O3, E: ParseError<I>, F, G, H>( + mut first: F, + mut sep: G, + mut second: H, +) -> impl FnMut(I) -> IResult<I, (O1, O3), E> +where + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, + H: Parser<I, O3, E>, +{ + move |input: I| { + let (input, o1) = first.parse(input)?; + let (input, _) = sep.parse(input)?; + second.parse(input).map(|(i, o2)| (i, (o1, o2))) + } +} + +/// Matches an object from the first parser and discards it, +/// then gets an object from the second parser, +/// and finally matches an object from the third parser and discards it. +/// +/// # Arguments +/// * `first` The first parser to apply and discard. +/// * `second` The second parser to apply. +/// * `third` The third parser to apply and discard. +/// +/// ```rust +/// # use nom::{Err, error::ErrorKind, Needed}; +/// # use nom::Needed::Size; +/// use nom::sequence::delimited; +/// use nom::bytes::complete::tag; +/// +/// let mut parser = delimited(tag("("), tag("abc"), tag(")")); +/// +/// assert_eq!(parser("(abc)"), Ok(("", "abc"))); +/// assert_eq!(parser("(abc)def"), Ok(("def", "abc"))); +/// assert_eq!(parser(""), Err(Err::Error(("", ErrorKind::Tag)))); +/// assert_eq!(parser("123"), Err(Err::Error(("123", ErrorKind::Tag)))); +/// ``` +pub fn delimited<I, O1, O2, O3, E: ParseError<I>, F, G, H>( + mut first: F, + mut second: G, + mut third: H, +) -> impl FnMut(I) -> IResult<I, O2, E> +where + F: Parser<I, O1, E>, + G: Parser<I, O2, E>, + H: Parser<I, O3, E>, +{ + move |input: I| { + let (input, _) = first.parse(input)?; + let (input, o2) = second.parse(input)?; + third.parse(input).map(|(i, _)| (i, o2)) + } +} + +/// Helper trait for the tuple combinator. +/// +/// This trait is implemented for tuples of parsers of up to 21 elements. +pub trait Tuple<I, O, E> { + /// Parses the input and returns a tuple of results of each parser. + fn parse(&mut self, input: I) -> IResult<I, O, E>; +} + +impl<Input, Output, Error: ParseError<Input>, F: Parser<Input, Output, Error>> + Tuple<Input, (Output,), Error> for (F,) +{ + fn parse(&mut self, input: Input) -> IResult<Input, (Output,), Error> { + self.0.parse(input).map(|(i, o)| (i, (o,))) + } +} + +macro_rules! tuple_trait( + ($name1:ident $ty1:ident, $name2: ident $ty2:ident, $($name:ident $ty:ident),*) => ( + tuple_trait!(__impl $name1 $ty1, $name2 $ty2; $($name $ty),*); + ); + (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident, $($name2:ident $ty2:ident),*) => ( + tuple_trait_impl!($($name $ty),+); + tuple_trait!(__impl $($name $ty),+ , $name1 $ty1; $($name2 $ty2),*); + ); + (__impl $($name:ident $ty: ident),+; $name1:ident $ty1:ident) => ( + tuple_trait_impl!($($name $ty),+); + tuple_trait_impl!($($name $ty),+, $name1 $ty1); + ); +); + +macro_rules! tuple_trait_impl( + ($($name:ident $ty: ident),+) => ( + impl< + Input: Clone, $($ty),+ , Error: ParseError<Input>, + $($name: Parser<Input, $ty, Error>),+ + > Tuple<Input, ( $($ty),+ ), Error> for ( $($name),+ ) { + + fn parse(&mut self, input: Input) -> IResult<Input, ( $($ty),+ ), Error> { + tuple_trait_inner!(0, self, input, (), $($name)+) + + } + } + ); +); + +macro_rules! tuple_trait_inner( + ($it:tt, $self:expr, $input:expr, (), $head:ident $($id:ident)+) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + succ!($it, tuple_trait_inner!($self, i, ( o ), $($id)+)) + }); + ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident $($id:ident)+) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + succ!($it, tuple_trait_inner!($self, i, ($($parsed)* , o), $($id)+)) + }); + ($it:tt, $self:expr, $input:expr, ($($parsed:tt)*), $head:ident) => ({ + let (i, o) = $self.$it.parse($input.clone())?; + + Ok((i, ($($parsed)* , o))) + }); +); + +tuple_trait!(FnA A, FnB B, FnC C, FnD D, FnE E, FnF F, FnG G, FnH H, FnI I, FnJ J, FnK K, FnL L, + FnM M, FnN N, FnO O, FnP P, FnQ Q, FnR R, FnS S, FnT T, FnU U); + +// Special case: implement `Tuple` for `()`, the unit type. +// This can come up in macros which accept a variable number of arguments. +// Literally, `()` is an empty tuple, so it should simply parse nothing. +impl<I, E: ParseError<I>> Tuple<I, (), E> for () { + fn parse(&mut self, input: I) -> IResult<I, (), E> { + Ok((input, ())) + } +} + +///Applies a tuple of parsers one by one and returns their results as a tuple. +///There is a maximum of 21 parsers +/// ```rust +/// # use nom::{Err, error::ErrorKind}; +/// use nom::sequence::tuple; +/// use nom::character::complete::{alpha1, digit1}; +/// let mut parser = tuple((alpha1, digit1, alpha1)); +/// +/// assert_eq!(parser("abc123def"), Ok(("", ("abc", "123", "def")))); +/// assert_eq!(parser("123def"), Err(Err::Error(("123def", ErrorKind::Alpha)))); +/// ``` +pub fn tuple<I, O, E: ParseError<I>, List: Tuple<I, O, E>>( + mut l: List, +) -> impl FnMut(I) -> IResult<I, O, E> { + move |i: I| l.parse(i) +} diff --git a/third_party/rust/nom/src/sequence/tests.rs b/third_party/rust/nom/src/sequence/tests.rs new file mode 100644 index 0000000000..30ad0d6783 --- /dev/null +++ b/third_party/rust/nom/src/sequence/tests.rs @@ -0,0 +1,290 @@ +use super::*; +use crate::bytes::streaming::{tag, take}; +use crate::error::{Error, ErrorKind}; +use crate::internal::{Err, IResult, Needed}; +use crate::number::streaming::be_u16; + +#[test] +fn single_element_tuples() { + use crate::character::complete::alpha1; + use crate::{error::ErrorKind, Err}; + + let mut parser = tuple((alpha1,)); + assert_eq!(parser("abc123def"), Ok(("123def", ("abc",)))); + assert_eq!( + parser("123def"), + Err(Err::Error(("123def", ErrorKind::Alpha))) + ); +} + +#[derive(PartialEq, Eq, Debug)] +struct B { + a: u8, + b: u8, +} + +#[derive(PartialEq, Eq, Debug)] +struct C { + a: u8, + b: Option<u8>, +} + +/*FIXME: convert code examples to new error management +use util::{add_error_pattern, error_to_list, print_error}; + +#[cfg(feature = "std")] +#[rustfmt::skip] +fn error_to_string<P: Clone + PartialEq>(e: &Context<P, u32>) -> &'static str { + let v: Vec<(P, ErrorKind<u32>)> = error_to_list(e); + // do it this way if you can use slice patterns + //match &v[..] { + // [ErrorKind::Custom(42), ErrorKind::Tag] => "missing `ijkl` tag", + // [ErrorKind::Custom(42), ErrorKind::Custom(128), ErrorKind::Tag] => "missing `mnop` tag after `ijkl`", + // _ => "unrecognized error" + //} + + let collected: Vec<ErrorKind<u32>> = v.iter().map(|&(_, ref e)| e.clone()).collect(); + if &collected[..] == [ErrorKind::Custom(42), ErrorKind::Tag] { + "missing `ijkl` tag" + } else if &collected[..] == [ErrorKind::Custom(42), ErrorKind::Custom(128), ErrorKind::Tag] { + "missing `mnop` tag after `ijkl`" + } else { + "unrecognized error" + } +} + +// do it this way if you can use box patterns +//use $crate::lib::std::str; +//fn error_to_string(e:Err) -> String +// match e { +// NodePosition(ErrorKind::Custom(42), i1, box Position(ErrorKind::Tag, i2)) => { +// format!("missing `ijkl` tag, found '{}' instead", str::from_utf8(i2).unwrap()) +// }, +// NodePosition(ErrorKind::Custom(42), i1, box NodePosition(ErrorKind::Custom(128), i2, box Position(ErrorKind::Tag, i3))) => { +// format!("missing `mnop` tag after `ijkl`, found '{}' instead", str::from_utf8(i3).unwrap()) +// }, +// _ => "unrecognized error".to_string() +// } +//} +*/ + +#[test] +fn complete() { + use crate::bytes::complete::tag; + fn err_test(i: &[u8]) -> IResult<&[u8], &[u8]> { + let (i, _) = tag("ijkl")(i)?; + tag("mnop")(i) + } + let a = &b"ijklmn"[..]; + + let res_a = err_test(a); + assert_eq!( + res_a, + Err(Err::Error(error_position!(&b"mn"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn pair_test() { + fn pair_abc_def(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + pair(tag("abc"), tag("def"))(i) + } + + assert_eq!( + pair_abc_def(&b"abcdefghijkl"[..]), + Ok((&b"ghijkl"[..], (&b"abc"[..], &b"def"[..]))) + ); + assert_eq!( + pair_abc_def(&b"ab"[..]), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + pair_abc_def(&b"abcd"[..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_eq!( + pair_abc_def(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + pair_abc_def(&b"xxxdef"[..]), + Err(Err::Error(error_position!(&b"xxxdef"[..], ErrorKind::Tag))) + ); + assert_eq!( + pair_abc_def(&b"abcxxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn separated_pair_test() { + fn sep_pair_abc_def(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + separated_pair(tag("abc"), tag(","), tag("def"))(i) + } + + assert_eq!( + sep_pair_abc_def(&b"abc,defghijkl"[..]), + Ok((&b"ghijkl"[..], (&b"abc"[..], &b"def"[..]))) + ); + assert_eq!( + sep_pair_abc_def(&b"ab"[..]), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + sep_pair_abc_def(&b"abc,d"[..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_eq!( + sep_pair_abc_def(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + sep_pair_abc_def(&b"xxx,def"[..]), + Err(Err::Error(error_position!(&b"xxx,def"[..], ErrorKind::Tag))) + ); + assert_eq!( + sep_pair_abc_def(&b"abc,xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn preceded_test() { + fn preceded_abcd_efgh(i: &[u8]) -> IResult<&[u8], &[u8]> { + preceded(tag("abcd"), tag("efgh"))(i) + } + + assert_eq!( + preceded_abcd_efgh(&b"abcdefghijkl"[..]), + Ok((&b"ijkl"[..], &b"efgh"[..])) + ); + assert_eq!( + preceded_abcd_efgh(&b"ab"[..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_eq!( + preceded_abcd_efgh(&b"abcde"[..]), + Err(Err::Incomplete(Needed::new(3))) + ); + assert_eq!( + preceded_abcd_efgh(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + preceded_abcd_efgh(&b"xxxxdef"[..]), + Err(Err::Error(error_position!(&b"xxxxdef"[..], ErrorKind::Tag))) + ); + assert_eq!( + preceded_abcd_efgh(&b"abcdxxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn terminated_test() { + fn terminated_abcd_efgh(i: &[u8]) -> IResult<&[u8], &[u8]> { + terminated(tag("abcd"), tag("efgh"))(i) + } + + assert_eq!( + terminated_abcd_efgh(&b"abcdefghijkl"[..]), + Ok((&b"ijkl"[..], &b"abcd"[..])) + ); + assert_eq!( + terminated_abcd_efgh(&b"ab"[..]), + Err(Err::Incomplete(Needed::new(2))) + ); + assert_eq!( + terminated_abcd_efgh(&b"abcde"[..]), + Err(Err::Incomplete(Needed::new(3))) + ); + assert_eq!( + terminated_abcd_efgh(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + terminated_abcd_efgh(&b"xxxxdef"[..]), + Err(Err::Error(error_position!(&b"xxxxdef"[..], ErrorKind::Tag))) + ); + assert_eq!( + terminated_abcd_efgh(&b"abcdxxxx"[..]), + Err(Err::Error(error_position!(&b"xxxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn delimited_test() { + fn delimited_abc_def_ghi(i: &[u8]) -> IResult<&[u8], &[u8]> { + delimited(tag("abc"), tag("def"), tag("ghi"))(i) + } + + assert_eq!( + delimited_abc_def_ghi(&b"abcdefghijkl"[..]), + Ok((&b"jkl"[..], &b"def"[..])) + ); + assert_eq!( + delimited_abc_def_ghi(&b"ab"[..]), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(&b"abcde"[..]), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(&b"abcdefgh"[..]), + Err(Err::Incomplete(Needed::new(1))) + ); + assert_eq!( + delimited_abc_def_ghi(&b"xxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); + assert_eq!( + delimited_abc_def_ghi(&b"xxxdefghi"[..]), + Err(Err::Error(error_position!( + &b"xxxdefghi"[..], + ErrorKind::Tag + ),)) + ); + assert_eq!( + delimited_abc_def_ghi(&b"abcxxxghi"[..]), + Err(Err::Error(error_position!(&b"xxxghi"[..], ErrorKind::Tag))) + ); + assert_eq!( + delimited_abc_def_ghi(&b"abcdefxxx"[..]), + Err(Err::Error(error_position!(&b"xxx"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn tuple_test() { + fn tuple_3(i: &[u8]) -> IResult<&[u8], (u16, &[u8], &[u8])> { + tuple((be_u16, take(3u8), tag("fg")))(i) + } + + assert_eq!( + tuple_3(&b"abcdefgh"[..]), + Ok((&b"h"[..], (0x6162u16, &b"cde"[..], &b"fg"[..]))) + ); + assert_eq!(tuple_3(&b"abcd"[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(tuple_3(&b"abcde"[..]), Err(Err::Incomplete(Needed::new(2)))); + assert_eq!( + tuple_3(&b"abcdejk"[..]), + Err(Err::Error(error_position!(&b"jk"[..], ErrorKind::Tag))) + ); +} + +#[test] +fn unit_type() { + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())("abxsbsh"), + Ok(("abxsbsh", ())) + ); + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())("sdfjakdsas"), + Ok(("sdfjakdsas", ())) + ); + assert_eq!( + tuple::<&'static str, (), Error<&'static str>, ()>(())(""), + Ok(("", ())) + ); +} diff --git a/third_party/rust/nom/src/str.rs b/third_party/rust/nom/src/str.rs new file mode 100644 index 0000000000..1a8b8ba2d4 --- /dev/null +++ b/third_party/rust/nom/src/str.rs @@ -0,0 +1,536 @@ +#[cfg(test)] +mod test { + #[cfg(feature = "alloc")] + use crate::{branch::alt, bytes::complete::tag_no_case, combinator::recognize, multi::many1}; + use crate::{ + bytes::complete::{is_a, is_not, tag, take, take_till, take_until}, + error::{self, ErrorKind}, + Err, IResult, + }; + + #[test] + fn tagtr_succeed() { + const INPUT: &str = "Hello World!"; + const TAG: &str = "Hello"; + fn test(input: &str) -> IResult<&str, &str> { + tag(TAG)(input) + } + + match test(INPUT) { + Ok((extra, output)) => { + assert!(extra == " World!", "Parser `tag` consumed leftover input."); + assert!( + output == TAG, + "Parser `tag` doesn't return the tag it matched on success. \ + Expected `{}`, got `{}`.", + TAG, + output + ); + } + other => panic!( + "Parser `tag` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn tagtr_incomplete() { + use crate::bytes::streaming::tag; + + const INPUT: &str = "Hello"; + const TAG: &str = "Hello World!"; + + let res: IResult<_, _, error::Error<_>> = tag(TAG)(INPUT); + match res { + Err(Err::Incomplete(_)) => (), + other => { + panic!( + "Parser `tag` didn't require more input when it should have. \ + Got `{:?}`.", + other + ); + } + }; + } + + #[test] + fn tagtr_error() { + const INPUT: &str = "Hello World!"; + const TAG: &str = "Random"; // TAG must be closer than INPUT. + + let res: IResult<_, _, error::Error<_>> = tag(TAG)(INPUT); + match res { + Err(Err::Error(_)) => (), + other => { + panic!( + "Parser `tag` didn't fail when it should have. Got `{:?}`.`", + other + ); + } + }; + } + + #[test] + fn take_s_succeed() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + + let res: IResult<_, _, error::Error<_>> = take(9_usize)(INPUT); + match res { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_s` consumed leftover input. Leftover `{}`.", + extra + ); + assert!( + output == CONSUMED, + "Parser `take_s` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_s` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_until_succeed() { + const INPUT: &str = "βèƒôřèÂßÇ∂áƒƭèř"; + const FIND: &str = "ÂßÇ∂"; + const CONSUMED: &str = "βèƒôřè"; + const LEFTOVER: &str = "ÂßÇ∂áƒƭèř"; + + let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT); + match res { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_until`\ + consumed leftover input. Leftover `{}`.", + extra + ); + assert!( + output == CONSUMED, + "Parser `take_until`\ + doesn't return the string it consumed on success. Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_until` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_s_incomplete() { + use crate::bytes::streaming::take; + + const INPUT: &str = "βèƒôřèÂßÇá"; + + let res: IResult<_, _, (_, ErrorKind)> = take(13_usize)(INPUT); + match res { + Err(Err::Incomplete(_)) => (), + other => panic!( + "Parser `take` didn't require more input when it should have. \ + Got `{:?}`.", + other + ), + } + } + + use crate::internal::Needed; + + fn is_alphabetic(c: char) -> bool { + (c as u8 >= 0x41 && c as u8 <= 0x5A) || (c as u8 >= 0x61 && c as u8 <= 0x7A) + } + + #[test] + fn take_while() { + use crate::bytes::streaming::take_while; + + fn f(i: &str) -> IResult<&str, &str> { + take_while(is_alphabetic)(i) + } + let a = ""; + let b = "abcd"; + let c = "abcd123"; + let d = "123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&c[..]), Ok((&d[..], &b[..]))); + assert_eq!(f(&d[..]), Ok((&d[..], &a[..]))); + } + + #[test] + fn take_while1() { + use crate::bytes::streaming::take_while1; + + fn f(i: &str) -> IResult<&str, &str> { + take_while1(is_alphabetic)(i) + } + let a = ""; + let b = "abcd"; + let c = "abcd123"; + let d = "123"; + + assert_eq!(f(&a[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&b[..]), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(f(&c[..]), Ok((&"123"[..], &b[..]))); + assert_eq!( + f(&d[..]), + Err(Err::Error(error_position!(&d[..], ErrorKind::TakeWhile1))) + ); + } + + #[test] + fn take_till_s_succeed() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + fn till_s(c: char) -> bool { + c == 'á' + } + fn test(input: &str) -> IResult<&str, &str> { + take_till(till_s)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_till` consumed leftover input." + ); + assert!( + output == CONSUMED, + "Parser `take_till` doesn't return the string it consumed on success. \ + Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_till` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_while_succeed_none() { + use crate::bytes::complete::take_while; + + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const CONSUMED: &str = ""; + const LEFTOVER: &str = "βèƒôřèÂßÇáƒƭèř"; + fn while_s(c: char) -> bool { + c == '9' + } + fn test(input: &str) -> IResult<&str, &str> { + take_while(while_s)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_while` consumed leftover input." + ); + assert!( + output == CONSUMED, + "Parser `take_while` doesn't return the string it consumed on success. \ + Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_while` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn is_not_succeed() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const AVOID: &str = "£úçƙ¥á"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + fn test(input: &str) -> IResult<&str, &str> { + is_not(AVOID)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `is_not` consumed leftover input. Leftover `{}`.", + extra + ); + assert!( + output == CONSUMED, + "Parser `is_not` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `is_not` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_while_succeed_some() { + use crate::bytes::complete::take_while; + + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + fn while_s(c: char) -> bool { + c == 'β' + || c == 'è' + || c == 'ƒ' + || c == 'ô' + || c == 'ř' + || c == 'è' + || c == 'Â' + || c == 'ß' + || c == 'Ç' + } + fn test(input: &str) -> IResult<&str, &str> { + take_while(while_s)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_while` consumed leftover input." + ); + assert!( + output == CONSUMED, + "Parser `take_while` doesn't return the string it consumed on success. \ + Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_while` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn is_not_fail() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const AVOID: &str = "βúçƙ¥"; + fn test(input: &str) -> IResult<&str, &str> { + is_not(AVOID)(input) + } + match test(INPUT) { + Err(Err::Error(_)) => (), + other => panic!( + "Parser `is_not` didn't fail when it should have. Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_while1_succeed() { + use crate::bytes::complete::take_while1; + + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + fn while1_s(c: char) -> bool { + c == 'β' + || c == 'è' + || c == 'ƒ' + || c == 'ô' + || c == 'ř' + || c == 'è' + || c == 'Â' + || c == 'ß' + || c == 'Ç' + } + fn test(input: &str) -> IResult<&str, &str> { + take_while1(while1_s)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `take_while1` consumed leftover input." + ); + assert!( + output == CONSUMED, + "Parser `take_while1` doesn't return the string it consumed on success. \ + Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `take_while1` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_until_incomplete() { + use crate::bytes::streaming::take_until; + + const INPUT: &str = "βèƒôřè"; + const FIND: &str = "βèƒôřèÂßÇ"; + + let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT); + match res { + Err(Err::Incomplete(_)) => (), + other => panic!( + "Parser `take_until` didn't require more input when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn is_a_succeed() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const MATCH: &str = "βèƒôřèÂßÇ"; + const CONSUMED: &str = "βèƒôřèÂßÇ"; + const LEFTOVER: &str = "áƒƭèř"; + fn test(input: &str) -> IResult<&str, &str> { + is_a(MATCH)(input) + } + match test(INPUT) { + Ok((extra, output)) => { + assert!( + extra == LEFTOVER, + "Parser `is_a` consumed leftover input. Leftover `{}`.", + extra + ); + assert!( + output == CONSUMED, + "Parser `is_a` doesn't return the string it consumed on success. Expected `{}`, got `{}`.", + CONSUMED, + output + ); + } + other => panic!( + "Parser `is_a` didn't succeed when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_while1_fail() { + use crate::bytes::complete::take_while1; + + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + fn while1_s(c: char) -> bool { + c == '9' + } + fn test(input: &str) -> IResult<&str, &str> { + take_while1(while1_s)(input) + } + match test(INPUT) { + Err(Err::Error(_)) => (), + other => panic!( + "Parser `take_while1` didn't fail when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + fn is_a_fail() { + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const MATCH: &str = "Ûñℓúçƙ¥"; + fn test(input: &str) -> IResult<&str, &str> { + is_a(MATCH)(input) + } + match test(INPUT) { + Err(Err::Error(_)) => (), + other => panic!( + "Parser `is_a` didn't fail when it should have. Got `{:?}`.", + other + ), + }; + } + + #[test] + fn take_until_error() { + use crate::bytes::streaming::take_until; + + const INPUT: &str = "βèƒôřèÂßÇáƒƭèř"; + const FIND: &str = "Ráñδô₥"; + + let res: IResult<_, _, (_, ErrorKind)> = take_until(FIND)(INPUT); + match res { + Err(Err::Incomplete(_)) => (), + other => panic!( + "Parser `take_until` didn't fail when it should have. \ + Got `{:?}`.", + other + ), + }; + } + + #[test] + #[cfg(feature = "alloc")] + fn recognize_is_a() { + let a = "aabbab"; + let b = "ababcd"; + + fn f(i: &str) -> IResult<&str, &str> { + recognize(many1(alt((tag("a"), tag("b")))))(i) + } + + assert_eq!(f(&a[..]), Ok((&a[6..], &a[..]))); + assert_eq!(f(&b[..]), Ok((&b[4..], &b[..4]))); + } + + #[test] + fn utf8_indexing() { + fn dot(i: &str) -> IResult<&str, &str> { + tag(".")(i) + } + + let _ = dot("點"); + } + + #[cfg(feature = "alloc")] + #[test] + fn case_insensitive() { + fn test(i: &str) -> IResult<&str, &str> { + tag_no_case("ABcd")(i) + } + assert_eq!(test("aBCdefgh"), Ok(("efgh", "aBCd"))); + assert_eq!(test("abcdefgh"), Ok(("efgh", "abcd"))); + assert_eq!(test("ABCDefgh"), Ok(("efgh", "ABCD"))); + } +} diff --git a/third_party/rust/nom/src/traits.rs b/third_party/rust/nom/src/traits.rs new file mode 100644 index 0000000000..394e5bc3a5 --- /dev/null +++ b/third_party/rust/nom/src/traits.rs @@ -0,0 +1,1441 @@ +//! Traits input types have to implement to work with nom combinators +use crate::error::{ErrorKind, ParseError}; +use crate::internal::{Err, IResult, Needed}; +use crate::lib::std::iter::{Copied, Enumerate}; +use crate::lib::std::ops::{Range, RangeFrom, RangeFull, RangeTo}; +use crate::lib::std::slice::Iter; +use crate::lib::std::str::from_utf8; +use crate::lib::std::str::CharIndices; +use crate::lib::std::str::Chars; +use crate::lib::std::str::FromStr; + +#[cfg(feature = "alloc")] +use crate::lib::std::string::String; +#[cfg(feature = "alloc")] +use crate::lib::std::vec::Vec; + +/// Abstract method to calculate the input length +pub trait InputLength { + /// Calculates the input length, as indicated by its name, + /// and the name of the trait itself + fn input_len(&self) -> usize; +} + +impl<'a, T> InputLength for &'a [T] { + #[inline] + fn input_len(&self) -> usize { + self.len() + } +} + +impl<'a> InputLength for &'a str { + #[inline] + fn input_len(&self) -> usize { + self.len() + } +} + +impl<'a> InputLength for (&'a [u8], usize) { + #[inline] + fn input_len(&self) -> usize { + //println!("bit input length for ({:?}, {}):", self.0, self.1); + //println!("-> {}", self.0.len() * 8 - self.1); + self.0.len() * 8 - self.1 + } +} + +/// Useful functions to calculate the offset between slices and show a hexdump of a slice +pub trait Offset { + /// Offset between the first byte of self and the first byte of the argument + fn offset(&self, second: &Self) -> usize; +} + +impl Offset for [u8] { + fn offset(&self, second: &Self) -> usize { + let fst = self.as_ptr(); + let snd = second.as_ptr(); + + snd as usize - fst as usize + } +} + +impl<'a> Offset for &'a [u8] { + fn offset(&self, second: &Self) -> usize { + let fst = self.as_ptr(); + let snd = second.as_ptr(); + + snd as usize - fst as usize + } +} + +impl Offset for str { + fn offset(&self, second: &Self) -> usize { + let fst = self.as_ptr(); + let snd = second.as_ptr(); + + snd as usize - fst as usize + } +} + +impl<'a> Offset for &'a str { + fn offset(&self, second: &Self) -> usize { + let fst = self.as_ptr(); + let snd = second.as_ptr(); + + snd as usize - fst as usize + } +} + +/// Helper trait for types that can be viewed as a byte slice +pub trait AsBytes { + /// Casts the input type to a byte slice + fn as_bytes(&self) -> &[u8]; +} + +impl<'a> AsBytes for &'a str { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + (*self).as_bytes() + } +} + +impl AsBytes for str { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self.as_ref() + } +} + +impl<'a> AsBytes for &'a [u8] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + *self + } +} + +impl AsBytes for [u8] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self + } +} + +macro_rules! as_bytes_array_impls { + ($($N:expr)+) => { + $( + impl<'a> AsBytes for &'a [u8; $N] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + *self + } + } + + impl AsBytes for [u8; $N] { + #[inline(always)] + fn as_bytes(&self) -> &[u8] { + self + } + } + )+ + }; +} + +as_bytes_array_impls! { + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 +} + +/// Transforms common types to a char for basic token parsing +pub trait AsChar { + /// makes a char from self + fn as_char(self) -> char; + + /// Tests that self is an alphabetic character + /// + /// Warning: for `&str` it recognizes alphabetic + /// characters outside of the 52 ASCII letters + fn is_alpha(self) -> bool; + + /// Tests that self is an alphabetic character + /// or a decimal digit + fn is_alphanum(self) -> bool; + /// Tests that self is a decimal digit + fn is_dec_digit(self) -> bool; + /// Tests that self is an hex digit + fn is_hex_digit(self) -> bool; + /// Tests that self is an octal digit + fn is_oct_digit(self) -> bool; + /// Gets the len in bytes for self + fn len(self) -> usize; +} + +impl AsChar for u8 { + #[inline] + fn as_char(self) -> char { + self as char + } + #[inline] + fn is_alpha(self) -> bool { + (self >= 0x41 && self <= 0x5A) || (self >= 0x61 && self <= 0x7A) + } + #[inline] + fn is_alphanum(self) -> bool { + self.is_alpha() || self.is_dec_digit() + } + #[inline] + fn is_dec_digit(self) -> bool { + self >= 0x30 && self <= 0x39 + } + #[inline] + fn is_hex_digit(self) -> bool { + (self >= 0x30 && self <= 0x39) + || (self >= 0x41 && self <= 0x46) + || (self >= 0x61 && self <= 0x66) + } + #[inline] + fn is_oct_digit(self) -> bool { + self >= 0x30 && self <= 0x37 + } + #[inline] + fn len(self) -> usize { + 1 + } +} +impl<'a> AsChar for &'a u8 { + #[inline] + fn as_char(self) -> char { + *self as char + } + #[inline] + fn is_alpha(self) -> bool { + (*self >= 0x41 && *self <= 0x5A) || (*self >= 0x61 && *self <= 0x7A) + } + #[inline] + fn is_alphanum(self) -> bool { + self.is_alpha() || self.is_dec_digit() + } + #[inline] + fn is_dec_digit(self) -> bool { + *self >= 0x30 && *self <= 0x39 + } + #[inline] + fn is_hex_digit(self) -> bool { + (*self >= 0x30 && *self <= 0x39) + || (*self >= 0x41 && *self <= 0x46) + || (*self >= 0x61 && *self <= 0x66) + } + #[inline] + fn is_oct_digit(self) -> bool { + *self >= 0x30 && *self <= 0x37 + } + #[inline] + fn len(self) -> usize { + 1 + } +} + +impl AsChar for char { + #[inline] + fn as_char(self) -> char { + self + } + #[inline] + fn is_alpha(self) -> bool { + self.is_ascii_alphabetic() + } + #[inline] + fn is_alphanum(self) -> bool { + self.is_alpha() || self.is_dec_digit() + } + #[inline] + fn is_dec_digit(self) -> bool { + self.is_ascii_digit() + } + #[inline] + fn is_hex_digit(self) -> bool { + self.is_ascii_hexdigit() + } + #[inline] + fn is_oct_digit(self) -> bool { + self.is_digit(8) + } + #[inline] + fn len(self) -> usize { + self.len_utf8() + } +} + +impl<'a> AsChar for &'a char { + #[inline] + fn as_char(self) -> char { + *self + } + #[inline] + fn is_alpha(self) -> bool { + self.is_ascii_alphabetic() + } + #[inline] + fn is_alphanum(self) -> bool { + self.is_alpha() || self.is_dec_digit() + } + #[inline] + fn is_dec_digit(self) -> bool { + self.is_ascii_digit() + } + #[inline] + fn is_hex_digit(self) -> bool { + self.is_ascii_hexdigit() + } + #[inline] + fn is_oct_digit(self) -> bool { + self.is_digit(8) + } + #[inline] + fn len(self) -> usize { + self.len_utf8() + } +} + +/// Abstracts common iteration operations on the input type +pub trait InputIter { + /// The current input type is a sequence of that `Item` type. + /// + /// Example: `u8` for `&[u8]` or `char` for `&str` + type Item; + /// An iterator over the input type, producing the item and its position + /// for use with [Slice]. If we're iterating over `&str`, the position + /// corresponds to the byte index of the character + type Iter: Iterator<Item = (usize, Self::Item)>; + + /// An iterator over the input type, producing the item + type IterElem: Iterator<Item = Self::Item>; + + /// Returns an iterator over the elements and their byte offsets + fn iter_indices(&self) -> Self::Iter; + /// Returns an iterator over the elements + fn iter_elements(&self) -> Self::IterElem; + /// Finds the byte position of the element + fn position<P>(&self, predicate: P) -> Option<usize> + where + P: Fn(Self::Item) -> bool; + /// Get the byte offset from the element's position in the stream + fn slice_index(&self, count: usize) -> Result<usize, Needed>; +} + +/// Abstracts slicing operations +pub trait InputTake: Sized { + /// Returns a slice of `count` bytes. panics if count > length + fn take(&self, count: usize) -> Self; + /// Split the stream at the `count` byte offset. panics if count > length + fn take_split(&self, count: usize) -> (Self, Self); +} + +impl<'a> InputIter for &'a [u8] { + type Item = u8; + type Iter = Enumerate<Self::IterElem>; + type IterElem = Copied<Iter<'a, u8>>; + + #[inline] + fn iter_indices(&self) -> Self::Iter { + self.iter_elements().enumerate() + } + #[inline] + fn iter_elements(&self) -> Self::IterElem { + self.iter().copied() + } + #[inline] + fn position<P>(&self, predicate: P) -> Option<usize> + where + P: Fn(Self::Item) -> bool, + { + self.iter().position(|b| predicate(*b)) + } + #[inline] + fn slice_index(&self, count: usize) -> Result<usize, Needed> { + if self.len() >= count { + Ok(count) + } else { + Err(Needed::new(count - self.len())) + } + } +} + +impl<'a> InputTake for &'a [u8] { + #[inline] + fn take(&self, count: usize) -> Self { + &self[0..count] + } + #[inline] + fn take_split(&self, count: usize) -> (Self, Self) { + let (prefix, suffix) = self.split_at(count); + (suffix, prefix) + } +} + +impl<'a> InputIter for &'a str { + type Item = char; + type Iter = CharIndices<'a>; + type IterElem = Chars<'a>; + #[inline] + fn iter_indices(&self) -> Self::Iter { + self.char_indices() + } + #[inline] + fn iter_elements(&self) -> Self::IterElem { + self.chars() + } + fn position<P>(&self, predicate: P) -> Option<usize> + where + P: Fn(Self::Item) -> bool, + { + for (o, c) in self.char_indices() { + if predicate(c) { + return Some(o); + } + } + None + } + #[inline] + fn slice_index(&self, count: usize) -> Result<usize, Needed> { + let mut cnt = 0; + for (index, _) in self.char_indices() { + if cnt == count { + return Ok(index); + } + cnt += 1; + } + if cnt == count { + return Ok(self.len()); + } + Err(Needed::Unknown) + } +} + +impl<'a> InputTake for &'a str { + #[inline] + fn take(&self, count: usize) -> Self { + &self[..count] + } + + // return byte index + #[inline] + fn take_split(&self, count: usize) -> (Self, Self) { + let (prefix, suffix) = self.split_at(count); + (suffix, prefix) + } +} + +/// Dummy trait used for default implementations (currently only used for `InputTakeAtPosition` and `Compare`). +/// +/// When implementing a custom input type, it is possible to use directly the +/// default implementation: If the input type implements `InputLength`, `InputIter`, +/// `InputTake` and `Clone`, you can implement `UnspecializedInput` and get +/// a default version of `InputTakeAtPosition` and `Compare`. +/// +/// For performance reasons, you might want to write a custom implementation of +/// `InputTakeAtPosition` (like the one for `&[u8]`). +pub trait UnspecializedInput {} + +/// Methods to take as much input as possible until the provided function returns true for the current element. +/// +/// A large part of nom's basic parsers are built using this trait. +pub trait InputTakeAtPosition: Sized { + /// The current input type is a sequence of that `Item` type. + /// + /// Example: `u8` for `&[u8]` or `char` for `&str` + type Item; + + /// Looks for the first element of the input type for which the condition returns true, + /// and returns the input up to this position. + /// + /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` + fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool; + + /// Looks for the first element of the input type for which the condition returns true + /// and returns the input up to this position. + /// + /// Fails if the produced slice is empty. + /// + /// *streaming version*: If no element is found matching the condition, this will return `Incomplete` + fn split_at_position1<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool; + + /// Looks for the first element of the input type for which the condition returns true, + /// and returns the input up to this position. + /// + /// *complete version*: If no element is found matching the condition, this will return the whole input + fn split_at_position_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool; + + /// Looks for the first element of the input type for which the condition returns true + /// and returns the input up to this position. + /// + /// Fails if the produced slice is empty. + /// + /// *complete version*: If no element is found matching the condition, this will return the whole input + fn split_at_position1_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool; +} + +impl<T: InputLength + InputIter + InputTake + Clone + UnspecializedInput> InputTakeAtPosition + for T +{ + type Item = <T as InputIter>::Item; + + fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.position(predicate) { + Some(n) => Ok(self.take_split(n)), + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position1<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.position(predicate) { + Some(0) => Err(Err::Error(E::from_error_kind(self.clone(), e))), + Some(n) => Ok(self.take_split(n)), + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.split_at_position(predicate) { + Err(Err::Incomplete(_)) => Ok(self.take_split(self.input_len())), + res => res, + } + } + + fn split_at_position1_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.split_at_position1(predicate, e) { + Err(Err::Incomplete(_)) => { + if self.input_len() == 0 { + Err(Err::Error(E::from_error_kind(self.clone(), e))) + } else { + Ok(self.take_split(self.input_len())) + } + } + res => res, + } + } +} + +impl<'a> InputTakeAtPosition for &'a [u8] { + type Item = u8; + + fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.iter().position(|c| predicate(*c)) { + Some(i) => Ok(self.take_split(i)), + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position1<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.iter().position(|c| predicate(*c)) { + Some(0) => Err(Err::Error(E::from_error_kind(self, e))), + Some(i) => Ok(self.take_split(i)), + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.iter().position(|c| predicate(*c)) { + Some(i) => Ok(self.take_split(i)), + None => Ok(self.take_split(self.input_len())), + } + } + + fn split_at_position1_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.iter().position(|c| predicate(*c)) { + Some(0) => Err(Err::Error(E::from_error_kind(self, e))), + Some(i) => Ok(self.take_split(i)), + None => { + if self.is_empty() { + Err(Err::Error(E::from_error_kind(self, e))) + } else { + Ok(self.take_split(self.input_len())) + } + } + } + } +} + +impl<'a> InputTakeAtPosition for &'a str { + type Item = char; + + fn split_at_position<P, E: ParseError<Self>>(&self, predicate: P) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.find(predicate) { + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position1<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.find(predicate) { + Some(0) => Err(Err::Error(E::from_error_kind(self, e))), + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => Err(Err::Incomplete(Needed::new(1))), + } + } + + fn split_at_position_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.find(predicate) { + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + // the end of slice is a char boundary + None => unsafe { + Ok(( + self.get_unchecked(self.len()..), + self.get_unchecked(..self.len()), + )) + }, + } + } + + fn split_at_position1_complete<P, E: ParseError<Self>>( + &self, + predicate: P, + e: ErrorKind, + ) -> IResult<Self, Self, E> + where + P: Fn(Self::Item) -> bool, + { + match self.find(predicate) { + Some(0) => Err(Err::Error(E::from_error_kind(self, e))), + // find() returns a byte index that is already in the slice at a char boundary + Some(i) => unsafe { Ok((self.get_unchecked(i..), self.get_unchecked(..i))) }, + None => { + if self.is_empty() { + Err(Err::Error(E::from_error_kind(self, e))) + } else { + // the end of slice is a char boundary + unsafe { + Ok(( + self.get_unchecked(self.len()..), + self.get_unchecked(..self.len()), + )) + } + } + } + } + } +} + +/// Indicates whether a comparison was successful, an error, or +/// if more data was needed +#[derive(Debug, PartialEq)] +pub enum CompareResult { + /// Comparison was successful + Ok, + /// We need more data to be sure + Incomplete, + /// Comparison failed + Error, +} + +/// Abstracts comparison operations +pub trait Compare<T> { + /// Compares self to another value for equality + fn compare(&self, t: T) -> CompareResult; + /// Compares self to another value for equality + /// independently of the case. + /// + /// Warning: for `&str`, the comparison is done + /// by lowercasing both strings and comparing + /// the result. This is a temporary solution until + /// a better one appears + fn compare_no_case(&self, t: T) -> CompareResult; +} + +fn lowercase_byte(c: u8) -> u8 { + match c { + b'A'..=b'Z' => c - b'A' + b'a', + _ => c, + } +} + +impl<'a, 'b> Compare<&'b [u8]> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: &'b [u8]) -> CompareResult { + let pos = self.iter().zip(t.iter()).position(|(a, b)| a != b); + + match pos { + Some(_) => CompareResult::Error, + None => { + if self.len() >= t.len() { + CompareResult::Ok + } else { + CompareResult::Incomplete + } + } + } + + /* + let len = self.len(); + let blen = t.len(); + let m = if len < blen { len } else { blen }; + let reduced = &self[..m]; + let b = &t[..m]; + + if reduced != b { + CompareResult::Error + } else if m < blen { + CompareResult::Incomplete + } else { + CompareResult::Ok + } + */ + } + + #[inline(always)] + fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { + if self + .iter() + .zip(t) + .any(|(a, b)| lowercase_byte(*a) != lowercase_byte(*b)) + { + CompareResult::Error + } else if self.len() < t.len() { + CompareResult::Incomplete + } else { + CompareResult::Ok + } + } +} + +impl< + T: InputLength + InputIter<Item = u8> + InputTake + UnspecializedInput, + O: InputLength + InputIter<Item = u8> + InputTake, + > Compare<O> for T +{ + #[inline(always)] + fn compare(&self, t: O) -> CompareResult { + let pos = self + .iter_elements() + .zip(t.iter_elements()) + .position(|(a, b)| a != b); + + match pos { + Some(_) => CompareResult::Error, + None => { + if self.input_len() >= t.input_len() { + CompareResult::Ok + } else { + CompareResult::Incomplete + } + } + } + } + + #[inline(always)] + fn compare_no_case(&self, t: O) -> CompareResult { + if self + .iter_elements() + .zip(t.iter_elements()) + .any(|(a, b)| lowercase_byte(a) != lowercase_byte(b)) + { + CompareResult::Error + } else if self.input_len() < t.input_len() { + CompareResult::Incomplete + } else { + CompareResult::Ok + } + } +} + +impl<'a, 'b> Compare<&'b str> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: &'b str) -> CompareResult { + self.compare(AsBytes::as_bytes(t)) + } + #[inline(always)] + fn compare_no_case(&self, t: &'b str) -> CompareResult { + self.compare_no_case(AsBytes::as_bytes(t)) + } +} + +impl<'a, 'b> Compare<&'b str> for &'a str { + #[inline(always)] + fn compare(&self, t: &'b str) -> CompareResult { + self.as_bytes().compare(t.as_bytes()) + } + + //FIXME: this version is too simple and does not use the current locale + #[inline(always)] + fn compare_no_case(&self, t: &'b str) -> CompareResult { + let pos = self + .chars() + .zip(t.chars()) + .position(|(a, b)| a.to_lowercase().ne(b.to_lowercase())); + + match pos { + Some(_) => CompareResult::Error, + None => { + if self.len() >= t.len() { + CompareResult::Ok + } else { + CompareResult::Incomplete + } + } + } + } +} + +impl<'a, 'b> Compare<&'b [u8]> for &'a str { + #[inline(always)] + fn compare(&self, t: &'b [u8]) -> CompareResult { + AsBytes::as_bytes(self).compare(t) + } + #[inline(always)] + fn compare_no_case(&self, t: &'b [u8]) -> CompareResult { + AsBytes::as_bytes(self).compare_no_case(t) + } +} + +/// Look for a token in self +pub trait FindToken<T> { + /// Returns true if self contains the token + fn find_token(&self, token: T) -> bool; +} + +impl<'a> FindToken<u8> for &'a [u8] { + fn find_token(&self, token: u8) -> bool { + memchr::memchr(token, self).is_some() + } +} + +impl<'a> FindToken<u8> for &'a str { + fn find_token(&self, token: u8) -> bool { + self.as_bytes().find_token(token) + } +} + +impl<'a, 'b> FindToken<&'a u8> for &'b [u8] { + fn find_token(&self, token: &u8) -> bool { + self.find_token(*token) + } +} + +impl<'a, 'b> FindToken<&'a u8> for &'b str { + fn find_token(&self, token: &u8) -> bool { + self.as_bytes().find_token(token) + } +} + +impl<'a> FindToken<char> for &'a [u8] { + fn find_token(&self, token: char) -> bool { + self.iter().any(|i| *i == token as u8) + } +} + +impl<'a> FindToken<char> for &'a str { + fn find_token(&self, token: char) -> bool { + self.chars().any(|i| i == token) + } +} + +impl<'a> FindToken<char> for &'a [char] { + fn find_token(&self, token: char) -> bool { + self.iter().any(|i| *i == token) + } +} + +impl<'a, 'b> FindToken<&'a char> for &'b [char] { + fn find_token(&self, token: &char) -> bool { + self.find_token(*token) + } +} + +/// Look for a substring in self +pub trait FindSubstring<T> { + /// Returns the byte position of the substring if it is found + fn find_substring(&self, substr: T) -> Option<usize>; +} + +impl<'a, 'b> FindSubstring<&'b [u8]> for &'a [u8] { + fn find_substring(&self, substr: &'b [u8]) -> Option<usize> { + if substr.len() > self.len() { + return None; + } + + let (&substr_first, substr_rest) = match substr.split_first() { + Some(split) => split, + // an empty substring is found at position 0 + // This matches the behavior of str.find(""). + None => return Some(0), + }; + + if substr_rest.is_empty() { + return memchr::memchr(substr_first, self); + } + + let mut offset = 0; + let haystack = &self[..self.len() - substr_rest.len()]; + + while let Some(position) = memchr::memchr(substr_first, &haystack[offset..]) { + offset += position; + let next_offset = offset + 1; + if &self[next_offset..][..substr_rest.len()] == substr_rest { + return Some(offset); + } + + offset = next_offset; + } + + None + } +} + +impl<'a, 'b> FindSubstring<&'b str> for &'a [u8] { + fn find_substring(&self, substr: &'b str) -> Option<usize> { + self.find_substring(AsBytes::as_bytes(substr)) + } +} + +impl<'a, 'b> FindSubstring<&'b str> for &'a str { + //returns byte index + fn find_substring(&self, substr: &'b str) -> Option<usize> { + self.find(substr) + } +} + +/// Used to integrate `str`'s `parse()` method +pub trait ParseTo<R> { + /// Succeeds if `parse()` succeeded. The byte slice implementation + /// will first convert it to a `&str`, then apply the `parse()` function + fn parse_to(&self) -> Option<R>; +} + +impl<'a, R: FromStr> ParseTo<R> for &'a [u8] { + fn parse_to(&self) -> Option<R> { + from_utf8(self).ok().and_then(|s| s.parse().ok()) + } +} + +impl<'a, R: FromStr> ParseTo<R> for &'a str { + fn parse_to(&self) -> Option<R> { + self.parse().ok() + } +} + +/// Slicing operations using ranges. +/// +/// This trait is loosely based on +/// `Index`, but can actually return +/// something else than a `&[T]` or `&str` +pub trait Slice<R> { + /// Slices self according to the range argument + fn slice(&self, range: R) -> Self; +} + +macro_rules! impl_fn_slice { + ( $ty:ty ) => { + fn slice(&self, range: $ty) -> Self { + &self[range] + } + }; +} + +macro_rules! slice_range_impl { + ( [ $for_type:ident ], $ty:ty ) => { + impl<'a, $for_type> Slice<$ty> for &'a [$for_type] { + impl_fn_slice!($ty); + } + }; + ( $for_type:ty, $ty:ty ) => { + impl<'a> Slice<$ty> for &'a $for_type { + impl_fn_slice!($ty); + } + }; +} + +macro_rules! slice_ranges_impl { + ( [ $for_type:ident ] ) => { + slice_range_impl! {[$for_type], Range<usize>} + slice_range_impl! {[$for_type], RangeTo<usize>} + slice_range_impl! {[$for_type], RangeFrom<usize>} + slice_range_impl! {[$for_type], RangeFull} + }; + ( $for_type:ty ) => { + slice_range_impl! {$for_type, Range<usize>} + slice_range_impl! {$for_type, RangeTo<usize>} + slice_range_impl! {$for_type, RangeFrom<usize>} + slice_range_impl! {$for_type, RangeFull} + }; +} + +slice_ranges_impl! {str} +slice_ranges_impl! {[T]} + +macro_rules! array_impls { + ($($N:expr)+) => { + $( + impl InputLength for [u8; $N] { + #[inline] + fn input_len(&self) -> usize { + self.len() + } + } + + impl<'a> InputLength for &'a [u8; $N] { + #[inline] + fn input_len(&self) -> usize { + self.len() + } + } + + impl<'a> InputIter for &'a [u8; $N] { + type Item = u8; + type Iter = Enumerate<Self::IterElem>; + type IterElem = Copied<Iter<'a, u8>>; + + fn iter_indices(&self) -> Self::Iter { + (&self[..]).iter_indices() + } + + fn iter_elements(&self) -> Self::IterElem { + (&self[..]).iter_elements() + } + + fn position<P>(&self, predicate: P) -> Option<usize> + where P: Fn(Self::Item) -> bool { + (&self[..]).position(predicate) + } + + fn slice_index(&self, count: usize) -> Result<usize, Needed> { + (&self[..]).slice_index(count) + } + } + + impl<'a> Compare<[u8; $N]> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: [u8; $N]) -> CompareResult { + self.compare(&t[..]) + } + + #[inline(always)] + fn compare_no_case(&self, t: [u8;$N]) -> CompareResult { + self.compare_no_case(&t[..]) + } + } + + impl<'a,'b> Compare<&'b [u8; $N]> for &'a [u8] { + #[inline(always)] + fn compare(&self, t: &'b [u8; $N]) -> CompareResult { + self.compare(&t[..]) + } + + #[inline(always)] + fn compare_no_case(&self, t: &'b [u8;$N]) -> CompareResult { + self.compare_no_case(&t[..]) + } + } + + impl FindToken<u8> for [u8; $N] { + fn find_token(&self, token: u8) -> bool { + memchr::memchr(token, &self[..]).is_some() + } + } + + impl<'a> FindToken<&'a u8> for [u8; $N] { + fn find_token(&self, token: &u8) -> bool { + self.find_token(*token) + } + } + )+ + }; +} + +array_impls! { + 0 1 2 3 4 5 6 7 8 9 + 10 11 12 13 14 15 16 17 18 19 + 20 21 22 23 24 25 26 27 28 29 + 30 31 32 +} + +/// Abstracts something which can extend an `Extend`. +/// Used to build modified input slices in `escaped_transform` +pub trait ExtendInto { + /// The current input type is a sequence of that `Item` type. + /// + /// Example: `u8` for `&[u8]` or `char` for `&str` + type Item; + + /// The type that will be produced + type Extender; + + /// Create a new `Extend` of the correct type + fn new_builder(&self) -> Self::Extender; + /// Accumulate the input into an accumulator + fn extend_into(&self, acc: &mut Self::Extender); +} + +#[cfg(feature = "alloc")] +impl ExtendInto for [u8] { + type Item = u8; + type Extender = Vec<u8>; + + #[inline] + fn new_builder(&self) -> Vec<u8> { + Vec::new() + } + #[inline] + fn extend_into(&self, acc: &mut Vec<u8>) { + acc.extend(self.iter().cloned()); + } +} + +#[cfg(feature = "alloc")] +impl ExtendInto for &[u8] { + type Item = u8; + type Extender = Vec<u8>; + + #[inline] + fn new_builder(&self) -> Vec<u8> { + Vec::new() + } + #[inline] + fn extend_into(&self, acc: &mut Vec<u8>) { + acc.extend_from_slice(self); + } +} + +#[cfg(feature = "alloc")] +impl ExtendInto for str { + type Item = char; + type Extender = String; + + #[inline] + fn new_builder(&self) -> String { + String::new() + } + #[inline] + fn extend_into(&self, acc: &mut String) { + acc.push_str(self); + } +} + +#[cfg(feature = "alloc")] +impl ExtendInto for &str { + type Item = char; + type Extender = String; + + #[inline] + fn new_builder(&self) -> String { + String::new() + } + #[inline] + fn extend_into(&self, acc: &mut String) { + acc.push_str(self); + } +} + +#[cfg(feature = "alloc")] +impl ExtendInto for char { + type Item = char; + type Extender = String; + + #[inline] + fn new_builder(&self) -> String { + String::new() + } + #[inline] + fn extend_into(&self, acc: &mut String) { + acc.push(*self); + } +} + +/// Helper trait to convert numbers to usize. +/// +/// By default, usize implements `From<u8>` and `From<u16>` but not +/// `From<u32>` and `From<u64>` because that would be invalid on some +/// platforms. This trait implements the conversion for platforms +/// with 32 and 64 bits pointer platforms +pub trait ToUsize { + /// converts self to usize + fn to_usize(&self) -> usize; +} + +impl ToUsize for u8 { + #[inline] + fn to_usize(&self) -> usize { + *self as usize + } +} + +impl ToUsize for u16 { + #[inline] + fn to_usize(&self) -> usize { + *self as usize + } +} + +impl ToUsize for usize { + #[inline] + fn to_usize(&self) -> usize { + *self + } +} + +#[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] +impl ToUsize for u32 { + #[inline] + fn to_usize(&self) -> usize { + *self as usize + } +} + +#[cfg(target_pointer_width = "64")] +impl ToUsize for u64 { + #[inline] + fn to_usize(&self) -> usize { + *self as usize + } +} + +/// Equivalent From implementation to avoid orphan rules in bits parsers +pub trait ErrorConvert<E> { + /// Transform to another error type + fn convert(self) -> E; +} + +impl<I> ErrorConvert<(I, ErrorKind)> for ((I, usize), ErrorKind) { + fn convert(self) -> (I, ErrorKind) { + ((self.0).0, self.1) + } +} + +impl<I> ErrorConvert<((I, usize), ErrorKind)> for (I, ErrorKind) { + fn convert(self) -> ((I, usize), ErrorKind) { + ((self.0, 0), self.1) + } +} + +use crate::error; +impl<I> ErrorConvert<error::Error<I>> for error::Error<(I, usize)> { + fn convert(self) -> error::Error<I> { + error::Error { + input: self.input.0, + code: self.code, + } + } +} + +impl<I> ErrorConvert<error::Error<(I, usize)>> for error::Error<I> { + fn convert(self) -> error::Error<(I, usize)> { + error::Error { + input: (self.input, 0), + code: self.code, + } + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +impl<I> ErrorConvert<error::VerboseError<I>> for error::VerboseError<(I, usize)> { + fn convert(self) -> error::VerboseError<I> { + error::VerboseError { + errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(), + } + } +} + +#[cfg(feature = "alloc")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +impl<I> ErrorConvert<error::VerboseError<(I, usize)>> for error::VerboseError<I> { + fn convert(self) -> error::VerboseError<(I, usize)> { + error::VerboseError { + errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(), + } + } +} + +impl ErrorConvert<()> for () { + fn convert(self) {} +} + +#[cfg(feature = "std")] +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "std")))] +/// Helper trait to show a byte slice as a hex dump +pub trait HexDisplay { + /// Converts the value of `self` to a hex dump, returning the owned + /// `String`. + fn to_hex(&self, chunk_size: usize) -> String; + + /// Converts the value of `self` to a hex dump beginning at `from` address, returning the owned + /// `String`. + fn to_hex_from(&self, chunk_size: usize, from: usize) -> String; +} + +#[cfg(feature = "std")] +static CHARS: &[u8] = b"0123456789abcdef"; + +#[cfg(feature = "std")] +impl HexDisplay for [u8] { + #[allow(unused_variables)] + fn to_hex(&self, chunk_size: usize) -> String { + self.to_hex_from(chunk_size, 0) + } + + #[allow(unused_variables)] + fn to_hex_from(&self, chunk_size: usize, from: usize) -> String { + let mut v = Vec::with_capacity(self.len() * 3); + let mut i = from; + for chunk in self.chunks(chunk_size) { + let s = format!("{:08x}", i); + for &ch in s.as_bytes().iter() { + v.push(ch); + } + v.push(b'\t'); + + i += chunk_size; + + for &byte in chunk { + v.push(CHARS[(byte >> 4) as usize]); + v.push(CHARS[(byte & 0xf) as usize]); + v.push(b' '); + } + if chunk_size > chunk.len() { + for j in 0..(chunk_size - chunk.len()) { + v.push(b' '); + v.push(b' '); + v.push(b' '); + } + } + v.push(b'\t'); + + for &byte in chunk { + if (byte >= 32 && byte <= 126) || byte >= 128 { + v.push(byte); + } else { + v.push(b'.'); + } + } + v.push(b'\n'); + } + + String::from_utf8_lossy(&v[..]).into_owned() + } +} + +#[cfg(feature = "std")] +impl HexDisplay for str { + #[allow(unused_variables)] + fn to_hex(&self, chunk_size: usize) -> String { + self.to_hex_from(chunk_size, 0) + } + + #[allow(unused_variables)] + fn to_hex_from(&self, chunk_size: usize, from: usize) -> String { + self.as_bytes().to_hex_from(chunk_size, from) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_offset_u8() { + let s = b"abcd123"; + let a = &s[..]; + let b = &a[2..]; + let c = &a[..4]; + let d = &a[3..5]; + assert_eq!(a.offset(b), 2); + assert_eq!(a.offset(c), 0); + assert_eq!(a.offset(d), 3); + } + + #[test] + fn test_offset_str() { + let s = "abcřèÂßÇd123"; + let a = &s[..]; + let b = &a[7..]; + let c = &a[..5]; + let d = &a[5..9]; + assert_eq!(a.offset(b), 7); + assert_eq!(a.offset(c), 0); + assert_eq!(a.offset(d), 5); + } +} diff --git a/third_party/rust/nom/tests/arithmetic.rs b/third_party/rust/nom/tests/arithmetic.rs new file mode 100644 index 0000000000..5b627a97a5 --- /dev/null +++ b/third_party/rust/nom/tests/arithmetic.rs @@ -0,0 +1,94 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::char, + character::complete::{digit1 as digit, space0 as space}, + combinator::map_res, + multi::fold_many0, + sequence::{delimited, pair}, + IResult, +}; + +// Parser definition + +use std::str::FromStr; + +// We parse any expr surrounded by parens, ignoring all whitespaces around those +fn parens(i: &str) -> IResult<&str, i64> { + delimited(space, delimited(tag("("), expr, tag(")")), space)(i) +} + +// We transform an integer string into a i64, ignoring surrounding whitespaces +// We look for a digit suite, and try to convert it. +// If either str::from_utf8 or FromStr::from_str fail, +// we fallback to the parens parser defined above +fn factor(i: &str) -> IResult<&str, i64> { + alt(( + map_res(delimited(space, digit, space), FromStr::from_str), + parens, + ))(i) +} + +// We read an initial factor and for each time we find +// a * or / operator followed by another factor, we do +// the math by folding everything +fn term(i: &str) -> IResult<&str, i64> { + let (i, init) = factor(i)?; + + fold_many0( + pair(alt((char('*'), char('/'))), factor), + move || init, + |acc, (op, val): (char, i64)| { + if op == '*' { + acc * val + } else { + acc / val + } + }, + )(i) +} + +fn expr(i: &str) -> IResult<&str, i64> { + let (i, init) = term(i)?; + + fold_many0( + pair(alt((char('+'), char('-'))), term), + move || init, + |acc, (op, val): (char, i64)| { + if op == '+' { + acc + val + } else { + acc - val + } + }, + )(i) +} + +#[test] +fn factor_test() { + assert_eq!(factor("3"), Ok(("", 3))); + assert_eq!(factor(" 12"), Ok(("", 12))); + assert_eq!(factor("537 "), Ok(("", 537))); + assert_eq!(factor(" 24 "), Ok(("", 24))); +} + +#[test] +fn term_test() { + assert_eq!(term(" 12 *2 / 3"), Ok(("", 8))); + assert_eq!(term(" 2* 3 *2 *2 / 3"), Ok(("", 8))); + assert_eq!(term(" 48 / 3/2"), Ok(("", 8))); +} + +#[test] +fn expr_test() { + assert_eq!(expr(" 1 + 2 "), Ok(("", 3))); + assert_eq!(expr(" 12 + 6 - 4+ 3"), Ok(("", 17))); + assert_eq!(expr(" 1 + 2*3 + 4"), Ok(("", 11))); +} + +#[test] +fn parens_test() { + assert_eq!(expr(" ( 2 )"), Ok(("", 2))); + assert_eq!(expr(" 2* ( 3 + 4 ) "), Ok(("", 14))); + assert_eq!(expr(" 2*2 / ( 5 - 1) + 3"), Ok(("", 4))); +} diff --git a/third_party/rust/nom/tests/arithmetic_ast.rs b/third_party/rust/nom/tests/arithmetic_ast.rs new file mode 100644 index 0000000000..ca15110960 --- /dev/null +++ b/third_party/rust/nom/tests/arithmetic_ast.rs @@ -0,0 +1,161 @@ +use std::fmt; +use std::fmt::{Debug, Display, Formatter}; + +use std::str::FromStr; + +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{digit1 as digit, multispace0 as multispace}, + combinator::{map, map_res}, + multi::many0, + sequence::{delimited, preceded}, + IResult, +}; + +pub enum Expr { + Value(i64), + Add(Box<Expr>, Box<Expr>), + Sub(Box<Expr>, Box<Expr>), + Mul(Box<Expr>, Box<Expr>), + Div(Box<Expr>, Box<Expr>), + Paren(Box<Expr>), +} + +#[derive(Debug)] +pub enum Oper { + Add, + Sub, + Mul, + Div, +} + +impl Display for Expr { + fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { + use self::Expr::*; + match *self { + Value(val) => write!(format, "{}", val), + Add(ref left, ref right) => write!(format, "{} + {}", left, right), + Sub(ref left, ref right) => write!(format, "{} - {}", left, right), + Mul(ref left, ref right) => write!(format, "{} * {}", left, right), + Div(ref left, ref right) => write!(format, "{} / {}", left, right), + Paren(ref expr) => write!(format, "({})", expr), + } + } +} + +impl Debug for Expr { + fn fmt(&self, format: &mut Formatter<'_>) -> fmt::Result { + use self::Expr::*; + match *self { + Value(val) => write!(format, "{}", val), + Add(ref left, ref right) => write!(format, "({:?} + {:?})", left, right), + Sub(ref left, ref right) => write!(format, "({:?} - {:?})", left, right), + Mul(ref left, ref right) => write!(format, "({:?} * {:?})", left, right), + Div(ref left, ref right) => write!(format, "({:?} / {:?})", left, right), + Paren(ref expr) => write!(format, "[{:?}]", expr), + } + } +} + +fn parens(i: &str) -> IResult<&str, Expr> { + delimited( + multispace, + delimited(tag("("), map(expr, |e| Expr::Paren(Box::new(e))), tag(")")), + multispace, + )(i) +} + +fn factor(i: &str) -> IResult<&str, Expr> { + alt(( + map( + map_res(delimited(multispace, digit, multispace), FromStr::from_str), + Expr::Value, + ), + parens, + ))(i) +} + +fn fold_exprs(initial: Expr, remainder: Vec<(Oper, Expr)>) -> Expr { + remainder.into_iter().fold(initial, |acc, pair| { + let (oper, expr) = pair; + match oper { + Oper::Add => Expr::Add(Box::new(acc), Box::new(expr)), + Oper::Sub => Expr::Sub(Box::new(acc), Box::new(expr)), + Oper::Mul => Expr::Mul(Box::new(acc), Box::new(expr)), + Oper::Div => Expr::Div(Box::new(acc), Box::new(expr)), + } + }) +} + +fn term(i: &str) -> IResult<&str, Expr> { + let (i, initial) = factor(i)?; + let (i, remainder) = many0(alt(( + |i| { + let (i, mul) = preceded(tag("*"), factor)(i)?; + Ok((i, (Oper::Mul, mul))) + }, + |i| { + let (i, div) = preceded(tag("/"), factor)(i)?; + Ok((i, (Oper::Div, div))) + }, + )))(i)?; + + Ok((i, fold_exprs(initial, remainder))) +} + +fn expr(i: &str) -> IResult<&str, Expr> { + let (i, initial) = term(i)?; + let (i, remainder) = many0(alt(( + |i| { + let (i, add) = preceded(tag("+"), term)(i)?; + Ok((i, (Oper::Add, add))) + }, + |i| { + let (i, sub) = preceded(tag("-"), term)(i)?; + Ok((i, (Oper::Sub, sub))) + }, + )))(i)?; + + Ok((i, fold_exprs(initial, remainder))) +} + +#[test] +fn factor_test() { + assert_eq!( + factor(" 3 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("3"))) + ); +} + +#[test] +fn term_test() { + assert_eq!( + term(" 3 * 5 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("(3 * 5)"))) + ); +} + +#[test] +fn expr_test() { + assert_eq!( + expr(" 1 + 2 * 3 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("(1 + (2 * 3))"))) + ); + assert_eq!( + expr(" 1 + 2 * 3 / 4 - 5 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("((1 + ((2 * 3) / 4)) - 5)"))) + ); + assert_eq!( + expr(" 72 / 2 / 3 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("((72 / 2) / 3)"))) + ); +} + +#[test] +fn parens_test() { + assert_eq!( + expr(" ( 1 + 2 ) * 3 ").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("([(1 + 2)] * 3)"))) + ); +} diff --git a/third_party/rust/nom/tests/css.rs b/third_party/rust/nom/tests/css.rs new file mode 100644 index 0000000000..ad3d72b8fa --- /dev/null +++ b/third_party/rust/nom/tests/css.rs @@ -0,0 +1,45 @@ +use nom::bytes::complete::{tag, take_while_m_n}; +use nom::combinator::map_res; +use nom::sequence::tuple; +use nom::IResult; + +#[derive(Debug, PartialEq)] +pub struct Color { + pub red: u8, + pub green: u8, + pub blue: u8, +} + +fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> { + u8::from_str_radix(input, 16) +} + +fn is_hex_digit(c: char) -> bool { + c.is_digit(16) +} + +fn hex_primary(input: &str) -> IResult<&str, u8> { + map_res(take_while_m_n(2, 2, is_hex_digit), from_hex)(input) +} + +fn hex_color(input: &str) -> IResult<&str, Color> { + let (input, _) = tag("#")(input)?; + let (input, (red, green, blue)) = tuple((hex_primary, hex_primary, hex_primary))(input)?; + + Ok((input, Color { red, green, blue })) +} + +#[test] +fn parse_color() { + assert_eq!( + hex_color("#2F14DF"), + Ok(( + "", + Color { + red: 47, + green: 20, + blue: 223, + } + )) + ); +} diff --git a/third_party/rust/nom/tests/custom_errors.rs b/third_party/rust/nom/tests/custom_errors.rs new file mode 100644 index 0000000000..2021713341 --- /dev/null +++ b/third_party/rust/nom/tests/custom_errors.rs @@ -0,0 +1,48 @@ +#![allow(dead_code)] + +use nom::bytes::streaming::tag; +use nom::character::streaming::digit1 as digit; +use nom::combinator::verify; +use nom::error::{ErrorKind, ParseError}; +#[cfg(feature = "alloc")] +use nom::multi::count; +use nom::sequence::terminated; +use nom::IResult; + +#[derive(Debug)] +pub struct CustomError(String); + +impl<'a> From<(&'a str, ErrorKind)> for CustomError { + fn from(error: (&'a str, ErrorKind)) -> Self { + CustomError(format!("error code was: {:?}", error)) + } +} + +impl<'a> ParseError<&'a str> for CustomError { + fn from_error_kind(_: &'a str, kind: ErrorKind) -> Self { + CustomError(format!("error code was: {:?}", kind)) + } + + fn append(_: &'a str, kind: ErrorKind, other: CustomError) -> Self { + CustomError(format!("{:?}\nerror code was: {:?}", other, kind)) + } +} + +fn test1(input: &str) -> IResult<&str, &str, CustomError> { + //fix_error!(input, CustomError, tag!("abcd")) + tag("abcd")(input) +} + +fn test2(input: &str) -> IResult<&str, &str, CustomError> { + //terminated!(input, test1, fix_error!(CustomError, digit)) + terminated(test1, digit)(input) +} + +fn test3(input: &str) -> IResult<&str, &str, CustomError> { + verify(test1, |s: &str| s.starts_with("abcd"))(input) +} + +#[cfg(feature = "alloc")] +fn test4(input: &str) -> IResult<&str, Vec<&str>, CustomError> { + count(test1, 4)(input) +} diff --git a/third_party/rust/nom/tests/escaped.rs b/third_party/rust/nom/tests/escaped.rs new file mode 100644 index 0000000000..47c6a71e52 --- /dev/null +++ b/third_party/rust/nom/tests/escaped.rs @@ -0,0 +1,28 @@ +use nom::bytes::complete::escaped; +use nom::character::complete::digit1; +use nom::character::complete::one_of; +use nom::{error::ErrorKind, Err, IResult}; + +fn esc(s: &str) -> IResult<&str, &str, (&str, ErrorKind)> { + escaped(digit1, '\\', one_of("\"n\\"))(s) +} + +#[cfg(feature = "alloc")] +fn esc_trans(s: &str) -> IResult<&str, String, (&str, ErrorKind)> { + use nom::bytes::complete::{escaped_transform, tag}; + escaped_transform(digit1, '\\', tag("n"))(s) +} + +#[test] +fn test_escaped() { + assert_eq!(esc("abcd"), Err(Err::Error(("abcd", ErrorKind::Escaped)))); +} + +#[test] +#[cfg(feature = "alloc")] +fn test_escaped_transform() { + assert_eq!( + esc_trans("abcd"), + Err(Err::Error(("abcd", ErrorKind::EscapedTransform))) + ); +} diff --git a/third_party/rust/nom/tests/float.rs b/third_party/rust/nom/tests/float.rs new file mode 100644 index 0000000000..634b189899 --- /dev/null +++ b/third_party/rust/nom/tests/float.rs @@ -0,0 +1,46 @@ +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::character::streaming::digit1 as digit; +use nom::combinator::{map, map_res, opt, recognize}; +use nom::sequence::{delimited, pair}; +use nom::IResult; + +use std::str; +use std::str::FromStr; + +fn unsigned_float(i: &[u8]) -> IResult<&[u8], f32> { + let float_bytes = recognize(alt(( + delimited(digit, tag("."), opt(digit)), + delimited(opt(digit), tag("."), digit), + ))); + let float_str = map_res(float_bytes, str::from_utf8); + map_res(float_str, FromStr::from_str)(i) +} + +fn float(i: &[u8]) -> IResult<&[u8], f32> { + map( + pair(opt(alt((tag("+"), tag("-")))), unsigned_float), + |(sign, value)| { + sign + .and_then(|s| if s[0] == b'-' { Some(-1f32) } else { None }) + .unwrap_or(1f32) + * value + }, + )(i) +} + +#[test] +fn unsigned_float_test() { + assert_eq!(unsigned_float(&b"123.456;"[..]), Ok((&b";"[..], 123.456))); + assert_eq!(unsigned_float(&b"0.123;"[..]), Ok((&b";"[..], 0.123))); + assert_eq!(unsigned_float(&b"123.0;"[..]), Ok((&b";"[..], 123.0))); + assert_eq!(unsigned_float(&b"123.;"[..]), Ok((&b";"[..], 123.0))); + assert_eq!(unsigned_float(&b".123;"[..]), Ok((&b";"[..], 0.123))); +} + +#[test] +fn float_test() { + assert_eq!(float(&b"123.456;"[..]), Ok((&b";"[..], 123.456))); + assert_eq!(float(&b"+123.456;"[..]), Ok((&b";"[..], 123.456))); + assert_eq!(float(&b"-123.456;"[..]), Ok((&b";"[..], -123.456))); +} diff --git a/third_party/rust/nom/tests/fnmut.rs b/third_party/rust/nom/tests/fnmut.rs new file mode 100644 index 0000000000..b1486cbe63 --- /dev/null +++ b/third_party/rust/nom/tests/fnmut.rs @@ -0,0 +1,39 @@ +use nom::{ + bytes::complete::tag, + multi::{many0, many0_count}, +}; + +#[test] +fn parse() { + let mut counter = 0; + + let res = { + let mut parser = many0::<_, _, (), _>(|i| { + counter += 1; + tag("abc")(i) + }); + + parser("abcabcabcabc").unwrap() + }; + + println!("res: {:?}", res); + assert_eq!(counter, 5); +} + +#[test] +fn accumulate() { + let mut v = Vec::new(); + + let (_, count) = { + let mut parser = many0_count::<_, _, (), _>(|i| { + let (i, o) = tag("abc")(i)?; + v.push(o); + Ok((i, ())) + }); + parser("abcabcabcabc").unwrap() + }; + + println!("v: {:?}", v); + assert_eq!(count, 4); + assert_eq!(v.len(), 4); +} diff --git a/third_party/rust/nom/tests/ini.rs b/third_party/rust/nom/tests/ini.rs new file mode 100644 index 0000000000..e556f44a3c --- /dev/null +++ b/third_party/rust/nom/tests/ini.rs @@ -0,0 +1,207 @@ +use nom::{ + bytes::complete::take_while, + character::complete::{ + alphanumeric1 as alphanumeric, char, multispace0 as multispace, space0 as space, + }, + combinator::{map, map_res, opt}, + multi::many0, + sequence::{delimited, pair, separated_pair, terminated, tuple}, + IResult, +}; + +use std::collections::HashMap; +use std::str; + +fn category(i: &[u8]) -> IResult<&[u8], &str> { + map_res( + delimited(char('['), take_while(|c| c != b']'), char(']')), + str::from_utf8, + )(i) +} + +fn key_value(i: &[u8]) -> IResult<&[u8], (&str, &str)> { + let (i, key) = map_res(alphanumeric, str::from_utf8)(i)?; + let (i, _) = tuple((opt(space), char('='), opt(space)))(i)?; + let (i, val) = map_res(take_while(|c| c != b'\n' && c != b';'), str::from_utf8)(i)?; + let (i, _) = opt(pair(char(';'), take_while(|c| c != b'\n')))(i)?; + Ok((i, (key, val))) +} + +fn keys_and_values(i: &[u8]) -> IResult<&[u8], HashMap<&str, &str>> { + map(many0(terminated(key_value, opt(multispace))), |vec| { + vec.into_iter().collect() + })(i) +} + +fn category_and_keys(i: &[u8]) -> IResult<&[u8], (&str, HashMap<&str, &str>)> { + let (i, category) = terminated(category, opt(multispace))(i)?; + let (i, keys) = keys_and_values(i)?; + Ok((i, (category, keys))) +} + +fn categories(i: &[u8]) -> IResult<&[u8], HashMap<&str, HashMap<&str, &str>>> { + map( + many0(separated_pair( + category, + opt(multispace), + map( + many0(terminated(key_value, opt(multispace))), + |vec: Vec<_>| vec.into_iter().collect(), + ), + )), + |vec: Vec<_>| vec.into_iter().collect(), + )(i) +} + +#[test] +fn parse_category_test() { + let ini_file = &b"[category] + +parameter=value +key = value2"[..]; + + let ini_without_category = &b"\n\nparameter=value +key = value2"[..]; + + let res = category(ini_file); + println!("{:?}", res); + match res { + Ok((i, o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_category, "category"))); +} + +#[test] +fn parse_key_value_test() { + let ini_file = &b"parameter=value +key = value2"[..]; + + let ini_without_key_value = &b"\nkey = value2"[..]; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_key_value_with_space_test() { + let ini_file = &b"parameter = value +key = value2"[..]; + + let ini_without_key_value = &b"\nkey = value2"[..]; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_key_value_with_comment_test() { + let ini_file = &b"parameter=value;abc +key = value2"[..]; + + let ini_without_key_value = &b"\nkey = value2"[..]; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {:?} | o: ({:?},{:?})", str::from_utf8(i), o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_multiple_keys_and_values_test() { + let ini_file = &b"parameter=value;abc + +key = value2 + +[category]"[..]; + + let ini_without_key_value = &b"[category]"[..]; + + let res = keys_and_values(ini_file); + println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), + _ => println!("error"), + } + + let mut expected: HashMap<&str, &str> = HashMap::new(); + expected.insert("parameter", "value"); + expected.insert("key", "value2"); + assert_eq!(res, Ok((ini_without_key_value, expected))); +} + +#[test] +fn parse_category_then_multiple_keys_and_values_test() { + //FIXME: there can be an empty line or a comment line after a category + let ini_file = &b"[abcd] +parameter=value;abc + +key = value2 + +[category]"[..]; + + let ini_after_parser = &b"[category]"[..]; + + let res = category_and_keys(ini_file); + println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), + _ => println!("error"), + } + + let mut expected_h: HashMap<&str, &str> = HashMap::new(); + expected_h.insert("parameter", "value"); + expected_h.insert("key", "value2"); + assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h)))); +} + +#[test] +fn parse_multiple_categories_test() { + let ini_file = &b"[abcd] + +parameter=value;abc + +key = value2 + +[category] +parameter3=value3 +key4 = value4 +"[..]; + + let ini_after_parser = &b""[..]; + + let res = categories(ini_file); + //println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {:?} | o: {:?}", str::from_utf8(i), o), + _ => println!("error"), + } + + let mut expected_1: HashMap<&str, &str> = HashMap::new(); + expected_1.insert("parameter", "value"); + expected_1.insert("key", "value2"); + let mut expected_2: HashMap<&str, &str> = HashMap::new(); + expected_2.insert("parameter3", "value3"); + expected_2.insert("key4", "value4"); + let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); + expected_h.insert("abcd", expected_1); + expected_h.insert("category", expected_2); + assert_eq!(res, Ok((ini_after_parser, expected_h))); +} diff --git a/third_party/rust/nom/tests/ini_str.rs b/third_party/rust/nom/tests/ini_str.rs new file mode 100644 index 0000000000..3702303527 --- /dev/null +++ b/third_party/rust/nom/tests/ini_str.rs @@ -0,0 +1,217 @@ +use nom::{ + bytes::complete::{is_a, tag, take_till, take_while}, + character::complete::{alphanumeric1 as alphanumeric, char, space0 as space}, + combinator::opt, + multi::many0, + sequence::{delimited, pair, terminated, tuple}, + IResult, +}; + +use std::collections::HashMap; + +fn is_line_ending_or_comment(chr: char) -> bool { + chr == ';' || chr == '\n' +} + +fn not_line_ending(i: &str) -> IResult<&str, &str> { + take_while(|c| c != '\r' && c != '\n')(i) +} + +fn space_or_line_ending(i: &str) -> IResult<&str, &str> { + is_a(" \r\n")(i) +} + +fn category(i: &str) -> IResult<&str, &str> { + terminated( + delimited(char('['), take_while(|c| c != ']'), char(']')), + opt(is_a(" \r\n")), + )(i) +} + +fn key_value(i: &str) -> IResult<&str, (&str, &str)> { + let (i, key) = alphanumeric(i)?; + let (i, _) = tuple((opt(space), tag("="), opt(space)))(i)?; + let (i, val) = take_till(is_line_ending_or_comment)(i)?; + let (i, _) = opt(space)(i)?; + let (i, _) = opt(pair(tag(";"), not_line_ending))(i)?; + let (i, _) = opt(space_or_line_ending)(i)?; + + Ok((i, (key, val))) +} + +fn keys_and_values_aggregator(i: &str) -> IResult<&str, Vec<(&str, &str)>> { + many0(key_value)(i) +} + +fn keys_and_values(input: &str) -> IResult<&str, HashMap<&str, &str>> { + match keys_and_values_aggregator(input) { + Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())), + Err(e) => Err(e), + } +} + +fn category_and_keys(i: &str) -> IResult<&str, (&str, HashMap<&str, &str>)> { + pair(category, keys_and_values)(i) +} + +fn categories_aggregator(i: &str) -> IResult<&str, Vec<(&str, HashMap<&str, &str>)>> { + many0(category_and_keys)(i) +} + +fn categories(input: &str) -> IResult<&str, HashMap<&str, HashMap<&str, &str>>> { + match categories_aggregator(input) { + Ok((i, tuple_vec)) => Ok((i, tuple_vec.into_iter().collect())), + Err(e) => Err(e), + } +} + +#[test] +fn parse_category_test() { + let ini_file = "[category] + +parameter=value +key = value2"; + + let ini_without_category = "parameter=value +key = value2"; + + let res = category(ini_file); + println!("{:?}", res); + match res { + Ok((i, o)) => println!("i: {} | o: {:?}", i, o), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_category, "category"))); +} + +#[test] +fn parse_key_value_test() { + let ini_file = "parameter=value +key = value2"; + + let ini_without_key_value = "key = value2"; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_key_value_with_space_test() { + let ini_file = "parameter = value +key = value2"; + + let ini_without_key_value = "key = value2"; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_key_value_with_comment_test() { + let ini_file = "parameter=value;abc +key = value2"; + + let ini_without_key_value = "key = value2"; + + let res = key_value(ini_file); + println!("{:?}", res); + match res { + Ok((i, (o1, o2))) => println!("i: {} | o: ({:?},{:?})", i, o1, o2), + _ => println!("error"), + } + + assert_eq!(res, Ok((ini_without_key_value, ("parameter", "value")))); +} + +#[test] +fn parse_multiple_keys_and_values_test() { + let ini_file = "parameter=value;abc + +key = value2 + +[category]"; + + let ini_without_key_value = "[category]"; + + let res = keys_and_values(ini_file); + println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), + _ => println!("error"), + } + + let mut expected: HashMap<&str, &str> = HashMap::new(); + expected.insert("parameter", "value"); + expected.insert("key", "value2"); + assert_eq!(res, Ok((ini_without_key_value, expected))); +} + +#[test] +fn parse_category_then_multiple_keys_and_values_test() { + //FIXME: there can be an empty line or a comment line after a category + let ini_file = "[abcd] +parameter=value;abc + +key = value2 + +[category]"; + + let ini_after_parser = "[category]"; + + let res = category_and_keys(ini_file); + println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), + _ => println!("error"), + } + + let mut expected_h: HashMap<&str, &str> = HashMap::new(); + expected_h.insert("parameter", "value"); + expected_h.insert("key", "value2"); + assert_eq!(res, Ok((ini_after_parser, ("abcd", expected_h)))); +} + +#[test] +fn parse_multiple_categories_test() { + let ini_file = "[abcd] + +parameter=value;abc + +key = value2 + +[category] +parameter3=value3 +key4 = value4 +"; + + let res = categories(ini_file); + //println!("{:?}", res); + match res { + Ok((i, ref o)) => println!("i: {} | o: {:?}", i, o), + _ => println!("error"), + } + + let mut expected_1: HashMap<&str, &str> = HashMap::new(); + expected_1.insert("parameter", "value"); + expected_1.insert("key", "value2"); + let mut expected_2: HashMap<&str, &str> = HashMap::new(); + expected_2.insert("parameter3", "value3"); + expected_2.insert("key4", "value4"); + let mut expected_h: HashMap<&str, HashMap<&str, &str>> = HashMap::new(); + expected_h.insert("abcd", expected_1); + expected_h.insert("category", expected_2); + assert_eq!(res, Ok(("", expected_h))); +} diff --git a/third_party/rust/nom/tests/issues.rs b/third_party/rust/nom/tests/issues.rs new file mode 100644 index 0000000000..7985702f67 --- /dev/null +++ b/third_party/rust/nom/tests/issues.rs @@ -0,0 +1,242 @@ +//#![feature(trace_macros)] +#![allow(dead_code)] +#![cfg_attr(feature = "cargo-clippy", allow(redundant_closure))] + +use nom::{error::ErrorKind, Err, IResult, Needed}; + +#[allow(dead_code)] +struct Range { + start: char, + end: char, +} + +pub fn take_char(input: &[u8]) -> IResult<&[u8], char> { + if !input.is_empty() { + Ok((&input[1..], input[0] as char)) + } else { + Err(Err::Incomplete(Needed::new(1))) + } +} + +#[cfg(feature = "std")] +mod parse_int { + use nom::HexDisplay; + use nom::{ + character::streaming::{digit1 as digit, space1 as space}, + combinator::{complete, map, opt}, + multi::many0, + IResult, + }; + use std::str; + + fn parse_ints(input: &[u8]) -> IResult<&[u8], Vec<i32>> { + many0(spaces_or_int)(input) + } + + fn spaces_or_int(input: &[u8]) -> IResult<&[u8], i32> { + println!("{}", input.to_hex(8)); + let (i, _) = opt(complete(space))(input)?; + let (i, res) = map(complete(digit), |x| { + println!("x: {:?}", x); + let result = str::from_utf8(x).unwrap(); + println!("Result: {}", result); + println!("int is empty?: {}", x.is_empty()); + match result.parse() { + Ok(i) => i, + Err(e) => panic!("UH OH! NOT A DIGIT! {:?}", e), + } + })(i)?; + + Ok((i, res)) + } + + #[test] + fn issue_142() { + let subject = parse_ints(&b"12 34 5689a"[..]); + let expected = Ok((&b"a"[..], vec![12, 34, 5689])); + assert_eq!(subject, expected); + + let subject = parse_ints(&b"12 34 5689 "[..]); + let expected = Ok((&b" "[..], vec![12, 34, 5689])); + assert_eq!(subject, expected) + } +} + +#[test] +fn usize_length_bytes_issue() { + use nom::multi::length_data; + use nom::number::streaming::be_u16; + let _: IResult<&[u8], &[u8], (&[u8], ErrorKind)> = length_data(be_u16)(b"012346"); +} + +#[test] +fn take_till_issue() { + use nom::bytes::streaming::take_till; + + fn nothing(i: &[u8]) -> IResult<&[u8], &[u8]> { + take_till(|_| true)(i) + } + + assert_eq!(nothing(b""), Err(Err::Incomplete(Needed::new(1)))); + assert_eq!(nothing(b"abc"), Ok((&b"abc"[..], &b""[..]))); +} + +#[test] +fn issue_655() { + use nom::character::streaming::{line_ending, not_line_ending}; + fn twolines(i: &str) -> IResult<&str, (&str, &str)> { + let (i, l1) = not_line_ending(i)?; + let (i, _) = line_ending(i)?; + let (i, l2) = not_line_ending(i)?; + let (i, _) = line_ending(i)?; + + Ok((i, (l1, l2))) + } + + assert_eq!(twolines("foo\nbar\n"), Ok(("", ("foo", "bar")))); + assert_eq!(twolines("féo\nbar\n"), Ok(("", ("féo", "bar")))); + assert_eq!(twolines("foé\nbar\n"), Ok(("", ("foé", "bar")))); + assert_eq!(twolines("foé\r\nbar\n"), Ok(("", ("foé", "bar")))); +} + +#[cfg(feature = "alloc")] +fn issue_717(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + use nom::bytes::complete::{is_not, tag}; + use nom::multi::separated_list0; + + separated_list0(tag([0x0]), is_not([0x0u8]))(i) +} + +mod issue_647 { + use nom::bytes::streaming::tag; + use nom::combinator::complete; + use nom::multi::separated_list0; + use nom::{error::Error, number::streaming::be_f64, Err, IResult}; + pub type Input<'a> = &'a [u8]; + + #[derive(PartialEq, Debug, Clone)] + struct Data { + c: f64, + v: Vec<f64>, + } + + fn list<'a, 'b>( + input: Input<'a>, + _cs: &'b f64, + ) -> Result<(Input<'a>, Vec<f64>), Err<Error<&'a [u8]>>> { + separated_list0(complete(tag(",")), complete(be_f64))(input) + } + + fn data(input: Input<'_>) -> IResult<Input<'_>, Data> { + let (i, c) = be_f64(input)?; + let (i, _) = tag("\n")(i)?; + let (i, v) = list(i, &c)?; + Ok((i, Data { c, v })) + } +} + +#[test] +fn issue_848_overflow_incomplete_bits_to_bytes() { + fn take(i: &[u8]) -> IResult<&[u8], &[u8]> { + use nom::bytes::streaming::take; + take(0x2000000000000000_usize)(i) + } + fn parser(i: &[u8]) -> IResult<&[u8], &[u8]> { + use nom::bits::{bits, bytes}; + + bits(bytes(take))(i) + } + assert_eq!( + parser(&b""[..]), + Err(Err::Failure(nom::error_position!( + &b""[..], + ErrorKind::TooLarge + ))) + ); +} + +#[test] +fn issue_942() { + use nom::error::{ContextError, ParseError}; + pub fn parser<'a, E: ParseError<&'a str> + ContextError<&'a str>>( + i: &'a str, + ) -> IResult<&'a str, usize, E> { + use nom::{character::complete::char, error::context, multi::many0_count}; + many0_count(context("char_a", char('a')))(i) + } + assert_eq!(parser::<()>("aaa"), Ok(("", 3))); +} + +#[test] +fn issue_many_m_n_with_zeros() { + use nom::character::complete::char; + use nom::multi::many_m_n; + let mut parser = many_m_n::<_, _, (), _>(0, 0, char('a')); + assert_eq!(parser("aaa"), Ok(("aaa", vec![]))); +} + +#[test] +fn issue_1027_convert_error_panic_nonempty() { + use nom::character::complete::char; + use nom::error::{convert_error, VerboseError}; + use nom::sequence::pair; + + let input = "a"; + + let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b'))(input); + let err = match result.unwrap_err() { + Err::Error(e) => e, + _ => unreachable!(), + }; + + let msg = convert_error(input, err); + assert_eq!( + msg, + "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n" + ); +} + +#[test] +fn issue_1231_bits_expect_fn_closure() { + use nom::bits::{bits, complete::take}; + use nom::error::Error; + use nom::sequence::tuple; + pub fn example(input: &[u8]) -> IResult<&[u8], (u8, u8)> { + bits::<_, _, Error<_>, _, _>(tuple((take(1usize), take(1usize))))(input) + } + assert_eq!(example(&[0xff]), Ok((&b""[..], (1, 1)))); +} + +#[test] +fn issue_1282_findtoken_char() { + use nom::character::complete::one_of; + use nom::error::Error; + let parser = one_of::<_, _, Error<_>>(&['a', 'b', 'c'][..]); + assert_eq!(parser("aaa"), Ok(("aa", 'a'))); +} + +#[test] +fn issue_1459_clamp_capacity() { + use nom::character::complete::char; + + // shouldn't panic + use nom::multi::many_m_n; + let mut parser = many_m_n::<_, _, (), _>(usize::MAX, usize::MAX, char('a')); + assert_eq!(parser("a"), Err(nom::Err::Error(()))); + + // shouldn't panic + use nom::multi::count; + let mut parser = count::<_, _, (), _>(char('a'), usize::MAX); + assert_eq!(parser("a"), Err(nom::Err::Error(()))); +} + +#[test] +fn issue_1617_count_parser_returning_zero_size() { + use nom::{bytes::complete::tag, combinator::map, error::Error, multi::count}; + + // previously, `count()` panicked if the parser had type `O = ()` + let parser = map(tag::<_, _, Error<&str>>("abc"), |_| ()); + // shouldn't panic + let result = count(parser, 3)("abcabcabcdef").expect("parsing should succeed"); + assert_eq!(result, ("def", vec![(), (), ()])); +} diff --git a/third_party/rust/nom/tests/json.rs b/third_party/rust/nom/tests/json.rs new file mode 100644 index 0000000000..e8a06fd778 --- /dev/null +++ b/third_party/rust/nom/tests/json.rs @@ -0,0 +1,236 @@ +#![cfg(feature = "alloc")] + +use nom::{ + branch::alt, + bytes::complete::{tag, take}, + character::complete::{anychar, char, multispace0, none_of}, + combinator::{map, map_opt, map_res, value, verify}, + error::ParseError, + multi::{fold_many0, separated_list0}, + number::complete::double, + sequence::{delimited, preceded, separated_pair}, + IResult, Parser, +}; + +use std::collections::HashMap; + +#[derive(Debug, PartialEq, Clone)] +pub enum JsonValue { + Null, + Bool(bool), + Str(String), + Num(f64), + Array(Vec<JsonValue>), + Object(HashMap<String, JsonValue>), +} + +fn boolean(input: &str) -> IResult<&str, bool> { + alt((value(false, tag("false")), value(true, tag("true"))))(input) +} + +fn u16_hex(input: &str) -> IResult<&str, u16> { + map_res(take(4usize), |s| u16::from_str_radix(s, 16))(input) +} + +fn unicode_escape(input: &str) -> IResult<&str, char> { + map_opt( + alt(( + // Not a surrogate + map(verify(u16_hex, |cp| !(0xD800..0xE000).contains(cp)), |cp| { + cp as u32 + }), + // See https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF for details + map( + verify( + separated_pair(u16_hex, tag("\\u"), u16_hex), + |(high, low)| (0xD800..0xDC00).contains(high) && (0xDC00..0xE000).contains(low), + ), + |(high, low)| { + let high_ten = (high as u32) - 0xD800; + let low_ten = (low as u32) - 0xDC00; + (high_ten << 10) + low_ten + 0x10000 + }, + ), + )), + // Could be probably replaced with .unwrap() or _unchecked due to the verify checks + std::char::from_u32, + )(input) +} + +fn character(input: &str) -> IResult<&str, char> { + let (input, c) = none_of("\"")(input)?; + if c == '\\' { + alt(( + map_res(anychar, |c| { + Ok(match c { + '"' | '\\' | '/' => c, + 'b' => '\x08', + 'f' => '\x0C', + 'n' => '\n', + 'r' => '\r', + 't' => '\t', + _ => return Err(()), + }) + }), + preceded(char('u'), unicode_escape), + ))(input) + } else { + Ok((input, c)) + } +} + +fn string(input: &str) -> IResult<&str, String> { + delimited( + char('"'), + fold_many0(character, String::new, |mut string, c| { + string.push(c); + string + }), + char('"'), + )(input) +} + +fn ws<'a, O, E: ParseError<&'a str>, F: Parser<&'a str, O, E>>(f: F) -> impl Parser<&'a str, O, E> { + delimited(multispace0, f, multispace0) +} + +fn array(input: &str) -> IResult<&str, Vec<JsonValue>> { + delimited( + char('['), + ws(separated_list0(ws(char(',')), json_value)), + char(']'), + )(input) +} + +fn object(input: &str) -> IResult<&str, HashMap<String, JsonValue>> { + map( + delimited( + char('{'), + ws(separated_list0( + ws(char(',')), + separated_pair(string, ws(char(':')), json_value), + )), + char('}'), + ), + |key_values| key_values.into_iter().collect(), + )(input) +} + +fn json_value(input: &str) -> IResult<&str, JsonValue> { + use JsonValue::*; + + alt(( + value(Null, tag("null")), + map(boolean, Bool), + map(string, Str), + map(double, Num), + map(array, Array), + map(object, Object), + ))(input) +} + +fn json(input: &str) -> IResult<&str, JsonValue> { + ws(json_value).parse(input) +} + +#[test] +fn json_string() { + assert_eq!(string("\"\""), Ok(("", "".to_string()))); + assert_eq!(string("\"abc\""), Ok(("", "abc".to_string()))); + assert_eq!( + string("\"abc\\\"\\\\\\/\\b\\f\\n\\r\\t\\u0001\\u2014\u{2014}def\""), + Ok(("", "abc\"\\/\x08\x0C\n\r\t\x01——def".to_string())), + ); + assert_eq!(string("\"\\uD83D\\uDE10\""), Ok(("", "😐".to_string()))); + + assert!(string("\"").is_err()); + assert!(string("\"abc").is_err()); + assert!(string("\"\\\"").is_err()); + assert!(string("\"\\u123\"").is_err()); + assert!(string("\"\\uD800\"").is_err()); + assert!(string("\"\\uD800\\uD800\"").is_err()); + assert!(string("\"\\uDC00\"").is_err()); +} + +#[test] +fn json_object() { + use JsonValue::*; + + let input = r#"{"a":42,"b":"x"}"#; + + let expected = Object( + vec![ + ("a".to_string(), Num(42.0)), + ("b".to_string(), Str("x".to_string())), + ] + .into_iter() + .collect(), + ); + + assert_eq!(json(input), Ok(("", expected))); +} + +#[test] +fn json_array() { + use JsonValue::*; + + let input = r#"[42,"x"]"#; + + let expected = Array(vec![Num(42.0), Str("x".to_string())]); + + assert_eq!(json(input), Ok(("", expected))); +} + +#[test] +fn json_whitespace() { + use JsonValue::*; + + let input = r#" + { + "null" : null, + "true" :true , + "false": false , + "number" : 123e4 , + "string" : " abc 123 " , + "array" : [ false , 1 , "two" ] , + "object" : { "a" : 1.0 , "b" : "c" } , + "empty_array" : [ ] , + "empty_object" : { } + } + "#; + + assert_eq!( + json(input), + Ok(( + "", + Object( + vec![ + ("null".to_string(), Null), + ("true".to_string(), Bool(true)), + ("false".to_string(), Bool(false)), + ("number".to_string(), Num(123e4)), + ("string".to_string(), Str(" abc 123 ".to_string())), + ( + "array".to_string(), + Array(vec![Bool(false), Num(1.0), Str("two".to_string())]) + ), + ( + "object".to_string(), + Object( + vec![ + ("a".to_string(), Num(1.0)), + ("b".to_string(), Str("c".to_string())), + ] + .into_iter() + .collect() + ) + ), + ("empty_array".to_string(), Array(vec![]),), + ("empty_object".to_string(), Object(HashMap::new()),), + ] + .into_iter() + .collect() + ) + )) + ); +} diff --git a/third_party/rust/nom/tests/mp4.rs b/third_party/rust/nom/tests/mp4.rs new file mode 100644 index 0000000000..852bf29555 --- /dev/null +++ b/third_party/rust/nom/tests/mp4.rs @@ -0,0 +1,320 @@ +#![allow(dead_code)] + +use nom::{ + branch::alt, + bytes::streaming::{tag, take}, + combinator::{map, map_res}, + error::ErrorKind, + multi::many0, + number::streaming::{be_f32, be_u16, be_u32, be_u64}, + Err, IResult, Needed, +}; + +use std::str; + +fn mp4_box(input: &[u8]) -> IResult<&[u8], &[u8]> { + match be_u32(input) { + Ok((i, offset)) => { + let sz: usize = offset as usize; + if i.len() >= sz - 4 { + Ok((&i[(sz - 4)..], &i[0..(sz - 4)])) + } else { + Err(Err::Incomplete(Needed::new(offset as usize + 4))) + } + } + Err(e) => Err(e), + } +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +#[derive(PartialEq,Eq,Debug)] +struct FileType<'a> { + major_brand: &'a str, + major_brand_version: &'a [u8], + compatible_brands: Vec<&'a str> +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +#[allow(non_snake_case)] +#[derive(Debug,Clone)] +pub struct Mvhd32 { + version_flags: u32, // actually: + // version: u8, + // flags: u24 // 3 bytes + created_date: u32, + modified_date: u32, + scale: u32, + duration: u32, + speed: f32, + volume: u16, // actually a 2 bytes decimal + /* 10 bytes reserved */ + scaleA: f32, + rotateB: f32, + angleU: f32, + rotateC: f32, + scaleD: f32, + angleV: f32, + positionX: f32, + positionY: f32, + scaleW: f32, + preview: u64, + poster: u32, + selection: u64, + current_time: u32, + track_id: u32 +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +#[allow(non_snake_case)] +#[derive(Debug,Clone)] +pub struct Mvhd64 { + version_flags: u32, // actually: + // version: u8, + // flags: u24 // 3 bytes + created_date: u64, + modified_date: u64, + scale: u32, + duration: u64, + speed: f32, + volume: u16, // actually a 2 bytes decimal + /* 10 bytes reserved */ + scaleA: f32, + rotateB: f32, + angleU: f32, + rotateC: f32, + scaleD: f32, + angleV: f32, + positionX: f32, + positionY: f32, + scaleW: f32, + preview: u64, + poster: u32, + selection: u64, + current_time: u32, + track_id: u32 +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +fn mvhd32(i: &[u8]) -> IResult<&[u8], MvhdBox> { + let (i, version_flags) = be_u32(i)?; + let (i, created_date) = be_u32(i)?; + let (i, modified_date) = be_u32(i)?; + let (i, scale) = be_u32(i)?; + let (i, duration) = be_u32(i)?; + let (i, speed) = be_f32(i)?; + let (i, volume) = be_u16(i)?; // actually a 2 bytes decimal + let (i, _) = take(10_usize)(i)?; + let (i, scale_a) = be_f32(i)?; + let (i, rotate_b) = be_f32(i)?; + let (i, angle_u) = be_f32(i)?; + let (i, rotate_c) = be_f32(i)?; + let (i, scale_d) = be_f32(i)?; + let (i, angle_v) = be_f32(i)?; + let (i, position_x) = be_f32(i)?; + let (i, position_y) = be_f32(i)?; + let (i, scale_w) = be_f32(i)?; + let (i, preview) = be_u64(i)?; + let (i, poster) = be_u32(i)?; + let (i, selection) = be_u64(i)?; + let (i, current_time) = be_u32(i)?; + let (i, track_id) = be_u32(i)?; + + let mvhd_box = MvhdBox::M32(Mvhd32 { + version_flags, + created_date, + modified_date, + scale, + duration, + speed, + volume, + scaleA: scale_a, + rotateB: rotate_b, + angleU: angle_u, + rotateC: rotate_c, + scaleD: scale_d, + angleV: angle_v, + positionX: position_x, + positionY: position_y, + scaleW: scale_w, + preview, + poster, + selection, + current_time, + track_id, + }); + + Ok((i, mvhd_box)) +} + +#[cfg_attr(rustfmt, rustfmt_skip)] +fn mvhd64(i: &[u8]) -> IResult<&[u8], MvhdBox> { + let (i, version_flags) = be_u32(i)?; + let (i, created_date) = be_u64(i)?; + let (i, modified_date) = be_u64(i)?; + let (i, scale) = be_u32(i)?; + let (i, duration) = be_u64(i)?; + let (i, speed) = be_f32(i)?; + let (i, volume) = be_u16(i)?; // actually a 2 bytes decimal + let (i, _) = take(10_usize)(i)?; + let (i, scale_a) = be_f32(i)?; + let (i, rotate_b) = be_f32(i)?; + let (i, angle_u) = be_f32(i)?; + let (i, rotate_c) = be_f32(i)?; + let (i, scale_d) = be_f32(i)?; + let (i, angle_v) = be_f32(i)?; + let (i, position_x) = be_f32(i)?; + let (i, position_y) = be_f32(i)?; + let (i, scale_w) = be_f32(i)?; + let (i, preview) = be_u64(i)?; + let (i, poster) = be_u32(i)?; + let (i, selection) = be_u64(i)?; + let (i, current_time) = be_u32(i)?; + let (i, track_id) = be_u32(i)?; + + let mvhd_box = MvhdBox::M64(Mvhd64 { + version_flags, + created_date, + modified_date, + scale, + duration, + speed, + volume, + scaleA: scale_a, + rotateB: rotate_b, + angleU: angle_u, + rotateC: rotate_c, + scaleD: scale_d, + angleV: angle_v, + positionX: position_x, + positionY: position_y, + scaleW: scale_w, + preview, + poster, + selection, + current_time, + track_id, + }); + + Ok((i, mvhd_box)) +} + +#[derive(Debug, Clone)] +pub enum MvhdBox { + M32(Mvhd32), + M64(Mvhd64), +} + +#[derive(Debug, Clone)] +pub enum MoovBox { + Mdra, + Dref, + Cmov, + Rmra, + Iods, + Mvhd(MvhdBox), + Clip, + Trak, + Udta, +} + +#[derive(Debug)] +enum MP4BoxType { + Ftyp, + Moov, + Mdat, + Free, + Skip, + Wide, + Mdra, + Dref, + Cmov, + Rmra, + Iods, + Mvhd, + Clip, + Trak, + Udta, + Unknown, +} + +#[derive(Debug)] +struct MP4BoxHeader { + length: u32, + tag: MP4BoxType, +} + +fn brand_name(input: &[u8]) -> IResult<&[u8], &str> { + map_res(take(4_usize), str::from_utf8)(input) +} + +fn filetype_parser(input: &[u8]) -> IResult<&[u8], FileType<'_>> { + let (i, name) = brand_name(input)?; + let (i, version) = take(4_usize)(i)?; + let (i, brands) = many0(brand_name)(i)?; + + let ft = FileType { + major_brand: name, + major_brand_version: version, + compatible_brands: brands, + }; + Ok((i, ft)) +} + +fn mvhd_box(input: &[u8]) -> IResult<&[u8], MvhdBox> { + let res = if input.len() < 100 { + Err(Err::Incomplete(Needed::new(100))) + } else if input.len() == 100 { + mvhd32(input) + } else if input.len() == 112 { + mvhd64(input) + } else { + Err(Err::Error(nom::error_position!(input, ErrorKind::TooLarge))) + }; + println!("res: {:?}", res); + res +} + +fn unknown_box_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> { + Ok((input, MP4BoxType::Unknown)) +} + +fn box_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> { + alt(( + map(tag("ftyp"), |_| MP4BoxType::Ftyp), + map(tag("moov"), |_| MP4BoxType::Moov), + map(tag("mdat"), |_| MP4BoxType::Mdat), + map(tag("free"), |_| MP4BoxType::Free), + map(tag("skip"), |_| MP4BoxType::Skip), + map(tag("wide"), |_| MP4BoxType::Wide), + unknown_box_type, + ))(input) +} + +// warning, an alt combinator with 9 branches containing a tag combinator +// can make the compilation very slow. Use functions as sub parsers, +// or split into multiple alt parsers if it gets slow +fn moov_type(input: &[u8]) -> IResult<&[u8], MP4BoxType> { + alt(( + map(tag("mdra"), |_| MP4BoxType::Mdra), + map(tag("dref"), |_| MP4BoxType::Dref), + map(tag("cmov"), |_| MP4BoxType::Cmov), + map(tag("rmra"), |_| MP4BoxType::Rmra), + map(tag("iods"), |_| MP4BoxType::Iods), + map(tag("mvhd"), |_| MP4BoxType::Mvhd), + map(tag("clip"), |_| MP4BoxType::Clip), + map(tag("trak"), |_| MP4BoxType::Trak), + map(tag("udta"), |_| MP4BoxType::Udta), + ))(input) +} + +fn box_header(input: &[u8]) -> IResult<&[u8], MP4BoxHeader> { + let (i, length) = be_u32(input)?; + let (i, tag) = box_type(i)?; + Ok((i, MP4BoxHeader { length, tag })) +} + +fn moov_header(input: &[u8]) -> IResult<&[u8], MP4BoxHeader> { + let (i, length) = be_u32(input)?; + let (i, tag) = moov_type(i)?; + Ok((i, MP4BoxHeader { length, tag })) +} diff --git a/third_party/rust/nom/tests/multiline.rs b/third_party/rust/nom/tests/multiline.rs new file mode 100644 index 0000000000..7378b9e3b4 --- /dev/null +++ b/third_party/rust/nom/tests/multiline.rs @@ -0,0 +1,31 @@ +use nom::{ + character::complete::{alphanumeric1 as alphanumeric, line_ending as eol}, + multi::many0, + sequence::terminated, + IResult, +}; + +pub fn end_of_line(input: &str) -> IResult<&str, &str> { + if input.is_empty() { + Ok((input, input)) + } else { + eol(input) + } +} + +pub fn read_line(input: &str) -> IResult<&str, &str> { + terminated(alphanumeric, end_of_line)(input) +} + +pub fn read_lines(input: &str) -> IResult<&str, Vec<&str>> { + many0(read_line)(input) +} + +#[cfg(feature = "alloc")] +#[test] +fn read_lines_test() { + let res = Ok(("", vec!["Duck", "Dog", "Cow"])); + + assert_eq!(read_lines("Duck\nDog\nCow\n"), res); + assert_eq!(read_lines("Duck\nDog\nCow"), res); +} diff --git a/third_party/rust/nom/tests/overflow.rs b/third_party/rust/nom/tests/overflow.rs new file mode 100644 index 0000000000..ea513bb395 --- /dev/null +++ b/third_party/rust/nom/tests/overflow.rs @@ -0,0 +1,145 @@ +#![cfg_attr(feature = "cargo-clippy", allow(unreadable_literal))] +#![cfg(target_pointer_width = "64")] + +use nom::bytes::streaming::take; +#[cfg(feature = "alloc")] +use nom::multi::{length_data, many0}; +#[cfg(feature = "alloc")] +use nom::number::streaming::be_u64; +use nom::sequence::tuple; +use nom::{Err, IResult, Needed}; + +// Parser definition + +// We request a length that would trigger an overflow if computing consumed + requested +fn parser02(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { + tuple((take(1_usize), take(18446744073709551615_usize)))(i) +} + +#[test] +fn overflow_incomplete_tuple() { + assert_eq!( + parser02(&b"3"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551615))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_length_bytes() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(length_data(be_u64))(i) + } + + // Trigger an overflow in length_data + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551615))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_many0() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(length_data(be_u64))(i) + } + + // Trigger an overflow in many0 + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_many1() { + use nom::multi::many1; + + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many1(length_data(be_u64))(i) + } + + // Trigger an overflow in many1 + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_many_till() { + use nom::{bytes::complete::tag, multi::many_till}; + + fn multi(i: &[u8]) -> IResult<&[u8], (Vec<&[u8]>, &[u8])> { + many_till(length_data(be_u64), tag("abc"))(i) + } + + // Trigger an overflow in many_till + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_many_m_n() { + use nom::multi::many_m_n; + + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many_m_n(2, 4, length_data(be_u64))(i) + } + + // Trigger an overflow in many_m_n + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_count() { + use nom::multi::count; + + fn counter(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + count(length_data(be_u64), 2)(i) + } + + assert_eq!( + counter(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xef"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551599))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_length_count() { + use nom::multi::length_count; + use nom::number::streaming::be_u8; + + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + length_count(be_u8, length_data(be_u64))(i) + } + + assert_eq!( + multi(&b"\x04\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xee"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551598))) + ); +} + +#[test] +#[cfg(feature = "alloc")] +fn overflow_incomplete_length_data() { + fn multi(i: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { + many0(length_data(be_u64))(i) + } + + assert_eq!( + multi(&b"\x00\x00\x00\x00\x00\x00\x00\x01\xaa\xff\xff\xff\xff\xff\xff\xff\xff"[..]), + Err(Err::Incomplete(Needed::new(18446744073709551615))) + ); +} diff --git a/third_party/rust/nom/tests/reborrow_fold.rs b/third_party/rust/nom/tests/reborrow_fold.rs new file mode 100644 index 0000000000..486617e427 --- /dev/null +++ b/third_party/rust/nom/tests/reborrow_fold.rs @@ -0,0 +1,31 @@ +#![allow(dead_code)] +// #![allow(unused_variables)] + +use std::str; + +use nom::bytes::complete::is_not; +use nom::character::complete::char; +use nom::combinator::{map, map_res}; +use nom::multi::fold_many0; +use nom::sequence::delimited; +use nom::IResult; + +fn atom<'a>(_tomb: &'a mut ()) -> impl FnMut(&'a [u8]) -> IResult<&'a [u8], String> { + move |input| { + map( + map_res(is_not(" \t\r\n"), str::from_utf8), + ToString::to_string, + )(input) + } +} + +// FIXME: should we support the use case of borrowing data mutably in a parser? +fn list<'a>(i: &'a [u8], tomb: &'a mut ()) -> IResult<&'a [u8], String> { + delimited( + char('('), + fold_many0(atom(tomb), String::new, |acc: String, next: String| { + acc + next.as_str() + }), + char(')'), + )(i) +} |