summaryrefslogtreecommitdiffstats
path: root/vendor/winnow/src/_topic/language.rs
blob: 245bab4c7bafc9246aa94d22a50398cc6e1b9229 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
//! # Elements of Programming Languages
//!
//! These are short recipes for accomplishing common tasks.
//!
//! * [Whitespace](#whitespace)
//!   + [Wrapper combinators that eat whitespace before and after a parser](#wrapper-combinators-that-eat-whitespace-before-and-after-a-parser)
//! * [Comments](#comments)
//!   + [`// C++/EOL-style comments`](#-ceol-style-comments)
//!   + [`/* C-style comments */`](#-c-style-comments-)
//! * [Identifiers](#identifiers)
//!   + [`Rust-Style Identifiers`](#rust-style-identifiers)
//! * [Literal Values](#literal-values)
//!   + [Escaped Strings](#escaped-strings)
//!   + [Integers](#integers)
//!     - [Hexadecimal](#hexadecimal)
//!     - [Octal](#octal)
//!     - [Binary](#binary)
//!     - [Decimal](#decimal)
//!   + [Floating Point Numbers](#floating-point-numbers)
//!
//! ## Whitespace
//!
//!
//!
//! ### Wrapper combinators that eat whitespace before and after a parser
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   error::ParseError,
//!   combinator::delimited,
//!   ascii::multispace0,
//! };
//!
//! /// A combinator that takes a parser `inner` and produces a parser that also consumes both leading and
//! /// trailing whitespace, returning the output of `inner`.
//! fn ws<'a, F, O, E: ParseError<&'a str>>(inner: F) -> impl Parser<&'a str, O, E>
//!   where
//!   F: Parser<&'a str, O, E>,
//! {
//!   delimited(
//!     multispace0,
//!     inner,
//!     multispace0
//!   )
//! }
//! ```
//!
//! To eat only trailing whitespace, replace `delimited(...)` with `terminated(&inner, multispace0)`.
//! Likewise, the eat only leading whitespace, replace `delimited(...)` with `preceded(multispace0,
//! &inner)`. You can use your own parser instead of `multispace0` if you want to skip a different set
//! of lexemes.
//!
//! ## Comments
//!
//! ### `// C++/EOL-style comments`
//!
//! This version uses `%` to start a comment, does not consume the newline character, and returns an
//! output of `()`.
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   error::ParseError,
//!   token::take_till1,
//! };
//!
//! pub fn peol_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E>
//! {
//!   ('%', take_till1("\n\r"))
//!     .void() // Output is thrown away.
//!     .parse_next(i)
//! }
//! ```
//!
//! ### `/* C-style comments */`
//!
//! Inline comments surrounded with sentinel tags `(*` and `*)`. This version returns an output of `()`
//! and does not handle nested comments.
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   error::ParseError,
//!   token::{tag, take_until0},
//! };
//!
//! pub fn pinline_comment<'a, E: ParseError<&'a str>>(i: &'a str) -> IResult<&'a str, (), E> {
//!   (
//!     "(*",
//!     take_until0("*)"),
//!     "*)"
//!   )
//!     .void() // Output is thrown away.
//!     .parse_next(i)
//! }
//! ```
//!
//! ## Identifiers
//!
//! ### `Rust-Style Identifiers`
//!
//! Parsing identifiers that may start with a letter (or underscore) and may contain underscores,
//! letters and numbers may be parsed like this:
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   stream::AsChar,
//!   token::take_while,
//!   token::one_of,
//! };
//!
//! pub fn identifier(input: &str) -> IResult<&str, &str> {
//!   (
//!       one_of(|c: char| c.is_alpha() || c == '_'),
//!       take_while(0.., |c: char| c.is_alphanum() || c == '_')
//!   )
//!   .recognize()
//!   .parse_next(input)
//! }
//! ```
//!
//! Let's say we apply this to the identifier `hello_world123abc`. The first element of the tuple
//! would uses [`one_of`][crate::token::one_of] which would recognize `h`. The tuple ensures that
//! `ello_world123abc` will be piped to the next [`take_while`][crate::token::take_while] parser,
//! which recognizes every remaining character. However, the tuple returns a tuple of the results
//! of its sub-parsers. The [`recognize`][crate::Parser::recognize] parser produces a `&str` of the
//! input text that was parsed, which in this case is the entire `&str` `hello_world123abc`.
//!
//! ## Literal Values
//!
//! ### Escaped Strings
//!
//! ```rust
#![doc = include_str!("../../examples/string/parser.rs")]
//! ```
//!
//! ### Integers
//!
//! The following recipes all return string slices rather than integer values. How to obtain an
//! integer value instead is demonstrated for hexadecimal integers. The others are similar.
//!
//! The parsers allow the grouping character `_`, which allows one to group the digits by byte, for
//! example: `0xA4_3F_11_28`. If you prefer to exclude the `_` character, the lambda to convert from a
//! string slice to an integer value is slightly simpler. You can also strip the `_` from the string
//! slice that is returned, which is demonstrated in the second hexadecimal number parser.
//!
//! #### Hexadecimal
//!
//! The parser outputs the string slice of the digits without the leading `0x`/`0X`.
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   combinator::alt,
//!   combinator::{repeat},
//!   combinator::{preceded, terminated},
//!   token::one_of,
//!   token::tag,
//! };
//!
//! fn hexadecimal(input: &str) -> IResult<&str, &str> { // <'a, E: ParseError<&'a str>>
//!   preceded(
//!     alt(("0x", "0X")),
//!     repeat(1..,
//!       terminated(one_of("0123456789abcdefABCDEF"), repeat(0.., '_').map(|()| ()))
//!     ).map(|()| ()).recognize()
//!   ).parse_next(input)
//! }
//! ```
//!
//! If you want it to return the integer value instead, use map:
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   combinator::alt,
//!   combinator::{repeat},
//!   combinator::{preceded, terminated},
//!   token::one_of,
//!   token::tag,
//! };
//!
//! fn hexadecimal_value(input: &str) -> IResult<&str, i64> {
//!   preceded(
//!     alt(("0x", "0X")),
//!     repeat(1..,
//!       terminated(one_of("0123456789abcdefABCDEF"), repeat(0.., '_').map(|()| ()))
//!     ).map(|()| ()).recognize()
//!   ).try_map(
//!     |out: &str| i64::from_str_radix(&str::replace(&out, "_", ""), 16)
//!   ).parse_next(input)
//! }
//! ```
//!
//! #### Octal
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   combinator::alt,
//!   combinator::{repeat},
//!   combinator::{preceded, terminated},
//!   token::one_of,
//!   token::tag,
//! };
//!
//! fn octal(input: &str) -> IResult<&str, &str> {
//!   preceded(
//!     alt(("0o", "0O")),
//!     repeat(1..,
//!       terminated(one_of("01234567"), repeat(0.., '_').map(|()| ()))
//!     ).map(|()| ()).recognize()
//!   ).parse_next(input)
//! }
//! ```
//!
//! #### Binary
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   combinator::alt,
//!   combinator::{repeat},
//!   combinator::{preceded, terminated},
//!   token::one_of,
//!   token::tag,
//! };
//!
//! fn binary(input: &str) -> IResult<&str, &str> {
//!   preceded(
//!     alt(("0b", "0B")),
//!     repeat(1..,
//!       terminated(one_of("01"), repeat(0.., '_').map(|()| ()))
//!     ).map(|()| ()).recognize()
//!   ).parse_next(input)
//! }
//! ```
//!
//! #### Decimal
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   IResult,
//!   combinator::{repeat},
//!   combinator::terminated,
//!   token::one_of,
//! };
//!
//! fn decimal(input: &str) -> IResult<&str, &str> {
//!   repeat(1..,
//!     terminated(one_of("0123456789"), repeat(0.., '_').map(|()| ()))
//!   ).map(|()| ())
//!     .recognize()
//!     .parse_next(input)
//! }
//! ```
//!
//! ### Floating Point Numbers
//!
//! The following is adapted from [the Python parser by Valentin Lorentz](https://github.com/ProgVal/rust-python-parser/blob/master/src/numbers.rs).
//!
//! ```rust
//! use winnow::prelude::*;
//! use winnow::{
//!   combinator::alt,
//!   combinator::{repeat},
//!   combinator::opt,
//!   combinator::{preceded, terminated},
//!   token::one_of,
//! };
//!
//! fn float(input: &str) -> IResult<&str, &str> {
//!   alt((
//!     // Case one: .42
//!     (
//!       '.',
//!       decimal,
//!       opt((
//!         one_of("eE"),
//!         opt(one_of("+-")),
//!         decimal
//!       ))
//!     ).recognize()
//!     , // Case two: 42e42 and 42.42e42
//!     (
//!       decimal,
//!       opt(preceded(
//!         '.',
//!         decimal,
//!       )),
//!       one_of("eE"),
//!       opt(one_of("+-")),
//!       decimal
//!     ).recognize()
//!     , // Case three: 42. and 42.42
//!     (
//!       decimal,
//!       '.',
//!       opt(decimal)
//!     ).recognize()
//!   )).parse_next(input)
//! }
//!
//! fn decimal(input: &str) -> IResult<&str, &str> {
//!   repeat(1..,
//!     terminated(one_of("0123456789"), repeat(0.., '_').map(|()| ()))
//!   ).
//!   map(|()| ())
//!     .recognize()
//!     .parse_next(input)
//! }
//! ```