1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
|
// Copyright 2015 Google Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
//! CommonMark punctuation set based on spec and Unicode properties.
// Autogenerated by mk_puncttable.py
const PUNCT_MASKS_ASCII: [u16; 8] = [
0x0000, // U+0000...U+000F
0x0000, // U+0010...U+001F
0xfffe, // U+0020...U+002F
0xfc00, // U+0030...U+003F
0x0001, // U+0040...U+004F
0xf800, // U+0050...U+005F
0x0001, // U+0060...U+006F
0x7800, // U+0070...U+007F
];
const PUNCT_TAB: [u16; 132] = [
10, // U+00A0...U+00AF
11, // U+00B0...U+00BF
55, // U+0370...U+037F
56, // U+0380...U+038F
85, // U+0550...U+055F
88, // U+0580...U+058F
91, // U+05B0...U+05BF
92, // U+05C0...U+05CF
95, // U+05F0...U+05FF
96, // U+0600...U+060F
97, // U+0610...U+061F
102, // U+0660...U+066F
109, // U+06D0...U+06DF
112, // U+0700...U+070F
127, // U+07F0...U+07FF
131, // U+0830...U+083F
133, // U+0850...U+085F
150, // U+0960...U+096F
151, // U+0970...U+097F
175, // U+0AF0...U+0AFF
223, // U+0DF0...U+0DFF
228, // U+0E40...U+0E4F
229, // U+0E50...U+0E5F
240, // U+0F00...U+0F0F
241, // U+0F10...U+0F1F
243, // U+0F30...U+0F3F
248, // U+0F80...U+0F8F
253, // U+0FD0...U+0FDF
260, // U+1040...U+104F
271, // U+10F0...U+10FF
310, // U+1360...U+136F
320, // U+1400...U+140F
358, // U+1660...U+166F
361, // U+1690...U+169F
366, // U+16E0...U+16EF
371, // U+1730...U+173F
381, // U+17D0...U+17DF
384, // U+1800...U+180F
404, // U+1940...U+194F
417, // U+1A10...U+1A1F
426, // U+1AA0...U+1AAF
437, // U+1B50...U+1B5F
438, // U+1B60...U+1B6F
447, // U+1BF0...U+1BFF
451, // U+1C30...U+1C3F
455, // U+1C70...U+1C7F
460, // U+1CC0...U+1CCF
461, // U+1CD0...U+1CDF
513, // U+2010...U+201F
514, // U+2020...U+202F
515, // U+2030...U+203F
516, // U+2040...U+204F
517, // U+2050...U+205F
519, // U+2070...U+207F
520, // U+2080...U+208F
560, // U+2300...U+230F
562, // U+2320...U+232F
630, // U+2760...U+276F
631, // U+2770...U+277F
636, // U+27C0...U+27CF
638, // U+27E0...U+27EF
664, // U+2980...U+298F
665, // U+2990...U+299F
669, // U+29D0...U+29DF
671, // U+29F0...U+29FF
719, // U+2CF0...U+2CFF
727, // U+2D70...U+2D7F
736, // U+2E00...U+2E0F
737, // U+2E10...U+2E1F
738, // U+2E20...U+2E2F
739, // U+2E30...U+2E3F
740, // U+2E40...U+2E4F
768, // U+3000...U+300F
769, // U+3010...U+301F
771, // U+3030...U+303F
778, // U+30A0...U+30AF
783, // U+30F0...U+30FF
2639, // U+A4F0...U+A4FF
2656, // U+A600...U+A60F
2663, // U+A670...U+A67F
2671, // U+A6F0...U+A6FF
2695, // U+A870...U+A87F
2700, // U+A8C0...U+A8CF
2703, // U+A8F0...U+A8FF
2706, // U+A920...U+A92F
2709, // U+A950...U+A95F
2716, // U+A9C0...U+A9CF
2717, // U+A9D0...U+A9DF
2725, // U+AA50...U+AA5F
2733, // U+AAD0...U+AADF
2735, // U+AAF0...U+AAFF
2750, // U+ABE0...U+ABEF
4051, // U+FD30...U+FD3F
4065, // U+FE10...U+FE1F
4067, // U+FE30...U+FE3F
4068, // U+FE40...U+FE4F
4069, // U+FE50...U+FE5F
4070, // U+FE60...U+FE6F
4080, // U+FF00...U+FF0F
4081, // U+FF10...U+FF1F
4082, // U+FF20...U+FF2F
4083, // U+FF30...U+FF3F
4085, // U+FF50...U+FF5F
4086, // U+FF60...U+FF6F
4112, // U+10100...U+1010F
4153, // U+10390...U+1039F
4157, // U+103D0...U+103DF
4182, // U+10560...U+1056F
4229, // U+10850...U+1085F
4241, // U+10910...U+1091F
4243, // U+10930...U+1093F
4261, // U+10A50...U+10A5F
4263, // U+10A70...U+10A7F
4271, // U+10AF0...U+10AFF
4275, // U+10B30...U+10B3F
4281, // U+10B90...U+10B9F
4356, // U+11040...U+1104F
4363, // U+110B0...U+110BF
4364, // U+110C0...U+110CF
4372, // U+11140...U+1114F
4375, // U+11170...U+1117F
4380, // U+111C0...U+111CF
4387, // U+11230...U+1123F
4428, // U+114C0...U+114CF
4444, // U+115C0...U+115CF
4452, // U+11640...U+1164F
4679, // U+12470...U+1247F
5798, // U+16A60...U+16A6F
5807, // U+16AF0...U+16AFF
5811, // U+16B30...U+16B3F
5812, // U+16B40...U+16B4F
7113, // U+1BC90...U+1BC9F
];
const PUNCT_MASKS: [u16; 132] = [
0x0882, // U+00A0...U+00AF
0x88c0, // U+00B0...U+00BF
0x4000, // U+0370...U+037F
0x0080, // U+0380...U+038F
0xfc00, // U+0550...U+055F
0x0600, // U+0580...U+058F
0x4000, // U+05B0...U+05BF
0x0049, // U+05C0...U+05CF
0x0018, // U+05F0...U+05FF
0x3600, // U+0600...U+060F
0xc800, // U+0610...U+061F
0x3c00, // U+0660...U+066F
0x0010, // U+06D0...U+06DF
0x3fff, // U+0700...U+070F
0x0380, // U+07F0...U+07FF
0x7fff, // U+0830...U+083F
0x4000, // U+0850...U+085F
0x0030, // U+0960...U+096F
0x0001, // U+0970...U+097F
0x0001, // U+0AF0...U+0AFF
0x0010, // U+0DF0...U+0DFF
0x8000, // U+0E40...U+0E4F
0x0c00, // U+0E50...U+0E5F
0xfff0, // U+0F00...U+0F0F
0x0017, // U+0F10...U+0F1F
0x3c00, // U+0F30...U+0F3F
0x0020, // U+0F80...U+0F8F
0x061f, // U+0FD0...U+0FDF
0xfc00, // U+1040...U+104F
0x0800, // U+10F0...U+10FF
0x01ff, // U+1360...U+136F
0x0001, // U+1400...U+140F
0x6000, // U+1660...U+166F
0x1800, // U+1690...U+169F
0x3800, // U+16E0...U+16EF
0x0060, // U+1730...U+173F
0x0770, // U+17D0...U+17DF
0x07ff, // U+1800...U+180F
0x0030, // U+1940...U+194F
0xc000, // U+1A10...U+1A1F
0x3f7f, // U+1AA0...U+1AAF
0xfc00, // U+1B50...U+1B5F
0x0001, // U+1B60...U+1B6F
0xf000, // U+1BF0...U+1BFF
0xf800, // U+1C30...U+1C3F
0xc000, // U+1C70...U+1C7F
0x00ff, // U+1CC0...U+1CCF
0x0008, // U+1CD0...U+1CDF
0xffff, // U+2010...U+201F
0x00ff, // U+2020...U+202F
0xffff, // U+2030...U+203F
0xffef, // U+2040...U+204F
0x7ffb, // U+2050...U+205F
0x6000, // U+2070...U+207F
0x6000, // U+2080...U+208F
0x0f00, // U+2300...U+230F
0x0600, // U+2320...U+232F
0xff00, // U+2760...U+276F
0x003f, // U+2770...U+277F
0x0060, // U+27C0...U+27CF
0xffc0, // U+27E0...U+27EF
0xfff8, // U+2980...U+298F
0x01ff, // U+2990...U+299F
0x0f00, // U+29D0...U+29DF
0x3000, // U+29F0...U+29FF
0xde00, // U+2CF0...U+2CFF
0x0001, // U+2D70...U+2D7F
0xffff, // U+2E00...U+2E0F
0xffff, // U+2E10...U+2E1F
0x7fff, // U+2E20...U+2E2F
0xffff, // U+2E30...U+2E3F
0x0007, // U+2E40...U+2E4F
0xff0e, // U+3000...U+300F
0xfff3, // U+3010...U+301F
0x2001, // U+3030...U+303F
0x0001, // U+30A0...U+30AF
0x0800, // U+30F0...U+30FF
0xc000, // U+A4F0...U+A4FF
0xe000, // U+A600...U+A60F
0x4008, // U+A670...U+A67F
0x00fc, // U+A6F0...U+A6FF
0x00f0, // U+A870...U+A87F
0xc000, // U+A8C0...U+A8CF
0x0700, // U+A8F0...U+A8FF
0xc000, // U+A920...U+A92F
0x8000, // U+A950...U+A95F
0x3ffe, // U+A9C0...U+A9CF
0xc000, // U+A9D0...U+A9DF
0xf000, // U+AA50...U+AA5F
0xc000, // U+AAD0...U+AADF
0x0003, // U+AAF0...U+AAFF
0x0800, // U+ABE0...U+ABEF
0xc000, // U+FD30...U+FD3F
0x03ff, // U+FE10...U+FE1F
0xffff, // U+FE30...U+FE3F
0xffff, // U+FE40...U+FE4F
0xfff7, // U+FE50...U+FE5F
0x0d0b, // U+FE60...U+FE6F
0xf7ee, // U+FF00...U+FF0F
0x8c00, // U+FF10...U+FF1F
0x0001, // U+FF20...U+FF2F
0xb800, // U+FF30...U+FF3F
0xa800, // U+FF50...U+FF5F
0x003f, // U+FF60...U+FF6F
0x0007, // U+10100...U+1010F
0x8000, // U+10390...U+1039F
0x0001, // U+103D0...U+103DF
0x8000, // U+10560...U+1056F
0x0080, // U+10850...U+1085F
0x8000, // U+10910...U+1091F
0x8000, // U+10930...U+1093F
0x01ff, // U+10A50...U+10A5F
0x8000, // U+10A70...U+10A7F
0x007f, // U+10AF0...U+10AFF
0xfe00, // U+10B30...U+10B3F
0x1e00, // U+10B90...U+10B9F
0x3f80, // U+11040...U+1104F
0xd800, // U+110B0...U+110BF
0x0003, // U+110C0...U+110CF
0x000f, // U+11140...U+1114F
0x0030, // U+11170...U+1117F
0x21e0, // U+111C0...U+111CF
0x3f00, // U+11230...U+1123F
0x0040, // U+114C0...U+114CF
0x03fe, // U+115C0...U+115CF
0x000e, // U+11640...U+1164F
0x001f, // U+12470...U+1247F
0xc000, // U+16A60...U+16A6F
0x0020, // U+16AF0...U+16AFF
0x0f80, // U+16B30...U+16B3F
0x0010, // U+16B40...U+16B4F
0x8000, // U+1BC90...U+1BC9F
];
pub(crate) fn is_ascii_punctuation(c: u8) -> bool {
c < 128 && (PUNCT_MASKS_ASCII[(c / 16) as usize] & (1 << (c & 15))) != 0
}
pub(crate) fn is_punctuation(c: char) -> bool {
let cp = c as u32;
if cp < 128 {
return is_ascii_punctuation(cp as u8);
}
if cp > 0x1BC9F {
return false;
}
let high = (cp / 16) as u16;
match PUNCT_TAB.binary_search(&high) {
Ok(index) => (PUNCT_MASKS[index] & (1 << (cp & 15))) != 0,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{is_ascii_punctuation, is_punctuation};
#[test]
fn test_ascii() {
assert!(is_ascii_punctuation(b'!'));
assert!(is_ascii_punctuation(b'@'));
assert!(is_ascii_punctuation(b'~'));
assert!(!is_ascii_punctuation(b' '));
assert!(!is_ascii_punctuation(b'0'));
assert!(!is_ascii_punctuation(b'A'));
assert!(!is_ascii_punctuation(0xA1));
}
#[test]
fn test_unicode() {
assert!(is_punctuation('~'));
assert!(!is_punctuation(' '));
assert!(is_punctuation('\u{00A1}'));
assert!(is_punctuation('\u{060C}'));
assert!(is_punctuation('\u{FF65}'));
assert!(is_punctuation('\u{1BC9F}'));
assert!(!is_punctuation('\u{1BCA0}'));
}
}
|