summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/onigmo/doc/UnicodeProps.txt
blob: 59124ee28f063b657c3542d1e0c05d13b34083d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
Onigmo (Oniguruma-mod) Unicode Properties  Version 6.2.2    2022/09/20

* POSIX brackets
    Alpha
    Blank
    Cntrl
    Digit
    Graph
    Lower
    Print
    Punct
    Space
    Upper
    XDigit
    Word
    Alnum
    ASCII
    XPosixPunct

* Special
    Any
    Assigned

* Major and General Categories
    C
    Cc
    Cf
    Cn
    Co
    Cs
    L
    LC
    Ll
    Lm
    Lo
    Lt
    Lu
    M
    Mc
    Me
    Mn
    N
    Nd
    Nl
    No
    P
    Pc
    Pd
    Pe
    Pf
    Pi
    Po
    Ps
    S
    Sc
    Sk
    Sm
    So
    Z
    Zl
    Zp
    Zs

* Scripts
    Adlam
    Ahom
    Anatolian_Hieroglyphs
    Arabic
    Armenian
    Avestan
    Balinese
    Bamum
    Bassa_Vah
    Batak
    Bengali
    Bhaiksuki
    Bopomofo
    Brahmi
    Braille
    Buginese
    Buhid
    Canadian_Aboriginal
    Carian
    Caucasian_Albanian
    Chakma
    Cham
    Cherokee
    Chorasmian
    Common
    Coptic
    Cuneiform
    Cypriot
    Cypro_Minoan
    Cyrillic
    Deseret
    Devanagari
    Dives_Akuru
    Dogra
    Duployan
    Egyptian_Hieroglyphs
    Elbasan
    Elymaic
    Ethiopic
    Georgian
    Glagolitic
    Gothic
    Grantha
    Greek
    Gujarati
    Gunjala_Gondi
    Gurmukhi
    Han
    Hangul
    Hanifi_Rohingya
    Hanunoo
    Hatran
    Hebrew
    Hiragana
    Imperial_Aramaic
    Inherited
    Inscriptional_Pahlavi
    Inscriptional_Parthian
    Javanese
    Kaithi
    Kannada
    Katakana
    Kawi
    Kayah_Li
    Kharoshthi
    Khitan_Small_Script
    Khmer
    Khojki
    Khudawadi
    Lao
    Latin
    Lepcha
    Limbu
    Linear_A
    Linear_B
    Lisu
    Lycian
    Lydian
    Mahajani
    Makasar
    Malayalam
    Mandaic
    Manichaean
    Marchen
    Masaram_Gondi
    Medefaidrin
    Meetei_Mayek
    Mende_Kikakui
    Meroitic_Cursive
    Meroitic_Hieroglyphs
    Miao
    Modi
    Mongolian
    Mro
    Multani
    Myanmar
    Nabataean
    Nag_Mundari
    Nandinagari
    New_Tai_Lue
    Newa
    Nko
    Nushu
    Nyiakeng_Puachue_Hmong
    Ogham
    Ol_Chiki
    Old_Hungarian
    Old_Italic
    Old_North_Arabian
    Old_Permic
    Old_Persian
    Old_Sogdian
    Old_South_Arabian
    Old_Turkic
    Old_Uyghur
    Oriya
    Osage
    Osmanya
    Pahawh_Hmong
    Palmyrene
    Pau_Cin_Hau
    Phags_Pa
    Phoenician
    Psalter_Pahlavi
    Rejang
    Runic
    Samaritan
    Saurashtra
    Sharada
    Shavian
    Siddham
    SignWriting
    Sinhala
    Sogdian
    Sora_Sompeng
    Soyombo
    Sundanese
    Syloti_Nagri
    Syriac
    Tagalog
    Tagbanwa
    Tai_Le
    Tai_Tham
    Tai_Viet
    Takri
    Tamil
    Tangsa
    Tangut
    Telugu
    Thaana
    Thai
    Tibetan
    Tifinagh
    Tirhuta
    Toto
    Ugaritic
    Unknown
    Vai
    Vithkuqi
    Wancho
    Warang_Citi
    Yezidi
    Yi
    Zanabazar_Square

* DerivedCoreProperties
    Alphabetic
    Case_Ignorable
    Cased
    Changes_When_Casefolded
    Changes_When_Casemapped
    Changes_When_Lowercased
    Changes_When_Titlecased
    Changes_When_Uppercased
    Default_Ignorable_Code_Point
    Grapheme_Base
    Grapheme_Extend
    Grapheme_Link
    ID_Continue
    ID_Start
    Lowercase
    Math
    Uppercase
    XID_Continue
    XID_Start

* PropList
    ASCII_Hex_Digit
    Bidi_Control
    Dash
    Deprecated
    Diacritic
    Extender
    Hex_Digit
    Hyphen
    IDS_Binary_Operator
    IDS_Trinary_Operator
    Ideographic
    Join_Control
    Logical_Order_Exception
    Noncharacter_Code_Point
    Other_Alphabetic
    Other_Default_Ignorable_Code_Point
    Other_Grapheme_Extend
    Other_ID_Continue
    Other_ID_Start
    Other_Lowercase
    Other_Math
    Other_Uppercase
    Pattern_Syntax
    Pattern_White_Space
    Prepended_Concatenation_Mark
    Quotation_Mark
    Radical
    Regional_Indicator
    Sentence_Terminal
    Soft_Dotted
    Terminal_Punctuation
    Unified_Ideograph
    Variation_Selector
    White_Space

* Emoji
    Emoji
    Emoji_Component
    Emoji_Modifier
    Emoji_Modifier_Base
    Emoji_Presentation

* PropertyAliases
    AHex
    Bidi_C
    CI
    CWCF
    CWCM
    CWL
    CWT
    CWU
    DI
    Dep
    Dia
    EBase
    EComp
    EMod
    EPres
    Ext
    Gr_Base
    Gr_Ext
    Gr_Link
    Hex
    IDC
    IDS
    IDSB
    IDST
    Ideo
    Join_C
    LOE
    NChar
    OAlpha
    ODI
    OGr_Ext
    OIDC
    OIDS
    OLower
    OMath
    OUpper
    PCM
    Pat_Syn
    Pat_WS
    QMark
    RI
    SD
    STerm
    Term
    UIdeo
    VS
    WSpace
    XIDC
    XIDS

* PropertyValueAliases (General_Category)
    Other
    Control
    Format
    Unassigned
    Private_Use
    Surrogate
    Letter
    Cased_Letter
    Lowercase_Letter
    Modifier_Letter
    Other_Letter
    Titlecase_Letter
    Uppercase_Letter
    Mark
    Combining_Mark
    Spacing_Mark
    Enclosing_Mark
    Nonspacing_Mark
    Number
    Decimal_Number
    Letter_Number
    Other_Number
    Punctuation
    Connector_Punctuation
    Dash_Punctuation
    Close_Punctuation
    Final_Punctuation
    Initial_Punctuation
    Other_Punctuation
    Open_Punctuation
    Symbol
    Currency_Symbol
    Modifier_Symbol
    Math_Symbol
    Other_Symbol
    Separator
    Line_Separator
    Paragraph_Separator
    Space_Separator

* PropertyValueAliases (Script)
    Adlm
    Aghb
    Arab
    Armi
    Armn
    Avst
    Bali
    Bamu
    Bass
    Batk
    Beng
    Bhks
    Bopo
    Brah
    Brai
    Bugi
    Buhd
    Cakm
    Cans
    Cari
    Cher
    Chrs
    Copt
    Qaac
    Cpmn
    Cprt
    Cyrl
    Deva
    Diak
    Dogr
    Dsrt
    Dupl
    Egyp
    Elba
    Elym
    Ethi
    Geor
    Glag
    Gong
    Gonm
    Goth
    Gran
    Grek
    Gujr
    Guru
    Hang
    Hani
    Hano
    Hatr
    Hebr
    Hira
    Hluw
    Hmng
    Hmnp
    Hung
    Ital
    Java
    Kali
    Kana
    Khar
    Khmr
    Khoj
    Kits
    Knda
    Kthi
    Lana
    Laoo
    Latn
    Lepc
    Limb
    Lina
    Linb
    Lyci
    Lydi
    Mahj
    Maka
    Mand
    Mani
    Marc
    Medf
    Mend
    Merc
    Mero
    Mlym
    Mong
    Mroo
    Mtei
    Mult
    Mymr
    Nagm
    Nand
    Narb
    Nbat
    Nkoo
    Nshu
    Ogam
    Olck
    Orkh
    Orya
    Osge
    Osma
    Ougr
    Palm
    Pauc
    Perm
    Phag
    Phli
    Phlp
    Phnx
    Plrd
    Prti
    Rjng
    Rohg
    Runr
    Samr
    Sarb
    Saur
    Sgnw
    Shaw
    Shrd
    Sidd
    Sind
    Sinh
    Sogd
    Sogo
    Sora
    Soyo
    Sund
    Sylo
    Syrc
    Tagb
    Takr
    Tale
    Talu
    Taml
    Tang
    Tavt
    Telu
    Tfng
    Tglg
    Thaa
    Tibt
    Tirh
    Tnsa
    Ugar
    Vaii
    Vith
    Wara
    Wcho
    Xpeo
    Xsux
    Yezi
    Yiii
    Zanb
    Zinh
    Qaai
    Zyyy
    Zzzz

* DerivedAges
    Age=1.1
    Age=10.0
    Age=11.0
    Age=12.0
    Age=12.1
    Age=13.0
    Age=14.0
    Age=15.0
    Age=2.0
    Age=2.1
    Age=3.0
    Age=3.1
    Age=3.2
    Age=4.0
    Age=4.1
    Age=5.0
    Age=5.1
    Age=5.2
    Age=6.0
    Age=6.1
    Age=6.2
    Age=6.3
    Age=7.0
    Age=8.0
    Age=9.0

* Blocks
    In_Basic_Latin
    In_Latin_1_Supplement
    In_Latin_Extended_A
    In_Latin_Extended_B
    In_IPA_Extensions
    In_Spacing_Modifier_Letters
    In_Combining_Diacritical_Marks
    In_Greek_and_Coptic
    In_Cyrillic
    In_Cyrillic_Supplement
    In_Armenian
    In_Hebrew
    In_Arabic
    In_Syriac
    In_Arabic_Supplement
    In_Thaana
    In_NKo
    In_Samaritan
    In_Mandaic
    In_Syriac_Supplement
    In_Arabic_Extended_B
    In_Arabic_Extended_A
    In_Devanagari
    In_Bengali
    In_Gurmukhi
    In_Gujarati
    In_Oriya
    In_Tamil
    In_Telugu
    In_Kannada
    In_Malayalam
    In_Sinhala
    In_Thai
    In_Lao
    In_Tibetan
    In_Myanmar
    In_Georgian
    In_Hangul_Jamo
    In_Ethiopic
    In_Ethiopic_Supplement
    In_Cherokee
    In_Unified_Canadian_Aboriginal_Syllabics
    In_Ogham
    In_Runic
    In_Tagalog
    In_Hanunoo
    In_Buhid
    In_Tagbanwa
    In_Khmer
    In_Mongolian
    In_Unified_Canadian_Aboriginal_Syllabics_Extended
    In_Limbu
    In_Tai_Le
    In_New_Tai_Lue
    In_Khmer_Symbols
    In_Buginese
    In_Tai_Tham
    In_Combining_Diacritical_Marks_Extended
    In_Balinese
    In_Sundanese
    In_Batak
    In_Lepcha
    In_Ol_Chiki
    In_Cyrillic_Extended_C
    In_Georgian_Extended
    In_Sundanese_Supplement
    In_Vedic_Extensions
    In_Phonetic_Extensions
    In_Phonetic_Extensions_Supplement
    In_Combining_Diacritical_Marks_Supplement
    In_Latin_Extended_Additional
    In_Greek_Extended
    In_General_Punctuation
    In_Superscripts_and_Subscripts
    In_Currency_Symbols
    In_Combining_Diacritical_Marks_for_Symbols
    In_Letterlike_Symbols
    In_Number_Forms
    In_Arrows
    In_Mathematical_Operators
    In_Miscellaneous_Technical
    In_Control_Pictures
    In_Optical_Character_Recognition
    In_Enclosed_Alphanumerics
    In_Box_Drawing
    In_Block_Elements
    In_Geometric_Shapes
    In_Miscellaneous_Symbols
    In_Dingbats
    In_Miscellaneous_Mathematical_Symbols_A
    In_Supplemental_Arrows_A
    In_Braille_Patterns
    In_Supplemental_Arrows_B
    In_Miscellaneous_Mathematical_Symbols_B
    In_Supplemental_Mathematical_Operators
    In_Miscellaneous_Symbols_and_Arrows
    In_Glagolitic
    In_Latin_Extended_C
    In_Coptic
    In_Georgian_Supplement
    In_Tifinagh
    In_Ethiopic_Extended
    In_Cyrillic_Extended_A
    In_Supplemental_Punctuation
    In_CJK_Radicals_Supplement
    In_Kangxi_Radicals
    In_Ideographic_Description_Characters
    In_CJK_Symbols_and_Punctuation
    In_Hiragana
    In_Katakana
    In_Bopomofo
    In_Hangul_Compatibility_Jamo
    In_Kanbun
    In_Bopomofo_Extended
    In_CJK_Strokes
    In_Katakana_Phonetic_Extensions
    In_Enclosed_CJK_Letters_and_Months
    In_CJK_Compatibility
    In_CJK_Unified_Ideographs_Extension_A
    In_Yijing_Hexagram_Symbols
    In_CJK_Unified_Ideographs
    In_Yi_Syllables
    In_Yi_Radicals
    In_Lisu
    In_Vai
    In_Cyrillic_Extended_B
    In_Bamum
    In_Modifier_Tone_Letters
    In_Latin_Extended_D
    In_Syloti_Nagri
    In_Common_Indic_Number_Forms
    In_Phags_pa
    In_Saurashtra
    In_Devanagari_Extended
    In_Kayah_Li
    In_Rejang
    In_Hangul_Jamo_Extended_A
    In_Javanese
    In_Myanmar_Extended_B
    In_Cham
    In_Myanmar_Extended_A
    In_Tai_Viet
    In_Meetei_Mayek_Extensions
    In_Ethiopic_Extended_A
    In_Latin_Extended_E
    In_Cherokee_Supplement
    In_Meetei_Mayek
    In_Hangul_Syllables
    In_Hangul_Jamo_Extended_B
    In_High_Surrogates
    In_High_Private_Use_Surrogates
    In_Low_Surrogates
    In_Private_Use_Area
    In_CJK_Compatibility_Ideographs
    In_Alphabetic_Presentation_Forms
    In_Arabic_Presentation_Forms_A
    In_Variation_Selectors
    In_Vertical_Forms
    In_Combining_Half_Marks
    In_CJK_Compatibility_Forms
    In_Small_Form_Variants
    In_Arabic_Presentation_Forms_B
    In_Halfwidth_and_Fullwidth_Forms
    In_Specials
    In_Linear_B_Syllabary
    In_Linear_B_Ideograms
    In_Aegean_Numbers
    In_Ancient_Greek_Numbers
    In_Ancient_Symbols
    In_Phaistos_Disc
    In_Lycian
    In_Carian
    In_Coptic_Epact_Numbers
    In_Old_Italic
    In_Gothic
    In_Old_Permic
    In_Ugaritic
    In_Old_Persian
    In_Deseret
    In_Shavian
    In_Osmanya
    In_Osage
    In_Elbasan
    In_Caucasian_Albanian
    In_Vithkuqi
    In_Linear_A
    In_Latin_Extended_F
    In_Cypriot_Syllabary
    In_Imperial_Aramaic
    In_Palmyrene
    In_Nabataean
    In_Hatran
    In_Phoenician
    In_Lydian
    In_Meroitic_Hieroglyphs
    In_Meroitic_Cursive
    In_Kharoshthi
    In_Old_South_Arabian
    In_Old_North_Arabian
    In_Manichaean
    In_Avestan
    In_Inscriptional_Parthian
    In_Inscriptional_Pahlavi
    In_Psalter_Pahlavi
    In_Old_Turkic
    In_Old_Hungarian
    In_Hanifi_Rohingya
    In_Rumi_Numeral_Symbols
    In_Yezidi
    In_Arabic_Extended_C
    In_Old_Sogdian
    In_Sogdian
    In_Old_Uyghur
    In_Chorasmian
    In_Elymaic
    In_Brahmi
    In_Kaithi
    In_Sora_Sompeng
    In_Chakma
    In_Mahajani
    In_Sharada
    In_Sinhala_Archaic_Numbers
    In_Khojki
    In_Multani
    In_Khudawadi
    In_Grantha
    In_Newa
    In_Tirhuta
    In_Siddham
    In_Modi
    In_Mongolian_Supplement
    In_Takri
    In_Ahom
    In_Dogra
    In_Warang_Citi
    In_Dives_Akuru
    In_Nandinagari
    In_Zanabazar_Square
    In_Soyombo
    In_Unified_Canadian_Aboriginal_Syllabics_Extended_A
    In_Pau_Cin_Hau
    In_Devanagari_Extended_A
    In_Bhaiksuki
    In_Marchen
    In_Masaram_Gondi
    In_Gunjala_Gondi
    In_Makasar
    In_Kawi
    In_Lisu_Supplement
    In_Tamil_Supplement
    In_Cuneiform
    In_Cuneiform_Numbers_and_Punctuation
    In_Early_Dynastic_Cuneiform
    In_Cypro_Minoan
    In_Egyptian_Hieroglyphs
    In_Egyptian_Hieroglyph_Format_Controls
    In_Anatolian_Hieroglyphs
    In_Bamum_Supplement
    In_Mro
    In_Tangsa
    In_Bassa_Vah
    In_Pahawh_Hmong
    In_Medefaidrin
    In_Miao
    In_Ideographic_Symbols_and_Punctuation
    In_Tangut
    In_Tangut_Components
    In_Khitan_Small_Script
    In_Tangut_Supplement
    In_Kana_Extended_B
    In_Kana_Supplement
    In_Kana_Extended_A
    In_Small_Kana_Extension
    In_Nushu
    In_Duployan
    In_Shorthand_Format_Controls
    In_Znamenny_Musical_Notation
    In_Byzantine_Musical_Symbols
    In_Musical_Symbols
    In_Ancient_Greek_Musical_Notation
    In_Kaktovik_Numerals
    In_Mayan_Numerals
    In_Tai_Xuan_Jing_Symbols
    In_Counting_Rod_Numerals
    In_Mathematical_Alphanumeric_Symbols
    In_Sutton_SignWriting
    In_Latin_Extended_G
    In_Glagolitic_Supplement
    In_Cyrillic_Extended_D
    In_Nyiakeng_Puachue_Hmong
    In_Toto
    In_Wancho
    In_Nag_Mundari
    In_Ethiopic_Extended_B
    In_Mende_Kikakui
    In_Adlam
    In_Indic_Siyaq_Numbers
    In_Ottoman_Siyaq_Numbers
    In_Arabic_Mathematical_Alphabetic_Symbols
    In_Mahjong_Tiles
    In_Domino_Tiles
    In_Playing_Cards
    In_Enclosed_Alphanumeric_Supplement
    In_Enclosed_Ideographic_Supplement
    In_Miscellaneous_Symbols_and_Pictographs
    In_Emoticons
    In_Ornamental_Dingbats
    In_Transport_and_Map_Symbols
    In_Alchemical_Symbols
    In_Geometric_Shapes_Extended
    In_Supplemental_Arrows_C
    In_Supplemental_Symbols_and_Pictographs
    In_Chess_Symbols
    In_Symbols_and_Pictographs_Extended_A
    In_Symbols_for_Legacy_Computing
    In_CJK_Unified_Ideographs_Extension_B
    In_CJK_Unified_Ideographs_Extension_C
    In_CJK_Unified_Ideographs_Extension_D
    In_CJK_Unified_Ideographs_Extension_E
    In_CJK_Unified_Ideographs_Extension_F
    In_CJK_Compatibility_Ideographs_Supplement
    In_CJK_Unified_Ideographs_Extension_G
    In_CJK_Unified_Ideographs_Extension_H
    In_Tags
    In_Variation_Selectors_Supplement
    In_Supplementary_Private_Use_Area_A
    In_Supplementary_Private_Use_Area_B
    In_No_Block