1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
|
# Copyright (C) 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html
# ******************************************************************************
# *
# * Copyright (C) 1995-2014, International Business Machines
# * Corporation and others. All Rights Reserved.
# *
# ******************************************************************************
# If this converter alias table looks very confusing, a much easier to
# understand view can be found at this demo:
# https://icu4c-demos.unicode.org/icu-bin/convexp
# IMPORTANT NOTE
#
# This file is not read directly by ICU. If you change it, you need to
# run gencnval, and eventually run pkgdata to update the representation that
# ICU uses for aliases. The gencnval tool will normally compile this file into
# cnvalias.icu. The gencnval -v verbose option will help you when you edit
# this file.
# Please be friendly to the rest of us that edit this table by
# keeping this table free of tabs.
# This is an alias file used by the character set converter.
# A lot of converter information can be found in unicode/ucnv.h, but here
# is more information about this file.
#
# If you are adding a new converter to this list and want to include it in the
# icu data library, please be sure to add an entry to the appropriate ucm*.mk file
# (see ucmfiles.mk for more information).
#
# Here is the file format using BNF-like syntax:
#
# converterTable ::= tags { converterLine* }
# converterLine ::= converterName [ tags ] { taggedAlias* }'\n'
# taggedAlias ::= alias [ tags ]
# tags ::= '{' { tag+ } '}'
# tag ::= standard['*']
# converterName ::= [0-9a-zA-Z:_'-']+
# alias ::= converterName
#
# Except for the converter name, aliases are case insensitive.
# Names are separated by whitespace.
# Line continuation and comment sytax are similar to the GNU make syntax.
# Any lines beginning with whitespace (e.g. U+0020 SPACE or U+0009 HORIZONTAL
# TABULATION) are presumed to be a continuation of the previous line.
# The # symbol starts a comment and the comment continues till the end of
# the line.
#
# The converter
#
# All names can be tagged by including a space-separated list of tags in
# curly braces, as in ISO_8859-1:1987{IANA*} iso-8859-1 { MIME* } or
# some-charset{MIME* IANA*}. The order of tags does not matter, and
# whitespace is allowed between the tagged name and the tags list.
#
# The tags can be used to get standard names using ucnv_getStandardName().
#
# The complete list of recognized tags used in this file is defined in
# the affinity list near the beginning of the file.
#
# The * after the standard tag denotes that the previous alias is the
# preferred (default) charset name for that standard. There can only
# be one of these default charset names per converter.
# The world is getting more complicated...
# Supporting XML parsers, HTML, MIME, and similar applications
# that mark encodings with a charset name can be difficult.
# Many of these applications and operating systems will update
# their codepages over time.
# It means that a new codepage, one that differs from an
# old one by changing a code point, e.g., to the Euro sign,
# must not get an old alias, because it would mean that
# old files with this alias would be interpreted differently.
# If an codepage gets updated by assigning characters to previously
# unassigned code points, then a new name is not necessary.
# Also, some codepages map unassigned codepage byte values
# to the same numbers in Unicode for roundtripping. It may be
# industry practice to keep the encoding name in such a case, too
# (example: Windows codepages).
# The aliases listed in the list of character sets
# that is maintained by the IANA (http://www.iana.org/) must
# not be changed to mean encodings different from what this
# list shows. Currently, the IANA list is at
# http://www.iana.org/assignments/character-sets
# It should also be mentioned that the exact mapping table used for each
# IANA names usually isn't specified. This means that some other applications
# and operating systems are left to interpret the exact mappings for the
# underspecified aliases. For instance, Shift-JIS on a Solaris platform
# may be different from Shift-JIS on a Windows platform. This is why
# some of the aliases can be tagged to differentiate different mapping
# tables with the same alias. If an alias is given to more than one converter,
# it is considered to be an ambiguous alias, and the affinity list will
# choose the converter to use when a standard isn't specified with the alias.
# Name matching is case-insensitive. Also, dashes '-', underscores '_'
# and spaces ' ' are ignored in names (thus cs-iso_latin-1, csisolatin1
# and "cs iso latin 1" are the same).
# However, the names in the left column are directly file names
# or names of algorithmic converters, and their case must not
# be changed - or else code and/or file names must also be changed.
# For example, the converter ibm-921 is expected to be the file ibm-921.cnv.
# The immediately following list is the affinity list of supported standard tags.
# When multiple converters have the same alias under different standards,
# the standard nearest to the top of this list with that alias will
# be the first converter that will be opened. The ordering of the aliases
# after this affinity list does not affect the preferred alias, but it may
# affect the order of the returned list of aliases for a given converter.
#
# The general ordering is from specific and frequently used to more general
# or rarely used at the bottom.
{ UTR22 # Name format specified by https://www.unicode.org/reports/tr22/
# ICU # Can also use ICU_FEATURE
IBM # The IBM CCSID number is specified by ibm-*
WINDOWS # The Microsoft code page identifier number is specified by windows-*. The rest are recognized IE names.
JAVA # Source: Sun JDK. Alias name case is ignored, but dashes are not ignored.
# GLIBC
# AIX
# DB2
# SOLARIS
# APPLE
# HPUX
IANA # Source: http://www.iana.org/assignments/character-sets
MIME # Source: http://www.iana.org/assignments/character-sets
# MSIE # MSIE is Internet Explorer, which can be different from Windows (From the IMultiLanguage COM interface)
# ZOS_USS # z/OS (os/390) Unix System Services (USS), which has NL<->LF swapping. They have the same format as the IBM tag.
}
# Fully algorithmic converters
UTF-8 { IANA* MIME* JAVA* WINDOWS }
ibm-1208 { IBM* } # UTF-8 with IBM PUA
ibm-1209 { IBM } # UTF-8
ibm-5304 { IBM } # Unicode 2.0, UTF-8 with IBM PUA
ibm-5305 { IBM } # Unicode 2.0, UTF-8
ibm-13496 { IBM } # Unicode 3.0, UTF-8 with IBM PUA
ibm-13497 { IBM } # Unicode 3.0, UTF-8
ibm-17592 { IBM } # Unicode 4.0, UTF-8 with IBM PUA
ibm-17593 { IBM } # Unicode 4.0, UTF-8
windows-65001 { WINDOWS* }
cp1208
x-UTF_8J
unicode-1-1-utf-8
unicode-2-0-utf-8
# The ICU 2.2 UTF-16/32 converters detect and write a BOM.
UTF-16 { IANA* MIME* JAVA* } ISO-10646-UCS-2 { IANA }
ibm-1204 { IBM* } # UTF-16 with IBM PUA and BOM sensitive
ibm-1205 { IBM } # UTF-16 BOM sensitive
unicode
csUnicode
ucs-2
# The following Unicode CCSIDs (IBM) are not valid in ICU because they are
# considered pure DBCS (exactly 2 bytes) of Unicode,
# and they are a subset of Unicode. ICU does not support their encoding structures.
# 1400 1401 1402 1410 1414 1415 1446 1447 1448 1449 64770 64771 65520 5496 5497 5498 9592 13688
UTF-16BE { IANA* MIME* JAVA* } x-utf-16be { JAVA }
UnicodeBigUnmarked { JAVA } # java.io name
ibm-1200 { IBM* } # UTF-16 BE with IBM PUA
ibm-1201 { IBM } # UTF-16 BE
ibm-13488 { IBM } # Unicode 2.0, UTF-16 BE with IBM PUA
ibm-13489 { IBM } # Unicode 2.0, UTF-16 BE
ibm-17584 { IBM } # Unicode 3.0, UTF-16 BE with IBM PUA
ibm-17585 { IBM } # Unicode 3.0, UTF-16 BE
ibm-21680 { IBM } # Unicode 4.0, UTF-16 BE with IBM PUA
ibm-21681 { IBM } # Unicode 4.0, UTF-16 BE
ibm-25776 { IBM } # Unicode 4.1, UTF-16 BE with IBM PUA
ibm-25777 { IBM } # Unicode 4.1, UTF-16 BE
ibm-29872 { IBM } # Unicode 5.0, UTF-16 BE with IBM PUA
ibm-29873 { IBM } # Unicode 5.0, UTF-16 BE
ibm-61955 { IBM } # UTF-16BE with Gaidai University (Japan) PUA
ibm-61956 { IBM } # UTF-16BE with Microsoft HKSCS-Big 5 PUA
windows-1201 { WINDOWS* }
cp1200
cp1201
UTF16_BigEndian
# ibm-5297 { IBM } # Unicode 2.0, UTF-16 (BE) (reserved, never used)
# iso-10646-ucs-2 { JAVA } # This is ambiguous
# ibm-61952 is not a valid CCSID because it's Unicode 1.1
# ibm-61953 is not a valid CCSID because it's Unicode 1.0
UTF-16LE { IANA* MIME* JAVA* } x-utf-16le { JAVA }
UnicodeLittleUnmarked { JAVA } # java.io name
ibm-1202 { IBM* } # UTF-16 LE with IBM PUA
ibm-1203 { IBM } # UTF-16 LE
ibm-13490 { IBM } # Unicode 2.0, UTF-16 LE with IBM PUA
ibm-13491 { IBM } # Unicode 2.0, UTF-16 LE
ibm-17586 { IBM } # Unicode 3.0, UTF-16 LE with IBM PUA
ibm-17587 { IBM } # Unicode 3.0, UTF-16 LE
ibm-21682 { IBM } # Unicode 4.0, UTF-16 LE with IBM PUA
ibm-21683 { IBM } # Unicode 4.0, UTF-16 LE
ibm-25778 { IBM } # Unicode 4.1, UTF-16 LE with IBM PUA
ibm-25779 { IBM } # Unicode 4.1, UTF-16 LE
ibm-29874 { IBM } # Unicode 5.0, UTF-16 LE with IBM PUA
ibm-29875 { IBM } # Unicode 5.0, UTF-16 LE
UTF16_LittleEndian
windows-1200 { WINDOWS* }
UTF-32 { IANA* MIME* } ISO-10646-UCS-4 { IANA }
ibm-1236 { IBM* } # UTF-32 with IBM PUA and BOM sensitive
ibm-1237 { IBM } # UTF-32 BOM sensitive
csUCS4
ucs-4
UTF-32BE { IANA* } UTF32_BigEndian
ibm-1232 { IBM* } # UTF-32 BE with IBM PUA
ibm-1233 { IBM } # UTF-32 BE
ibm-9424 { IBM } # Unicode 4.1, UTF-32 BE with IBM PUA
UTF-32LE { IANA* } UTF32_LittleEndian
ibm-1234 { IBM* } # UTF-32 LE, with IBM PUA
ibm-1235 { IBM } # UTF-32 LE
# ICU-specific names for special uses
UTF16_PlatformEndian
UTF16_OppositeEndian
UTF32_PlatformEndian
UTF32_OppositeEndian
# Java-specific, non-Unicode-standard UTF-16 variants.
# These are in the Java "Basic Encoding Set (contained in lib/rt.jar)".
# See the "Supported Encodings" at
# http://java.sun.com/javase/6/docs/technotes/guides/intl/encoding.doc.html
# or a newer version of this document.
#
# Aliases marked with { JAVA* } are canonical names for java.io and java.lang APIs.
# Aliases marked with { JAVA } are canonical names for the java.nio API.
#
# "BOM" means the Unicode Byte Order Mark, which is the encoding-scheme-specific
# byte sequence for U+FEFF.
# "Reverse BOM" means the BOM for the sibling encoding scheme with the
# opposite endianness. (LE<->BE)
# "Sixteen-bit Unicode (or UCS) Transformation Format, big-endian byte order,
# with byte-order mark"
#
# From Unicode: Writes BOM.
# To Unicode: Detects and consumes BOM.
# If there is a "reverse BOM", Java throws
# MalformedInputException: Incorrect byte-order mark.
# In this case, ICU4C sets a U_ILLEGAL_ESCAPE_SEQUENCE UErrorCode value
# and a UCNV_ILLEGAL UConverterCallbackReason.
UTF-16BE,version=1 UnicodeBig { JAVA* }
# "Sixteen-bit Unicode (or UCS) Transformation Format, little-endian byte order,
# with byte-order mark"
#
# From Unicode: Writes BOM.
# To Unicode: Detects and consumes BOM.
# If there is a "reverse BOM", Java throws
# MalformedInputException: Incorrect byte-order mark.
# In this case, ICU4C sets a U_ILLEGAL_ESCAPE_SEQUENCE UErrorCode value
# and a UCNV_ILLEGAL UConverterCallbackReason.
UTF-16LE,version=1 UnicodeLittle { JAVA* } x-UTF-16LE-BOM { JAVA }
# This one is not mentioned on the "Supported Encodings" page
# but is available in Java.
# In Java, this is called "Unicode" but we cannot give it that alias
# because the standard UTF-16 converter already has a "unicode" alias.
#
# From Unicode: Writes BOM.
# To Unicode: Detects and consumes BOM.
# If there is no BOM, rather than defaulting to BE, Java throws
# MalformedInputException: Missing byte-order mark.
# In this case, ICU4C sets a U_ILLEGAL_ESCAPE_SEQUENCE UErrorCode value
# and a UCNV_ILLEGAL UConverterCallbackReason.
UTF-16,version=1
# This is the same as standard UTF-16 but always writes a big-endian byte stream,
# regardless of the platform endianness, as expected by the Java compatibility tests.
# See the java.nio.charset.Charset API documentation at
# http://java.sun.com/javase/6/docs/api/java/nio/charset/Charset.html
# or a newer version of this document.
#
# From Unicode: Write BE BOM and BE bytes
# To Unicode: Detects and consumes BOM. Defaults to BE.
UTF-16,version=2
# Note: ICU does not currently support Java-specific, non-Unicode-standard UTF-32 variants.
# Presumably, these behave analogously to the UTF-16 variants with similar names.
# UTF_32BE_BOM x-UTF-32BE-BOM
# UTF_32LE_BOM x-UTF-32LE-BOM
# End of Java-specific, non-Unicode-standard UTF variants.
# On UTF-7:
# RFC 2152 (http://www.imc.org/rfc2152) allows to encode some US-ASCII
# characters directly or in base64. Especially, the characters in set O
# as defined in the RFC (!"#$%&*;<=>@[]^_`{|}) may be encoded directly
# but are not allowed in, e.g., email headers.
# By default, the ICU UTF-7 converter encodes set O directly.
# By choosing the option "version=1", set O will be escaped instead.
# For example:
# utf7Converter=ucnv_open("UTF-7,version=1");
#
# For details about email headers see RFC 2047.
UTF-7 { IANA* MIME* WINDOWS } windows-65000 { WINDOWS* }
unicode-1-1-utf-7
unicode-2-0-utf-7
# UTF-EBCDIC doesn't exist in ICU, but the aliases are here for reference.
#UTF-EBCDIC ibm-1210 { IBM* } ibm-1211 { IBM }
# IMAP-mailbox-name is an ICU-specific name for the encoding of IMAP mailbox names.
# It is a substantially modified UTF-7 encoding. See the specification in:
#
# RFC 2060: INTERNET MESSAGE ACCESS PROTOCOL - VERSION 4rev1
# (http://www.ietf.org/rfc/rfc2060.txt)
# Section 5.1.3. Mailbox International Naming Convention
IMAP-mailbox-name
SCSU { IANA* }
ibm-1212 { IBM } # SCSU with IBM PUA
ibm-1213 { IBM* } # SCSU
BOCU-1 { IANA* }
csBOCU-1 { IANA }
ibm-1214 { IBM } # BOCU-1 with IBM PUA
ibm-1215 { IBM* } # BOCU-1
# See https://www.unicode.org/reports/tr26 for this Compatibility Encoding Scheme for UTF-16
# The Unicode Consortium does not encourage the use of CESU-8
CESU-8 { IANA* } ibm-9400 { IBM* }
# Standard iso-8859-1, which does not have the Euro update.
# See iso-8859-15 (latin9) for the Euro update
ISO-8859-1 { MIME* IANA JAVA* }
ibm-819 { IBM* JAVA } # This is not truely ibm-819 because it's missing the fallbacks.
IBM819 { IANA }
cp819 { IANA JAVA }
latin1 { IANA JAVA }
8859_1 { JAVA }
csISOLatin1 { IANA JAVA }
iso-ir-100 { IANA JAVA }
ISO_8859-1:1987 { IANA* JAVA }
l1 { IANA JAVA }
819 { JAVA }
# windows-28591 { WINDOWS* } # This has odd behavior because it has the Euro update, which isn't correct.
# LATIN_1 # Old ICU name
# ANSI_X3.110-1983 # This is for a different IANA alias. This isn't iso-8859-1.
US-ASCII { MIME* IANA JAVA WINDOWS }
ASCII { JAVA* IANA WINDOWS }
ANSI_X3.4-1968 { IANA* WINDOWS }
ANSI_X3.4-1986 { IANA WINDOWS }
ISO_646.irv:1991 { IANA WINDOWS }
iso_646.irv:1983 { JAVA }
ISO646-US { JAVA IANA WINDOWS }
us { IANA }
csASCII { IANA WINDOWS }
iso-ir-6 { IANA }
cp367 { IANA WINDOWS }
ascii7 { JAVA }
646 { JAVA }
windows-20127 { WINDOWS* }
ibm-367 { IBM* } IBM367 { IANA WINDOWS } # This is not truely ibm-367 because it's missing the fallbacks.
# GB 18030 is partly algorithmic, using the MBCS converter
gb18030 { IANA* } ibm-1392 { IBM* } windows-54936 { WINDOWS* } GB18030 { MIME* }
# Table-based interchange codepages
# Central Europe
ibm-912_P100-1995 { UTR22* }
ibm-912 { IBM* JAVA }
ISO-8859-2 { MIME* IANA JAVA* WINDOWS }
ISO_8859-2:1987 { IANA* WINDOWS JAVA }
latin2 { IANA WINDOWS JAVA }
csISOLatin2 { IANA WINDOWS JAVA }
iso-ir-101 { IANA WINDOWS JAVA }
l2 { IANA WINDOWS JAVA }
8859_2 { JAVA }
cp912 { JAVA }
912 { JAVA }
windows-28592 { WINDOWS* }
# Maltese Esperanto
ibm-913_P100-2000 { UTR22* }
ibm-913 { IBM* JAVA }
ISO-8859-3 { MIME* IANA WINDOWS JAVA* }
ISO_8859-3:1988 { IANA* WINDOWS JAVA }
latin3 { IANA JAVA WINDOWS }
csISOLatin3 { IANA WINDOWS }
iso-ir-109 { IANA WINDOWS JAVA }
l3 { IANA WINDOWS JAVA }
8859_3 { JAVA }
cp913 { JAVA }
913 { JAVA }
windows-28593 { WINDOWS* }
# Baltic
ibm-914_P100-1995 { UTR22* }
ibm-914 { IBM* JAVA }
ISO-8859-4 { MIME* IANA WINDOWS JAVA* }
latin4 { IANA WINDOWS JAVA }
csISOLatin4 { IANA WINDOWS JAVA }
iso-ir-110 { IANA WINDOWS JAVA }
ISO_8859-4:1988 { IANA* WINDOWS JAVA }
l4 { IANA WINDOWS JAVA }
8859_4 { JAVA }
cp914 { JAVA }
914 { JAVA }
windows-28594 { WINDOWS* }
# Cyrillic
ibm-915_P100-1995 { UTR22* }
ibm-915 { IBM* JAVA }
ISO-8859-5 { MIME* IANA WINDOWS JAVA* }
cyrillic { IANA WINDOWS JAVA }
csISOLatinCyrillic { IANA WINDOWS JAVA }
iso-ir-144 { IANA WINDOWS JAVA }
ISO_8859-5:1988 { IANA* WINDOWS JAVA }
8859_5 { JAVA }
cp915 { JAVA }
915 { JAVA }
windows-28595 { WINDOWS* }
glibc-PT154-2.3.3 { UTR22* }
PTCP154 { IANA* }
csPTCP154
PT154
CP154
Cyrillic-Asian
# Arabic
# ISO_8859-6-E and ISO_8859-6-I are similar to this charset, but BiDi is done differently
# From a narrow mapping point of view, there is no difference.
# -E means explicit. -I means implicit.
# -E requires the client to handle the ISO 6429 bidirectional controls
ibm-1089_P100-1995 { UTR22* }
ibm-1089 { IBM* JAVA }
ISO-8859-6 { MIME* IANA WINDOWS JAVA* }
arabic { IANA WINDOWS JAVA }
csISOLatinArabic { IANA WINDOWS JAVA }
iso-ir-127 { IANA WINDOWS JAVA }
ISO_8859-6:1987 { IANA* WINDOWS JAVA }
ECMA-114 { IANA JAVA }
ASMO-708 { IANA JAVA }
8859_6 { JAVA }
cp1089 { JAVA }
1089 { JAVA }
windows-28596 { WINDOWS* }
ISO-8859-6-I { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
ISO-8859-6-E { IANA MIME } # IANA considers this alias different and BiDi needs to be applied.
x-ISO-8859-6S { JAVA }
# ISO Greek (with euro update). This is really ISO_8859-7:2003
ibm-9005_X110-2007 { UTR22* }
ibm-9005 { IBM* }
ISO-8859-7 { MIME* IANA JAVA* WINDOWS }
8859_7 { JAVA }
greek { IANA JAVA WINDOWS }
greek8 { IANA JAVA WINDOWS }
ELOT_928 { IANA JAVA WINDOWS }
ECMA-118 { IANA JAVA WINDOWS }
csISOLatinGreek { IANA JAVA WINDOWS }
iso-ir-126 { IANA JAVA WINDOWS }
ISO_8859-7:1987 { IANA* JAVA WINDOWS }
windows-28597 { WINDOWS* }
sun_eu_greek # For Solaris
# ISO Greek (w/o euro update)
# JDK 1.5 has these aliases.
ibm-813_P100-1995 { UTR22* }
ibm-813 { IBM* JAVA* }
cp813 { JAVA }
813 { JAVA }
# hebrew
# ISO_8859-8-E and ISO_8859-8-I are similar to this charset, but BiDi is done differently
# From a narrow mapping point of view, there is no difference.
# -E means explicit. -I means implicit.
# -E requires the client to handle the ISO 6429 bidirectional controls
# This matches the official mapping on unicode.org
ibm-5012_P100-1999 { UTR22* }
ibm-5012 { IBM* }
ISO-8859-8 { MIME* IANA WINDOWS JAVA* }
hebrew { IANA WINDOWS JAVA }
csISOLatinHebrew { IANA WINDOWS JAVA }
iso-ir-138 { IANA WINDOWS JAVA }
ISO_8859-8:1988 { IANA* WINDOWS JAVA }
ISO-8859-8-I { IANA MIME } # IANA and Windows considers this alias different and BiDi needs to be applied.
ISO-8859-8-E { IANA MIME } # IANA and Windows considers this alias different and BiDi needs to be applied.
8859_8 { JAVA }
windows-28598 { WINDOWS* } # Hebrew (ISO-Visual). A hybrid between ibm-5012 and ibm-916 with extra PUA mappings.
hebrew8 # Reflect HP-UX code page update
# Unfortunately, the Java aliases are split across ibm-916 and ibm-5012
# Also many platforms are a combination between ibm-916 and ibm-5012 behaviors
ibm-916_P100-1995 { UTR22* }
ibm-916 { IBM* JAVA* }
cp916 { JAVA }
916 { JAVA }
# Turkish
ibm-920_P100-1995 { UTR22* }
ibm-920 { IBM* JAVA }
ISO-8859-9 { MIME* IANA WINDOWS JAVA* }
latin5 { IANA WINDOWS JAVA }
csISOLatin5 { IANA JAVA }
iso-ir-148 { IANA WINDOWS JAVA }
ISO_8859-9:1989 { IANA* WINDOWS }
l5 { IANA WINDOWS JAVA }
8859_9 { JAVA }
cp920 { JAVA }
920 { JAVA }
windows-28599 { WINDOWS* }
ECMA-128 # IANA doesn't have this alias 6/24/2002
turkish8 # Reflect HP-UX codepage update 8/1/2008
turkish # Reflect HP-UX codepage update 8/1/2008
# Nordic languages
iso-8859_10-1998 { UTR22* } ISO-8859-10 { MIME* IANA* }
iso-ir-157 { IANA }
l6 { IANA }
ISO_8859-10:1992 { IANA }
csISOLatin6 { IANA }
latin6 { IANA }
# Thai
# Be warned. There several iso-8859-11 codepage variants, and they are all incompatible.
# ISO-8859-11 is a superset of TIS-620. The difference is that ISO-8859-11 contains the C1 control codes.
iso-8859_11-2001 { UTR22* } ISO-8859-11
thai8 # HP-UX alias. HP-UX says TIS-620, but it's closer to ISO-8859-11.
x-iso-8859-11 { JAVA* }
# iso-8859-13, PC Baltic (w/o euro update)
ibm-921_P100-1995 { UTR22* }
ibm-921 { IBM* }
ISO-8859-13 { IANA* MIME* JAVA* }
8859_13 { JAVA }
windows-28603 { WINDOWS* }
cp921
921
x-IBM921 { JAVA }
# Celtic
iso-8859_14-1998 { UTR22* } ISO-8859-14 { IANA* }
iso-ir-199 { IANA }
ISO_8859-14:1998 { IANA }
latin8 { IANA }
iso-celtic { IANA }
l8 { IANA }
# Latin 9
ibm-923_P100-1998 { UTR22* }
ibm-923 { IBM* JAVA }
ISO-8859-15 { IANA* MIME* WINDOWS JAVA* }
Latin-9 { IANA WINDOWS }
l9 { WINDOWS }
8859_15 { JAVA }
latin0 { JAVA }
csisolatin0 { JAVA }
csisolatin9 { JAVA }
iso8859_15_fdis { JAVA }
cp923 { JAVA }
923 { JAVA }
windows-28605 { WINDOWS* }
# CJK encodings
ibm-942_P12A-1999 { UTR22* } # ibm-942_P120 is a rarely used alternate mapping (sjis78 is already old)
ibm-942 { IBM* }
ibm-932 { IBM }
cp932
shift_jis78
sjis78
ibm-942_VSUB_VPUA
ibm-932_VSUB_VPUA
x-IBM942 { JAVA* }
x-IBM942C { JAVA }
# Is this "JIS_C6226-1978"?
# ibm-943_P15A-2003 differs from windows-932-2000 only in a few roundtrip mappings:
# - the usual IBM PC control code rotation (1A-1C-7F)
# - the Windows table has roundtrips for bytes 80, A0, and FD-FF to U+0080 and PUA
ibm-943_P15A-2003 { UTR22* }
ibm-943 # Leave untagged because this isn't the default
Shift_JIS { IANA* MIME* WINDOWS JAVA }
MS_Kanji { IANA WINDOWS JAVA }
csShiftJIS { IANA WINDOWS JAVA }
windows-31j { IANA JAVA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
csWindows31J { IANA WINDOWS JAVA } # A further extension of Shift_JIS to include NEC special characters (Row 13)
x-sjis { WINDOWS JAVA }
x-ms-cp932 { WINDOWS }
cp932 { WINDOWS }
windows-932 { WINDOWS* }
cp943c { JAVA* } # This is slightly different, but the backslash mapping is the same.
IBM-943C #{ AIX* } # Add this tag once AIX aliases becomes available
ms932
pck # Probably SOLARIS
sjis # This might be for ibm-1351
ibm-943_VSUB_VPUA
x-MS932_0213 { JAVA }
x-JISAutoDetect { JAVA }
# cp943 # This isn't Windows, and no one else uses it.
# IANA says that Windows-31J is an extension to csshiftjis ibm-932
ibm-943_P130-1999 { UTR22* }
ibm-943 { IBM* JAVA }
Shift_JIS # Leave untagged because this isn't the default
cp943 { JAVA* } # This is slightly different, but the backslash mapping is the same.
943 { JAVA }
ibm-943_VASCII_VSUB_VPUA
x-IBM943 { JAVA }
# japanese. Unicode name is \u30b7\u30d5\u30c8\u7b26\u53f7\u5316\u8868\u73fe
ibm-33722_P12A_P12A-2009_U2 { UTR22* }
ibm-33722 # Leave untagged because this isn't the default
ibm-5050 # Leave untagged because this isn't the default, and yes this alias is correct
ibm-33722_VPUA
IBM-eucJP
windows-51932-2006 { UTR22* }
windows-51932 { WINDOWS* }
CP51932 { IANA* }
csCP51932
ibm-33722_P120-1999 { UTR22* } # Japan EUC with \ <-> Yen mapping
ibm-33722 { IBM* JAVA }
ibm-5050 { IBM } # Yes this is correct
cp33722 { JAVA* }
33722 { JAVA }
ibm-33722_VASCII_VPUA
x-IBM33722 { JAVA }
x-IBM33722A { JAVA }
x-IBM33722C { JAVA }
# ibm-954 seems to be almost a superset of ibm-33722 and ibm-1350
# ibm-1350 seems to be almost a superset of ibm-33722
# ibm-954 contains more PUA characters than the others.
ibm-954_P101-2007 { UTR22* }
ibm-954 { IBM* }
x-IBM954 { JAVA* }
x-IBM954C { JAVA }
# eucJP # This is closest to Solaris EUC-JP.
euc-jp-2007 { UTR22* }
EUC-JP { MIME* IANA JAVA* WINDOWS* }
Extended_UNIX_Code_Packed_Format_for_Japanese { IANA* JAVA WINDOWS }
csEUCPkdFmtJapanese { IANA JAVA WINDOWS }
X-EUC-JP { MIME JAVA WINDOWS } # Japan EUC. x-euc-jp is a MIME name
eucjis {JAVA}
ujis # Linux sometimes uses this name. This is an unfortunate generic and rarely used name. Its use is discouraged.
aix-IBM_udcJP-4.3.6 { UTR22* }
x-IBM-udcJP { JAVA* }
java-euc_jp_linux-1.6_P { UTR22* }
euc-jp-linux
x-EUC_JP_LINUX { JAVA* }
java-sjis_0213-1.6_P { UTR22* }
x-SJIS_0213 { JAVA* }
# Here are various interpretations and extensions of Big5
ibm-1373_P100-2002 { UTR22* } # IBM's interpretation of Windows' Taiwan Big-5 without HKSCS extensions
ibm-1373 { IBM* }
windows-950 # Alternate mapping. Leave untagged. This is the IBM interpretation of a Windows codepage.
windows-950-2000 { UTR22* }
Big5 { IANA* MIME* JAVA* WINDOWS }
csBig5 { IANA WINDOWS }
windows-950 { WINDOWS* }
x-windows-950 { JAVA }
x-big5
ms950
ibm-950_P110-1999 { UTR22* } # Taiwan Big-5 (w/o euro update)
ibm-950 { IBM* JAVA }
cp950 { JAVA* }
950 { JAVA }
x-IBM950 { JAVA }
ibm-1375_P100-2008 { UTR22* } # Big5-HKSCS-2004 with Unicode 3.1 mappings. This uses supplementary characters.
ibm-1375 { IBM* }
Big5-HKSCS { IANA* JAVA* }
big5hk { JAVA }
HKSCS-BIG5 # From http://www.openi18n.org/localenameguide/
ibm-5471_P100-2006 { UTR22* } # Big5-HKSCS-2001 with Unicode 3.0 mappings. This uses many PUA characters.
ibm-5471 { IBM* }
Big5-HKSCS
MS950_HKSCS { JAVA* }
hkbig5 # from HP-UX 11i, which can't handle supplementary characters.
big5-hkscs:unicode3.0
x-MS950-HKSCS { JAVA }
# windows-950 # Windows-950 can be w/ or w/o HKSCS extensions. By default it's not.
# windows-950_hkscs
solaris-zh_TW_big5-2.7 { UTR22* }
Big5_Solaris { JAVA* }
x-Big5-Solaris { JAVA }
# GBK
ibm-1386_P100-2001 { UTR22* }
ibm-1386 { IBM* }
cp1386
windows-936 # Alternate mapping. Leave untagged. This is the IBM interpretation of a Windows codepage.
ibm-1386_VSUB_VPUA
windows-936-2000 { UTR22* }
GBK { IANA* WINDOWS JAVA* }
CP936 { IANA JAVA }
MS936 { IANA } # In JDK 1.5, this goes to x-mswin-936. This is an IANA name split.
windows-936 { IANA WINDOWS* JAVA }
# Java has two different tables for ibm-1383 and gb2312. We pick closest set for tagging.
ibm-1383_P110-1999 { UTR22* } # China EUC.
ibm-1383 { IBM* JAVA }
GB2312 { IANA* MIME* }
csGB2312 { IANA }
cp1383 { JAVA* }
1383 { JAVA }
EUC-CN # According to other platforms, windows-20936 looks more like euc-cn. x-euc-cn is also a MIME name
ibm-eucCN
hp15CN # From HP-UX?
ibm-1383_VPUA
# gb # This is not an IANA name. gb in IANA means Great Britain.
ibm-5478_P100-1995 { UTR22* } ibm-5478 { IBM* } # This gb_2312_80 DBCS mapping is needed by iso-2022.
GB_2312-80 { IANA* } # Windows maps this alias incorrectly
chinese { IANA }
iso-ir-58 { IANA }
csISO58GB231280 { IANA }
gb2312-1980
GB2312.1980-0 # From X11R6
euc-tw-2014 { UTR22* } # Updated EUC-TW converter based on ibm-964
EUC-TW
ibm-964_P110-1999 { UTR22* } # Taiwan EUC. x-euc-tw is a MIME name
ibm-964 { IBM* JAVA }
ibm-eucTW
cns11643
cp964 { JAVA* }
964 { JAVA }
ibm-964_VPUA
x-IBM964 { JAVA }
# ISO-2022 needs one, and other people may need others.
ibm-949_P110-1999 { UTR22* }
ibm-949 { IBM* JAVA }
cp949 { JAVA* }
949 { JAVA }
ibm-949_VASCII_VSUB_VPUA
x-IBM949 { JAVA }
ibm-949_P11A-1999 { UTR22* }
ibm-949 # Leave untagged because this isn't the default
cp949c { JAVA* }
ibm-949_VSUB_VPUA
x-IBM949C { JAVA }
IBM-949C { JAVA }
# Korean EUC.
#
# <quote from="Jungshik Shin">
# EUC-KR = KS X 1003/ISO 646-KR or ISO 646-IRV/US-ASCII in GL and KS X 1001:1998 (formerly KS C 5601-1987) in GR.
#
# Although widely spread on MS Windows, using
# KS C 5601 or related names to denote EUC-KR or
# windows-949 is very much misleading. KS C 5601-1987
# is NOT suitable as a designation for MIME charset
# and MBCS. It's just the name of a 94 x 94 Korean
# coded character set standard which can be invoked
# on either GL (with MSB reset) or GR (with MSB set).
# Note that JOHAB (windows-1361) specified in
# KS X 1001:1998 annex 3 (KS C 5601-1992 annex 3)
# is a _seprate_ MBCS with a _completely different_
# mapping.
# </quote>
#
# The following aliases tries to mirror the poor state of alias recognition
# on these platforms.
#
# ibm-970 is almost a subset of ibm-1363.
# Java, Solaris and AIX use euc-kr to also mean ksc5601.
# Java has both ibm-970 and EUC-KR as separate converters.
ibm-970_P110_P110-2006_U2 { UTR22* }
ibm-970 { IBM* JAVA }
EUC-KR { IANA* MIME* WINDOWS JAVA }
KS_C_5601-1987 { JAVA }
windows-51949 { WINDOWS* }
csEUCKR { IANA WINDOWS } # x-euc-kr is also a MIME name
ibm-eucKR { JAVA }
KSC_5601 { JAVA } # Needed by iso-2022
5601 { JAVA }
cp970 { JAVA* }
970 { JAVA }
ibm-970_VPUA
x-IBM970 { JAVA }
# ibm-971 is almost the set of DBCS mappings of ibm-970
ibm-971_P100-1995 ibm-971 { IBM* } ibm-971_VPUA x-IBM971 { JAVA* }
# Java, Solaris and AIX use euc-kr to also mean ksc5601, and _sometimes_ for Windows too.
# ibm-1363 is almost a superset of ibm-970.
ibm-1363_P11B-1998 { UTR22* }
ibm-1363 # Leave untagged because this isn't the default
KS_C_5601-1987 { IANA* }
KS_C_5601-1989 { IANA }
KSC_5601 { IANA }
csKSC56011987 { IANA }
korean { IANA }
iso-ir-149 { IANA }
cp1363 { MIME* }
5601
ksc
windows-949 # Alternate mapping. Leave untagged. This is the IBM interpretation of a Windows codepage.
ibm-1363_VSUB_VPUA
x-IBM1363C { JAVA* }
# ks_x_1001:1992
# ksc5601-1992
ibm-1363_P110-1997 { UTR22* } # Korean KSC MBCS with \ <-> Won mapping
ibm-1363 { IBM* }
ibm-1363_VASCII_VSUB_VPUA
x-IBM1363 { JAVA* }
windows-949-2000 { UTR22* }
windows-949 { JAVA* WINDOWS* }
KS_C_5601-1987 { WINDOWS }
KS_C_5601-1989 { WINDOWS }
KSC_5601 { MIME* WINDOWS } # Needed by iso-2022
csKSC56011987 { WINDOWS }
korean { WINDOWS }
iso-ir-149 { WINDOWS }
ms949 { JAVA }
x-KSC5601 { JAVA }
windows-1361-2000 { UTR22* }
ksc5601_1992
ms1361
johab
x-Johab { JAVA* }
windows-874-2000 { UTR22* } # Thai (w/ euro update)
TIS-620 { WINDOWS }
windows-874 { JAVA* WINDOWS* }
MS874 { JAVA }
x-windows-874 { JAVA }
# iso-8859-11 { WINDOWS } # iso-8859-11 is similar to TIS-620. ibm-13162 is a closer match.
ibm-874_P100-1995 { UTR22* } # Thai PC (w/o euro update).
ibm-874 { IBM* JAVA }
ibm-9066 { IBM } # Yes ibm-874 == ibm-9066. ibm-1161 has the euro update.
cp874 { JAVA* }
TIS-620 { IANA* JAVA } # This is actually separate from ibm-874, which is similar to this table
tis620.2533 { JAVA } # This is actually separate from ibm-874, which is similar to this table
eucTH # eucTH is an unusual alias from Solaris. eucTH has fewer mappings than TIS620
x-IBM874 { JAVA }
ibm-1162_P100-1999 { UTR22* } # Thai (w/ euro update)
ibm-1162 { IBM* }
windows-864-2000 { UTR22* }
ibm-864s
cp864s
x-IBM864S { JAVA* }
# Platform codepages
# If Java supports the IBM prefix, it should also support the ibm- prefix too.
ibm-437_P100-1995 { UTR22* } ibm-437 { IBM* } IBM437 { IANA* WINDOWS JAVA } cp437 { IANA WINDOWS JAVA* } 437 { IANA WINDOWS JAVA } csPC8CodePage437 { IANA JAVA } windows-437 { WINDOWS* } # PC US
ibm-720_P100-1997 { UTR22* } ibm-720 { IBM* } windows-720 { WINDOWS* } DOS-720 { WINDOWS } x-IBM720 { JAVA* } # PC Arabic
ibm-737_P100-1997 { UTR22* } ibm-737 { IBM* } IBM737 { WINDOWS JAVA } cp737 { JAVA* } windows-737 { WINDOWS* } 737 { JAVA } x-IBM737 { JAVA } # PC Greek
ibm-775_P100-1996 { UTR22* } ibm-775 { IBM* } IBM775 { IANA* WINDOWS JAVA } cp775 { IANA WINDOWS JAVA* } csPC775Baltic { IANA } windows-775 { WINDOWS* } 775 { JAVA } # PC Baltic
ibm-850_P100-1995 { UTR22* } ibm-850 { IBM* } IBM850 { IANA* MIME* WINDOWS JAVA } cp850 { IANA MIME WINDOWS JAVA* } 850 { IANA JAVA } csPC850Multilingual { IANA JAVA } windows-850 { WINDOWS* } # PC latin1
ibm-851_P100-1995 { UTR22* } ibm-851 { IBM* } IBM851 { IANA* } cp851 { IANA MIME* } 851 { IANA } csPC851 { IANA } # PC DOS Greek (w/o euro)
ibm-852_P100-1995 { UTR22* } ibm-852 { IBM* } IBM852 { IANA* WINDOWS JAVA } cp852 { IANA WINDOWS JAVA* } 852 { IANA WINDOWS JAVA } csPCp852 { IANA JAVA } windows-852 { WINDOWS* } # PC latin2 (w/o euro update)
ibm-855_P100-1995 { UTR22* } ibm-855 { IBM* } IBM855 { IANA* JAVA } cp855 { IANA JAVA* } 855 { IANA } csIBM855 { IANA } csPCp855 { JAVA } windows-855 { WINDOWS* } # PC cyrillic (w/o euro update)
ibm-856_P100-1995 { UTR22* } ibm-856 { IBM* } IBM856 { JAVA } cp856 { JAVA* } 856 { JAVA } x-IBM856 { JAVA } # PC Hebrew implicit order
ibm-857_P100-1995 { UTR22* } ibm-857 { IBM* } IBM857 { IANA* MIME* WINDOWS JAVA } cp857 { IANA MIME JAVA* } 857 { IANA JAVA } csIBM857 { IANA JAVA } windows-857 { WINDOWS* } # PC Latin 5 (w/o euro update)
ibm-858_P100-1997 { UTR22* } ibm-858 { IBM* } IBM00858 { IANA* MIME* JAVA } CCSID00858 { IANA JAVA } CP00858 { IANA JAVA } PC-Multilingual-850+euro { IANA } cp858 { MIME JAVA* } windows-858 { WINDOWS* } # PC latin1 with Euro
ibm-860_P100-1995 { UTR22* } ibm-860 { IBM* } IBM860 { IANA* MIME* JAVA } cp860 { IANA MIME JAVA* } 860 { IANA JAVA } csIBM860 { IANA JAVA } # PC Portugal
ibm-861_P100-1995 { UTR22* } ibm-861 { IBM* } IBM861 { IANA* MIME* WINDOWS JAVA } cp861 { IANA MIME JAVA* } 861 { IANA JAVA } cp-is { IANA JAVA } csIBM861 { IANA JAVA } windows-861 { WINDOWS* } # PC Iceland
ibm-862_P100-1995 { UTR22* } ibm-862 { IBM* } IBM862 { IANA* MIME* JAVA } cp862 { IANA MIME JAVA* } 862 { IANA JAVA } csPC862LatinHebrew { IANA JAVA } DOS-862 { WINDOWS } windows-862 { WINDOWS* } # PC Hebrew visual order (w/o euro update)
ibm-863_P100-1995 { UTR22* } ibm-863 { IBM* } IBM863 { IANA* MIME* JAVA } cp863 { IANA MIME JAVA* } 863 { IANA JAVA } csIBM863 { IANA JAVA } # PC Canadian French
ibm-864_X110-1999 { UTR22* } ibm-864 { IBM* } IBM864 { IANA* MIME* JAVA } cp864 { IANA MIME JAVA* } csIBM864 { IANA JAVA } # PC Arabic (w/o euro update)
ibm-865_P100-1995 { UTR22* } ibm-865 { IBM* } IBM865 { IANA* MIME* JAVA } cp865 { IANA MIME JAVA* } 865 { IANA JAVA } csIBM865 { IANA JAVA } # PC Nordic
ibm-866_P100-1995 { UTR22* } ibm-866 { IBM* } IBM866 { IANA* MIME* JAVA } cp866 { IANA MIME WINDOWS JAVA* } 866 { IANA JAVA } csIBM866 { IANA JAVA } windows-866 { WINDOWS* } # PC Russian (w/o euro update)
ibm-867_P100-1998 { UTR22* } ibm-867 { IBM* } x-IBM867 { JAVA* } # PC Hebrew (w/ euro update) Updated version of ibm-862
ibm-868_P100-1995 { UTR22* } ibm-868 { IBM* } IBM868 { IANA* MIME* JAVA } CP868 { IANA MIME JAVA* } 868 { JAVA } csIBM868 { IANA } cp-ar { IANA } # PC Urdu
ibm-869_P100-1995 { UTR22* } ibm-869 { IBM* } IBM869 { IANA* MIME* WINDOWS JAVA } cp869 { IANA MIME JAVA* } 869 { IANA JAVA } cp-gr { IANA JAVA } csIBM869 { IANA JAVA } windows-869 { WINDOWS* } # PC Greek (w/o euro update)
ibm-878_P100-1996 { UTR22* } ibm-878 { IBM* } KOI8-R { IANA* MIME* WINDOWS JAVA* } koi8 { WINDOWS JAVA } csKOI8R { IANA WINDOWS JAVA } windows-20866 { WINDOWS* } cp878 # Russian internet
ibm-901_P100-1999 { UTR22* } ibm-901 { IBM* } # PC Baltic (w/ euro update), update of ibm-921
ibm-902_P100-1999 { UTR22* } ibm-902 { IBM* } # PC Estonian (w/ euro update), update of ibm-922
ibm-922_P100-1999 { UTR22* } ibm-922 { IBM* } IBM922 { JAVA } cp922 { JAVA* } 922 { JAVA } x-IBM922 { JAVA } # PC Estonian (w/o euro update)
ibm-1168_P100-2002 { UTR22* } ibm-1168 { IBM* } KOI8-U { IANA* WINDOWS } windows-21866 { WINDOWS* } # Ukrainian KOI8. koi8-ru != KOI8-U and Microsoft is wrong for aliasing them as the same.
ibm-4909_P100-1999 { UTR22* } ibm-4909 { IBM* } # ISO Greek (w/ euro update), update of ibm-813
# The cp aliases in this section aren't really windows aliases, but it was used by ICU for Windows.
# cp is usually used to denote IBM in Java, and that is why we don't do that anymore.
# The windows-* aliases mean windows codepages.
ibm-5346_P100-1998 { UTR22* } ibm-5346 { IBM* } windows-1250 { IANA* JAVA* WINDOWS* } cp1250 { WINDOWS JAVA } # Windows Latin2 (w/ euro update)
ibm-5347_P100-1998 { UTR22* } ibm-5347 { IBM* } windows-1251 { IANA* JAVA* WINDOWS* } cp1251 { WINDOWS JAVA } ANSI1251 # Windows Cyrillic (w/ euro update). ANSI1251 is from Solaris
ibm-5348_P100-1997 { UTR22* } ibm-5348 { IBM* } windows-1252 { IANA* JAVA* WINDOWS* } cp1252 { JAVA } # Windows Latin1 (w/ euro update)
ibm-5349_P100-1998 { UTR22* } ibm-5349 { IBM* } windows-1253 { IANA* JAVA* WINDOWS* } cp1253 { JAVA } # Windows Greek (w/ euro update)
ibm-5350_P100-1998 { UTR22* } ibm-5350 { IBM* } windows-1254 { IANA* JAVA* WINDOWS* } cp1254 { JAVA } # Windows Turkish (w/ euro update)
ibm-9447_P100-2002 { UTR22* } ibm-9447 { IBM* } windows-1255 { IANA* JAVA* WINDOWS* } cp1255 { JAVA } # Windows Hebrew (w/ euro update)
ibm-9448_X100-2005 { UTR22* } ibm-9448 { IBM* } windows-1256 { IANA* JAVA* WINDOWS* } cp1256 { WINDOWS JAVA } x-windows-1256S { JAVA } # Windows Arabic (w/ euro update)
ibm-9449_P100-2002 { UTR22* } ibm-9449 { IBM* } windows-1257 { IANA* JAVA* WINDOWS* } cp1257 { JAVA } # Windows Baltic (w/ euro update)
ibm-5354_P100-1998 { UTR22* } ibm-5354 { IBM* } windows-1258 { IANA* JAVA* WINDOWS* } cp1258 { JAVA } # Windows Vietnamese (w/ euro update)
# These tables are out of date, and most don't have the Euro
# Leave the windows- variants untagged. They are alternate tables of the newer ones above.
ibm-1250_P100-1995 { UTR22* } ibm-1250 { IBM* } windows-1250 # Old Windows Latin2 (w/o euro update)
ibm-1251_P100-1995 { UTR22* } ibm-1251 { IBM* } windows-1251 # Old Windows Cyrillic (w/o euro update)
ibm-1252_P100-2000 { UTR22* } ibm-1252 { IBM* } windows-1252 # Old Windows Latin 1 without Euro
ibm-1253_P100-1995 { UTR22* } ibm-1253 { IBM* } windows-1253 # Old Windows Greek (w/o euro update)
ibm-1254_P100-1995 { UTR22* } ibm-1254 { IBM* } windows-1254 # Old Windows Turkish (w/o euro update)
ibm-1255_P100-1995 { UTR22* } ibm-1255 { IBM* } # Very old Windows Hebrew (w/o euro update)
ibm-5351_P100-1998 { UTR22* } ibm-5351 { IBM* } windows-1255 # Old Windows Hebrew (w/ euro update)
ibm-1256_P110-1997 { UTR22* } ibm-1256 { IBM* } # Old Windows Arabic (w/o euro update)
ibm-5352_P100-1998 { UTR22* } ibm-5352 { IBM* } windows-1256 # Somewhat old Windows Arabic (w/ euro update)
ibm-1257_P100-1995 { UTR22* } ibm-1257 { IBM* } # Old Windows Baltic (w/o euro update)
ibm-5353_P100-1998 { UTR22* } ibm-5353 { IBM* } windows-1257 # Somewhat old Windows Baltic (w/ euro update)
ibm-1258_P100-1997 { UTR22* } ibm-1258 { IBM* } windows-1258 # Old Windows Vietnamese (w/o euro update)
macos-0_2-10.2 { UTR22* } macintosh { IANA* MIME* WINDOWS } mac { IANA } csMacintosh { IANA } windows-10000 { WINDOWS* } macroman { JAVA } x-macroman { JAVA* } # Apple latin 1
macos-6_2-10.4 { UTR22* } x-mac-greek { MIME* WINDOWS } windows-10006 { WINDOWS* } macgr x-MacGreek { JAVA* } # Apple Greek
macos-7_3-10.2 { UTR22* } x-mac-cyrillic { MIME* WINDOWS } windows-10007 { WINDOWS* } mac-cyrillic maccy x-MacCyrillic { JAVA } x-MacUkraine { JAVA* } # Apple Cyrillic
macos-21-10.5 { UTR22* } x-mac-thai { MIME* } x-MacThai { JAVA* } MacThai { JAVA }
macos-29-10.2 { UTR22* } x-mac-centraleurroman { MIME* } windows-10029 { WINDOWS* } x-mac-ce { WINDOWS } macce maccentraleurope x-MacCentralEurope { JAVA* } # Apple Central Europe
macos-33-10.5 { UTR22* } x-mac-symbol { MIME* } x-MacSymbol { JAVA* } MacSymbol { JAVA }
macos-34-10.2 { UTR22* } x-mac-dingbat { MIME* } x-MacDingbat { JAVA* } MacDingbat { JAVA }
macos-35-10.2 { UTR22* } x-mac-turkish { MIME* WINDOWS } windows-10081 { WINDOWS* } mactr x-MacTurkish { JAVA* } # Apple Turkish
macos-36_2-10.2 { UTR22* } x-mac-croatian { MIME* } x-MacCroatian { JAVA* } MacCroatian { JAVA }
macos-37_5-10.2 { UTR22* } x-mac-iceland { MIME* } x-MacIceland { JAVA* } MacIceland { JAVA }
macos-38_2-10.2 { UTR22* } x-mac-romania { MIME* } x-MacRomania { JAVA* } MacRomania { JAVA }
macos-518-10.2 { UTR22* } x-mac-arabic { MIME* } x-MacArabic { JAVA* } MacArabic { JAVA }
macos-1285-10.2 { UTR22* } x-mac-hebrew { MIME* } x-MacHebrew { JAVA* } MacHebrew { JAVA }
ibm-1051_P100-1995 { UTR22* } ibm-1051 { IBM* } hp-roman8 { IANA* } roman8 { IANA } r8 { IANA } csHPRoman8 { IANA } x-roman8 { JAVA* } # HP Latin1
ibm-1276_P100-1995 { UTR22* } ibm-1276 { IBM* } Adobe-Standard-Encoding { IANA* } csAdobeStandardEncoding { IANA } # Different from ISO-Unicode-IBM-1276 (GCSGID: 1276)
ibm-1006_P100-1995 { UTR22* } ibm-1006 { IBM* } IBM1006 { JAVA } cp1006 { JAVA* } 1006 { JAVA } x-IBM1006 { JAVA } # Urdu
ibm-1098_P100-1995 { UTR22* } ibm-1098 { IBM* } IBM1098 { JAVA } cp1098 { JAVA* } 1098 { JAVA } x-IBM1098 { JAVA } # PC Farsi
ibm-1124_P100-1996 { UTR22* } ibm-1124 { IBM* JAVA } cp1124 { JAVA* } 1124 { JAVA } x-IBM1124 { JAVA } # ISO Cyrillic Ukraine
ibm-1125_P100-1997 { UTR22* } ibm-1125 { IBM* } cp1125 # Cyrillic Ukraine PC
ibm-1129_P100-1997 { UTR22* } ibm-1129 { IBM* } # ISO Vietnamese
ibm-1131_P100-1997 { UTR22* } ibm-1131 { IBM* } cp1131 # Cyrillic Belarus PC
ibm-1133_P100-1997 { UTR22* } ibm-1133 { IBM* } # ISO Lao
# GSM 03.38
gsm-03.38-2009 { UTR22* } GSM0338 # GSM0338 alias is from Perl
# Partially algorithmic converters
# [U_ENABLE_GENERIC_ISO_2022]
# The _generic_ ISO-2022 converter is disabled starting 2003-dec-03 (ICU 2.8).
# For details see the icu mailing list from 2003-dec-01 and the ucnv2022.c file.
# Language-specific variants of ISO-2022 continue to be available as listed below.
# ISO_2022 ISO-2022
ISO_2022,locale=ja,version=0 ISO-2022-JP { IANA* MIME* JAVA* } csISO2022JP { IANA JAVA } x-windows-iso2022jp { JAVA } x-windows-50220 { JAVA }
ISO_2022,locale=ja,version=1 ISO-2022-JP-1 { MIME* } JIS_Encoding { IANA* } csJISEncoding { IANA } ibm-5054 { IBM* } JIS x-windows-50221 { JAVA* }
ISO_2022,locale=ja,version=2 ISO-2022-JP-2 { IANA* MIME* } csISO2022JP2 { IANA }
ISO_2022,locale=ja,version=3 JIS7
ISO_2022,locale=ja,version=4 JIS8
ISO_2022,locale=ko,version=0 ISO-2022-KR { IANA* MIME* JAVA* } csISO2022KR { IANA JAVA } # This uses ibm-949
ISO_2022,locale=ko,version=1 ibm-25546 { IBM* }
ISO_2022,locale=zh,version=0 ISO-2022-CN { IANA* JAVA* } csISO2022CN { JAVA } x-ISO-2022-CN-GB { JAVA }
ISO_2022,locale=zh,version=1 ISO-2022-CN-EXT { IANA* }
ISO_2022,locale=zh,version=2 ISO-2022-CN-CNS x-ISO-2022-CN-CNS { JAVA* }
HZ HZ-GB-2312 { IANA* }
x11-compound-text COMPOUND_TEXT x-compound-text { JAVA* }
ISCII,version=0 x-ISCII91 { JAVA* } x-iscii-de { WINDOWS } windows-57002 { WINDOWS* } iscii-dev ibm-4902 { IBM* } # ibm-806 contains non-standard box drawing symbols.
ISCII,version=1 x-iscii-be { WINDOWS } windows-57003 { WINDOWS* } iscii-bng windows-57006 { WINDOWS } x-iscii-as { WINDOWS } # be is different from as on Windows.
ISCII,version=2 x-iscii-pa { WINDOWS } windows-57011 { WINDOWS* } iscii-gur
ISCII,version=3 x-iscii-gu { WINDOWS } windows-57010 { WINDOWS* } iscii-guj
ISCII,version=4 x-iscii-or { WINDOWS } windows-57007 { WINDOWS* } iscii-ori
ISCII,version=5 x-iscii-ta { WINDOWS } windows-57004 { WINDOWS* } iscii-tml
ISCII,version=6 x-iscii-te { WINDOWS } windows-57005 { WINDOWS* } iscii-tlg
ISCII,version=7 x-iscii-ka { WINDOWS } windows-57008 { WINDOWS* } iscii-knd
ISCII,version=8 x-iscii-ma { WINDOWS } windows-57009 { WINDOWS* } iscii-mlm
# Lotus specific
LMBCS-1 lmbcs ibm-65025 { IBM* }
# These Lotus specific converters still work, but they aren't advertised in this alias table.
# These are almost never used outside of Lotus software,
# and they take a lot of time when creating the available converter list.
# Also Lotus doesn't really use them anyway. It was a mistake to create these LMBCS variant converters in ICU.
#LMBCS-2
#LMBCS-3
#LMBCS-4
#LMBCS-5
#LMBCS-6
#LMBCS-8
#LMBCS-11
#LMBCS-16
#LMBCS-17
#LMBCS-18
#LMBCS-19
# EBCDIC codepages according to the CDRA
# without Euro
ibm-37_P100-1995 { UTR22* } # EBCDIC US
ibm-37 { IBM* }
IBM037 { IANA* JAVA }
ibm-037 # { JAVA }
ebcdic-cp-us { IANA JAVA }
ebcdic-cp-ca { IANA JAVA }
ebcdic-cp-wt { IANA JAVA }
ebcdic-cp-nl { IANA JAVA }
csIBM037 { IANA JAVA }
cp037 { JAVA* }
037 { JAVA }
cpibm37 { JAVA }
cp37
ibm-273_P100-1995 { UTR22* } ibm-273 { IBM* } IBM273 { IANA* JAVA } CP273 { IANA JAVA* } csIBM273 { IANA } ebcdic-de 273 { JAVA } # EBCDIC Germanay, Austria
ibm-277_P100-1995 { UTR22* } ibm-277 { IBM* } IBM277 { IANA* JAVA } cp277 { JAVA* } EBCDIC-CP-DK { IANA } EBCDIC-CP-NO { IANA } csIBM277 { IANA } ebcdic-dk 277 { JAVA } # EBCDIC Denmark
ibm-278_P100-1995 { UTR22* } ibm-278 { IBM* } IBM278 { IANA* JAVA } cp278 { JAVA* } ebcdic-cp-fi { IANA } ebcdic-cp-se { IANA } csIBM278 { IANA } ebcdic-sv { JAVA } 278 { JAVA } # EBCDIC Sweden
ibm-280_P100-1995 { UTR22* } ibm-280 { IBM* } IBM280 { IANA* JAVA } CP280 { IANA JAVA* } ebcdic-cp-it { IANA } csIBM280 { IANA } 280 { JAVA } # EBCDIC Italy
ibm-284_P100-1995 { UTR22* } ibm-284 { IBM* } IBM284 { IANA* JAVA } CP284 { IANA JAVA* } ebcdic-cp-es { IANA } csIBM284 { IANA } cpibm284 { JAVA } 284 { JAVA } # EBCDIC Spain
ibm-285_P100-1995 { UTR22* } ibm-285 { IBM* } IBM285 { IANA* JAVA } CP285 { IANA JAVA* } ebcdic-cp-gb { IANA } csIBM285 { IANA } cpibm285 { JAVA } ebcdic-gb { JAVA } 285 { JAVA } # EBCDIC UK Ireland
ibm-290_P100-1995 { UTR22* } ibm-290 { IBM* } IBM290 { IANA* } cp290 { IANA } EBCDIC-JP-kana { IANA } csIBM290 { IANA } # host SBCS (Katakana)
ibm-297_P100-1995 { UTR22* } ibm-297 { IBM* } IBM297 { IANA* JAVA } cp297 { IANA JAVA* } ebcdic-cp-fr { IANA } csIBM297 { IANA } cpibm297 { JAVA } 297 { JAVA } # EBCDIC France
ibm-420_X120-1999 { UTR22* } ibm-420 { IBM* } IBM420 { IANA* JAVA } cp420 { IANA JAVA* } ebcdic-cp-ar1 { IANA } csIBM420 { IANA } 420 { JAVA } # EBCDIC Arabic (all presentation shapes)
ibm-424_P100-1995 { UTR22* } ibm-424 { IBM* } IBM424 { IANA* JAVA } cp424 { IANA JAVA* } ebcdic-cp-he { IANA } csIBM424 { IANA } 424 { JAVA } # EBCDIC Hebrew
ibm-500_P100-1995 { UTR22* } ibm-500 { IBM* } IBM500 { IANA* JAVA } CP500 { IANA JAVA* } ebcdic-cp-be { IANA } csIBM500 { IANA } ebcdic-cp-ch { IANA } 500 # EBCDIC International Latin1
ibm-803_P100-1999 { UTR22* } ibm-803 { IBM* } cp803 # Old EBCDIC Hebrew
ibm-838_P100-1995 { UTR22* } ibm-838 { IBM* } IBM838 { JAVA } IBM-Thai { IANA* JAVA } csIBMThai { IANA } cp838 { JAVA* } 838 { JAVA } ibm-9030 { IBM } # EBCDIC Thai. Yes ibm-9030 is an alias.
ibm-870_P100-1995 { UTR22* } ibm-870 { IBM* } IBM870 { IANA* JAVA } CP870 { IANA JAVA* } ebcdic-cp-roece { IANA } ebcdic-cp-yu { IANA } csIBM870 { IANA } # EBCDIC Latin 2
ibm-871_P100-1995 { UTR22* } ibm-871 { IBM* } IBM871 { IANA* JAVA } ebcdic-cp-is { IANA JAVA } csIBM871 { IANA JAVA } CP871 { IANA JAVA* } ebcdic-is { JAVA } 871 { JAVA } # EBCDIC Iceland
ibm-875_P100-1995 { UTR22* } ibm-875 { IBM* } IBM875 { JAVA } cp875 { JAVA* } 875 { JAVA } x-IBM875 { JAVA } # EBCDIC Greek
ibm-918_P100-1995 { UTR22* } ibm-918 { IBM* } IBM918 { IANA* JAVA } CP918 { IANA JAVA* } ebcdic-cp-ar2 { IANA } csIBM918 { IANA } # EBCDIC Urdu
ibm-930_P120-1999 { UTR22* } # EBCDIC_STATEFUL Katakana-Kanji Host Mixed.
ibm-930 { IBM* }
ibm-5026 { IBM } # Yes this is correct
IBM930 { JAVA }
cp930 { JAVA* }
930 { JAVA }
x-IBM930 { JAVA }
x-IBM930A { JAVA }
ibm-933_P110-1995 { UTR22* } ibm-933 { IBM* JAVA } cp933 { JAVA* } 933 { JAVA } x-IBM933 { JAVA } # Korea EBCDIC MIXED
ibm-935_P110-1999 { UTR22* } ibm-935 { IBM* JAVA } cp935 { JAVA* } 935 { JAVA } x-IBM935 { JAVA } # China EBCDIC MIXED. Need to use Unicode, ibm-1388 or gb18030 instead because it is required by the government of China.
ibm-937_P110-1999 { UTR22* } ibm-937 { IBM* JAVA } cp937 { JAVA* } 937 { JAVA } x-IBM937 { JAVA } # Taiwan EBCDIC MIXED
ibm-939_P120-1999 { UTR22* } # EBCDIC_STATEFUL Latin-Kanji Host Mixed.
ibm-939 { IBM* }
ibm-931 { IBM } # Yes this is correct
ibm-5035 { IBM } # Yes this is also correct
IBM939 { JAVA }
cp939 { JAVA* }
939 { JAVA }
x-IBM939 { JAVA }
x-IBM939A { JAVA }
ibm-1025_P100-1995 { UTR22* } ibm-1025 { IBM* JAVA } cp1025 { JAVA* } 1025 { JAVA } x-IBM1025 { JAVA } # EBCDIC Cyrillic
ibm-1026_P100-1995 { UTR22* } ibm-1026 { IBM* } IBM1026 { IANA* JAVA } CP1026 { IANA JAVA* } csIBM1026 { IANA } 1026 { JAVA } # EBCDIC Turkey
ibm-1047_P100-1995 { UTR22* } ibm-1047 { IBM* } IBM1047 { IANA* JAVA } cp1047 { JAVA* } 1047 { JAVA } # EBCDIC Open systems Latin1
ibm-1097_P100-1995 { UTR22* } ibm-1097 { IBM* JAVA } cp1097 { JAVA* } 1097 { JAVA } x-IBM1097 { JAVA } # EBCDIC Farsi
ibm-1112_P100-1995 { UTR22* } ibm-1112 { IBM* JAVA } cp1112 { JAVA* } 1112 { JAVA } x-IBM1112 { JAVA } # EBCDIC Baltic
ibm-1114_P100-2001 { UTR22* } ibm-1114 { IBM* } x-IBM1114 { JAVA* }
ibm-1115_P100-1995 { UTR22* } ibm-1115 { IBM* } x-IBM1115 { JAVA* }
ibm-1122_P100-1999 { UTR22* } ibm-1122 { IBM* JAVA } cp1122 { JAVA* } 1122 { JAVA } x-IBM1122 { JAVA } # EBCDIC Estonia
ibm-1123_P100-1995 { UTR22* } ibm-1123 { IBM* JAVA } cp1123 { JAVA* } 1123 { JAVA } x-IBM1123 { JAVA } # EBCDIC Cyrillic Ukraine
ibm-1130_P100-1997 { UTR22* } ibm-1130 { IBM* } # EBCDIC Vietnamese
ibm-1132_P100-1998 { UTR22* } ibm-1132 { IBM* } # EBCDIC Lao
ibm-1137_P100-1999 { UTR22* } ibm-1137 { IBM* } # Devanagari EBCDIC (based on Unicode character set)
ibm-4517_P100-2005 { UTR22* } ibm-4517 { IBM* } # EBCDIC Arabic. Update of ibm-421
# with Euro
ibm-1140_P100-1997 { UTR22* } ibm-1140 { IBM* } IBM01140 { IANA* JAVA } CCSID01140 { IANA JAVA } CP01140 { IANA JAVA } cp1140 { JAVA* } ebcdic-us-37+euro { IANA } # EBCDIC US
ibm-1141_P100-1997 { UTR22* } ibm-1141 { IBM* } IBM01141 { IANA* JAVA } CCSID01141 { IANA JAVA } CP01141 { IANA JAVA } cp1141 { JAVA* } ebcdic-de-273+euro { IANA } # EBCDIC Germanay, Austria
ibm-1142_P100-1997 { UTR22* } ibm-1142 { IBM* } IBM01142 { IANA* JAVA } CCSID01142 { IANA JAVA } CP01142 { IANA JAVA } cp1142 { JAVA* } ebcdic-dk-277+euro { IANA } ebcdic-no-277+euro { IANA } # EBCDIC Denmark
ibm-1143_P100-1997 { UTR22* } ibm-1143 { IBM* } IBM01143 { IANA* JAVA } CCSID01143 { IANA JAVA } CP01143 { IANA JAVA } cp1143 { JAVA* } ebcdic-fi-278+euro { IANA } ebcdic-se-278+euro { IANA } # EBCDIC Sweden
ibm-1144_P100-1997 { UTR22* } ibm-1144 { IBM* } IBM01144 { IANA* JAVA } CCSID01144 { IANA JAVA } CP01144 { IANA JAVA } cp1144 { JAVA* } ebcdic-it-280+euro { IANA } # EBCDIC Italy
ibm-1145_P100-1997 { UTR22* } ibm-1145 { IBM* } IBM01145 { IANA* JAVA } CCSID01145 { IANA JAVA } CP01145 { IANA JAVA } cp1145 { JAVA* } ebcdic-es-284+euro { IANA } # EBCDIC Spain
ibm-1146_P100-1997 { UTR22* } ibm-1146 { IBM* } IBM01146 { IANA* JAVA } CCSID01146 { IANA JAVA } CP01146 { IANA JAVA } cp1146 { JAVA* } ebcdic-gb-285+euro { IANA } # EBCDIC UK Ireland
ibm-1147_P100-1997 { UTR22* } ibm-1147 { IBM* } IBM01147 { IANA* JAVA } CCSID01147 { IANA JAVA } CP01147 { IANA JAVA } cp1147 { JAVA* } ebcdic-fr-297+euro { IANA } # EBCDIC France
ibm-1148_P100-1997 { UTR22* } ibm-1148 { IBM* } IBM01148 { IANA* JAVA } CCSID01148 { IANA JAVA } CP01148 { IANA JAVA } cp1148 { JAVA* } ebcdic-international-500+euro { IANA } # EBCDIC International Latin1
ibm-1149_P100-1997 { UTR22* } ibm-1149 { IBM* } IBM01149 { IANA* JAVA } CCSID01149 { IANA JAVA } CP01149 { IANA JAVA } cp1149 { JAVA* } ebcdic-is-871+euro { IANA } # EBCDIC Iceland
ibm-1153_P100-1999 { UTR22* } ibm-1153 { IBM* } IBM1153 { JAVA } x-IBM1153 { JAVA* } # EBCDIC latin 2
ibm-1154_P100-1999 { UTR22* } ibm-1154 { IBM* } # EBCDIC Cyrillic Multilingual
ibm-1155_P100-1999 { UTR22* } ibm-1155 { IBM* } # EBCDIC Turkey
ibm-1156_P100-1999 { UTR22* } ibm-1156 { IBM* } # EBCDIC Baltic Multilingual
ibm-1157_P100-1999 { UTR22* } ibm-1157 { IBM* } # EBCDIC Estonia
ibm-1158_P100-1999 { UTR22* } ibm-1158 { IBM* } # EBCDIC Cyrillic Ukraine
ibm-1160_P100-1999 { UTR22* } ibm-1160 { IBM* } # EBCDIC Thailand
ibm-1164_P100-1999 { UTR22* } ibm-1164 { IBM* } # EBCDIC Viet Nam
ibm-1364_P110-2007 { UTR22* } ibm-1364 { IBM* } x-IBM1364 { JAVA* } # Korean Host Mixed
ibm-1370_P100-1999 { UTR22* } ibm-1370 { IBM* } x-IBM1370 { JAVA* }
ibm-1371_P100-1999 { UTR22* } ibm-1371 { IBM* } x-IBM1371 { JAVA* } # Taiwan EBCDIC MIXED (Euro update of ibm-937)
ibm-1388_P103-2001 { UTR22* } ibm-1388 { IBM* } ibm-9580 { IBM } x-IBM1388 { JAVA* } # S-Ch DBCS-Host Data GBK EBCDIC_STATEFUL. Yes ibm-9580 is an alias.
ibm-1390_P110-2003 { UTR22* } ibm-1390 { IBM* } x-IBM1390 { JAVA* } # Japan EBCDIC MIXED (JIS X 0213)
ibm-1399_P110-2003 { UTR22* } ibm-1399 { IBM* } x-IBM1399 { JAVA* } # Host MBCS (Latin-Kanji) (JIS X 0213)
ibm-5123_P100-1999 { UTR22* } ibm-5123 { IBM* } # Host Roman Jis. Euro update of ibm-1027. SBCS portion of ibm-1390.
ibm-8482_P100-1999 { UTR22* } ibm-8482 { IBM* } # host SBCS (Katakana). Euro update of ibm-290. SBCS portion of ibm-1399.
# Yes ibm-20780 is the same as ibm-16684
ibm-16684_P110-2003 { UTR22* } ibm-16684 { IBM* } ibm-20780 { IBM } # DBCS Jis + Roman Jis Host. This is the DBCS portion of ibm-1390 and ibm-1399 (JIS X 0213).
ibm-4899_P100-1998 { UTR22* } ibm-4899 { IBM* } # Old EBCDIC Hebrew. Update of ibm-803
ibm-4971_P100-1999 { UTR22* } ibm-4971 { IBM* } # EBCDIC Greek. Update of ibm-875 and superceded by ibm-9067
ibm-9067_X100-2005 { UTR22* } ibm-9067 { IBM* } # EBCDIC Greek. Update of ibm-875 and ibm-4971
ibm-12712_P100-1998 { UTR22* } ibm-12712 { IBM* } ebcdic-he # EBCDIC Hebrew (new sheqel, control characters update). Update of ibm-424
ibm-16804_X110-1999 { UTR22* } ibm-16804 { IBM* } ebcdic-ar # EBCDIC Arabic. Update of ibm-420
java-Cp1399A-1.6_P { UTR22* } x-IBM1399A { JAVA* }
java-Cp420s-1.6_P { UTR22* } x-IBM420S { JAVA* }
java-Cp1390A-1.6_P { UTR22* } x-IBM1390A { JAVA* }
# EBCDIC codepages for S/390, with LF and NL codes swapped
# Starting with ICU 2.4, the swapping is done by modifying the
# normal tables at runtime instead of at build time.
# Append UCNV_SWAP_LFNL_OPTION_STRING to the "ibm-CCSID" name to select this.
#
# Example: "ibm-1047,swaplfnl" or "ibm-1047" UCNV_SWAP_LFNL_OPTION_STRING
#
# This avoids the duplication of all EBCDIC SBCS and mixed-SBCS/DBCS
# mapping files.
# Some examples below for declaring old-style, obsolete aliases with the "-s390"
# suffix to map to the new-style, recommended names with the option added.
# These are listed here for backward compatibility.
# Do not use these; instead use the normal converter name with the option
# added as recommended above.
# Note: It is not possible to define an alias (non-initial name in a line here)
# that itself contains a converter option like this one for swapping LF<->NL.
# Such names would never be found because ucnv_open() will first parse and strip
# options before looking up a name in this table.
# ucnv_open() then parses the lookup result (the canonical name on the left
# in lines here) as well.
# This also means that it is not necessary to add anything to convrtrs.txt
# for converter names like "ibm-1026,swaplfnl" to work -
# they are already covered by the normal option parsing together with the
# regular, option-less alias elsewhere in this file.
ibm-37_P100-1995,swaplfnl ibm-37-s390 # ibm037-s390 also matches ibm-37-s390
ibm-924_P100-1998,swaplfnl ibm-924-s390 IBM924_LF { JAVA* }
ibm-1047_P100-1995,swaplfnl ibm-1047-s390 IBM1047_LF { JAVA* }
ibm-1140_P100-1997,swaplfnl ibm-1140-s390
ibm-1141_P100-1997,swaplfnl ibm-1141-s390 IBM1141_LF { JAVA* }
ibm-1142_P100-1997,swaplfnl ibm-1142-s390
ibm-1143_P100-1997,swaplfnl ibm-1143-s390
ibm-1144_P100-1997,swaplfnl ibm-1144-s390
ibm-1145_P100-1997,swaplfnl ibm-1145-s390
ibm-1146_P100-1997,swaplfnl ibm-1146-s390
ibm-1147_P100-1997,swaplfnl ibm-1147-s390
ibm-1148_P100-1997,swaplfnl ibm-1148-s390
ibm-1149_P100-1997,swaplfnl ibm-1149-s390
ibm-1153_P100-1999,swaplfnl ibm-1153-s390
ibm-12712_P100-1998,swaplfnl ibm-12712-s390
ibm-16804_X110-1999,swaplfnl ibm-16804-s390
# This is a special version of ibm-1140 that the XML4C (Xerces) parser team
# requested in 2000.
# It maps both EBCDIC LF and NL controls to Unicode LF U+000A.
ebcdic-xml-us
# These are not installed by default. They are rarely used.
# Many of them can be added through the online ICU Data Library Customization tool
ibm-1004_P100-1995 { UTR22* } ibm-1004 { IBM* }
ibm-1008_P100-1995 { UTR22* } ibm-1008 { IBM* } # cp1008, 8-bit Arabic (w/o euro update)
ibm-1009_P100-1995 { UTR22* } ibm-1009 { IBM* }
ibm-1010_P100-1995 { UTR22* } ibm-1010 { IBM* } NF_Z_62-010 { IANA* } iso-ir-69 { IANA } ISO646-FR { IANA } fr { IANA } csISO69French { IANA }
ibm-1011_P100-1995 { UTR22* } ibm-1011 { IBM* } DIN_66003 { IANA* } iso-ir-21 { IANA } de { IANA } ISO646-DE { IANA } csISO21German { IANA }
ibm-1012_P100-1995 { UTR22* } ibm-1012 { IBM* } IT { IANA* } iso-ir-15 { IANA } ISO646-IT { IANA } csISO15Italian { IANA }
ibm-1013_P100-1995 { UTR22* } ibm-1013 { IBM* } BS_4730 { IANA* } iso-ir-4 { IANA } ISO646-GB { IANA } gb { IANA } uk { IANA } csISO4UnitedKingdom { IANA }
ibm-1014_P100-1995 { UTR22* } ibm-1014 { IBM* } ES2 { IANA* } iso-ir-85 { IANA } ISO646-ES2 { IANA } csISO85Spanish2 { IANA }
ibm-1015_P100-1995 { UTR22* } ibm-1015 { IBM* } PT2 { IANA* } iso-ir-84 { IANA } ISO646-PT2 { IANA } csISO84Portuguese2 { IANA }
ibm-1016_P100-1995 { UTR22* } ibm-1016 { IBM* } NS_4551-1 { IANA* } iso-ir-60 { IANA } ISO646-NO { IANA } no { IANA } csISO60DanishNorwegian { IANA } csISO60Norwegian1 { IANA }
ibm-1017_P100-1995 { UTR22* } ibm-1017 { IBM* }
ibm-1018_P100-1995 { UTR22* } ibm-1018 { IBM* } SEN_850200_B { IANA* } iso-ir-10 { IANA } FI { IANA } ISO646-FI { IANA } ISO646-SE { IANA } se { IANA } csISO10Swedish { IANA }
ibm-1019_P100-1995 { UTR22* } ibm-1019 { IBM* }
ibm-1020_P100-2003 { UTR22* } ibm-1020 { IBM* } CSA_Z243.4-1985-1 { IANA* } iso-ir-121 { IANA } ISO646-CA { IANA } csa7-1 { IANA } ca { IANA } csISO121Canadian1 { IANA }
ibm-1021_P100-2003 { UTR22* } ibm-1021 { IBM* }
ibm-1023_P100-2003 { UTR22* } ibm-1023 { IBM* } ES { IANA* } iso-ir-17 { IANA } ISO646-ES { IANA } csISO17Spanish { IANA }
ibm-1027_P100-1995 { UTR22* } ibm-1027 { IBM* } x-IBM1027 { JAVA* }
ibm-1041_P100-1995 { UTR22* } ibm-1041 { IBM* } x-IBM1041 { JAVA* }
ibm-1043_P100-1995 { UTR22* } ibm-1043 { IBM* } x-IBM1043 { JAVA* }
ibm-1046_X110-1999 { UTR22* } ibm-1046 { IBM* } x-IBM1046 { JAVA* } x-IBM1046S { JAVA } # Arabic
ibm-1088_P100-1995 { UTR22* } ibm-1088 { IBM* } x-IBM1088 { JAVA* }
ibm-1100_P100-2003 { UTR22* } ibm-1100 { IBM* } DEC-MCS { IANA* } dec { IANA } csDECMCS { IANA }
ibm-1101_P100-2003 { UTR22* } ibm-1101 { IBM* }
ibm-1102_P100-2003 { UTR22* } ibm-1102 { IBM* }
ibm-1103_P100-2003 { UTR22* } ibm-1103 { IBM* }
ibm-1104_P100-2003 { UTR22* } ibm-1104 { IBM* } NF_Z_62-010_1973 iso-ir-25 { IANA* } ISO646-FR1 { IANA } csISO25French { IANA } # NF_Z_62-010_(1973) is the real IANA alias, but () aren't invariant characters.
ibm-1105_P100-2003 { UTR22* } ibm-1105 { IBM* }
ibm-1106_P100-2003 { UTR22* } ibm-1106 { IBM* }
ibm-1107_P100-2003 { UTR22* } ibm-1107 { IBM* } DS_2089 { IANA* } ISO646-DK { IANA } dk { IANA } csISO646Danish { IANA }
ibm-1127_P100-2004 { UTR22* } ibm-1127 { IBM* }
ibm-1161_P100-1999 { UTR22* } ibm-1161 { IBM* } # Thai (Euro update of ibm-1129)
ibm-1163_P100-1999 { UTR22* } ibm-1163 { IBM* } # Vietnamese
ibm-1165_P101-2000 { UTR22* } ibm-1165 { IBM* } # Vietnamese (EBCDIC)
ibm-1166_P100-2002 { UTR22* } ibm-1166 { IBM* } # Cyrillic for Kazakhstan
ibm-1167_P100-2002 { UTR22* } ibm-1167 { IBM* } KOI8-RU x-KOI8_RU { JAVA* }
ibm-1174_X100-2007 { UTR22* } ibm-1174 { IBM* } KZ-1048 { IANA* } STRK1048-2002 { IANA } RK1048 { IANA } csKZ1048 { IANA }
ibm-1277_P100-1995 { UTR22* } ibm-1277 { IBM* } # Adobe (Postscript) Latin-1
ibm-13125_P100-1997 { UTR22* } ibm-13125 { IBM* } # S-Ch (DBCS subset of ibm-4933, ibm-1388)
ibm-13140_P101-2000 { UTR22* } ibm-13140 { IBM* }
ibm-13218_P100-1996 { UTR22* } ibm-13218 { IBM* } # Japanese (EBCDIC update of ibm-930)
ibm-1350_P110-1997 { UTR22* } ibm-1350 { IBM* } x-eucJP-Open { JAVA* } eucJP-Open { JAVA } # Japanese (EUC-JP variant)
ibm-1351_P110-1997 { UTR22* } ibm-1351 { IBM* } x-IBM1351 { JAVA* } # Japanese (DBCS subset of ibm-5039)
ibm-1362_P110-1999 { UTR22* } ibm-1362 { IBM* } x-IBM1362 { JAVA* } # Korean (DBCS subset of ibm-1363)
ibm-13676_P102-2001 { UTR22* } ibm-13676 { IBM* } # Simplified Chinese (EBCDIC)
ibm-1380_P100-1995 { UTR22* } ibm-1380 { IBM* } x-IBM1380 { JAVA* } # Simplified Chinese (DBCS subset of ibm-1381)
ibm-1381_P110-1999 { UTR22* } ibm-1381 { IBM* JAVA } cp1381 { JAVA* } 1381 { JAVA } x-IBM1381 { JAVA } # Simplified Chinese PC Data mixed (IBM GB)
ibm-1382_P100-1995 { UTR22* } ibm-1382 { IBM* } x-IBM1382 { JAVA* } # Simplified Chinese (DBCS subset of ibm-1383)
ibm-17221_P100-2001 { UTR22* } ibm-17221 { IBM* } # Simplified Chinese (EBCDIC)
ibm-17248_X110-1999 { UTR22* } ibm-17248 { IBM* } # PC Arabic (w/ euro update) Updated version of ibm-864
ibm-21344_P101-2000 { UTR22* } ibm-21344 { IBM* } # PC Arabic. Updated version of ibm-864
ibm-21427_P100-1999 { UTR22* } ibm-21427 { IBM* } # Traditional Chinese (DBCS subset of ibm-1370)
ibm-256_P100-1995 { UTR22* } ibm-256 { IBM* } # Latin 1 EBCDIC
ibm-259_P100-1995 { UTR22* } ibm-259 { IBM* } IBM-Symbols { IANA* } csIBMSymbols { IANA }
ibm-274_P100-2000 { UTR22* } ibm-274 { IBM* } IBM274 { IANA* } EBCDIC-BE { IANA } CP274 { IANA } csIBM274 { IANA }
ibm-275_P100-1995 { UTR22* } ibm-275 { IBM* } IBM275 { IANA* } EBCDIC-BR { IANA } cp275 { IANA } csIBM275 { IANA }
ibm-286_P100-2003 { UTR22* } ibm-286 { IBM* } EBCDIC-AT-DE-A { IANA* } csEBCDICATDEA { IANA }
ibm-293_P100-1995 { UTR22* } ibm-293 { IBM* } # APL EBCDIC (APL: A Programming Language)
ibm-300_P120-2006 { UTR22* } ibm-300 { IBM* } x-IBM300 { JAVA* } # Japanese (DBCS subset of ibm-930 and ibm-939)
ibm-301_P110-1997 { UTR22* } ibm-301 { IBM* } x-IBM301 { JAVA* } # Japanese (DBCS subset of ibm-943)
ibm-33058_P100-2000 { UTR22* } ibm-33058 { IBM* } # SBCS (Katakana)
ibm-425_P101-2000 { UTR22* } ibm-425 { IBM* } # Arabic (EBCDIC)
ibm-4930_P110-1999 { UTR22* } ibm-4930 { IBM* } # Korean (DBCS subset of ibm-1364)
ibm-4933_P100-2002 { UTR22* } ibm-4933 { IBM* } # S-Ch (DBCS subset of ibm-1388)
ibm-4948_P100-1995 { UTR22* } ibm-4948 { IBM* }
ibm-4951_P100-1995 { UTR22* } ibm-4951 { IBM* }
ibm-4952_P100-1995 { UTR22* } ibm-4952 { IBM* }
ibm-4960_P100-1995 { UTR22* } ibm-4960 { IBM* }
ibm-5039_P11A-1998 { UTR22* } ibm-5039 { IBM* } # Japanese (HP Shift-JIS variant)
ibm-5048_P100-1995 { UTR22* } ibm-5048 { IBM* } # Japanese (DBCS subset of ibm-1350, JIS X208-1990)
ibm-5049_P100-1995 { UTR22* } ibm-5049 { IBM* } # Japanese (DBCS subset of ibm-1350, JIS X212)
ibm-5067_P100-1995 { UTR22* } ibm-5067 { IBM* } # Korean (DBCS subset of ibm-21450)
ibm-5104_X110-1999 { UTR22* } ibm-5104 { IBM* } # cp1008, 8-bit Arabic (w/ euro update)
ibm-5233_P100-2011 { UTR22* } ibm-5233 { IBM* } # Devanagari EBCDIC, including Indian Rupee
ibm-806_P100-1998 { UTR22* } ibm-806 { IBM* } # Hindi (ISCII variant)
ibm-808_P100-1999 { UTR22* } ibm-808 { IBM* } x-IBM808 { JAVA* } # Cyrillic
ibm-833_P100-1995 { UTR22* } ibm-833 { IBM* } x-IBM833 { JAVA* }
ibm-834_P100-1995 { UTR22* } ibm-834 { IBM* } x-IBM834 { JAVA* } # Korean (DBCS subset of ibm-933)
ibm-835_P100-1995 { UTR22* } ibm-835 { IBM* } x-IBM835 { JAVA* } # Traditional Chinese (DBCS subset of ibm-5033)
ibm-836_P100-1995 { UTR22* } ibm-836 { IBM* } x-IBM836 { JAVA* }
ibm-837_P100-2011 { UTR22* } ibm-837 { IBM* } x-IBM837 { JAVA* } # Simplified Chinese (DBCS subset of ibm-5031)
ibm-848_P100-1999 { UTR22* } ibm-848 { IBM* } # Cyrillic (euro update of ibm-1125)
ibm-849_P100-1999 { UTR22* } ibm-849 { IBM* } # Cyrillic Belarus (euro update of ibm-1131)
ibm-859_P100-1999 { UTR22* } ibm-859 { IBM* } x-IBM859 { JAVA* } # PC Latin 9 (w/ euro update)
ibm-8612_P100-1995 { UTR22* } ibm-8612 { IBM* } # Arabic (EBCDIC update of ibm-420)
ibm-872_P100-1999 { UTR22* } ibm-872 { IBM* } # Cyrillic (Euro update of ibm-855)
ibm-880_P100-1995 { UTR22* } ibm-880 { IBM* } IBM880 { IANA* } cp880 { IANA } EBCDIC-Cyrillic { IANA } csIBM880 { IANA } windows-20880 { WINDOWS* }
ibm-896_P100-1995 { UTR22* } ibm-896 { IBM* } # SBCS Katakana
ibm-897_P100-1995 { UTR22* } ibm-897 { IBM* } JIS_X0201 { IANA* } X0201 { IANA } csHalfWidthKatakana { IANA } x-IBM897 { JAVA* }
ibm-9027_P100-1999 { UTR22* } ibm-9027 { IBM* } # DBCS T-Ch Host. Euro update of ibm-835. DBCS portion of ibm-1371.
ibm-9048_P100-1998 { UTR22* } ibm-9048 { IBM* } # Hebrew (Euro and Sequel update of ibm-856)
ibm-905_P100-1995 { UTR22* } ibm-905 { IBM* } IBM905 { IANA* } CP905 { IANA } ebcdic-cp-tr { IANA } csIBM905 { IANA } windows-20905 { WINDOWS* }
ibm-9056_P100-1995 { UTR22* } ibm-9056 { IBM* } # Arabic
ibm-9061_P100-1999 { UTR22* } ibm-9061 { IBM* } # Greek (w/ euro update)
ibm-9145_P110-1997 { UTR22* } ibm-9145 { IBM* } # Japanese (DBCS subset of ibm-5050)
ibm-9238_X110-1999 { UTR22* } ibm-9238 { IBM* } # cp1046, PC Arabic Extended (w/ euro update)
ibm-924_P100-1998 { UTR22* } ibm-924 { IBM* } IBM00924 { IANA* } CCSID00924 { IANA } CP00924 { IANA } ebcdic-Latin9--euro { IANA }
ibm-926_P100-2000 { UTR22* } ibm-926 { IBM* } # Korean (DBCS subset of ibm-944)
ibm-927_P100-1995 { UTR22* } ibm-927 { IBM* } x-IBM927 { JAVA* } # Traditional Chinese (DBCS subset of ibm-948)
ibm-928_P100-1995 { UTR22* } ibm-928 { IBM* } # Simplified Chinese (DBCS subset of ibm-936)
ibm-941_P13A-2001 { UTR22* } ibm-941 { IBM* } # DBCS portion of ibm-943
ibm-944_P100-1995 { UTR22* } ibm-944 { IBM* } # Korean
ibm-946_P100-1995 { UTR22* } ibm-946 { IBM* } # Simplified Chinese
ibm-947_P100-1995 { UTR22* } ibm-947 { IBM* } x-IBM947 { JAVA* } # Traditional Chinese (DBCS subset of ibm-950)
ibm-948_P110-1999 { UTR22* } ibm-948 { IBM* } x-IBM948 { JAVA* } # Traditional Chinese
ibm-951_P100-1995 { UTR22* } ibm-951 { IBM* } x-IBM951 { JAVA* } # Korean (DBCS subset of ibm-949)
ibm-952_P110-1997 { UTR22* } ibm-952 { IBM* } x-JIS0208 # Pure DBCS, Japanese EUC, G1 - JIS X208-1990
ibm-953_P100-2000 { UTR22* } ibm-953 { IBM* } JIS_X0212-1990 { IANA* } # Pure DBCS, Japanese EUC, G3 - JIS X 0212-1990
ibm-955_P110-1997 { UTR22* } ibm-955 { IBM* } # Pure DBCS, Japanese EUC, G0 - JIS X208-1978
ibm-9577_P100-2001 { UTR22* } ibm-9577 { IBM* } ibm-1385 { IBM } x-IBM1385 { JAVA* } # ibm-9577 and ibm-1385 are identical DBCS tables.
iso-8859_16-2001 { UTR22* } ISO-8859-16 { IANA* } iso-ir-226 { IANA } ISO_8859-16:2001 { IANA } latin10 { IANA } l10 { IANA }
# To be considered for listing at a later date for the data library customization tool
#ibm-1159_P100-1999 { UTR22* } ibm-1159 { IBM* } # SBCS T-Ch Host. Euro update of ibm-28709. This is used in combination with another CCSID mapping.
#ibm-960_P100-2000 { UTR22* } ibm-960 { IBM* } # Pure DBCS, CNS11643 plane 1
#ibm-963_P100-1995 { UTR22* } ibm-963 { IBM* } # Pure DBCS, CNS11643 plane 2 Traditional Chinese (DBCS subset of ibm-965)
|