diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:34:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 20:34:10 +0000 |
commit | e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc (patch) | |
tree | 68cb5ef9081156392f1dd62a00c6ccc1451b93df /epan/charsets.h | |
parent | Initial commit. (diff) | |
download | wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.tar.xz wireshark-e4ba6dbc3f1e76890b22773807ea37fe8fa2b1bc.zip |
Adding upstream version 4.2.2.upstream/4.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'epan/charsets.h')
-rw-r--r-- | epan/charsets.h | 240 |
1 files changed, 240 insertions, 0 deletions
diff --git a/epan/charsets.h b/epan/charsets.h new file mode 100644 index 00000000..630f1e67 --- /dev/null +++ b/epan/charsets.h @@ -0,0 +1,240 @@ +/** @file + * Routines for handling character sets + * + * Wireshark - Network traffic analyzer + * By Gerald Combs <gerald@wireshark.org> + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ +#ifndef __CHARSETS_H__ +#define __CHARSETS_H__ + +#include "ws_symbol_export.h" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * Translation tables that map the upper 128 code points in single-byte + * "extended ASCII" character encodings to Unicode code points in the + * Basic Multilingual Plane. + */ + +/* Table for windows-1250 */ +extern const gunichar2 charset_table_cp1250[0x80]; +/* Table for windows-1251 */ +extern const gunichar2 charset_table_cp1251[0x80]; +/* Table for windows-1252 */ +extern const gunichar2 charset_table_cp1252[0x80]; + +/* Tables for ISO-8859-X */ +extern const gunichar2 charset_table_iso_8859_2[0x80]; +extern const gunichar2 charset_table_iso_8859_3[0x80]; +extern const gunichar2 charset_table_iso_8859_4[0x80]; +extern const gunichar2 charset_table_iso_8859_5[0x80]; +extern const gunichar2 charset_table_iso_8859_6[0x80]; +extern const gunichar2 charset_table_iso_8859_7[0x80]; +extern const gunichar2 charset_table_iso_8859_8[0x80]; +extern const gunichar2 charset_table_iso_8859_9[0x80]; +extern const gunichar2 charset_table_iso_8859_10[0x80]; +extern const gunichar2 charset_table_iso_8859_11[0x80]; +extern const gunichar2 charset_table_iso_8859_13[0x80]; +extern const gunichar2 charset_table_iso_8859_14[0x80]; +extern const gunichar2 charset_table_iso_8859_15[0x80]; +extern const gunichar2 charset_table_iso_8859_16[0x80]; + +/* Tables for Mac character sets */ +extern const gunichar2 charset_table_mac_roman[0x80]; + +/* Tables for DOS code pages */ +extern const gunichar2 charset_table_cp437[0x80]; +extern const gunichar2 charset_table_cp855[0x80]; +extern const gunichar2 charset_table_cp866[0x80]; + +/* + * Translation tables that map the lower 128 code points in single-byte + * ISO 646-based character encodings to Unicode code points in the + * Basic Multilingual Plane. + */ +extern const gunichar2 charset_table_iso_646_basic[0x80]; + +/* Tables for EBCDIC code pages */ +extern const gunichar2 charset_table_ebcdic[256]; +extern const gunichar2 charset_table_ebcdic_cp037[256]; +extern const gunichar2 charset_table_ebcdic_cp500[256]; + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as an ASCII string, with all bytes + * with the high-order bit set being invalid, and return a pointer to a + * UTF-8 string, allocated using the wmem scope. + * + * Octets with the highest bit set will be converted to the Unicode + * REPLACEMENT CHARACTER. + */ +WS_DLL_PUBLIC guint8 * +get_ascii_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UTF-8 string, and return a + * pointer to a UTF-8 string, allocated using the wmem scope, with all + * ill-formed sequences replaced with the Unicode REPLACEMENT CHARACTER + * according to the recommended "best practices" given in the Unicode + * Standard and specified by W3C/WHATWG. + */ +WS_DLL_PUBLIC guint8 * +get_utf_8_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +/* + * Given a wmem scope, a pointer, a length, and a translation table, + * treat the string of bytes referred to by the pointer and length as a + * string encoded using one octet per character, with octets with the + * high-order bit clear being mapped by the translation table to 2-byte + * Unicode Basic Multilingual Plane characters (including REPLACEMENT + * CHARACTER) and octets with the high-order bit set being mapped to + * REPLACEMENT CHARACTER, and return a pointer to a UTF-8 string, + * allocated using the wmem scope. + */ +WS_DLL_PUBLIC guint8 * +get_iso_646_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as an ISO 8859/1 string, and + * return a pointer to a UTF-8 string, allocated using the wmem scope. + */ +WS_DLL_PUBLIC guint8 * +get_8859_1_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +/* + * Given a wmem scope, a pointer, a length, and a translation table with + * 128 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * with the high-order bit clear being ASCII and octets with the high-order + * bit set being mapped by the translation table to 2-byte Unicode Basic + * Multilingual Plane characters (including REPLACEMENT CHARACTER), and + * return a pointer to a UTF-8 string, allocated using the wmem scope. + */ +WS_DLL_PUBLIC guint8 * +get_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[0x80]); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UCS-2 encoded string + * containing characters from the Basic Multilingual Plane (plane 0) of + * Unicode, and return a pointer to a UTF-8 string, allocated with the + * wmem scope. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN, + * possibly ORed with ENC_BOM. + * + * Specify length in bytes. + */ +WS_DLL_PUBLIC guint8 * +get_ucs_2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UTF-16 encoded string, and + * return a pointer to a UTF-8 string, allocated with the wmem scope. + * + * See RFC 2781 section 2.2. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN, + * possibly ORed with ENC_BOM. + * + * Specify length in bytes. + */ +WS_DLL_PUBLIC guint8 * +get_utf_16_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding); + +/* + * Given a wmem scope, a pointer, and a length, treat the string of bytes + * referred to by the pointer and length as a UCS-4 encoded string, and + * return a pointer to a UTF-8 string, allocated with the wmem scope. + * + * Encoding parameter should be ENC_BIG_ENDIAN or ENC_LITTLE_ENDIAN, + * possibly ORed with ENC_BOM. + * + * Specify length in bytes. + */ +WS_DLL_PUBLIC guint8 * +get_ucs_4_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, guint encoding); + +WS_DLL_PUBLIC guint8 * +get_ts_23_038_7bits_string_packed(wmem_allocator_t *scope, const guint8 *ptr, + const gint bit_offset, gint no_of_chars); + +WS_DLL_PUBLIC guint8 * +get_ts_23_038_7bits_string_unpacked(wmem_allocator_t *scope, const guint8 *ptr, + gint length); + +WS_DLL_PUBLIC guint8 * +get_etsi_ts_102_221_annex_a_string(wmem_allocator_t *scope, const guint8 *ptr, + gint length); + +WS_DLL_PUBLIC guint8 * +get_ascii_7bits_string(wmem_allocator_t *scope, const guint8 *ptr, + const gint bit_offset, gint no_of_chars); + +/* + * Given a wmem scope, a pointer, a length, and a translation table with + * 256 entries, treat the string of bytes referred to by the pointer and + * length as a string encoded using one octet per character, with octets + * being mapped by the translation table to 2-byte Unicode Basic Multilingual + * Plane characters (including REPLACEMENT CHARACTER), and return a + * pointer to a UTF-8 string, allocated using the wmem scope. + */ +WS_DLL_PUBLIC guint8 * +get_nonascii_unichar2_string(wmem_allocator_t *scope, const guint8 *ptr, gint length, const gunichar2 table[256]); + +/* + * Given a wmem scope, a pointer, and a length, treat the bytes referred to + * by the pointer and length as a GB18030 encoded string, and return a pointer + * to a UTF-8 string, allocated using the wmem scope, converted having + * substituted REPLACEMENT CHARACTER according to the Unicode Standard + * 5.22 U+FFFD Substitution for Conversion. + * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf ) + * + * As expected, this will also decode GBK and GB2312 strings. + */ +WS_DLL_PUBLIC guint8 * +get_gb18030_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +/* + * Given a wmem scope, a pointer, and a length, treat the bytes referred to + * by the pointer and length as a EUC-KR encoded string, and return a pointer + * to a UTF-8 string, allocated using the wmem scope, converted having + * substituted REPLACEMENT CHARACTER according to the Unicode Standard + * 5.22 U+FFFD Substitution for Conversion. + * ( https://www.unicode.org/versions/Unicode13.0.0/ch05.pdf ) + */ +WS_DLL_PUBLIC guint8 * +get_euc_kr_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +WS_DLL_PUBLIC guint8 * +get_t61_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); + +WS_DLL_PUBLIC guint8 * +get_dect_standard_8bits_string(wmem_allocator_t *scope, const guint8 *ptr, gint length); +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __CHARSETS_H__ */ + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + * + * vi: set shiftwidth=4 tabstop=8 expandtab: + * :indentSize=4:tabSize=8:noTabs=true: + */ |