diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/backend/utils/mb/conversion_procs | |
parent | Initial commit. (diff) | |
download | postgresql-14-upstream.tar.xz postgresql-14-upstream.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/mb/conversion_procs')
53 files changed, 5923 insertions, 0 deletions
diff --git a/src/backend/utils/mb/conversion_procs/Makefile b/src/backend/utils/mb/conversion_procs/Makefile new file mode 100644 index 0000000..a2e935e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/Makefile @@ -0,0 +1,25 @@ +#------------------------------------------------------------------------- +# +# Makefile for backend/utils/mb/conversion_procs +# +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/backend/utils/mb/conversion_procs/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/utils/mb/conversion_procs +top_builddir = ../../../../.. +include $(top_builddir)/src/Makefile.global + +SUBDIRS = \ + cyrillic_and_mic euc_cn_and_mic euc_jp_and_sjis \ + euc_kr_and_mic euc_tw_and_big5 latin2_and_win1250 latin_and_mic \ + utf8_and_big5 utf8_and_cyrillic utf8_and_euc_cn \ + utf8_and_euc_jp utf8_and_euc_kr utf8_and_euc_tw utf8_and_gb18030 \ + utf8_and_gbk utf8_and_iso8859 utf8_and_iso8859_1 utf8_and_johab \ + utf8_and_sjis utf8_and_win utf8_and_uhc \ + utf8_and_euc2004 utf8_and_sjis2004 euc2004_sjis2004 + +$(recurse) diff --git a/src/backend/utils/mb/conversion_procs/README.euc_jp b/src/backend/utils/mb/conversion_procs/README.euc_jp new file mode 100644 index 0000000..6e59b7b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/README.euc_jp @@ -0,0 +1,83 @@ +新しいエンコーディング変換関数の追加方法 + + 2006/04/15 Tatsuo Ishii + +はじめに + +PostgreSQLには,データベースとフロントエンドのエンコーディングが異なる +ときに,自動的にエンコーディングの変換を行う機能があります.このディレ +クトリには,そのときに使われる関数が登録されています.これらの関数はユー +ザ定義C関数として,initdbの中で登録されます.具体的には, +/usr/local/pgsql/share/conversion_create.sql の中で登録されます(このファ +イルはこのディレクトリでmakeしたときに自動生成されます). + +また,これらの関数はconvert()関数からも呼び出されることもあります. + +このREADMEでは,C関数を定義する方法と,それをMakefileなどに追加する方 +法を説明します. + +o C関数の呼び出し形式 + + エンコーディング変換関数の呼び出し形式は次のようになります. + + conv_proc( + INTEGER, -- source encoding id + INTEGER, -- destination encoding id + CSTRING, -- source string (null terminated C string) + INTERNAL, -- destination string (null terminated C string) + INTEGER -- source string length + ) returns VOID; + + 唯一の出力引数は4番目のdestination stringです.ユーザ定義関数は必要 + なメモリをpallocし,そこに変換結果をNULLターミネートされたC文字列と + して出力しなければなりません.また,適切な大きさのメモリを確保するの + は,このC関数の責任です.というのは,一般に変換された文字列の長さは + ソース文字列の長さ(5番目の引数で指定されます.単位はNULLターミネート + を含まないバイト数です)とは一致しないからです. + + エンコーディングIDはinclude/mb/pg_wchar.hのtypedef enum pg_encで定義 + されています. + +o 関数の登録とコンパイル + + 作ったC関数はサブディレクトリを作り,その中に納めます.その中に + Makefileも必要になりますが,他のディレクトリにあるMakefileを参考にす + れば簡単に作成できるでしょう. + + 次にメインのMakefile(このファイルが置いてある同じディレクトリにあり + ます)に関数に関する記述を追加します. + + (1) DIRS=の後にサブディレクトリ名を追加します. + + (2) @set \ で始まる項目に記述を追加します.1関数につき1行の追加が必要 + です. + + コンバージョンの名前 + ソースエンコーディング名 + デスティネーションエンコーディング名 + 関数名 + オブジェクトファイル名 + + を1行の中にスペースで区切って追加します. + +o テスト + + 以上が終わったら,このファイルがあるディレクトリでmakeし,すべてがう + まくいくことを確認します.特に,create_conversion.sqlがちゃんとした + 内容になっているかどうか確認しましょう.良さそうだったら,テスト用に + 新しいデータベースを作り,そこでこのスクリプトを実行します. + + $ psql -e -f create_conversion.sql test + + これも正常だったら,最後にregression test suiteにテスト項目を追加し + てください.具体的には,src/test/regress/sql/conversion.sqlに追加し, + regression testを行います. + +o 注意事項 + + デフォルトのエンコーディング変換として使用できるためには,ソースエン + コーディングとデスティネーションエンコーディングの間で双方向の変換が + できることが必要です.すなわち,あるエンコーディングのペアに付き,2 + 個の関数の作成が必要です.これらの関数は別々のサブディレクトリに登録 + しても良いですが,通常は一つのソースファイル中に2個の関数を書くこと + が多いでしょう. diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile new file mode 100644 index 0000000..e7cd8e8 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/cyrillic_and_mic/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/cyrillic_and_mic +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = cyrillic_and_mic +PGFILEDESC = "cyrillic <-> mic text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c new file mode 100644 index 0000000..368c2de --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c @@ -0,0 +1,624 @@ +/*------------------------------------------------------------------------- + * + * Cyrillic and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/cyrillic_and_mic/cyrillic_and_mic.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(koi8r_to_mic); +PG_FUNCTION_INFO_V1(mic_to_koi8r); +PG_FUNCTION_INFO_V1(iso_to_mic); +PG_FUNCTION_INFO_V1(mic_to_iso); +PG_FUNCTION_INFO_V1(win1251_to_mic); +PG_FUNCTION_INFO_V1(mic_to_win1251); +PG_FUNCTION_INFO_V1(win866_to_mic); +PG_FUNCTION_INFO_V1(mic_to_win866); +PG_FUNCTION_INFO_V1(koi8r_to_win1251); +PG_FUNCTION_INFO_V1(win1251_to_koi8r); +PG_FUNCTION_INFO_V1(koi8r_to_win866); +PG_FUNCTION_INFO_V1(win866_to_koi8r); +PG_FUNCTION_INFO_V1(win866_to_win1251); +PG_FUNCTION_INFO_V1(win1251_to_win866); +PG_FUNCTION_INFO_V1(iso_to_koi8r); +PG_FUNCTION_INFO_V1(koi8r_to_iso); +PG_FUNCTION_INFO_V1(iso_to_win1251); +PG_FUNCTION_INFO_V1(win1251_to_iso); +PG_FUNCTION_INFO_V1(iso_to_win866); +PG_FUNCTION_INFO_V1(win866_to_iso); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +/* + * Cyrillic support + * currently supported Cyrillic encodings: + * + * KOI8-R (this is also the charset for the mule internal code for Cyrillic) + * ISO-8859-5 + * Microsoft's CP1251 (windows-1251) + * Alternativny Variant (MS-DOS CP866) + */ + +/* ISO-8859-5 to KOI8-R */ +static const unsigned char iso2koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xB3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1, + 0x00, 0xA3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* KOI8-R to ISO-8859-5 */ +static const unsigned char koi2iso[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF1, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xA1, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xEE, 0xD0, 0xD1, 0xE6, 0xD4, 0xD5, 0xE4, 0xD3, + 0xE5, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, + 0xDF, 0xEF, 0xE0, 0xE1, 0xE2, 0xE3, 0xD6, 0xD2, + 0xEC, 0xEB, 0xD7, 0xE8, 0xED, 0xE9, 0xE7, 0xEA, + 0xCE, 0xB0, 0xB1, 0xC6, 0xB4, 0xB5, 0xC4, 0xB3, + 0xC5, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, + 0xBF, 0xCF, 0xC0, 0xC1, 0xC2, 0xC3, 0xB6, 0xB2, + 0xCC, 0xCB, 0xB7, 0xC8, 0xCD, 0xC9, 0xC7, 0xCA +}; + +/* WIN1251 to KOI8-R */ +static const unsigned char win12512koi[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xB3, 0x00, 0xB4, 0x00, 0x00, 0x00, 0x00, 0xB7, + 0x00, 0x00, 0xB6, 0xA6, 0xAD, 0x00, 0x00, 0x00, + 0xA3, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x00, 0xA7, + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1 +}; + +/* KOI8-R to WIN1251 */ +static const unsigned char koi2win1251[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xB8, 0xBA, 0x00, 0xB3, 0xBF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xB4, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xA8, 0xAA, 0x00, 0xB2, 0xAF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, + 0xFE, 0xE0, 0xE1, 0xF6, 0xE4, 0xE5, 0xF4, 0xE3, + 0xF5, 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, + 0xEF, 0xFF, 0xF0, 0xF1, 0xF2, 0xF3, 0xE6, 0xE2, + 0xFC, 0xFB, 0xE7, 0xF8, 0xFD, 0xF9, 0xF7, 0xFA, + 0xDE, 0xC0, 0xC1, 0xD6, 0xC4, 0xC5, 0xD4, 0xC3, + 0xD5, 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, + 0xCF, 0xDF, 0xD0, 0xD1, 0xD2, 0xD3, 0xC6, 0xC2, + 0xDC, 0xDB, 0xC7, 0xD8, 0xDD, 0xD9, 0xD7, 0xDA +}; + +/* WIN866 to KOI8-R */ +static const unsigned char win8662koi[] = { + 0xE1, 0xE2, 0xF7, 0xE7, 0xE4, 0xE5, 0xF6, 0xFA, + 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, 0xF0, + 0xF2, 0xF3, 0xF4, 0xF5, 0xE6, 0xE8, 0xE3, 0xFE, + 0xFB, 0xFD, 0xFF, 0xF9, 0xF8, 0xFC, 0xE0, 0xF1, + 0xC1, 0xC2, 0xD7, 0xC7, 0xC4, 0xC5, 0xD6, 0xDA, + 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, 0xD0, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xD2, 0xD3, 0xD4, 0xD5, 0xC6, 0xC8, 0xC3, 0xDE, + 0xDB, 0xDD, 0xDF, 0xD9, 0xD8, 0xDC, 0xC0, 0xD1, + 0xB3, 0xA3, 0xB4, 0xA4, 0xB7, 0xA7, 0x00, 0x00, + 0xB6, 0xA6, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* KOI8-R to WIN866 */ +static const unsigned char koi2win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF1, 0xF3, 0x00, 0xF9, 0xF5, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xAD, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xF0, 0xF2, 0x00, 0xF8, 0xF4, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xEE, 0xA0, 0xA1, 0xE6, 0xA4, 0xA5, 0xE4, 0xA3, + 0xE5, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, + 0xAF, 0xEF, 0xE0, 0xE1, 0xE2, 0xE3, 0xA6, 0xA2, + 0xEC, 0xEB, 0xA7, 0xE8, 0xED, 0xE9, 0xE7, 0xEA, + 0x9E, 0x80, 0x81, 0x96, 0x84, 0x85, 0x94, 0x83, + 0x95, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, + 0x8F, 0x9F, 0x90, 0x91, 0x92, 0x93, 0x86, 0x82, + 0x9C, 0x9B, 0x87, 0x98, 0x9D, 0x99, 0x97, 0x9A +}; + +/* WIN866 to WIN1251 */ +static const unsigned char win8662win1251[] = { + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xA5, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0xA8, 0xB8, 0xAA, 0xBA, 0xAF, 0xBF, 0x00, 0x00, + 0xB2, 0xB3, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN1251 to WIN866 */ +static const unsigned char win12512win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xBD, 0x00, 0x00, + 0xF0, 0x00, 0xF2, 0x00, 0x00, 0x00, 0x00, 0xF4, + 0x00, 0x00, 0xF8, 0xF9, 0xAD, 0x00, 0x00, 0x00, + 0xF1, 0x00, 0xF3, 0x00, 0x00, 0x00, 0x00, 0xF5, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF +}; + +/* ISO-8859-5 to WIN1251 */ +static const unsigned char iso2win1251[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xA8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF, + 0x00, 0xB8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN1251 to ISO-8859-5 */ +static const unsigned char win12512iso[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xA1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF +}; + +/* ISO-8859-5 to WIN866 */ +static const unsigned char iso2win866[] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xF0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, + 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0x00, 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +/* WIN866 to ISO-8859-5 */ +static const unsigned char win8662iso[] = { + 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, + 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xA1, 0xF1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + + +Datum +koi8r_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_MULE_INTERNAL); + + converted = latin2mic(src, dest, len, LC_KOI8_R, PG_KOI8R, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_koi8r(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_KOI8R); + + converted = mic2latin(src, dest, len, LC_KOI8_R, PG_KOI8R, noError); + + PG_RETURN_INT32(converted); +} + +Datum +iso_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_MULE_INTERNAL); + + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, iso2koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_iso(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_ISO_8859_5); + + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_ISO_8859_5, koi2iso, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1251_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_MULE_INTERNAL); + + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, win12512koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_win1251(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1251); + + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN1251, koi2win1251, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win866_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_MULE_INTERNAL); + + converted = latin2mic_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, win8662koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_win866(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN866); + + converted = mic2latin_with_table(src, dest, len, LC_KOI8_R, PG_WIN866, koi2win866, noError); + + PG_RETURN_INT32(converted); +} + +Datum +koi8r_to_win1251(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN1251); + + converted = local2local(src, dest, len, PG_KOI8R, PG_WIN1251, koi2win1251, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1251_to_koi8r(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_KOI8R); + + converted = local2local(src, dest, len, PG_WIN1251, PG_KOI8R, win12512koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +koi8r_to_win866(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_WIN866); + + converted = local2local(src, dest, len, PG_KOI8R, PG_WIN866, koi2win866, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win866_to_koi8r(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_KOI8R); + + converted = local2local(src, dest, len, PG_WIN866, PG_KOI8R, win8662koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win866_to_win1251(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_WIN1251); + + converted = local2local(src, dest, len, PG_WIN866, PG_WIN1251, win8662win1251, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1251_to_win866(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_WIN866); + + converted = local2local(src, dest, len, PG_WIN1251, PG_WIN866, win12512win866, noError); + + PG_RETURN_INT32(converted); +} + +Datum +iso_to_koi8r(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_KOI8R); + + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_KOI8R, iso2koi, noError); + + PG_RETURN_INT32(converted); +} + +Datum +koi8r_to_iso(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_ISO_8859_5); + + converted = local2local(src, dest, len, PG_KOI8R, PG_ISO_8859_5, koi2iso, noError); + + PG_RETURN_INT32(converted); +} + +Datum +iso_to_win1251(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN1251); + + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN1251, iso2win1251, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1251_to_iso(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1251, PG_ISO_8859_5); + + converted = local2local(src, dest, len, PG_WIN1251, PG_ISO_8859_5, win12512iso, noError); + + PG_RETURN_INT32(converted); +} + +Datum +iso_to_win866(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_ISO_8859_5, PG_WIN866); + + converted = local2local(src, dest, len, PG_ISO_8859_5, PG_WIN866, iso2win866, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win866_to_iso(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN866, PG_ISO_8859_5); + + converted = local2local(src, dest, len, PG_WIN866, PG_ISO_8859_5, win8662iso, noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/Makefile b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/Makefile new file mode 100644 index 0000000..fe0221c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/euc2004_sjis2004/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/euc2004_sjis2004 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = euc2004_sjis2004 +PGFILEDESC = "euc2004 <-> sjis2004 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c new file mode 100644 index 0000000..a3fd35b --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c @@ -0,0 +1,401 @@ +/*------------------------------------------------------------------------- + * + * EUC_JIS_2004, SHIFT_JIS_2004 + * + * Copyright (c) 2007-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_jis_2004_to_shift_jis_2004); +PG_FUNCTION_INFO_V1(shift_jis_2004_to_euc_jis_2004); + +static int euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +Datum +euc_jis_2004_to_shift_jis_2004(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_SHIFT_JIS_2004); + + converted = euc_jis_20042shift_jis_2004(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +shift_jis_2004_to_euc_jis_2004(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_EUC_JIS_2004); + + converted = shift_jis_20042euc_jis_2004(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +/* + * EUC_JIS_2004 -> SHIFT_JIS_2004 + */ +static int +euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1, + ku, + ten; + int l; + + while (len > 0) + { + c1 = *euc; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + continue; + } + + l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len); + + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + + if (c1 == SS2 && l == 2) /* JIS X 0201 kana? */ + { + *p++ = euc[1]; + } + else if (c1 == SS3 && l == 3) /* JIS X 0213 plane 2? */ + { + ku = euc[1] - 0xa0; + ten = euc[2] - 0xa0; + + switch (ku) + { + case 1: + case 3: + case 4: + case 5: + case 8: + case 12: + case 13: + case 14: + case 15: + *p++ = ((ku + 0x1df) >> 1) - (ku >> 3) * 3; + break; + default: + if (ku >= 78 && ku <= 94) + { + *p++ = (ku + 0x19b) >> 1; + } + else + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + } + + if (ku % 2) + { + if (ten >= 1 && ten <= 63) + *p++ = ten + 0x3f; + else if (ten >= 64 && ten <= 94) + *p++ = ten + 0x40; + else + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + } + else + *p++ = ten + 0x9e; + } + + else if (l == 2) /* JIS X 0213 plane 1? */ + { + ku = c1 - 0xa0; + ten = euc[1] - 0xa0; + + if (ku >= 1 && ku <= 62) + *p++ = (ku + 0x101) >> 1; + else if (ku >= 63 && ku <= 94) + *p++ = (ku + 0x181) >> 1; + else + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + + if (ku % 2) + { + if (ten >= 1 && ten <= 63) + *p++ = ten + 0x3f; + else if (ten >= 64 && ten <= 94) + *p++ = ten + 0x40; + else + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + } + else + *p++ = ten + 0x9e; + } + else + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JIS_2004, + (const char *) euc, len); + } + + euc += l; + len -= l; + } + *p = '\0'; + + return euc - start; +} + +/* + * returns SHIFT_JIS_2004 "ku" code indicated by second byte + * *ku = 0: "ku" = even + * *ku = 1: "ku" = odd + */ +static int +get_ten(int b, int *ku) +{ + int ten; + + if (b >= 0x40 && b <= 0x7e) + { + ten = b - 0x3f; + *ku = 1; + } + else if (b >= 0x80 && b <= 0x9e) + { + ten = b - 0x40; + *ku = 1; + } + else if (b >= 0x9f && b <= 0xfc) + { + ten = b - 0x9e; + *ku = 0; + } + else + { + ten = -1; /* error */ + *ku = 0; /* keep compiler quiet */ + } + return ten; +} + +/* + * SHIFT_JIS_2004 ---> EUC_JIS_2004 + */ + +static int +shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = sjis; + int c1; + int ku, + ten, + kubun; + int plane; + int l; + + while (len > 0) + { + c1 = *sjis; + + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + *p++ = c1; + sjis++; + len--; + continue; + } + + l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len); + + if (l < 0 || l > len) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + + if (c1 >= 0xa1 && c1 <= 0xdf && l == 1) + { + /* JIS X0201 (1 byte kana) */ + *p++ = SS2; + *p++ = c1; + } + else if (l == 2) + { + int c2 = sjis[1]; + + plane = 1; + ku = 1; + ten = 1; + + /* + * JIS X 0213 + */ + if (c1 >= 0x81 && c1 <= 0x9f) /* plane 1 1ku-62ku */ + { + ku = (c1 << 1) - 0x100; + ten = get_ten(c2, &kubun); + if (ten < 0) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + ku -= kubun; + } + else if (c1 >= 0xe0 && c1 <= 0xef) /* plane 1 62ku-94ku */ + { + ku = (c1 << 1) - 0x180; + ten = get_ten(c2, &kubun); + if (ten < 0) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + ku -= kubun; + } + else if (c1 >= 0xf0 && c1 <= 0xf3) /* plane 2 + * 1,3,4,5,8,12,13,14,15 ku */ + { + plane = 2; + ten = get_ten(c2, &kubun); + if (ten < 0) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + switch (c1) + { + case 0xf0: + ku = kubun == 0 ? 8 : 1; + break; + case 0xf1: + ku = kubun == 0 ? 4 : 3; + break; + case 0xf2: + ku = kubun == 0 ? 12 : 5; + break; + default: + ku = kubun == 0 ? 14 : 13; + break; + } + } + else if (c1 >= 0xf4 && c1 <= 0xfc) /* plane 2 78-94ku */ + { + plane = 2; + ten = get_ten(c2, &kubun); + if (ten < 0) + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + if (c1 == 0xf4 && kubun == 1) + ku = 15; + else + ku = (c1 << 1) - 0x19a - kubun; + } + else + { + if (noError) + break; + report_invalid_encoding(PG_SHIFT_JIS_2004, + (const char *) sjis, len); + } + + if (plane == 2) + *p++ = SS3; + + *p++ = ku + 0xa0; + *p++ = ten + 0xa0; + } + sjis += l; + len -= l; + } + *p = '\0'; + + return sjis - start; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile new file mode 100644 index 0000000..cb6a83f --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/euc_cn_and_mic/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/euc_cn_and_mic +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = euc_cn_and_mic +PGFILEDESC = "euc_cn <-> mic text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c new file mode 100644 index 0000000..09b3c2e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c @@ -0,0 +1,166 @@ +/*------------------------------------------------------------------------- + * + * EUC_CN and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/euc_cn_and_mic/euc_cn_and_mic.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_cn_to_mic); +PG_FUNCTION_INFO_V1(mic_to_euc_cn); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +static int euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError); + +Datum +euc_cn_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_MULE_INTERNAL); + + converted = euc_cn2mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_euc_cn(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_CN); + + converted = mic2euc_cn(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +/* + * EUC_CN ---> MIC + */ +static int +euc_cn2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + if (len < 2 || !IS_HIGHBIT_SET(euc[1])) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); + } + *p++ = LC_GB2312_80; + *p++ = c1; + *p++ = euc[1]; + euc += 2; + len -= 2; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_CN, (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; + + return euc - start; +} + +/* + * MIC ---> EUC_CN + */ +static int +mic2euc_cn(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + int c1; + + while (len > 0) + { + c1 = *mic; + if (IS_HIGHBIT_SET(c1)) + { + if (c1 != LC_GB2312_80) + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_CN, + (const char *) mic, len); + } + if (len < 3 || !IS_HIGHBIT_SET(mic[1]) || !IS_HIGHBIT_SET(mic[2])) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + mic++; + *p++ = *mic++; + *p++ = *mic++; + len -= 3; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + } + } + *p = '\0'; + + return mic - start; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile new file mode 100644 index 0000000..0b1de0e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/euc_jp_and_sjis +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = euc_jp_and_sjis +PGFILEDESC = "euc_jp <-> sjis text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c new file mode 100644 index 0000000..2e68708 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c @@ -0,0 +1,772 @@ +/*------------------------------------------------------------------------- + * + * EUC_JP, SJIS and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +/* + * SJIS alternative code. + * this code is used if a mapping EUC -> SJIS is not defined. + */ +#define PGSJISALTCODE 0x81ac +#define PGEUCALTCODE 0xa2ae + +/* + * conversion table between SJIS UDC (IBM kanji) and EUC_JP + */ +#include "sjis.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_jp_to_sjis); +PG_FUNCTION_INFO_V1(sjis_to_euc_jp); +PG_FUNCTION_INFO_V1(euc_jp_to_mic); +PG_FUNCTION_INFO_V1(mic_to_euc_jp); +PG_FUNCTION_INFO_V1(sjis_to_mic); +PG_FUNCTION_INFO_V1(mic_to_sjis); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +static int sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError); +static int mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_jp2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int sjis2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError); + +Datum +euc_jp_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_SJIS); + + converted = euc_jp2sjis(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +sjis_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_EUC_JP); + + converted = sjis2euc_jp(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +euc_jp_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_MULE_INTERNAL); + + converted = euc_jp2mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_JP); + + converted = mic2euc_jp(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +sjis_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_MULE_INTERNAL); + + converted = sjis2mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_SJIS); + + converted = mic2sjis(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +/* + * SJIS ---> MIC + */ +static int +sjis2mic(const unsigned char *sjis, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = sjis; + int c1, + c2, + i, + k, + k2; + + while (len > 0) + { + c1 = *sjis; + if (c1 >= 0xa1 && c1 <= 0xdf) + { + /* JIS X0201 (1 byte kana) */ + *p++ = LC_JISX0201K; + *p++ = c1; + sjis++; + len--; + } + else if (IS_HIGHBIT_SET(c1)) + { + /* + * JIS X0208, X0212, user defined extended characters + */ + if (len < 2 || !ISSJISHEAD(c1) || !ISSJISTAIL(sjis[1])) + { + if (noError) + break; + report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } + c2 = sjis[1]; + k = (c1 << 8) + c2; + if (k >= 0xed40 && k < 0xf040) + { + /* NEC selection IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].nec; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].sjis; + c1 = (k >> 8) & 0xff; + c2 = k & 0xff; + } + } + } + + if (k < 0xeb3f) + { + /* JIS X0208 */ + *p++ = LC_JISX0208; + *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc)) + { + /* NEC selection IBM kanji - Other undecided justice */ + *p++ = LC_JISX0208; + *p++ = PGEUCALTCODE >> 8; + *p++ = PGEUCALTCODE & 0xff; + } + else if (k >= 0xf040 && k < 0xf540) + { + /* + * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0xf5a1 - 0xfefe + */ + *p++ = LC_JISX0208; + c1 -= 0x6f; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xf540 && k < 0xfa40) + { + /* + * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe + */ + *p++ = LC_JISX0212; + c1 -= 0x74; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xfa40) + { + /* + * mapping IBM kanji to X0208 and X0212 + */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].sjis; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].euc; + if (k >= 0x8f0000) + { + *p++ = LC_JISX0212; + *p++ = 0x80 | ((k & 0xff00) >> 8); + *p++ = 0x80 | (k & 0xff); + } + else + { + *p++ = LC_JISX0208; + *p++ = 0x80 | (k >> 8); + *p++ = 0x80 | (k & 0xff); + } + } + } + } + sjis += 2; + len -= 2; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_SJIS, (const char *) sjis, len); + } + *p++ = c1; + sjis++; + len--; + } + } + *p = '\0'; + + return sjis - start; +} + +/* + * MIC ---> SJIS + */ +static int +mic2sjis(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + int c1, + c2, + k, + l; + + while (len > 0) + { + c1 = *mic; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + if (c1 == LC_JISX0201K) + *p++ = mic[1]; + else if (c1 == LC_JISX0208) + { + c1 = mic[1]; + c2 = mic[2]; + k = (c1 << 8) | (c2 & 0xff); + if (k >= 0xf5a1) + { + /* UDC1 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f; + } + else + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1); + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + else if (c1 == LC_JISX0212) + { + int i, + k2; + + c1 = mic[1]; + c2 = mic[2]; + k = c1 << 8 | c2; + if (k >= 0xf5a1) + { + /* UDC2 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74; + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + else + { + /* IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].euc & 0xffff; + if (k2 == 0xffff) + { + *p++ = PGSJISALTCODE >> 8; + *p++ = PGSJISALTCODE & 0xff; + break; + } + if (k2 == k) + { + k = ibmkanji[i].sjis; + *p++ = k >> 8; + *p++ = k & 0xff; + break; + } + } + } + } + else + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_SJIS, + (const char *) mic, len); + } + mic += l; + len -= l; + } + *p = '\0'; + + return mic - start; +} + +/* + * EUC_JP ---> MIC + */ +static int +euc_jp2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1; + int l; + + while (len > 0) + { + c1 = *euc; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JP, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JP, + (const char *) euc, len); + } + if (c1 == SS2) + { /* 1 byte kana? */ + *p++ = LC_JISX0201K; + *p++ = euc[1]; + } + else if (c1 == SS3) + { /* JIS X0212 kanji? */ + *p++ = LC_JISX0212; + *p++ = euc[1]; + *p++ = euc[2]; + } + else + { /* kanji? */ + *p++ = LC_JISX0208; + *p++ = c1; + *p++ = euc[1]; + } + euc += l; + len -= l; + } + *p = '\0'; + + return euc - start; +} + +/* + * MIC ---> EUC_JP + */ +static int +mic2euc_jp(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + int c1; + int l; + + while (len > 0) + { + c1 = *mic; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + if (c1 == LC_JISX0201K) + { + *p++ = SS2; + *p++ = mic[1]; + } + else if (c1 == LC_JISX0212) + { + *p++ = SS3; + *p++ = mic[1]; + *p++ = mic[2]; + } + else if (c1 == LC_JISX0208) + { + *p++ = mic[1]; + *p++ = mic[2]; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_JP, + (const char *) mic, len); + } + mic += l; + len -= l; + } + *p = '\0'; + + return mic - start; +} + +/* + * EUC_JP -> SJIS + */ +static int +euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1, + c2, + k; + int l; + + while (len > 0) + { + c1 = *euc; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JP, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_JP, + (const char *) euc, len); + } + if (c1 == SS2) + { + /* hankaku kana? */ + *p++ = euc[1]; + } + else if (c1 == SS3) + { + /* JIS X0212 kanji? */ + c1 = euc[1]; + c2 = euc[2]; + k = c1 << 8 | c2; + if (k >= 0xf5a1) + { + /* UDC2 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74; + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + else + { + int i, + k2; + + /* IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].euc & 0xffff; + if (k2 == 0xffff) + { + *p++ = PGSJISALTCODE >> 8; + *p++ = PGSJISALTCODE & 0xff; + break; + } + if (k2 == k) + { + k = ibmkanji[i].sjis; + *p++ = k >> 8; + *p++ = k & 0xff; + break; + } + } + } + } + else + { + /* JIS X0208 kanji? */ + c2 = euc[1]; + k = (c1 << 8) | (c2 & 0xff); + if (k >= 0xf5a1) + { + /* UDC1 */ + c1 -= 0x54; + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f; + } + else + *p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1); + *p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2); + } + euc += l; + len -= l; + } + *p = '\0'; + + return euc - start; +} + +/* + * SJIS ---> EUC_JP + */ +static int +sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = sjis; + int c1, + c2, + i, + k, + k2; + int l; + + while (len > 0) + { + c1 = *sjis; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_SJIS, + (const char *) sjis, len); + } + *p++ = c1; + sjis++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_SJIS, + (const char *) sjis, len); + } + if (c1 >= 0xa1 && c1 <= 0xdf) + { + /* JIS X0201 (1 byte kana) */ + *p++ = SS2; + *p++ = c1; + } + else + { + /* + * JIS X0208, X0212, user defined extended characters + */ + c2 = sjis[1]; + k = (c1 << 8) + c2; + if (k >= 0xed40 && k < 0xf040) + { + /* NEC selection IBM kanji */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].nec; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].sjis; + c1 = (k >> 8) & 0xff; + c2 = k & 0xff; + } + } + } + + if (k < 0xeb3f) + { + /* JIS X0208 */ + *p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc)) + { + /* NEC selection IBM kanji - Other undecided justice */ + *p++ = PGEUCALTCODE >> 8; + *p++ = PGEUCALTCODE & 0xff; + } + else if (k >= 0xf040 && k < 0xf540) + { + /* + * UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0xf5a1 - 0xfefe + */ + c1 -= 0x6f; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xf540 && k < 0xfa40) + { + /* + * UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 - + * 0x7e7e EUC 0x8ff5a1 - 0x8ffefe + */ + *p++ = SS3; + c1 -= 0x74; + *p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e); + *p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80); + } + else if (k >= 0xfa40) + { + /* + * mapping IBM kanji to X0208 and X0212 + * + */ + for (i = 0;; i++) + { + k2 = ibmkanji[i].sjis; + if (k2 == 0xffff) + break; + if (k2 == k) + { + k = ibmkanji[i].euc; + if (k >= 0x8f0000) + { + *p++ = SS3; + *p++ = 0x80 | ((k & 0xff00) >> 8); + *p++ = 0x80 | (k & 0xff); + } + else + { + *p++ = 0x80 | (k >> 8); + *p++ = 0x80 | (k & 0xff); + } + } + } + } + } + sjis += l; + len -= l; + } + *p = '\0'; + + return sjis - start; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map new file mode 100644 index 0000000..1062f83 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/sjis.map @@ -0,0 +1,396 @@ +static const struct +{ + unsigned short int nec; /* SJIS UDC (NEC selection IBM kanji) */ + unsigned short int sjis; /* SJIS UDC (IBM kanji) */ + int euc; /* EUC_JP */ +} ibmkanji[] = { +{ 0xEEEF , 0xfa40 , 0x8ff3f3 }, +{ 0xEEF0 , 0xfa41 , 0x8ff3f4 }, +{ 0xEEF1 , 0xfa42 , 0x8ff3f5 }, +{ 0xEEF2 , 0xfa43 , 0x8ff3f6 }, +{ 0xEEF3 , 0xfa44 , 0x8ff3f7 }, +{ 0xEEF4 , 0xfa45 , 0x8ff3f8 }, +{ 0xEEF5 , 0xfa46 , 0x8ff3f9 }, +{ 0xEEF6 , 0xfa47 , 0x8ff3fa }, +{ 0xEEF7 , 0xfa48 , 0x8ff3fb }, +{ 0xEEF8 , 0xfa49 , 0x8ff3fc }, +{ 0x8754 , 0xfa4a , 0x8ff3fd }, +{ 0x8755 , 0xfa4b , 0x8ff3fe }, +{ 0x8756 , 0xfa4c , 0x8ff4a1 }, +{ 0x8757 , 0xfa4d , 0x8ff4a2 }, +{ 0x8758 , 0xfa4e , 0x8ff4a3 }, +{ 0x8759 , 0xfa4f , 0x8ff4a4 }, +{ 0x875A , 0xfa50 , 0x8ff4a5 }, +{ 0x875B , 0xfa51 , 0x8ff4a6 }, +{ 0x875C , 0xfa52 , 0x8ff4a7 }, +{ 0x875D , 0xfa53 , 0x8ff4a8 }, +{ 0xEEF9 , 0xfa54 , 0xa2cc }, +{ 0xEEFA , 0xfa55 , 0x8fa2c3 }, +{ 0xEEFB , 0xfa56 , 0x8ff4a9 }, +{ 0xEEFC , 0xfa57 , 0x8ff4aa }, +{ 0x878A , 0xfa58 , 0x8ff4ab }, +{ 0x8782 , 0xfa59 , 0x8ff4ac }, +{ 0x8784 , 0xfa5a , 0x8ff4ad }, +{ 0x879A , 0xfa5b , 0xa2e8 }, +{ 0xED40 , 0xfa5c , 0x8fd4e3 }, +{ 0xED41 , 0xfa5d , 0x8fdcdf }, +{ 0xED42 , 0xfa5e , 0x8fe4e9 }, +{ 0xED43 , 0xfa5f , 0x8fe3f8 }, +{ 0xED44 , 0xfa60 , 0x8fd9a1 }, +{ 0xED45 , 0xfa61 , 0x8fb1bb }, +{ 0xED46 , 0xfa62 , 0x8ff4ae }, +{ 0xED47 , 0xfa63 , 0x8fc2ad }, +{ 0xED48 , 0xfa64 , 0x8fc3fc }, +{ 0xED49 , 0xfa65 , 0x8fe4d0 }, +{ 0xED4A , 0xfa66 , 0x8fc2bf }, +{ 0xED4B , 0xfa67 , 0x8fbcf4 }, +{ 0xED4C , 0xfa68 , 0x8fb0a9 }, +{ 0xED4D , 0xfa69 , 0x8fb0c8 }, +{ 0xED4E , 0xfa6a , 0x8ff4af }, +{ 0xED4F , 0xfa6b , 0x8fb0d2 }, +{ 0xED50 , 0xfa6c , 0x8fb0d4 }, +{ 0xED51 , 0xfa6d , 0x8fb0e3 }, +{ 0xED52 , 0xfa6e , 0x8fb0ee }, +{ 0xED53 , 0xfa6f , 0x8fb1a7 }, +{ 0xED54 , 0xfa70 , 0x8fb1a3 }, +{ 0xED55 , 0xfa71 , 0x8fb1ac }, +{ 0xED56 , 0xfa72 , 0x8fb1a9 }, +{ 0xED57 , 0xfa73 , 0x8fb1be }, +{ 0xED58 , 0xfa74 , 0x8fb1df }, +{ 0xED59 , 0xfa75 , 0x8fb1d8 }, +{ 0xED5A , 0xfa76 , 0x8fb1c8 }, +{ 0xED5B , 0xfa77 , 0x8fb1d7 }, +{ 0xED5C , 0xfa78 , 0x8fb1e3 }, +{ 0xED5D , 0xfa79 , 0x8fb1f4 }, +{ 0xED5E , 0xfa7a , 0x8fb1e1 }, +{ 0xED5F , 0xfa7b , 0x8fb2a3 }, +{ 0xED60 , 0xfa7c , 0x8ff4b0 }, +{ 0xED61 , 0xfa7d , 0x8fb2bb }, +{ 0xED62 , 0xfa7e , 0x8fb2e6 }, +{ 0xED63 , 0xfa80 , 0x8fb2ed }, +{ 0xED64 , 0xfa81 , 0x8fb2f5 }, +{ 0xED65 , 0xfa82 , 0x8fb2fc }, +{ 0xED66 , 0xfa83 , 0x8ff4b1 }, +{ 0xED67 , 0xfa84 , 0x8fb3b5 }, +{ 0xED68 , 0xfa85 , 0x8fb3d8 }, +{ 0xED69 , 0xfa86 , 0x8fb3db }, +{ 0xED6A , 0xfa87 , 0x8fb3e5 }, +{ 0xED6B , 0xfa88 , 0x8fb3ee }, +{ 0xED6C , 0xfa89 , 0x8fb3fb }, +{ 0xED6D , 0xfa8a , 0x8ff4b2 }, +{ 0xED6E , 0xfa8b , 0x8ff4b3 }, +{ 0xED6F , 0xfa8c , 0x8fb4c0 }, +{ 0xED70 , 0xfa8d , 0x8fb4c7 }, +{ 0xED71 , 0xfa8e , 0x8fb4d0 }, +{ 0xED72 , 0xfa8f , 0x8fb4de }, +{ 0xED73 , 0xfa90 , 0x8ff4b4 }, +{ 0xED74 , 0xfa91 , 0x8fb5aa }, +{ 0xED75 , 0xfa92 , 0x8ff4b5 }, +{ 0xED76 , 0xfa93 , 0x8fb5af }, +{ 0xED77 , 0xfa94 , 0x8fb5c4 }, +{ 0xED78 , 0xfa95 , 0x8fb5e8 }, +{ 0xED79 , 0xfa96 , 0x8ff4b6 }, +{ 0xED7A , 0xfa97 , 0x8fb7c2 }, +{ 0xED7B , 0xfa98 , 0x8fb7e4 }, +{ 0xED7C , 0xfa99 , 0x8fb7e8 }, +{ 0xED7D , 0xfa9a , 0x8fb7e7 }, +{ 0xED7E , 0xfa9b , 0x8ff4b7 }, +{ 0xED80 , 0xfa9c , 0x8ff4b8 }, +{ 0xED81 , 0xfa9d , 0x8ff4b9 }, +{ 0xED82 , 0xfa9e , 0x8fb8ce }, +{ 0xED83 , 0xfa9f , 0x8fb8e1 }, +{ 0xED84 , 0xfaa0 , 0x8fb8f5 }, +{ 0xED85 , 0xfaa1 , 0x8fb8f7 }, +{ 0xED86 , 0xfaa2 , 0x8fb8f8 }, +{ 0xED87 , 0xfaa3 , 0x8fb8fc }, +{ 0xED88 , 0xfaa4 , 0x8fb9af }, +{ 0xED89 , 0xfaa5 , 0x8fb9b7 }, +{ 0xED8A , 0xfaa6 , 0x8fbabe }, +{ 0xED8B , 0xfaa7 , 0x8fbadb }, +{ 0xED8C , 0xfaa8 , 0x8fcdaa }, +{ 0xED8D , 0xfaa9 , 0x8fbae1 }, +{ 0xED8E , 0xfaaa , 0x8ff4ba }, +{ 0xED8F , 0xfaab , 0x8fbaeb }, +{ 0xED90 , 0xfaac , 0x8fbbb3 }, +{ 0xED91 , 0xfaad , 0x8fbbb8 }, +{ 0xED92 , 0xfaae , 0x8ff4bb }, +{ 0xED93 , 0xfaaf , 0x8fbbca }, +{ 0xED94 , 0xfab0 , 0x8ff4bc }, +{ 0xED95 , 0xfab1 , 0x8ff4bd }, +{ 0xED96 , 0xfab2 , 0x8fbbd0 }, +{ 0xED97 , 0xfab3 , 0x8fbbde }, +{ 0xED98 , 0xfab4 , 0x8fbbf4 }, +{ 0xED99 , 0xfab5 , 0x8fbbf5 }, +{ 0xED9A , 0xfab6 , 0x8fbbf9 }, +{ 0xED9B , 0xfab7 , 0x8fbce4 }, +{ 0xED9C , 0xfab8 , 0x8fbced }, +{ 0xED9D , 0xfab9 , 0x8fbcfe }, +{ 0xED9E , 0xfaba , 0x8ff4be }, +{ 0xED9F , 0xfabb , 0x8fbdc2 }, +{ 0xEDA0 , 0xfabc , 0x8fbde7 }, +{ 0xEDA1 , 0xfabd , 0x8ff4bf }, +{ 0xEDA2 , 0xfabe , 0x8fbdf0 }, +{ 0xEDA3 , 0xfabf , 0x8fbeb0 }, +{ 0xEDA4 , 0xfac0 , 0x8fbeac }, +{ 0xEDA5 , 0xfac1 , 0x8ff4c0 }, +{ 0xEDA6 , 0xfac2 , 0x8fbeb3 }, +{ 0xEDA7 , 0xfac3 , 0x8fbebd }, +{ 0xEDA8 , 0xfac4 , 0x8fbecd }, +{ 0xEDA9 , 0xfac5 , 0x8fbec9 }, +{ 0xEDAA , 0xfac6 , 0x8fbee4 }, +{ 0xEDAB , 0xfac7 , 0x8fbfa8 }, +{ 0xEDAC , 0xfac8 , 0x8fbfc9 }, +{ 0xEDAD , 0xfac9 , 0x8fc0c4 }, +{ 0xEDAE , 0xfaca , 0x8fc0e4 }, +{ 0xEDAF , 0xfacb , 0x8fc0f4 }, +{ 0xEDB0 , 0xfacc , 0x8fc1a6 }, +{ 0xEDB1 , 0xfacd , 0x8ff4c1 }, +{ 0xEDB2 , 0xface , 0x8fc1f5 }, +{ 0xEDB3 , 0xfacf , 0x8fc1fc }, +{ 0xEDB4 , 0xfad0 , 0x8ff4c2 }, +{ 0xEDB5 , 0xfad1 , 0x8fc1f8 }, +{ 0xEDB6 , 0xfad2 , 0x8fc2ab }, +{ 0xEDB7 , 0xfad3 , 0x8fc2a1 }, +{ 0xEDB8 , 0xfad4 , 0x8fc2a5 }, +{ 0xEDB9 , 0xfad5 , 0x8ff4c3 }, +{ 0xEDBA , 0xfad6 , 0x8fc2b8 }, +{ 0xEDBB , 0xfad7 , 0x8fc2ba }, +{ 0xEDBC , 0xfad8 , 0x8ff4c4 }, +{ 0xEDBD , 0xfad9 , 0x8fc2c4 }, +{ 0xEDBE , 0xfada , 0x8fc2d2 }, +{ 0xEDBF , 0xfadb , 0x8fc2d7 }, +{ 0xEDC0 , 0xfadc , 0x8fc2db }, +{ 0xEDC1 , 0xfadd , 0x8fc2de }, +{ 0xEDC2 , 0xfade , 0x8fc2ed }, +{ 0xEDC3 , 0xfadf , 0x8fc2f0 }, +{ 0xEDC4 , 0xfae0 , 0x8ff4c5 }, +{ 0xEDC5 , 0xfae1 , 0x8fc3a1 }, +{ 0xEDC6 , 0xfae2 , 0x8fc3b5 }, +{ 0xEDC7 , 0xfae3 , 0x8fc3c9 }, +{ 0xEDC8 , 0xfae4 , 0x8fc3b9 }, +{ 0xEDC9 , 0xfae5 , 0x8ff4c6 }, +{ 0xEDCA , 0xfae6 , 0x8fc3d8 }, +{ 0xEDCB , 0xfae7 , 0x8fc3fe }, +{ 0xEDCC , 0xfae8 , 0x8ff4c7 }, +{ 0xEDCD , 0xfae9 , 0x8fc4cc }, +{ 0xEDCE , 0xfaea , 0x8ff4c8 }, +{ 0xEDCF , 0xfaeb , 0x8fc4d9 }, +{ 0xEDD0 , 0xfaec , 0x8fc4ea }, +{ 0xEDD1 , 0xfaed , 0x8fc4fd }, +{ 0xEDD2 , 0xfaee , 0x8ff4c9 }, +{ 0xEDD3 , 0xfaef , 0x8fc5a7 }, +{ 0xEDD4 , 0xfaf0 , 0x8fc5b5 }, +{ 0xEDD5 , 0xfaf1 , 0x8fc5b6 }, +{ 0xEDD6 , 0xfaf2 , 0x8ff4ca }, +{ 0xEDD7 , 0xfaf3 , 0x8fc5d5 }, +{ 0xEDD8 , 0xfaf4 , 0x8fc6b8 }, +{ 0xEDD9 , 0xfaf5 , 0x8fc6d7 }, +{ 0xEDDA , 0xfaf6 , 0x8fc6e0 }, +{ 0xEDDB , 0xfaf7 , 0x8fc6ea }, +{ 0xEDDC , 0xfaf8 , 0x8fc6e3 }, +{ 0xEDDD , 0xfaf9 , 0x8fc7a1 }, +{ 0xEDDE , 0xfafa , 0x8fc7ab }, +{ 0xEDDF , 0xfafb , 0x8fc7c7 }, +{ 0xEDE0 , 0xfafc , 0x8fc7c3 }, +{ 0xEDE1 , 0xfb40 , 0x8fc7cb }, +{ 0xEDE2 , 0xfb41 , 0x8fc7cf }, +{ 0xEDE3 , 0xfb42 , 0x8fc7d9 }, +{ 0xEDE4 , 0xfb43 , 0x8ff4cb }, +{ 0xEDE5 , 0xfb44 , 0x8ff4cc }, +{ 0xEDE6 , 0xfb45 , 0x8fc7e6 }, +{ 0xEDE7 , 0xfb46 , 0x8fc7ee }, +{ 0xEDE8 , 0xfb47 , 0x8fc7fc }, +{ 0xEDE9 , 0xfb48 , 0x8fc7eb }, +{ 0xEDEA , 0xfb49 , 0x8fc7f0 }, +{ 0xEDEB , 0xfb4a , 0x8fc8b1 }, +{ 0xEDEC , 0xfb4b , 0x8fc8e5 }, +{ 0xEDED , 0xfb4c , 0x8fc8f8 }, +{ 0xEDEE , 0xfb4d , 0x8fc9a6 }, +{ 0xEDEF , 0xfb4e , 0x8fc9ab }, +{ 0xEDF0 , 0xfb4f , 0x8fc9ad }, +{ 0xEDF1 , 0xfb50 , 0x8ff4cd }, +{ 0xEDF2 , 0xfb51 , 0x8fc9ca }, +{ 0xEDF3 , 0xfb52 , 0x8fc9d3 }, +{ 0xEDF4 , 0xfb53 , 0x8fc9e9 }, +{ 0xEDF5 , 0xfb54 , 0x8fc9e3 }, +{ 0xEDF6 , 0xfb55 , 0x8fc9fc }, +{ 0xEDF7 , 0xfb56 , 0x8fc9f4 }, +{ 0xEDF8 , 0xfb57 , 0x8fc9f5 }, +{ 0xEDF9 , 0xfb58 , 0x8ff4ce }, +{ 0xEDFA , 0xfb59 , 0x8fcab3 }, +{ 0xEDFB , 0xfb5a , 0x8fcabd }, +{ 0xEDFC , 0xfb5b , 0x8fcaef }, +{ 0xEE40 , 0xfb5c , 0x8fcaf1 }, +{ 0xEE41 , 0xfb5d , 0x8fcbae }, +{ 0xEE42 , 0xfb5e , 0x8ff4cf }, +{ 0xEE43 , 0xfb5f , 0x8fcbca }, +{ 0xEE44 , 0xfb60 , 0x8fcbe6 }, +{ 0xEE45 , 0xfb61 , 0x8fcbea }, +{ 0xEE46 , 0xfb62 , 0x8fcbf0 }, +{ 0xEE47 , 0xfb63 , 0x8fcbf4 }, +{ 0xEE48 , 0xfb64 , 0x8fcbee }, +{ 0xEE49 , 0xfb65 , 0x8fcca5 }, +{ 0xEE4A , 0xfb66 , 0x8fcbf9 }, +{ 0xEE4B , 0xfb67 , 0x8fccab }, +{ 0xEE4C , 0xfb68 , 0x8fccae }, +{ 0xEE4D , 0xfb69 , 0x8fccad }, +{ 0xEE4E , 0xfb6a , 0x8fccb2 }, +{ 0xEE4F , 0xfb6b , 0x8fccc2 }, +{ 0xEE50 , 0xfb6c , 0x8fccd0 }, +{ 0xEE51 , 0xfb6d , 0x8fccd9 }, +{ 0xEE52 , 0xfb6e , 0x8ff4d0 }, +{ 0xEE53 , 0xfb6f , 0x8fcdbb }, +{ 0xEE54 , 0xfb70 , 0x8ff4d1 }, +{ 0xEE55 , 0xfb71 , 0x8fcebb }, +{ 0xEE56 , 0xfb72 , 0x8ff4d2 }, +{ 0xEE57 , 0xfb73 , 0x8fceba }, +{ 0xEE58 , 0xfb74 , 0x8fcec3 }, +{ 0xEE59 , 0xfb75 , 0x8ff4d3 }, +{ 0xEE5A , 0xfb76 , 0x8fcef2 }, +{ 0xEE5B , 0xfb77 , 0x8fb3dd }, +{ 0xEE5C , 0xfb78 , 0x8fcfd5 }, +{ 0xEE5D , 0xfb79 , 0x8fcfe2 }, +{ 0xEE5E , 0xfb7a , 0x8fcfe9 }, +{ 0xEE5F , 0xfb7b , 0x8fcfed }, +{ 0xEE60 , 0xfb7c , 0x8ff4d4 }, +{ 0xEE61 , 0xfb7d , 0x8ff4d5 }, +{ 0xEE62 , 0xfb7e , 0x8ff4d6 }, +{ 0xEE63 , 0xfb80 , 0x8ff4d7 }, +{ 0xEE64 , 0xfb81 , 0x8fd0e5 }, +{ 0xEE65 , 0xfb82 , 0x8ff4d8 }, +{ 0xEE66 , 0xfb83 , 0x8fd0e9 }, +{ 0xEE67 , 0xfb84 , 0x8fd1e8 }, +{ 0xEE68 , 0xfb85 , 0x8ff4d9 }, +{ 0xEE69 , 0xfb86 , 0x8ff4da }, +{ 0xEE6A , 0xfb87 , 0x8fd1ec }, +{ 0xEE6B , 0xfb88 , 0x8fd2bb }, +{ 0xEE6C , 0xfb89 , 0x8ff4db }, +{ 0xEE6D , 0xfb8a , 0x8fd3e1 }, +{ 0xEE6E , 0xfb8b , 0x8fd3e8 }, +{ 0xEE6F , 0xfb8c , 0x8fd4a7 }, +{ 0xEE70 , 0xfb8d , 0x8ff4dc }, +{ 0xEE71 , 0xfb8e , 0x8ff4dd }, +{ 0xEE72 , 0xfb8f , 0x8fd4d4 }, +{ 0xEE73 , 0xfb90 , 0x8fd4f2 }, +{ 0xEE74 , 0xfb91 , 0x8fd5ae }, +{ 0xEE75 , 0xfb92 , 0x8ff4de }, +{ 0xEE76 , 0xfb93 , 0x8fd7de }, +{ 0xEE77 , 0xfb94 , 0x8ff4df }, +{ 0xEE78 , 0xfb95 , 0x8fd8a2 }, +{ 0xEE79 , 0xfb96 , 0x8fd8b7 }, +{ 0xEE7A , 0xfb97 , 0x8fd8c1 }, +{ 0xEE7B , 0xfb98 , 0x8fd8d1 }, +{ 0xEE7C , 0xfb99 , 0x8fd8f4 }, +{ 0xEE7D , 0xfb9a , 0x8fd9c6 }, +{ 0xEE7E , 0xfb9b , 0x8fd9c8 }, +{ 0xEE80 , 0xfb9c , 0x8fd9d1 }, +{ 0xEE81 , 0xfb9d , 0x8ff4e0 }, +{ 0xEE82 , 0xfb9e , 0x8ff4e1 }, +{ 0xEE83 , 0xfb9f , 0x8ff4e2 }, +{ 0xEE84 , 0xfba0 , 0x8ff4e3 }, +{ 0xEE85 , 0xfba1 , 0x8ff4e4 }, +{ 0xEE86 , 0xfba2 , 0x8fdcd3 }, +{ 0xEE87 , 0xfba3 , 0x8fddc8 }, +{ 0xEE88 , 0xfba4 , 0x8fddd4 }, +{ 0xEE89 , 0xfba5 , 0x8fddea }, +{ 0xEE8A , 0xfba6 , 0x8fddfa }, +{ 0xEE8B , 0xfba7 , 0x8fdea4 }, +{ 0xEE8C , 0xfba8 , 0x8fdeb0 }, +{ 0xEE8D , 0xfba9 , 0x8ff4e5 }, +{ 0xEE8E , 0xfbaa , 0x8fdeb5 }, +{ 0xEE8F , 0xfbab , 0x8fdecb }, +{ 0xEE90 , 0xfbac , 0x8ff4e6 }, +{ 0xEE91 , 0xfbad , 0x8fdfb9 }, +{ 0xEE92 , 0xfbae , 0x8ff4e7 }, +{ 0xEE93 , 0xfbaf , 0x8fdfc3 }, +{ 0xEE94 , 0xfbb0 , 0x8ff4e8 }, +{ 0xEE95 , 0xfbb1 , 0x8ff4e9 }, +{ 0xEE96 , 0xfbb2 , 0x8fe0d9 }, +{ 0xEE97 , 0xfbb3 , 0x8ff4ea }, +{ 0xEE98 , 0xfbb4 , 0x8ff4eb }, +{ 0xEE99 , 0xfbb5 , 0x8fe1e2 }, +{ 0xEE9A , 0xfbb6 , 0x8ff4ec }, +{ 0xEE9B , 0xfbb7 , 0x8ff4ed }, +{ 0xEE9C , 0xfbb8 , 0x8ff4ee }, +{ 0xEE9D , 0xfbb9 , 0x8fe2c7 }, +{ 0xEE9E , 0xfbba , 0x8fe3a8 }, +{ 0xEE9F , 0xfbbb , 0x8fe3a6 }, +{ 0xEEA0 , 0xfbbc , 0x8fe3a9 }, +{ 0xEEA1 , 0xfbbd , 0x8fe3af }, +{ 0xEEA2 , 0xfbbe , 0x8fe3b0 }, +{ 0xEEA3 , 0xfbbf , 0x8fe3aa }, +{ 0xEEA4 , 0xfbc0 , 0x8fe3ab }, +{ 0xEEA5 , 0xfbc1 , 0x8fe3bc }, +{ 0xEEA6 , 0xfbc2 , 0x8fe3c1 }, +{ 0xEEA7 , 0xfbc3 , 0x8fe3bf }, +{ 0xEEA8 , 0xfbc4 , 0x8fe3d5 }, +{ 0xEEA9 , 0xfbc5 , 0x8fe3d8 }, +{ 0xEEAA , 0xfbc6 , 0x8fe3d6 }, +{ 0xEEAB , 0xfbc7 , 0x8fe3df }, +{ 0xEEAC , 0xfbc8 , 0x8fe3e3 }, +{ 0xEEAD , 0xfbc9 , 0x8fe3e1 }, +{ 0xEEAE , 0xfbca , 0x8fe3d4 }, +{ 0xEEAF , 0xfbcb , 0x8fe3e9 }, +{ 0xEEB0 , 0xfbcc , 0x8fe4a6 }, +{ 0xEEB1 , 0xfbcd , 0x8fe3f1 }, +{ 0xEEB2 , 0xfbce , 0x8fe3f2 }, +{ 0xEEB3 , 0xfbcf , 0x8fe4cb }, +{ 0xEEB4 , 0xfbd0 , 0x8fe4c1 }, +{ 0xEEB5 , 0xfbd1 , 0x8fe4c3 }, +{ 0xEEB6 , 0xfbd2 , 0x8fe4be }, +{ 0xEEB7 , 0xfbd3 , 0x8ff4ef }, +{ 0xEEB8 , 0xfbd4 , 0x8fe4c0 }, +{ 0xEEB9 , 0xfbd5 , 0x8fe4c7 }, +{ 0xEEBA , 0xfbd6 , 0x8fe4bf }, +{ 0xEEBB , 0xfbd7 , 0x8fe4e0 }, +{ 0xEEBC , 0xfbd8 , 0x8fe4de }, +{ 0xEEBD , 0xfbd9 , 0x8fe4d1 }, +{ 0xEEBE , 0xfbda , 0x8ff4f0 }, +{ 0xEEBF , 0xfbdb , 0x8fe4dc }, +{ 0xEEC0 , 0xfbdc , 0x8fe4d2 }, +{ 0xEEC1 , 0xfbdd , 0x8fe4db }, +{ 0xEEC2 , 0xfbde , 0x8fe4d4 }, +{ 0xEEC3 , 0xfbdf , 0x8fe4fa }, +{ 0xEEC4 , 0xfbe0 , 0x8fe4ef }, +{ 0xEEC5 , 0xfbe1 , 0x8fe5b3 }, +{ 0xEEC6 , 0xfbe2 , 0x8fe5bf }, +{ 0xEEC7 , 0xfbe3 , 0x8fe5c9 }, +{ 0xEEC8 , 0xfbe4 , 0x8fe5d0 }, +{ 0xEEC9 , 0xfbe5 , 0x8fe5e2 }, +{ 0xEECA , 0xfbe6 , 0x8fe5ea }, +{ 0xEECB , 0xfbe7 , 0x8fe5eb }, +{ 0xEECC , 0xfbe8 , 0x8ff4f1 }, +{ 0xEECD , 0xfbe9 , 0x8ff4f2 }, +{ 0xEECE , 0xfbea , 0x8ff4f3 }, +{ 0xEECF , 0xfbeb , 0x8fe6e8 }, +{ 0xEED0 , 0xfbec , 0x8fe6ef }, +{ 0xEED1 , 0xfbed , 0x8fe7ac }, +{ 0xEED2 , 0xfbee , 0x8ff4f4 }, +{ 0xEED3 , 0xfbef , 0x8fe7ae }, +{ 0xEED4 , 0xfbf0 , 0x8ff4f5 }, +{ 0xEED5 , 0xfbf1 , 0x8fe7b1 }, +{ 0xEED6 , 0xfbf2 , 0x8ff4f6 }, +{ 0xEED7 , 0xfbf3 , 0x8fe7b2 }, +{ 0xEED8 , 0xfbf4 , 0x8fe8b1 }, +{ 0xEED9 , 0xfbf5 , 0x8fe8b6 }, +{ 0xEEDA , 0xfbf6 , 0x8ff4f7 }, +{ 0xEEDB , 0xfbf7 , 0x8ff4f8 }, +{ 0xEEDC , 0xfbf8 , 0x8fe8dd }, +{ 0xEEDD , 0xfbf9 , 0x8ff4f9 }, +{ 0xEEDE , 0xfbfa , 0x8ff4fa }, +{ 0xEEDF , 0xfbfb , 0x8fe9d1 }, +{ 0xEEE0 , 0xfbfc , 0x8ff4fb }, +{ 0xEEE1 , 0xfc40 , 0x8fe9ed }, +{ 0xEEE2 , 0xfc41 , 0x8feacd }, +{ 0xEEE3 , 0xfc42 , 0x8ff4fc }, +{ 0xEEE4 , 0xfc43 , 0x8feadb }, +{ 0xEEE5 , 0xfc44 , 0x8feae6 }, +{ 0xEEE6 , 0xfc45 , 0x8feaea }, +{ 0xEEE7 , 0xfc46 , 0x8feba5 }, +{ 0xEEE8 , 0xfc47 , 0x8febfb }, +{ 0xEEE9 , 0xfc48 , 0x8febfa }, +{ 0xEEEA , 0xfc49 , 0x8ff4fd }, +{ 0xEEEB , 0xfc4a , 0x8fecd6 }, +{ 0xEEEC , 0xfc4b , 0x8ff4fe }, +{ 0xffff , 0xffff , 0xffff } /* Stop code */ +}; diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile new file mode 100644 index 0000000..d43b082 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/euc_kr_and_mic/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/euc_kr_and_mic +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = euc_kr_and_mic +PGFILEDESC = "euc_kr <-> mic text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c new file mode 100644 index 0000000..3b85f0c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c @@ -0,0 +1,174 @@ +/*------------------------------------------------------------------------- + * + * EUC_KR and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_kr_to_mic); +PG_FUNCTION_INFO_V1(mic_to_euc_kr); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +static int euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError); + +Datum +euc_kr_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_MULE_INTERNAL); + + converted = euc_kr2mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_euc_kr(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_KR); + + converted = mic2euc_kr(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +/* + * EUC_KR ---> MIC + */ +static int +euc_kr2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1; + int l; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len); + if (l != 2) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_KR, + (const char *) euc, len); + } + *p++ = LC_KS5601; + *p++ = c1; + *p++ = euc[1]; + euc += 2; + len -= 2; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_KR, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; + + return euc - start; +} + +/* + * MIC ---> EUC_KR + */ +static int +mic2euc_kr(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + int c1; + int l; + + while (len > 0) + { + c1 = *mic; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + if (c1 == LC_KS5601) + { + *p++ = mic[1]; + *p++ = mic[2]; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_KR, + (const char *) mic, len); + } + mic += l; + len -= l; + } + *p = '\0'; + + return mic - start; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile new file mode 100644 index 0000000..5ac2389 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile @@ -0,0 +1,16 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/euc_tw_and_big5/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/euc_tw_and_big5 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = euc_tw_and_big5 +PGFILEDESC = "euc_tw <-> big5 text conversions" + +SRCS += big5.c +OBJS += big5.o + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c new file mode 100644 index 0000000..68f76aa --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c @@ -0,0 +1,377 @@ +/* + * conversion between BIG5 and Mule Internal Code(CNS 116643-1992 + * plane 1 and plane 2). + * This program is partially copied from lv(Multilingual file viewer) + * and slightly modified. lv is written and copyrighted by NARITA Tomio + * (nrt@web.ad.jp). + * + * 1999/1/15 Tatsuo Ishii + * + * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/big5.c + */ + +/* can be used in either frontend or backend */ +#include "postgres_fe.h" + +#include "mb/pg_wchar.h" + +typedef struct +{ + unsigned short code, + peer; +} codes_t; + +/* map Big5 Level 1 to CNS 11643-1992 Plane 1 */ +static const codes_t big5Level1ToCnsPlane1[25] = { /* range */ + {0xA140, 0x2121}, + {0xA1F6, 0x2258}, + {0xA1F7, 0x2257}, + {0xA1F8, 0x2259}, + {0xA2AF, 0x2421}, + {0xA3C0, 0x4221}, + {0xa3e1, 0x0000}, + {0xA440, 0x4421}, + {0xACFE, 0x5753}, + {0xacff, 0x0000}, + {0xAD40, 0x5323}, + {0xAFD0, 0x5754}, + {0xBBC8, 0x6B51}, + {0xBE52, 0x6B50}, + {0xBE53, 0x6F5C}, + {0xC1AB, 0x7536}, + {0xC2CB, 0x7535}, + {0xC2CC, 0x7737}, + {0xC361, 0x782E}, + {0xC3B9, 0x7865}, + {0xC3BA, 0x7864}, + {0xC3BB, 0x7866}, + {0xC456, 0x782D}, + {0xC457, 0x7962}, + {0xc67f, 0x0000} +}; + +/* map CNS 11643-1992 Plane 1 to Big5 Level 1 */ +static const codes_t cnsPlane1ToBig5Level1[26] = { /* range */ + {0x2121, 0xA140}, + {0x2257, 0xA1F7}, + {0x2258, 0xA1F6}, + {0x2259, 0xA1F8}, + {0x234f, 0x0000}, + {0x2421, 0xA2AF}, + {0x2571, 0x0000}, + {0x4221, 0xA3C0}, + {0x4242, 0x0000}, + {0x4421, 0xA440}, + {0x5323, 0xAD40}, + {0x5753, 0xACFE}, + {0x5754, 0xAFD0}, + {0x6B50, 0xBE52}, + {0x6B51, 0xBBC8}, + {0x6F5C, 0xBE53}, + {0x7535, 0xC2CB}, + {0x7536, 0xC1AB}, + {0x7737, 0xC2CC}, + {0x782D, 0xC456}, + {0x782E, 0xC361}, + {0x7864, 0xC3BA}, + {0x7865, 0xC3B9}, + {0x7866, 0xC3BB}, + {0x7962, 0xC457}, + {0x7d4c, 0x0000} +}; + +/* map Big5 Level 2 to CNS 11643-1992 Plane 2 */ +static const codes_t big5Level2ToCnsPlane2[48] = { /* range */ + {0xC940, 0x2121}, + {0xc94a, 0x0000}, + {0xC94B, 0x212B}, + {0xC96C, 0x214D}, + {0xC9BE, 0x214C}, + {0xC9BF, 0x217D}, + {0xC9ED, 0x224E}, + {0xCAF7, 0x224D}, + {0xCAF8, 0x2439}, + {0xD77A, 0x3F6A}, + {0xD77B, 0x387E}, + {0xDBA7, 0x3F6B}, + {0xDDFC, 0x4176}, + {0xDDFD, 0x4424}, + {0xE8A3, 0x554C}, + {0xE976, 0x5723}, + {0xEB5B, 0x5A29}, + {0xEBF1, 0x554B}, + {0xEBF2, 0x5B3F}, + {0xECDE, 0x5722}, + {0xECDF, 0x5C6A}, + {0xEDAA, 0x5D75}, + {0xEEEB, 0x642F}, + {0xEEEC, 0x6039}, + {0xF056, 0x5D74}, + {0xF057, 0x6243}, + {0xF0CB, 0x5A28}, + {0xF0CC, 0x6337}, + {0xF163, 0x6430}, + {0xF16B, 0x6761}, + {0xF16C, 0x6438}, + {0xF268, 0x6934}, + {0xF269, 0x6573}, + {0xF2C3, 0x664E}, + {0xF375, 0x6762}, + {0xF466, 0x6935}, + {0xF4B5, 0x664D}, + {0xF4B6, 0x6962}, + {0xF4FD, 0x6A4C}, + {0xF663, 0x6A4B}, + {0xF664, 0x6C52}, + {0xF977, 0x7167}, + {0xF9C4, 0x7166}, + {0xF9C5, 0x7234}, + {0xF9C6, 0x7240}, + {0xF9C7, 0x7235}, + {0xF9D2, 0x7241}, + {0xf9d6, 0x0000} +}; + +/* map CNS 11643-1992 Plane 2 to Big5 Level 2 */ +static const codes_t cnsPlane2ToBig5Level2[49] = { /* range */ + {0x2121, 0xC940}, + {0x212B, 0xC94B}, + {0x214C, 0xC9BE}, + {0x214D, 0xC96C}, + {0x217D, 0xC9BF}, + {0x224D, 0xCAF7}, + {0x224E, 0xC9ED}, + {0x2439, 0xCAF8}, + {0x387E, 0xD77B}, + {0x3F6A, 0xD77A}, + {0x3F6B, 0xDBA7}, + {0x4424, 0x0000}, + {0x4176, 0xDDFC}, + {0x4177, 0x0000}, + {0x4424, 0xDDFD}, + {0x554B, 0xEBF1}, + {0x554C, 0xE8A3}, + {0x5722, 0xECDE}, + {0x5723, 0xE976}, + {0x5A28, 0xF0CB}, + {0x5A29, 0xEB5B}, + {0x5B3F, 0xEBF2}, + {0x5C6A, 0xECDF}, + {0x5D74, 0xF056}, + {0x5D75, 0xEDAA}, + {0x6039, 0xEEEC}, + {0x6243, 0xF057}, + {0x6337, 0xF0CC}, + {0x642F, 0xEEEB}, + {0x6430, 0xF163}, + {0x6438, 0xF16C}, + {0x6573, 0xF269}, + {0x664D, 0xF4B5}, + {0x664E, 0xF2C3}, + {0x6761, 0xF16B}, + {0x6762, 0xF375}, + {0x6934, 0xF268}, + {0x6935, 0xF466}, + {0x6962, 0xF4B6}, + {0x6A4B, 0xF663}, + {0x6A4C, 0xF4FD}, + {0x6C52, 0xF664}, + {0x7166, 0xF9C4}, + {0x7167, 0xF977}, + {0x7234, 0xF9C5}, + {0x7235, 0xF9C7}, + {0x7240, 0xF9C6}, + {0x7241, 0xF9D2}, + {0x7245, 0x0000} +}; + +/* Big Five Level 1 Correspondence to CNS 11643-1992 Plane 4 */ +static const unsigned short b1c4[][2] = { + {0xC879, 0x2123}, + {0xC87B, 0x2124}, + {0xC87D, 0x212A}, + {0xC8A2, 0x2152} +}; + +/* Big Five Level 2 Correspondence to CNS 11643-1992 Plane 3 */ +static const unsigned short b2c3[][2] = { + {0xF9D6, 0x4337}, + {0xF9D7, 0x4F50}, + {0xF9D8, 0x444E}, + {0xF9D9, 0x504A}, + {0xF9DA, 0x2C5D}, + {0xF9DB, 0x3D7E}, + {0xF9DC, 0x4B5C} +}; + +static unsigned short BinarySearchRange + (const codes_t *array, int high, unsigned short code) +{ + int low, + mid, + distance, + tmp; + + low = 0; + mid = high >> 1; + + for (; low <= high; mid = (low + high) >> 1) + { + if ((array[mid].code <= code) && (array[mid + 1].code > code)) + { + if (0 == array[mid].peer) + return 0; + if (code >= 0xa140U) + { + /* big5 to cns */ + tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; + high = code & 0x00ff; + low = array[mid].code & 0x00ff; + + /* + * NOTE: big5 high_byte: 0xa1-0xfe, low_byte: 0x40-0x7e, + * 0xa1-0xfe (radicals: 0x00-0x3e, 0x3f-0x9c) big5 radix is + * 0x9d. [region_low, region_high] We + * should remember big5 has two different regions (above). + * There is a bias for the distance between these regions. + * 0xa1 - 0x7e + bias = 1 (Distance between 0xa1 and 0x7e is + * 1.) bias = - 0x22. + */ + distance = tmp * 0x9d + high - low + + (high >= 0xa1 ? (low >= 0xa1 ? 0 : -0x22) + : (low >= 0xa1 ? +0x22 : 0)); + + /* + * NOTE: we have to convert the distance into a code point. + * The code point's low_byte is 0x21 plus mod_0x5e. In the + * first, we extract the mod_0x5e of the starting code point, + * subtracting 0x21, and add distance to it. Then we calculate + * again mod_0x5e of them, and restore the final codepoint, + * adding 0x21. + */ + tmp = (array[mid].peer & 0x00ff) + distance - 0x21; + tmp = (array[mid].peer & 0xff00) + ((tmp / 0x5e) << 8) + + 0x21 + tmp % 0x5e; + return tmp; + } + else + { + /* cns to big5 */ + tmp = ((code & 0xff00) - (array[mid].code & 0xff00)) >> 8; + + /* + * NOTE: ISO charsets ranges between 0x21-0xfe (94charset). + * Its radix is 0x5e. But there is no distance bias like big5. + */ + distance = tmp * 0x5e + + ((int) (code & 0x00ff) - (int) (array[mid].code & 0x00ff)); + + /* + * NOTE: Similar to big5 to cns conversion, we extract + * mod_0x9d and restore mod_0x9d into a code point. + */ + low = array[mid].peer & 0x00ff; + tmp = low + distance - (low >= 0xa1 ? 0x62 : 0x40); + low = tmp % 0x9d; + tmp = (array[mid].peer & 0xff00) + ((tmp / 0x9d) << 8) + + (low > 0x3e ? 0x62 : 0x40) + low; + return tmp; + } + } + else if (array[mid].code > code) + high = mid - 1; + else + low = mid + 1; + } + + return 0; +} + + +unsigned short +BIG5toCNS(unsigned short big5, unsigned char *lc) +{ + unsigned short cns = 0; + int i; + + if (big5 < 0xc940U) + { + /* level 1 */ + + for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) + { + if (b1c4[i][0] == big5) + { + *lc = LC_CNS11643_4; + return (b1c4[i][1] | 0x8080U); + } + } + + if (0 < (cns = BinarySearchRange(big5Level1ToCnsPlane1, 23, big5))) + *lc = LC_CNS11643_1; + } + else if (big5 == 0xc94aU) + { + /* level 2 */ + *lc = LC_CNS11643_1; + cns = 0x4442; + } + else + { + /* level 2 */ + for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) + { + if (b2c3[i][0] == big5) + { + *lc = LC_CNS11643_3; + return (b2c3[i][1] | 0x8080U); + } + } + + if (0 < (cns = BinarySearchRange(big5Level2ToCnsPlane2, 46, big5))) + *lc = LC_CNS11643_2; + } + + if (0 == cns) + { /* no mapping Big5 to CNS 11643-1992 */ + *lc = 0; + return (unsigned short) '?'; + } + + return cns | 0x8080; +} + +unsigned short +CNStoBIG5(unsigned short cns, unsigned char lc) +{ + int i; + unsigned int big5 = 0; + + cns &= 0x7f7f; + + switch (lc) + { + case LC_CNS11643_1: + big5 = BinarySearchRange(cnsPlane1ToBig5Level1, 24, cns); + break; + case LC_CNS11643_2: + big5 = BinarySearchRange(cnsPlane2ToBig5Level2, 47, cns); + break; + case LC_CNS11643_3: + for (i = 0; i < sizeof(b2c3) / (sizeof(unsigned short) * 2); i++) + { + if (b2c3[i][1] == cns) + return b2c3[i][0]; + } + break; + case LC_CNS11643_4: + for (i = 0; i < sizeof(b1c4) / (sizeof(unsigned short) * 2); i++) + { + if (b1c4[i][1] == cns) + return b1c4[i][0]; + } + default: + break; + } + return big5; +} diff --git a/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c new file mode 100644 index 0000000..e9f1d61 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c @@ -0,0 +1,582 @@ +/*------------------------------------------------------------------------- + * + * EUC_TW, BIG5 and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +#define ENCODING_GROWTH_RATE 4 + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_tw_to_big5); +PG_FUNCTION_INFO_V1(big5_to_euc_tw); +PG_FUNCTION_INFO_V1(euc_tw_to_mic); +PG_FUNCTION_INFO_V1(mic_to_euc_tw); +PG_FUNCTION_INFO_V1(big5_to_mic); +PG_FUNCTION_INFO_V1(mic_to_big5); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +static int euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int big52euc_tw(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError); +static int mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError); +static int euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError); +static int mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError); + +Datum +euc_tw_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_BIG5); + + converted = euc_tw2big5(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +big5_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_EUC_TW); + + converted = big52euc_tw(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +euc_tw_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_MULE_INTERNAL); + + converted = euc_tw2mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_EUC_TW); + + converted = mic2euc_tw(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +big5_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_MULE_INTERNAL); + + converted = big52mic(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_BIG5); + + converted = mic2big5(src, dest, len, noError); + + PG_RETURN_INT32(converted); +} + + +/* + * EUC_TW ---> Big5 + */ +static int +euc_tw2big5(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + unsigned char c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + /* Verify and decode the next EUC_TW input character */ + l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + } + if (c1 == SS2) + { + c1 = euc[1]; /* plane No. */ + if (c1 == 0xa1) + lc = LC_CNS11643_1; + else if (c1 == 0xa2) + lc = LC_CNS11643_2; + else + lc = c1 - 0xa3 + LC_CNS11643_3; + cnsBuf = (euc[2] << 8) | euc[3]; + } + else + { /* CNS11643-1 */ + lc = LC_CNS11643_1; + cnsBuf = (c1 << 8) | euc[1]; + } + + /* Write it out in Big5 */ + big5buf = CNStoBIG5(cnsBuf, lc); + if (big5buf == 0) + { + if (noError) + break; + report_untranslatable_char(PG_EUC_TW, PG_BIG5, + (const char *) euc, len); + } + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + + euc += l; + len -= l; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; + + return euc - start; +} + +/* + * Big5 ---> EUC_TW + */ +static int +big52euc_tw(const unsigned char *big5, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = big5; + unsigned short c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + /* Verify and decode the next Big5 input character */ + c1 = *big5; + if (IS_HIGHBIT_SET(c1)) + { + l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + } + big5buf = (c1 << 8) | big5[1]; + cnsBuf = BIG5toCNS(big5buf, &lc); + + if (lc == LC_CNS11643_1) + { + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc == LC_CNS11643_2) + { + *p++ = SS2; + *p++ = 0xa2; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else if (lc >= LC_CNS11643_3 && lc <= LC_CNS11643_7) + { + *p++ = SS2; + *p++ = lc - LC_CNS11643_3 + 0xa3; + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_BIG5, PG_EUC_TW, + (const char *) big5, len); + } + + big5 += l; + len -= l; + } + else + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + } + *p++ = c1; + big5++; + len--; + continue; + } + } + *p = '\0'; + + return big5 - start; +} + +/* + * EUC_TW ---> MIC + */ +static int +euc_tw2mic(const unsigned char *euc, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = euc; + int c1; + int l; + + while (len > 0) + { + c1 = *euc; + if (IS_HIGHBIT_SET(c1)) + { + l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + } + if (c1 == SS2) + { + c1 = euc[1]; /* plane No. */ + if (c1 == 0xa1) + *p++ = LC_CNS11643_1; + else if (c1 == 0xa2) + *p++ = LC_CNS11643_2; + else + { + /* other planes are MULE private charsets */ + *p++ = LCPRV2_B; + *p++ = c1 - 0xa3 + LC_CNS11643_3; + } + *p++ = euc[2]; + *p++ = euc[3]; + } + else + { /* CNS11643-1 */ + *p++ = LC_CNS11643_1; + *p++ = c1; + *p++ = euc[1]; + } + euc += l; + len -= l; + } + else + { /* should be ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_EUC_TW, + (const char *) euc, len); + } + *p++ = c1; + euc++; + len--; + } + } + *p = '\0'; + + return euc - start; +} + +/* + * MIC ---> EUC_TW + */ +static int +mic2euc_tw(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + int c1; + int l; + + while (len > 0) + { + c1 = *mic; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + if (c1 == LC_CNS11643_1) + { + *p++ = mic[1]; + *p++ = mic[2]; + } + else if (c1 == LC_CNS11643_2) + { + *p++ = SS2; + *p++ = 0xa2; + *p++ = mic[1]; + *p++ = mic[2]; + } + else if (c1 == LCPRV2_B && + mic[1] >= LC_CNS11643_3 && mic[1] <= LC_CNS11643_7) + { + *p++ = SS2; + *p++ = mic[1] - LC_CNS11643_3 + 0xa3; + *p++ = mic[2]; + *p++ = mic[3]; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_EUC_TW, + (const char *) mic, len); + } + mic += l; + len -= l; + } + *p = '\0'; + + return mic - start; +} + +/* + * Big5 ---> MIC + */ +static int +big52mic(const unsigned char *big5, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = big5; + unsigned short c1; + unsigned short big5buf, + cnsBuf; + unsigned char lc; + int l; + + while (len > 0) + { + c1 = *big5; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + } + *p++ = c1; + big5++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_BIG5, + (const char *) big5, len); + } + big5buf = (c1 << 8) | big5[1]; + cnsBuf = BIG5toCNS(big5buf, &lc); + if (lc != 0) + { + /* Planes 3 and 4 are MULE private charsets */ + if (lc == LC_CNS11643_3 || lc == LC_CNS11643_4) + *p++ = LCPRV2_B; + *p++ = lc; /* Plane No. */ + *p++ = (cnsBuf >> 8) & 0x00ff; + *p++ = cnsBuf & 0x00ff; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_BIG5, PG_MULE_INTERNAL, + (const char *) big5, len); + } + big5 += l; + len -= l; + } + *p = '\0'; + + return big5 - start; +} + +/* + * MIC ---> Big5 + */ +static int +mic2big5(const unsigned char *mic, unsigned char *p, int len, bool noError) +{ + const unsigned char *start = mic; + unsigned short c1; + unsigned short big5buf, + cnsBuf; + int l; + + while (len > 0) + { + c1 = *mic; + if (!IS_HIGHBIT_SET(c1)) + { + /* ASCII */ + if (c1 == 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + *p++ = c1; + mic++; + len--; + continue; + } + l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len); + if (l < 0) + { + if (noError) + break; + report_invalid_encoding(PG_MULE_INTERNAL, + (const char *) mic, len); + } + if (c1 == LC_CNS11643_1 || c1 == LC_CNS11643_2 || c1 == LCPRV2_B) + { + if (c1 == LCPRV2_B) + { + c1 = mic[1]; /* get plane no. */ + cnsBuf = (mic[2] << 8) | mic[3]; + } + else + { + cnsBuf = (mic[1] << 8) | mic[2]; + } + big5buf = CNStoBIG5(cnsBuf, c1); + if (big5buf == 0) + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5, + (const char *) mic, len); + } + *p++ = (big5buf >> 8) & 0x00ff; + *p++ = big5buf & 0x00ff; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_MULE_INTERNAL, PG_BIG5, + (const char *) mic, len); + } + mic += l; + len -= l; + } + *p = '\0'; + + return mic - start; +} diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/Makefile b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/Makefile new file mode 100644 index 0000000..5e48dae --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/latin2_and_win1250/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/latin2_and_win1250 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = latin2_and_win1250 +PGFILEDESC = "latin2 <-> win1250 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c new file mode 100644 index 0000000..8610fcb --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c @@ -0,0 +1,180 @@ +/*------------------------------------------------------------------------- + * + * LATIN2 and WIN1250 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/latin2_and_win1250/latin2_and_win1250.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(latin2_to_mic); +PG_FUNCTION_INFO_V1(mic_to_latin2); +PG_FUNCTION_INFO_V1(win1250_to_mic); +PG_FUNCTION_INFO_V1(mic_to_win1250); +PG_FUNCTION_INFO_V1(latin2_to_win1250); +PG_FUNCTION_INFO_V1(win1250_to_latin2); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +/* WIN1250 to ISO-8859-2 */ +static const unsigned char win1250_2_iso88592[] = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0xA9, 0x8B, 0xA6, 0xAB, 0xAE, 0xAC, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0xB9, 0x9B, 0xB6, 0xBB, 0xBE, 0xBC, + 0xA0, 0xB7, 0xA2, 0xA3, 0xA4, 0xA1, 0x00, 0xA7, + 0xA8, 0x00, 0xAA, 0x00, 0x00, 0xAD, 0x00, 0xAF, + 0xB0, 0x00, 0xB2, 0xB3, 0xB4, 0x00, 0x00, 0x00, + 0xB8, 0xB1, 0xBA, 0x00, 0xA5, 0xBD, 0xB5, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + +/* ISO-8859-2 to WIN1250 */ +static const unsigned char iso88592_2_win1250[] = { + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x00, 0x8B, 0x00, 0x00, 0x00, 0x00, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x00, 0x9B, 0x00, 0x00, 0x00, 0x00, + 0xA0, 0xA5, 0xA2, 0xA3, 0xA4, 0xBC, 0x8C, 0xA7, + 0xA8, 0x8A, 0xAA, 0x8D, 0x8F, 0xAD, 0x8E, 0xAF, + 0xB0, 0xB9, 0xB2, 0xB3, 0xB4, 0xBE, 0x9C, 0xA1, + 0xB8, 0x9A, 0xBA, 0x9D, 0x9F, 0xBD, 0x9E, 0xBF, + 0xC0, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, + 0xC8, 0xC9, 0xCA, 0xCB, 0xCC, 0xCD, 0xCE, 0xCF, + 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, + 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF, + 0xE0, 0xE1, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, + 0xE8, 0xE9, 0xEA, 0xEB, 0xEC, 0xED, 0xEE, 0xEF, + 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, + 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF +}; + + +Datum +latin2_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_MULE_INTERNAL); + + converted = latin2mic(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_latin2(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN2); + + converted = mic2latin(src, dest, len, LC_ISO8859_2, PG_LATIN2, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1250_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_MULE_INTERNAL); + + converted = latin2mic_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + win1250_2_iso88592, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_win1250(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_WIN1250); + + converted = mic2latin_with_table(src, dest, len, LC_ISO8859_2, PG_WIN1250, + iso88592_2_win1250, noError); + + PG_RETURN_INT32(converted); +} + +Datum +latin2_to_win1250(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN2, PG_WIN1250); + + converted = local2local(src, dest, len, PG_LATIN2, PG_WIN1250, + iso88592_2_win1250, noError); + + PG_RETURN_INT32(converted); +} + +Datum +win1250_to_latin2(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_WIN1250, PG_LATIN2); + + converted = local2local(src, dest, len, PG_WIN1250, PG_LATIN2, + win1250_2_iso88592, noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile b/src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile new file mode 100644 index 0000000..c404738 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/latin_and_mic/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/latin_and_mic +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = latin_and_mic +PGFILEDESC = "latin <-> mic text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c new file mode 100644 index 0000000..bff27d1 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c @@ -0,0 +1,136 @@ +/*------------------------------------------------------------------------- + * + * LATINn and MULE_INTERNAL + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/latin_and_mic/latin_and_mic.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(latin1_to_mic); +PG_FUNCTION_INFO_V1(mic_to_latin1); +PG_FUNCTION_INFO_V1(latin3_to_mic); +PG_FUNCTION_INFO_V1(mic_to_latin3); +PG_FUNCTION_INFO_V1(latin4_to_mic); +PG_FUNCTION_INFO_V1(mic_to_latin4); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + + +Datum +latin1_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_MULE_INTERNAL); + + converted = latin2mic(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_latin1(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN1); + + converted = mic2latin(src, dest, len, LC_ISO8859_1, PG_LATIN1, noError); + + PG_RETURN_INT32(converted); +} + +Datum +latin3_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN3, PG_MULE_INTERNAL); + + converted = latin2mic(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_latin3(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN3); + + converted = mic2latin(src, dest, len, LC_ISO8859_3, PG_LATIN3, noError); + + PG_RETURN_INT32(converted); +} + +Datum +latin4_to_mic(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN4, PG_MULE_INTERNAL); + + converted = latin2mic(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError); + + PG_RETURN_INT32(converted); +} + +Datum +mic_to_latin4(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_MULE_INTERNAL, PG_LATIN4); + + converted = mic2latin(src, dest, len, LC_ISO8859_4, PG_LATIN4, noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/proc.mk b/src/backend/utils/mb/conversion_procs/proc.mk new file mode 100644 index 0000000..e0a3b74 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/proc.mk @@ -0,0 +1,17 @@ +SRCS += $(NAME).c +OBJS += $(NAME).o $(WIN32RES) + +rpath = + +all: all-shared-lib + +include $(top_srcdir)/src/Makefile.shlib + +install: all installdirs install-lib + +installdirs: installdirs-lib + +uninstall: uninstall-lib + +clean distclean maintainer-clean: clean-lib + rm -f $(OBJS) diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile new file mode 100644 index 0000000..2c274f3 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_big5/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_big5 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_big5 +PGFILEDESC = "utf8 <-> big5 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c new file mode 100644 index 0000000..3838b15 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * BIG5 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_big5/utf8_and_big5.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/big5_to_utf8.map" +#include "../../Unicode/utf8_to_big5.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(big5_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_big5); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +big5_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_BIG5, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &big5_to_unicode_tree, + NULL, 0, + NULL, + PG_BIG5, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_big5(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_BIG5); + + converted = UtfToLocal(src, len, dest, + &big5_from_unicode_tree, + NULL, 0, + NULL, + PG_BIG5, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/Makefile new file mode 100644 index 0000000..725281e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_cyrillic +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_cyrillic +PGFILEDESC = "utf8 <-> cyrillic text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c new file mode 100644 index 0000000..75719fe --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c @@ -0,0 +1,126 @@ +/*------------------------------------------------------------------------- + * + * UTF8 and Cyrillic + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/utf8_to_koi8r.map" +#include "../../Unicode/koi8r_to_utf8.map" +#include "../../Unicode/utf8_to_koi8u.map" +#include "../../Unicode/koi8u_to_utf8.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(utf8_to_koi8r); +PG_FUNCTION_INFO_V1(koi8r_to_utf8); + +PG_FUNCTION_INFO_V1(utf8_to_koi8u); +PG_FUNCTION_INFO_V1(koi8u_to_utf8); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +Datum +utf8_to_koi8r(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8R); + + converted = UtfToLocal(src, len, dest, + &koi8r_from_unicode_tree, + NULL, 0, + NULL, + PG_KOI8R, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +koi8r_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8R, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &koi8r_to_unicode_tree, + NULL, 0, + NULL, + PG_KOI8R, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_koi8u(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U); + + converted = UtfToLocal(src, len, dest, + &koi8u_from_unicode_tree, + NULL, 0, + NULL, + PG_KOI8U, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +koi8u_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &koi8u_to_unicode_tree, + NULL, 0, + NULL, + PG_KOI8U, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/Makefile new file mode 100644 index 0000000..c6c537c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_euc2004/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc2004 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_euc2004 +PGFILEDESC = "utf8 <-> euc2004 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c new file mode 100644 index 0000000..5391001 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * EUC_JIS_2004 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_euc2004/utf8_and_euc2004.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_jis_2004_to_utf8.map" +#include "../../Unicode/utf8_to_euc_jis_2004.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_jis_2004_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_euc_jis_2004); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +euc_jis_2004_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JIS_2004, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &euc_jis_2004_to_unicode_tree, + LUmapEUC_JIS_2004_combined, lengthof(LUmapEUC_JIS_2004_combined), + NULL, + PG_EUC_JIS_2004, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_euc_jis_2004(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JIS_2004); + + converted = UtfToLocal(src, len, dest, + &euc_jis_2004_from_unicode_tree, + ULmapEUC_JIS_2004_combined, lengthof(ULmapEUC_JIS_2004_combined), + NULL, + PG_EUC_JIS_2004, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile new file mode 100644 index 0000000..9d0e157 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_cn +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_euc_cn +PGFILEDESC = "utf8 <-> euc_cn text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c new file mode 100644 index 0000000..c87d1bf --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * EUC_CN <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_euc_cn/utf8_and_euc_cn.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_cn_to_utf8.map" +#include "../../Unicode/utf8_to_euc_cn.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_cn_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_euc_cn); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +euc_cn_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_CN, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &euc_cn_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_CN, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_euc_cn(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_CN); + + converted = UtfToLocal(src, len, dest, + &euc_cn_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_CN, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile new file mode 100644 index 0000000..c5f5578 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_jp +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_euc_jp +PGFILEDESC = "utf8 <-> euc_jp text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c new file mode 100644 index 0000000..6a55134 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * EUC_JP <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_euc_jp/utf8_and_euc_jp.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_jp_to_utf8.map" +#include "../../Unicode/utf8_to_euc_jp.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_jp_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_euc_jp); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +euc_jp_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_JP, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &euc_jp_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_JP, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_euc_jp(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_JP); + + converted = UtfToLocal(src, len, dest, + &euc_jp_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_JP, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile new file mode 100644 index 0000000..a7eff9e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_kr +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_euc_kr +PGFILEDESC = "utf8 <-> euc_kr text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c new file mode 100644 index 0000000..fe1924e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * EUC_KR <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_euc_kr/utf8_and_euc_kr.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_kr_to_utf8.map" +#include "../../Unicode/utf8_to_euc_kr.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_kr_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_euc_kr); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +euc_kr_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_KR, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &euc_kr_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_KR, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_euc_kr(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_KR); + + converted = UtfToLocal(src, len, dest, + &euc_kr_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_KR, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile new file mode 100644 index 0000000..59f42dc --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_euc_tw +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_euc_tw +PGFILEDESC = "utf8 <-> euc_tw text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c new file mode 100644 index 0000000..6821565 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * EUC_TW <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_euc_tw/utf8_and_euc_tw.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/euc_tw_to_utf8.map" +#include "../../Unicode/utf8_to_euc_tw.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(euc_tw_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_euc_tw); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +euc_tw_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_EUC_TW, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &euc_tw_to_unicode_tree, + NULL, 0, + NULL, + PG_EUC_TW, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_euc_tw(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_EUC_TW); + + converted = UtfToLocal(src, len, dest, + &euc_tw_from_unicode_tree, + NULL, 0, + NULL, + PG_EUC_TW, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile new file mode 100644 index 0000000..17bc18c --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_gb18030/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_gb18030 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_gb18030 +PGFILEDESC = "utf8 <-> gb18030 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c new file mode 100644 index 0000000..e1a59c3 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c @@ -0,0 +1,233 @@ +/*------------------------------------------------------------------------- + * + * GB18030 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_gb18030/utf8_and_gb18030.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/gb18030_to_utf8.map" +#include "../../Unicode/utf8_to_gb18030.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(gb18030_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_gb18030); + +/* + * Convert 4-byte GB18030 characters to and from a linear code space + * + * The first and third bytes can range from 0x81 to 0xfe (126 values), + * while the second and fourth bytes can range from 0x30 to 0x39 (10 values). + */ +static inline uint32 +gb_linear(uint32 gb) +{ + uint32 b0 = (gb & 0xff000000) >> 24; + uint32 b1 = (gb & 0x00ff0000) >> 16; + uint32 b2 = (gb & 0x0000ff00) >> 8; + uint32 b3 = (gb & 0x000000ff); + + return b0 * 12600 + b1 * 1260 + b2 * 10 + b3 - + (0x81 * 12600 + 0x30 * 1260 + 0x81 * 10 + 0x30); +} + +static inline uint32 +gb_unlinear(uint32 lin) +{ + uint32 r0 = 0x81 + lin / 12600; + uint32 r1 = 0x30 + (lin / 1260) % 10; + uint32 r2 = 0x81 + (lin / 10) % 126; + uint32 r3 = 0x30 + lin % 10; + + return (r0 << 24) | (r1 << 16) | (r2 << 8) | r3; +} + +/* + * Convert word-formatted UTF8 to and from Unicode code points + * + * Probably this should be somewhere else ... + */ +static inline uint32 +unicode_to_utf8word(uint32 c) +{ + uint32 word; + + if (c <= 0x7F) + { + word = c; + } + else if (c <= 0x7FF) + { + word = (0xC0 | ((c >> 6) & 0x1F)) << 8; + word |= 0x80 | (c & 0x3F); + } + else if (c <= 0xFFFF) + { + word = (0xE0 | ((c >> 12) & 0x0F)) << 16; + word |= (0x80 | ((c >> 6) & 0x3F)) << 8; + word |= 0x80 | (c & 0x3F); + } + else + { + word = (0xF0 | ((c >> 18) & 0x07)) << 24; + word |= (0x80 | ((c >> 12) & 0x3F)) << 16; + word |= (0x80 | ((c >> 6) & 0x3F)) << 8; + word |= 0x80 | (c & 0x3F); + } + + return word; +} + +static inline uint32 +utf8word_to_unicode(uint32 c) +{ + uint32 ucs; + + if (c <= 0x7F) + { + ucs = c; + } + else if (c <= 0xFFFF) + { + ucs = ((c >> 8) & 0x1F) << 6; + ucs |= c & 0x3F; + } + else if (c <= 0xFFFFFF) + { + ucs = ((c >> 16) & 0x0F) << 12; + ucs |= ((c >> 8) & 0x3F) << 6; + ucs |= c & 0x3F; + } + else + { + ucs = ((c >> 24) & 0x07) << 18; + ucs |= ((c >> 16) & 0x3F) << 12; + ucs |= ((c >> 8) & 0x3F) << 6; + ucs |= c & 0x3F; + } + + return ucs; +} + +/* + * Perform mapping of GB18030 ranges to UTF8 + * + * The ranges we need to convert are specified in gb-18030-2000.xml. + * All are ranges of 4-byte GB18030 codes. + */ +static uint32 +conv_18030_to_utf8(uint32 code) +{ +#define conv18030(minunicode, mincode, maxcode) \ + if (code >= mincode && code <= maxcode) \ + return unicode_to_utf8word(gb_linear(code) - gb_linear(mincode) + minunicode) + + conv18030(0x0452, 0x8130D330, 0x8136A531); + conv18030(0x2643, 0x8137A839, 0x8138FD38); + conv18030(0x361B, 0x8230A633, 0x8230F237); + conv18030(0x3CE1, 0x8231D438, 0x8232AF32); + conv18030(0x4160, 0x8232C937, 0x8232F837); + conv18030(0x44D7, 0x8233A339, 0x8233C931); + conv18030(0x478E, 0x8233E838, 0x82349638); + conv18030(0x49B8, 0x8234A131, 0x8234E733); + conv18030(0x9FA6, 0x82358F33, 0x8336C738); + conv18030(0xE865, 0x8336D030, 0x84308534); + conv18030(0xFA2A, 0x84309C38, 0x84318537); + conv18030(0xFFE6, 0x8431A234, 0x8431A439); + conv18030(0x10000, 0x90308130, 0xE3329A35); + /* No mapping exists */ + return 0; +} + +/* + * Perform mapping of UTF8 ranges to GB18030 + */ +static uint32 +conv_utf8_to_18030(uint32 code) +{ + uint32 ucs = utf8word_to_unicode(code); + +#define convutf8(minunicode, maxunicode, mincode) \ + if (ucs >= minunicode && ucs <= maxunicode) \ + return gb_unlinear(ucs - minunicode + gb_linear(mincode)) + + convutf8(0x0452, 0x200F, 0x8130D330); + convutf8(0x2643, 0x2E80, 0x8137A839); + convutf8(0x361B, 0x3917, 0x8230A633); + convutf8(0x3CE1, 0x4055, 0x8231D438); + convutf8(0x4160, 0x4336, 0x8232C937); + convutf8(0x44D7, 0x464B, 0x8233A339); + convutf8(0x478E, 0x4946, 0x8233E838); + convutf8(0x49B8, 0x4C76, 0x8234A131); + convutf8(0x9FA6, 0xD7FF, 0x82358F33); + convutf8(0xE865, 0xF92B, 0x8336D030); + convutf8(0xFA2A, 0xFE2F, 0x84309C38); + convutf8(0xFFE6, 0xFFFF, 0x8431A234); + convutf8(0x10000, 0x10FFFF, 0x90308130); + /* No mapping exists */ + return 0; +} + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +gb18030_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_GB18030, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &gb18030_to_unicode_tree, + NULL, 0, + conv_18030_to_utf8, + PG_GB18030, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_gb18030(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GB18030); + + converted = UtfToLocal(src, len, dest, + &gb18030_from_unicode_tree, + NULL, 0, + conv_utf8_to_18030, + PG_GB18030, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile new file mode 100644 index 0000000..eb20638 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_gbk/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_gbk +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_gbk +PGFILEDESC = "utf8 <-> gbk text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c new file mode 100644 index 0000000..881386d --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * GBK <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_gbk/utf8_and_gbk.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/gbk_to_utf8.map" +#include "../../Unicode/utf8_to_gbk.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(gbk_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_gbk); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +gbk_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_GBK, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &gbk_to_unicode_tree, + NULL, 0, + NULL, + PG_GBK, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_gbk(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_GBK); + + converted = UtfToLocal(src, len, dest, + &gbk_from_unicode_tree, + NULL, 0, + NULL, + PG_GBK, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile new file mode 100644 index 0000000..6fd0dd2 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_iso8859/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_iso8859 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_iso8859 +PGFILEDESC = "utf8 <-> iso8859 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c new file mode 100644 index 0000000..d93a521 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c @@ -0,0 +1,169 @@ +/*------------------------------------------------------------------------- + * + * ISO 8859 2-16 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_iso8859/utf8_and_iso8859.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/iso8859_10_to_utf8.map" +#include "../../Unicode/iso8859_13_to_utf8.map" +#include "../../Unicode/iso8859_14_to_utf8.map" +#include "../../Unicode/iso8859_15_to_utf8.map" +#include "../../Unicode/iso8859_2_to_utf8.map" +#include "../../Unicode/iso8859_3_to_utf8.map" +#include "../../Unicode/iso8859_4_to_utf8.map" +#include "../../Unicode/iso8859_5_to_utf8.map" +#include "../../Unicode/iso8859_6_to_utf8.map" +#include "../../Unicode/iso8859_7_to_utf8.map" +#include "../../Unicode/iso8859_8_to_utf8.map" +#include "../../Unicode/iso8859_9_to_utf8.map" +#include "../../Unicode/utf8_to_iso8859_10.map" +#include "../../Unicode/utf8_to_iso8859_13.map" +#include "../../Unicode/utf8_to_iso8859_14.map" +#include "../../Unicode/utf8_to_iso8859_15.map" +#include "../../Unicode/utf8_to_iso8859_16.map" +#include "../../Unicode/utf8_to_iso8859_2.map" +#include "../../Unicode/utf8_to_iso8859_3.map" +#include "../../Unicode/utf8_to_iso8859_4.map" +#include "../../Unicode/utf8_to_iso8859_5.map" +#include "../../Unicode/utf8_to_iso8859_6.map" +#include "../../Unicode/utf8_to_iso8859_7.map" +#include "../../Unicode/utf8_to_iso8859_8.map" +#include "../../Unicode/utf8_to_iso8859_9.map" +#include "../../Unicode/iso8859_16_to_utf8.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(iso8859_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_iso8859); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +typedef struct +{ + pg_enc encoding; + const pg_mb_radix_tree *map1; /* to UTF8 map name */ + const pg_mb_radix_tree *map2; /* from UTF8 map name */ +} pg_conv_map; + +static const pg_conv_map maps[] = { + {PG_LATIN2, &iso8859_2_to_unicode_tree, + &iso8859_2_from_unicode_tree}, /* ISO-8859-2 Latin 2 */ + {PG_LATIN3, &iso8859_3_to_unicode_tree, + &iso8859_3_from_unicode_tree}, /* ISO-8859-3 Latin 3 */ + {PG_LATIN4, &iso8859_4_to_unicode_tree, + &iso8859_4_from_unicode_tree}, /* ISO-8859-4 Latin 4 */ + {PG_LATIN5, &iso8859_9_to_unicode_tree, + &iso8859_9_from_unicode_tree}, /* ISO-8859-9 Latin 5 */ + {PG_LATIN6, &iso8859_10_to_unicode_tree, + &iso8859_10_from_unicode_tree}, /* ISO-8859-10 Latin 6 */ + {PG_LATIN7, &iso8859_13_to_unicode_tree, + &iso8859_13_from_unicode_tree}, /* ISO-8859-13 Latin 7 */ + {PG_LATIN8, &iso8859_14_to_unicode_tree, + &iso8859_14_from_unicode_tree}, /* ISO-8859-14 Latin 8 */ + {PG_LATIN9, &iso8859_15_to_unicode_tree, + &iso8859_15_from_unicode_tree}, /* ISO-8859-15 Latin 9 */ + {PG_LATIN10, &iso8859_16_to_unicode_tree, + &iso8859_16_from_unicode_tree}, /* ISO-8859-16 Latin 10 */ + {PG_ISO_8859_5, &iso8859_5_to_unicode_tree, + &iso8859_5_from_unicode_tree}, /* ISO-8859-5 */ + {PG_ISO_8859_6, &iso8859_6_to_unicode_tree, + &iso8859_6_from_unicode_tree}, /* ISO-8859-6 */ + {PG_ISO_8859_7, &iso8859_7_to_unicode_tree, + &iso8859_7_from_unicode_tree}, /* ISO-8859-7 */ + {PG_ISO_8859_8, &iso8859_8_to_unicode_tree, + &iso8859_8_from_unicode_tree}, /* ISO-8859-8 */ +}; + +Datum +iso8859_to_utf8(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(0); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int i; + + CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); + + for (i = 0; i < lengthof(maps); i++) + { + if (encoding == maps[i].encoding) + { + int converted; + + converted = LocalToUtf(src, len, dest, + maps[i].map1, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding ID %d for ISO 8859 character sets", + encoding))); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_iso8859(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(1); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int i; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); + + for (i = 0; i < lengthof(maps); i++) + { + if (encoding == maps[i].encoding) + { + int converted; + + converted = UtfToLocal(src, len, dest, + maps[i].map2, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding ID %d for ISO 8859 character sets", + encoding))); + + PG_RETURN_INT32(0); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile new file mode 100644 index 0000000..0298284 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_iso8859_1 +PGFILEDESC = "utf8 <-> iso8859_1 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c new file mode 100644 index 0000000..d0dc4cc --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c @@ -0,0 +1,139 @@ +/*------------------------------------------------------------------------- + * + * ISO8859_1 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_iso8859_1/utf8_and_iso8859_1.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(iso8859_1_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_iso8859_1); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +Datum +iso8859_1_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + unsigned char *start = src; + unsigned short c; + + CHECK_ENCODING_CONVERSION_ARGS(PG_LATIN1, PG_UTF8); + + while (len > 0) + { + c = *src; + if (c == 0) + { + if (noError) + break; + report_invalid_encoding(PG_LATIN1, (const char *) src, len); + } + if (!IS_HIGHBIT_SET(c)) + *dest++ = c; + else + { + *dest++ = (c >> 6) | 0xc0; + *dest++ = (c & 0x003f) | HIGHBIT; + } + src++; + len--; + } + *dest = '\0'; + + PG_RETURN_INT32(src - start); +} + +Datum +utf8_to_iso8859_1(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + unsigned char *start = src; + unsigned short c, + c1; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_LATIN1); + + while (len > 0) + { + c = *src; + if (c == 0) + { + if (noError) + break; + report_invalid_encoding(PG_UTF8, (const char *) src, len); + } + /* fast path for ASCII-subset characters */ + if (!IS_HIGHBIT_SET(c)) + { + *dest++ = c; + src++; + len--; + } + else + { + int l = pg_utf_mblen(src); + + if (l > len || !pg_utf8_islegal(src, l)) + { + if (noError) + break; + report_invalid_encoding(PG_UTF8, (const char *) src, len); + } + if (l != 2) + { + if (noError) + break; + report_untranslatable_char(PG_UTF8, PG_LATIN1, + (const char *) src, len); + } + c1 = src[1] & 0x3f; + c = ((c & 0x1f) << 6) | c1; + if (c >= 0x80 && c <= 0xff) + { + *dest++ = (unsigned char) c; + src += 2; + len -= 2; + } + else + { + if (noError) + break; + report_untranslatable_char(PG_UTF8, PG_LATIN1, + (const char *) src, len); + } + } + } + *dest = '\0'; + + PG_RETURN_INT32(src - start); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile new file mode 100644 index 0000000..e48ef3e --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_johab/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_johab +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_johab +PGFILEDESC = "utf8 <-> johab text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c new file mode 100644 index 0000000..317daa2 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * JOHAB <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_johab/utf8_and_johab.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/johab_to_utf8.map" +#include "../../Unicode/utf8_to_johab.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(johab_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_johab); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +johab_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_JOHAB, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &johab_to_unicode_tree, + NULL, 0, + NULL, + PG_JOHAB, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_johab(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_JOHAB); + + converted = UtfToLocal(src, len, dest, + &johab_from_unicode_tree, + NULL, 0, + NULL, + PG_JOHAB, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile new file mode 100644 index 0000000..448c5d4 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_sjis/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_sjis +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_sjis +PGFILEDESC = "utf8 <-> sjis text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c new file mode 100644 index 0000000..4c9348a --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * SJIS <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_sjis/utf8_and_sjis.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/sjis_to_utf8.map" +#include "../../Unicode/utf8_to_sjis.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(sjis_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_sjis); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +sjis_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_SJIS, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &sjis_to_unicode_tree, + NULL, 0, + NULL, + PG_SJIS, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_sjis(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SJIS); + + converted = UtfToLocal(src, len, dest, + &sjis_from_unicode_tree, + NULL, 0, + NULL, + PG_SJIS, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/Makefile new file mode 100644 index 0000000..f7072a4 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_sjis2004 +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_sjis2004 +PGFILEDESC = "utf8 <-> sjis2004 text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c new file mode 100644 index 0000000..1fffdc5 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * SHIFT_JIS_2004 <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_sjis2004/utf8_and_sjis2004.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/shift_jis_2004_to_utf8.map" +#include "../../Unicode/utf8_to_shift_jis_2004.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(shift_jis_2004_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_shift_jis_2004); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +shift_jis_2004_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_SHIFT_JIS_2004, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &shift_jis_2004_to_unicode_tree, + LUmapSHIFT_JIS_2004_combined, lengthof(LUmapSHIFT_JIS_2004_combined), + NULL, + PG_SHIFT_JIS_2004, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_shift_jis_2004(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_SHIFT_JIS_2004); + + converted = UtfToLocal(src, len, dest, + &shift_jis_2004_from_unicode_tree, + ULmapSHIFT_JIS_2004_combined, lengthof(ULmapSHIFT_JIS_2004_combined), + NULL, + PG_SHIFT_JIS_2004, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile new file mode 100644 index 0000000..cc6e0a9 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_uhc/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_uhc +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_uhc +PGFILEDESC = "utf8 <-> uhc text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c new file mode 100644 index 0000000..d9471da --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * UHC <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_uhc/utf8_and_uhc.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/uhc_to_utf8.map" +#include "../../Unicode/utf8_to_uhc.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(uhc_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_uhc); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ +Datum +uhc_to_utf8(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UHC, PG_UTF8); + + converted = LocalToUtf(src, len, dest, + &uhc_to_unicode_tree, + NULL, 0, + NULL, + PG_UHC, + noError); + + PG_RETURN_INT32(converted); +} + +Datum +utf8_to_uhc(PG_FUNCTION_ARGS) +{ + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int converted; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_UHC); + + converted = UtfToLocal(src, len, dest, + &uhc_from_unicode_tree, + NULL, 0, + NULL, + PG_UHC, + noError); + + PG_RETURN_INT32(converted); +} diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile b/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile new file mode 100644 index 0000000..d8b18fb --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile @@ -0,0 +1,13 @@ +#------------------------------------------------------------------------- +# +# src/backend/utils/mb/conversion_procs/utf8_and_win/Makefile +# +#------------------------------------------------------------------------- +subdir = src/backend/utils/mb/conversion_procs/utf8_and_win +top_builddir = ../../../../../.. +include $(top_builddir)/src/Makefile.global + +NAME = utf8_and_win +PGFILEDESC = "utf8 <-> win text conversions" + +include $(srcdir)/../proc.mk diff --git a/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c new file mode 100644 index 0000000..110ba56 --- /dev/null +++ b/src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c @@ -0,0 +1,150 @@ +/*------------------------------------------------------------------------- + * + * WIN <--> UTF8 + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/utils/mb/conversion_procs/utf8_and_win/utf8_and_win.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" +#include "fmgr.h" +#include "mb/pg_wchar.h" +#include "../../Unicode/utf8_to_win1250.map" +#include "../../Unicode/utf8_to_win1251.map" +#include "../../Unicode/utf8_to_win1252.map" +#include "../../Unicode/utf8_to_win1253.map" +#include "../../Unicode/utf8_to_win1254.map" +#include "../../Unicode/utf8_to_win1255.map" +#include "../../Unicode/utf8_to_win1256.map" +#include "../../Unicode/utf8_to_win1257.map" +#include "../../Unicode/utf8_to_win1258.map" +#include "../../Unicode/utf8_to_win866.map" +#include "../../Unicode/utf8_to_win874.map" +#include "../../Unicode/win1250_to_utf8.map" +#include "../../Unicode/win1251_to_utf8.map" +#include "../../Unicode/win1252_to_utf8.map" +#include "../../Unicode/win1253_to_utf8.map" +#include "../../Unicode/win1254_to_utf8.map" +#include "../../Unicode/win1255_to_utf8.map" +#include "../../Unicode/win1256_to_utf8.map" +#include "../../Unicode/win1257_to_utf8.map" +#include "../../Unicode/win866_to_utf8.map" +#include "../../Unicode/win874_to_utf8.map" +#include "../../Unicode/win1258_to_utf8.map" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(win_to_utf8); +PG_FUNCTION_INFO_V1(utf8_to_win); + +/* ---------- + * conv_proc( + * INTEGER, -- source encoding id + * INTEGER, -- destination encoding id + * CSTRING, -- source string (null terminated C string) + * CSTRING, -- destination string (null terminated C string) + * INTEGER, -- source string length + * BOOL -- if true, don't throw an error if conversion fails + * ) returns INTEGER; + * + * Returns the number of bytes successfully converted. + * ---------- + */ + +typedef struct +{ + pg_enc encoding; + const pg_mb_radix_tree *map1; /* to UTF8 map name */ + const pg_mb_radix_tree *map2; /* from UTF8 map name */ +} pg_conv_map; + +static const pg_conv_map maps[] = { + {PG_WIN866, &win866_to_unicode_tree, &win866_from_unicode_tree}, + {PG_WIN874, &win874_to_unicode_tree, &win874_from_unicode_tree}, + {PG_WIN1250, &win1250_to_unicode_tree, &win1250_from_unicode_tree}, + {PG_WIN1251, &win1251_to_unicode_tree, &win1251_from_unicode_tree}, + {PG_WIN1252, &win1252_to_unicode_tree, &win1252_from_unicode_tree}, + {PG_WIN1253, &win1253_to_unicode_tree, &win1253_from_unicode_tree}, + {PG_WIN1254, &win1254_to_unicode_tree, &win1254_from_unicode_tree}, + {PG_WIN1255, &win1255_to_unicode_tree, &win1255_from_unicode_tree}, + {PG_WIN1256, &win1256_to_unicode_tree, &win1256_from_unicode_tree}, + {PG_WIN1257, &win1257_to_unicode_tree, &win1257_from_unicode_tree}, + {PG_WIN1258, &win1258_to_unicode_tree, &win1258_from_unicode_tree}, +}; + +Datum +win_to_utf8(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(0); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int i; + + CHECK_ENCODING_CONVERSION_ARGS(-1, PG_UTF8); + + for (i = 0; i < lengthof(maps); i++) + { + if (encoding == maps[i].encoding) + { + int converted; + + converted = LocalToUtf(src, len, dest, + maps[i].map1, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding ID %d for WIN character sets", + encoding))); + + PG_RETURN_INT32(0); +} + +Datum +utf8_to_win(PG_FUNCTION_ARGS) +{ + int encoding = PG_GETARG_INT32(1); + unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2); + unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3); + int len = PG_GETARG_INT32(4); + bool noError = PG_GETARG_BOOL(5); + int i; + + CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, -1); + + for (i = 0; i < lengthof(maps); i++) + { + if (encoding == maps[i].encoding) + { + int converted; + + converted = UtfToLocal(src, len, dest, + maps[i].map2, + NULL, 0, + NULL, + encoding, + noError); + PG_RETURN_INT32(converted); + } + } + + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("unexpected encoding ID %d for WIN character sets", + encoding))); + + PG_RETURN_INT32(0); +} |