summaryrefslogtreecommitdiffstats
path: root/sal/textenc/tcvtmb.cxx
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--sal/textenc/tcvtmb.cxx682
1 files changed, 682 insertions, 0 deletions
diff --git a/sal/textenc/tcvtmb.cxx b/sal/textenc/tcvtmb.cxx
new file mode 100644
index 000000000..89e89c56c
--- /dev/null
+++ b/sal/textenc/tcvtmb.cxx
@@ -0,0 +1,682 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#include <sal/config.h>
+
+#include <rtl/textcvt.h>
+
+#include "handleundefinedunicodetotextchar.hxx"
+#include "tenchelp.hxx"
+#include "unichars.hxx"
+
+/* DBCS to Unicode conversion routine use a lead table for the first byte, */
+/* where we determine the trail table or for single byte chars the unicode */
+/* value. We have for all lead byte a separate table, because we can */
+/* then share many tables for different charset encodings. */
+
+sal_Size ImplDBCSToUnicode( const void* pData, SAL_UNUSED_PARAMETER void*,
+ const char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtBytes )
+{
+ unsigned char cTrail;
+ sal_Unicode cConv;
+ const ImplDBCSToUniLeadTab* pLeadEntry;
+ const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
+ const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
+ sal_Unicode* pEndDestBuf;
+ const char* pEndSrcBuf;
+ char const * startOfCurrentChar = pSrcBuf;
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestChars;
+ pEndSrcBuf = pSrcBuf+nSrcBytes;
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ unsigned char cLead = static_cast<unsigned char>(*pSrcBuf);
+
+ /* get entry for the lead byte */
+ pLeadEntry = pLeadTab+cLead;
+
+ /* SingleByte char? */
+ if (pLeadEntry->mpToUniTrailTab == nullptr
+ || cLead < pConvertData->mnLeadStart
+ || cLead > pConvertData->mnLeadEnd)
+ {
+ cConv = pLeadEntry->mnUniChar;
+ if ( !cConv && (cLead != 0) )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
+ {
+ if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
+ ++pSrcBuf;
+ } else {
+ pSrcBuf = startOfCurrentChar;
+ }
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
+ break;
+ }
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
+ {
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ continue;
+ }
+ cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
+ }
+ }
+ else
+ {
+ /* Source buffer too small */
+ if ( pSrcBuf +1 == pEndSrcBuf )
+ {
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0 )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
+ break;
+ }
+ cConv = 0;
+ }
+ else
+ {
+ pSrcBuf++;
+ cTrail = static_cast<unsigned char>(*pSrcBuf);
+ if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
+ cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
+ else
+ cConv = 0;
+
+ if ( !cConv )
+ {
+ /* EUDC Ranges */
+ sal_uInt16 i;
+ const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
+ for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
+ {
+ if ( (cLead >= pEUDCTab->mnLeadStart) &&
+ (cLead <= pEUDCTab->mnLeadEnd) )
+ {
+ if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
+ (cTrail <= pEUDCTab->mnTrail1End) )
+ {
+ cConv = pEUDCTab->mnUniStart+
+ ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
+ (cTrail-pEUDCTab->mnTrail1Start);
+ break;
+ }
+ sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
+ if ( (pEUDCTab->mnTrailCount >= 2) &&
+ (cTrail >= pEUDCTab->mnTrail2Start) &&
+ (cTrail <= pEUDCTab->mnTrail2End) )
+ {
+ cConv = pEUDCTab->mnUniStart+
+ ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
+ nTrailCount+
+ (cTrail-pEUDCTab->mnTrail2Start);
+ break;
+ }
+ nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
+ if ( (pEUDCTab->mnTrailCount >= 3) &&
+ (cTrail >= pEUDCTab->mnTrail3Start) &&
+ (cTrail <= pEUDCTab->mnTrail3End) )
+ {
+ cConv = pEUDCTab->mnUniStart+
+ ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
+ nTrailCount+
+ (cTrail-pEUDCTab->mnTrail3Start);
+ break;
+ }
+ }
+
+ pEUDCTab++;
+ }
+
+ if ( !cConv )
+ {
+ /* We compare the full range of the trail we defined, */
+ /* which can often be greater than the limit. We do this */
+ /* so that extensions that don't consider encodings */
+ /* correctly treat double-byte characters as a single */
+ /* character as much as possible. */
+
+ if (cLead < pConvertData->mnLeadStart
+ || cLead > pConvertData->mnLeadEnd
+ || cTrail < pConvertData->mnTrailStart
+ || cTrail > pConvertData->mnTrailEnd)
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
+ {
+ if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
+ ++pSrcBuf;
+ } else {
+ pSrcBuf = startOfCurrentChar;
+ }
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
+ break;
+ }
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
+ {
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ continue;
+ }
+ cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
+ }
+ }
+ }
+ }
+ if ( !cConv )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
+ {
+ if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
+ ++pSrcBuf;
+ } else {
+ pSrcBuf = startOfCurrentChar;
+ }
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
+ break;
+ }
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
+ {
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ continue;
+ }
+ cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
+ }
+ }
+
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
+ break;
+ }
+
+ *pDestBuf = cConv;
+ pDestBuf++;
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ }
+
+ *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
+ return (nDestChars - (pEndDestBuf-pDestBuf));
+}
+
+sal_Size ImplUnicodeToDBCS( const void* pData, SAL_UNUSED_PARAMETER void*,
+ const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
+ char* pDestBuf, sal_Size nDestBytes,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtChars )
+{
+ sal_uInt16 cConv;
+ sal_Unicode c;
+ const ImplUniToDBCSHighTab* pHighEntry;
+ const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
+ const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
+ char* pEndDestBuf;
+ const sal_Unicode* pEndSrcBuf;
+
+ bool bCheckRange =
+ pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF;
+ /* this statement has the effect that this extra check is only done for
+ EUC-KR, which uses the MS-949 tables, but does not support the full
+ range of MS-949 */
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestBytes;
+ pEndSrcBuf = pSrcBuf+nSrcChars;
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ c = *pSrcBuf;
+ unsigned char nHighChar = static_cast<unsigned char>((c >> 8) & 0xFF);
+ unsigned char nLowChar = static_cast<unsigned char>(c & 0xFF);
+
+ /* get entry for the high byte */
+ pHighEntry = pHighTab+nHighChar;
+
+ /* is low byte in the table range */
+ if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
+ {
+ cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
+ if (bCheckRange && cConv > 0x7F
+ && ((cConv >> 8) < pConvertData->mnLeadStart
+ || (cConv >> 8) > pConvertData->mnLeadEnd
+ || (cConv & 0xFF) < pConvertData->mnTrailStart
+ || (cConv & 0xFF) > pConvertData->mnTrailEnd))
+ cConv = 0;
+ }
+ else
+ cConv = 0;
+
+ if (cConv == 0 && c != 0)
+ {
+ /* Map to EUDC ranges: */
+ ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
+ sal_uInt32 i;
+ for (i = 0; i < pConvertData->mnEUDCCount; ++i)
+ {
+ if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
+ {
+ sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
+ sal_uInt32 nLeadOff
+ = nIndex / pEUDCTab->mnTrailRangeCount;
+ sal_uInt32 nTrailOff
+ = nIndex % pEUDCTab->mnTrailRangeCount;
+ sal_uInt32 nSize;
+ cConv = static_cast<sal_uInt16>((pEUDCTab->mnLeadStart + nLeadOff) << 8);
+ nSize
+ = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
+ if (nTrailOff < nSize)
+ {
+ cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
+ break;
+ }
+ nTrailOff -= nSize;
+ nSize
+ = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
+ if (nTrailOff < nSize)
+ {
+ cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
+ break;
+ }
+ nTrailOff -= nSize;
+ cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
+ break;
+ }
+ pEUDCTab++;
+ }
+
+ /* FIXME
+ * SB: Not sure why this is in here. Plus, it does not work as
+ * intended when (c & 0xFF) == 0, because the next !cConv check
+ * will then think c has not yet been converted...
+ */
+ if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
+ && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
+ {
+ if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
+ cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF));
+ }
+ }
+
+ if (cConv == 0 && c != 0)
+ {
+ if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
+ {
+ /* !!! */
+ }
+
+ if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
+ {
+ /* !!! */
+ }
+
+ /* Handle undefined and surrogates characters */
+ /* (all surrogates characters are undefined) */
+ if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
+ &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags,
+ pInfo))
+ continue;
+ break;
+ }
+
+ /* SingleByte */
+ if ( !(cConv & 0xFF00) )
+ {
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+ break;
+ }
+
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
+ pDestBuf++;
+ }
+ else
+ {
+ if ( pDestBuf+1 >= pEndDestBuf )
+ {
+ *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+ break;
+ }
+
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
+ pDestBuf++;
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
+ pDestBuf++;
+ }
+
+ pSrcBuf++;
+ }
+
+ *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
+ return (nDestBytes - (pEndDestBuf-pDestBuf));
+}
+
+#define JIS_EUC_LEAD_OFF 0x80
+#define JIS_EUC_TRAIL_OFF 0x80
+
+sal_Size ImplEUCJPToUnicode( const void* pData,
+ SAL_UNUSED_PARAMETER void*,
+ const char* pSrcBuf, sal_Size nSrcBytes,
+ sal_Unicode* pDestBuf, sal_Size nDestChars,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtBytes )
+{
+ unsigned char cLead = '\0';
+ unsigned char cTrail = '\0';
+ sal_Unicode cConv;
+ const ImplDBCSToUniLeadTab* pLeadEntry;
+ const ImplDBCSToUniLeadTab* pLeadTab;
+ const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
+ sal_Unicode* pEndDestBuf;
+ const char* pEndSrcBuf;
+ char const * startOfCurrentChar = pSrcBuf;
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestChars;
+ pEndSrcBuf = pSrcBuf+nSrcBytes;
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ unsigned char c = static_cast<unsigned char>(*pSrcBuf);
+
+ /* ASCII */
+ if ( c <= 0x7F )
+ cConv = c;
+ else
+ {
+ /* SS2 - Half-width katakana */
+ /* 8E + A1-DF */
+ if ( c == 0x8E )
+ {
+ /* Source buffer too small */
+ if ( pSrcBuf + 1 == pEndSrcBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
+ break;
+ }
+
+ pSrcBuf++;
+ c = static_cast<unsigned char>(*pSrcBuf);
+ if ( (c >= 0xA1) && (c <= 0xDF) )
+ cConv = 0xFF61+(c-0xA1);
+ else
+ {
+ cConv = 0;
+ cLead = 0x8E;
+ cTrail = c;
+ }
+ }
+ else
+ {
+ /* SS3 - JIS 0212-1990 */
+ /* 8F + A1-FE + A1-FE */
+ if ( c == 0x8F )
+ {
+ /* Source buffer too small */
+ if (pEndSrcBuf - pSrcBuf < 3)
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
+ break;
+ }
+
+ pSrcBuf++;
+ cLead = static_cast<unsigned char>(*pSrcBuf);
+ pSrcBuf++;
+ cTrail = static_cast<unsigned char>(*pSrcBuf);
+ pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
+ }
+ /* CodeSet 2 JIS 0208-1997 */
+ /* A1-FE + A1-FE */
+ else
+ {
+ /* Source buffer too small */
+ if ( pSrcBuf + 1 == pEndSrcBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
+ break;
+ }
+
+ cLead = c;
+ pSrcBuf++;
+ cTrail = static_cast<unsigned char>(*pSrcBuf);
+ pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
+ }
+
+ /* Undefined Range */
+ if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
+ cConv = 0;
+ else
+ {
+ cLead -= JIS_EUC_LEAD_OFF;
+ cTrail -= JIS_EUC_TRAIL_OFF;
+ pLeadEntry = pLeadTab+cLead;
+ if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
+ cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
+ else
+ cConv = 0;
+ }
+ }
+
+ if ( !cConv )
+ {
+ /* We compare the full range of the trail we defined, */
+ /* which can often be greater than the limit. We do this */
+ /* so that extensions that don't consider encodings */
+ /* correctly treat double-byte characters as a single */
+ /* character as much as possible. */
+
+ if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
+ {
+ if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
+ ++pSrcBuf;
+ } else {
+ pSrcBuf = startOfCurrentChar;
+ }
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
+ break;
+ }
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
+ {
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ continue;
+ }
+ cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
+ }
+ else
+ {
+ if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
+ ++pSrcBuf;
+ } else {
+ pSrcBuf = startOfCurrentChar;
+ }
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
+ break;
+ }
+ if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
+ {
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ continue;
+ }
+ cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
+ }
+ }
+ }
+
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
+ break;
+ }
+
+ *pDestBuf = cConv;
+ pDestBuf++;
+ pSrcBuf++;
+ startOfCurrentChar = pSrcBuf;
+ }
+
+ *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
+ return (nDestChars - (pEndDestBuf-pDestBuf));
+}
+
+sal_Size ImplUnicodeToEUCJP( const void* pData,
+ SAL_UNUSED_PARAMETER void*,
+ const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
+ char* pDestBuf, sal_Size nDestBytes,
+ sal_uInt32 nFlags, sal_uInt32* pInfo,
+ sal_Size* pSrcCvtChars )
+{
+ sal_uInt32 cConv;
+ sal_Unicode c;
+ unsigned char nHighChar;
+ unsigned char nLowChar;
+ const ImplUniToDBCSHighTab* pHighEntry;
+ const ImplUniToDBCSHighTab* pHighTab;
+ const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
+ char* pEndDestBuf;
+ const sal_Unicode* pEndSrcBuf;
+
+ *pInfo = 0;
+ pEndDestBuf = pDestBuf+nDestBytes;
+ pEndSrcBuf = pSrcBuf+nSrcChars;
+ while ( pSrcBuf < pEndSrcBuf )
+ {
+ c = *pSrcBuf;
+
+ /* ASCII */
+ if ( c <= 0x7F )
+ cConv = c;
+ /* Half-width katakana */
+ else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
+ cConv = 0x8E00+0xA1+(c-0xFF61);
+ else
+ {
+ nHighChar = static_cast<unsigned char>((c >> 8) & 0xFF);
+ nLowChar = static_cast<unsigned char>(c & 0xFF);
+
+ /* JIS 0208 */
+ pHighTab = pConvertData->mpUniToJIS0208HighTab;
+ pHighEntry = pHighTab+nHighChar;
+ if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
+ {
+ cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
+ if (cConv != 0)
+ cConv |= 0x8080;
+ }
+ else
+ cConv = 0;
+
+ /* JIS 0212 */
+ if ( !cConv )
+ {
+ pHighTab = pConvertData->mpUniToJIS0212HighTab;
+ pHighEntry = pHighTab+nHighChar;
+ if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
+ {
+ cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
+ if (cConv != 0)
+ cConv |= 0x8F8080;
+ }
+
+ if ( !cConv )
+ {
+ if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
+ {
+ /* !!! */
+ }
+
+ if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
+ {
+ /* !!! */
+ }
+
+ /* Handle undefined and surrogates characters */
+ /* (all surrogates characters are undefined) */
+ if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
+ &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf,
+ nFlags, pInfo))
+ continue;
+ break;
+ }
+ }
+ }
+
+ /* SingleByte */
+ if ( !(cConv & 0xFFFF00) )
+ {
+ if ( pDestBuf == pEndDestBuf )
+ {
+ *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+ break;
+ }
+
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
+ pDestBuf++;
+ }
+ /* DoubleByte */
+ else if ( !(cConv & 0xFF0000) )
+ {
+ if ( pDestBuf+1 >= pEndDestBuf )
+ {
+ *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+ break;
+ }
+
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
+ pDestBuf++;
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
+ pDestBuf++;
+ }
+ else
+ {
+ if ( pDestBuf+2 >= pEndDestBuf )
+ {
+ *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
+ break;
+ }
+
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF));
+ pDestBuf++;
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
+ pDestBuf++;
+ *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
+ pDestBuf++;
+ }
+
+ pSrcBuf++;
+ }
+
+ *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
+ return (nDestBytes - (pEndDestBuf-pDestBuf));
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */