diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sal/textenc/convertsinglebytetobmpunicode.cxx | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sal/textenc/convertsinglebytetobmpunicode.cxx')
-rw-r--r-- | sal/textenc/convertsinglebytetobmpunicode.cxx | 209 |
1 files changed, 209 insertions, 0 deletions
diff --git a/sal/textenc/convertsinglebytetobmpunicode.cxx b/sal/textenc/convertsinglebytetobmpunicode.cxx new file mode 100644 index 0000000000..1fa9be11da --- /dev/null +++ b/sal/textenc/convertsinglebytetobmpunicode.cxx @@ -0,0 +1,209 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#include <sal/config.h> + +#include <cassert> +#include <cstddef> + +#include <rtl/character.hxx> +#include <rtl/textcvt.h> +#include <sal/types.h> + +#include "context.hxx" +#include "converter.hxx" +#include "convertsinglebytetobmpunicode.hxx" + +sal_Size rtl_textenc_convertSingleByteToBmpUnicode( + void const * data, SAL_UNUSED_PARAMETER void *, char const * srcBuf, + sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars, + sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes) +{ + sal_Unicode const * map = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->byteToUnicode; + sal_uInt32 infoFlags = 0; + sal_Size converted = 0; + sal_Unicode * destBufPtr = destBuf; + sal_Unicode * destBufEnd = destBuf + destChars; + for (; converted < srcBytes; ++converted) { + char b = *srcBuf++; + sal_Unicode c = map[static_cast< sal_uInt8 >(b)]; + if (c == 0xFFFF) { + goto bad_input; + } + if (destBufEnd - destBufPtr < 1) { + goto no_output; + } + *destBufPtr++ = c; + continue; + bad_input: + switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion( + true/*undefined*/, false, b, flags, &destBufPtr, destBufEnd, + &infoFlags)) + { + case sal::detail::textenc::BAD_INPUT_STOP: + if ((flags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) { + ++converted; + } + break; + + case sal::detail::textenc::BAD_INPUT_CONTINUE: + continue; + + case sal::detail::textenc::BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + no_output: + --srcBuf; + infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL; + break; + } + if (info != nullptr) { + *info = infoFlags; + } + if (srcCvtBytes != nullptr) { + *srcCvtBytes = converted; + } + return destBufPtr - destBuf; +} + +sal_Size rtl_textenc_convertBmpUnicodeToSingleByte( + void const * data, void * context, + sal_Unicode const * srcBuf, sal_Size srcChars, char * destBuf, + sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info, + sal_Size * srcCvtChars) +{ + std::size_t entries = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->unicodeToByteEntries; + rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast< + rtl::textenc::BmpUnicodeToSingleByteConverterData const * >( + data)->unicodeToByte; + sal_Unicode highSurrogate = 0; + sal_uInt32 infoFlags = 0; + sal_Size converted = 0; + char * destBufPtr = destBuf; + char * destBufEnd = destBuf + destBytes; + if (context != nullptr) { + highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)-> + m_nHighSurrogate; + } + for (; converted < srcChars; ++converted) { + bool undefined = true; + sal_uInt32 c = *srcBuf++; + if (highSurrogate == 0) { + if (rtl::isHighSurrogate(c)) { + highSurrogate = static_cast< sal_Unicode >(c); + continue; + } + else if (rtl::isLowSurrogate(c)) + { + undefined = false; + goto bad_input; + } + } else if (rtl::isLowSurrogate(c)) { + c = rtl::combineSurrogates(highSurrogate, c); + } else { + undefined = false; + goto bad_input; + } + assert(rtl::isUnicodeScalarValue(c)); + // Linearly searching through the ranges if probably fastest, assuming + // that most converted characters belong to the ASCII subset: + for (std::size_t i = 0; i < entries; ++i) { + if (c < ranges[i].unicode) { + break; + } + if (c <= sal::static_int_cast< sal_uInt32 >( + ranges[i].unicode + ranges[i].range)) + { + if (destBufEnd - destBufPtr < 1) { + goto no_output; + } + *destBufPtr++ = static_cast< char >( + ranges[i].byte + (c - ranges[i].unicode)); + goto done; + } + } + goto bad_input; + done: + highSurrogate = 0; + continue; + bad_input: + switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion( + undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, nullptr, + 0, nullptr)) + { + case sal::detail::textenc::BAD_INPUT_STOP: + highSurrogate = 0; + break; + + case sal::detail::textenc::BAD_INPUT_CONTINUE: + highSurrogate = 0; + continue; + + case sal::detail::textenc::BAD_INPUT_NO_OUTPUT: + goto no_output; + } + break; + no_output: + --srcBuf; + infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + if (highSurrogate != 0 + && ((infoFlags + & (RTL_UNICODETOTEXT_INFO_ERROR + | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)) + == 0)) + { + if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) { + infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL; + } else { + switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion( + false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, nullptr, + 0, nullptr)) + { + case sal::detail::textenc::BAD_INPUT_STOP: + case sal::detail::textenc::BAD_INPUT_CONTINUE: + highSurrogate = 0; + break; + + case sal::detail::textenc::BAD_INPUT_NO_OUTPUT: + infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL; + break; + } + } + } + if (context != nullptr) { + static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate + = highSurrogate; + } + if (info != nullptr) { + *info = infoFlags; + } + if (srcCvtChars != nullptr) { + *srcCvtChars = converted; + } + return destBufPtr - destBuf; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |