From ed5640d8b587fbcfed7dd7967f3de04b37a76f26 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 11:06:44 +0200 Subject: Adding upstream version 4:7.4.7. Signed-off-by: Daniel Baumann --- sax/inc/xml2utf.hxx | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 sax/inc/xml2utf.hxx (limited to 'sax/inc') diff --git a/sax/inc/xml2utf.hxx b/sax/inc/xml2utf.hxx new file mode 100644 index 000000000..ead6ac114 --- /dev/null +++ b/sax/inc/xml2utf.hxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_INC_XML2UTF_HXX +#define INCLUDED_SAX_INC_XML2UTF_HXX + +#include + +#include + +#include +#include + +#include + +namespace sax_expatwrap { + +class Text2UnicodeConverter +{ + +public: + Text2UnicodeConverter( const OString & sEncoding ); + ~Text2UnicodeConverter(); + + css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence & ); + bool canContinue() const { return m_bCanContinue; } + +private: + void init( rtl_TextEncoding encoding ); + + rtl_TextToUnicodeConverter m_convText2Unicode; + rtl_TextToUnicodeContext m_contextText2Unicode; + bool m_bCanContinue; + bool m_bInitialized; + css::uno::Sequence m_seqSource; +}; + +/*---------------------------------------- +* +* Unicode2TextConverter +* +**-----------------------------------------*/ +class Unicode2TextConverter +{ +public: + Unicode2TextConverter( rtl_TextEncoding encoding ); + ~Unicode2TextConverter(); + + css::uno::Sequence convert( const sal_Unicode * , sal_Int32 nLength ); + +private: + rtl_UnicodeToTextConverter m_convUnicode2Text; + rtl_UnicodeToTextContext m_contextUnicode2Text; + css::uno::Sequence m_seqSource; +}; + + +/*---------------------------------------- +* +* XMLFile2UTFConverter +* +**-----------------------------------------*/ +class XMLFile2UTFConverter +{ +public: + XMLFile2UTFConverter( ): + m_bStarted( false ) + {} + + void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; } + void setEncoding( const OString &s ) { m_sEncoding = s; } + + + // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There + // may be returned less or more bytes than ordered. + /// @throws css::io::IOException + /// @throws css::io::NotConnectedException + /// @throws css::io::BufferSizeExceededException + /// @throws css::uno::RuntimeException + sal_Int32 readAndConvert( css::uno::Sequence &seq , sal_Int32 nMaxToRead ); + +private: + + // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. + // @return TRUE, when encoding information could be retrieved + // @return FALSE, when no encoding information was found in file + bool scanForEncoding( css::uno::Sequence &seq ); + + // Called only on first Sequence of bytes. Tries to figure out + // if enough data is available to scan encoding + // @return TRUE, when encoding is retrievable + // @return FALSE, when more data is needed + static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq ); + + // When encoding attribute is within the text (in the first line), it is removed. + static void removeEncoding( css::uno::Sequence &seq ); + + // Initializes decoding depending on m_sEncoding setting + void initializeDecoding(); +private: + css::uno::Reference< css::io::XInputStream > m_in; + + bool m_bStarted; + OString m_sEncoding; + + std::unique_ptr m_pText2Unicode; + std::unique_ptr m_pUnicode2Text; +}; +} + +#endif // INCLUDED_SAX_INC_XML2UTF_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ -- cgit v1.2.3