diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:54:39 +0000 |
commit | 267c6f2ac71f92999e969232431ba04678e7437e (patch) | |
tree | 358c9467650e1d0a1d7227a21dac2e3d08b622b2 /sax/inc | |
parent | Initial commit. (diff) | |
download | libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.tar.xz libreoffice-267c6f2ac71f92999e969232431ba04678e7437e.zip |
Adding upstream version 4:24.2.0.upstream/4%24.2.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sax/inc')
-rw-r--r-- | sax/inc/xml2utf.hxx | 129 |
1 files changed, 129 insertions, 0 deletions
diff --git a/sax/inc/xml2utf.hxx b/sax/inc/xml2utf.hxx new file mode 100644 index 0000000000..ead6ac1143 --- /dev/null +++ b/sax/inc/xml2utf.hxx @@ -0,0 +1,129 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +#ifndef INCLUDED_SAX_INC_XML2UTF_HXX +#define INCLUDED_SAX_INC_XML2UTF_HXX + +#include <sal/config.h> + +#include <memory> + +#include <sal/types.h> +#include <rtl/string.hxx> + +#include <com/sun/star/io/XInputStream.hpp> + +namespace sax_expatwrap { + +class Text2UnicodeConverter +{ + +public: + Text2UnicodeConverter( const OString & sEncoding ); + ~Text2UnicodeConverter(); + + css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence<sal_Int8> & ); + bool canContinue() const { return m_bCanContinue; } + +private: + void init( rtl_TextEncoding encoding ); + + rtl_TextToUnicodeConverter m_convText2Unicode; + rtl_TextToUnicodeContext m_contextText2Unicode; + bool m_bCanContinue; + bool m_bInitialized; + css::uno::Sequence<sal_Int8> m_seqSource; +}; + +/*---------------------------------------- +* +* Unicode2TextConverter +* +**-----------------------------------------*/ +class Unicode2TextConverter +{ +public: + Unicode2TextConverter( rtl_TextEncoding encoding ); + ~Unicode2TextConverter(); + + css::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength ); + +private: + rtl_UnicodeToTextConverter m_convUnicode2Text; + rtl_UnicodeToTextContext m_contextUnicode2Text; + css::uno::Sequence<sal_Unicode> m_seqSource; +}; + + +/*---------------------------------------- +* +* XMLFile2UTFConverter +* +**-----------------------------------------*/ +class XMLFile2UTFConverter +{ +public: + XMLFile2UTFConverter( ): + m_bStarted( false ) + {} + + void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; } + void setEncoding( const OString &s ) { m_sEncoding = s; } + + + // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There + // may be returned less or more bytes than ordered. + /// @throws css::io::IOException + /// @throws css::io::NotConnectedException + /// @throws css::io::BufferSizeExceededException + /// @throws css::uno::RuntimeException + sal_Int32 readAndConvert( css::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead ); + +private: + + // Called only on first Sequence of bytes. Tries to figure out file format and encoding information. + // @return TRUE, when encoding information could be retrieved + // @return FALSE, when no encoding information was found in file + bool scanForEncoding( css::uno::Sequence<sal_Int8> &seq ); + + // Called only on first Sequence of bytes. Tries to figure out + // if enough data is available to scan encoding + // @return TRUE, when encoding is retrievable + // @return FALSE, when more data is needed + static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq ); + + // When encoding attribute is within the text (in the first line), it is removed. + static void removeEncoding( css::uno::Sequence<sal_Int8> &seq ); + + // Initializes decoding depending on m_sEncoding setting + void initializeDecoding(); +private: + css::uno::Reference< css::io::XInputStream > m_in; + + bool m_bStarted; + OString m_sEncoding; + + std::unique_ptr<Text2UnicodeConverter> m_pText2Unicode; + std::unique_ptr<Unicode2TextConverter> m_pUnicode2Text; +}; +} + +#endif // INCLUDED_SAX_INC_XML2UTF_HXX + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |