summaryrefslogtreecommitdiffstats
path: root/sax/inc
diff options
context:
space:
mode:
Diffstat (limited to 'sax/inc')
-rw-r--r--sax/inc/xml2utf.hxx129
1 files changed, 129 insertions, 0 deletions
diff --git a/sax/inc/xml2utf.hxx b/sax/inc/xml2utf.hxx
new file mode 100644
index 0000000000..ead6ac1143
--- /dev/null
+++ b/sax/inc/xml2utf.hxx
@@ -0,0 +1,129 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+#ifndef INCLUDED_SAX_INC_XML2UTF_HXX
+#define INCLUDED_SAX_INC_XML2UTF_HXX
+
+#include <sal/config.h>
+
+#include <memory>
+
+#include <sal/types.h>
+#include <rtl/string.hxx>
+
+#include <com/sun/star/io/XInputStream.hpp>
+
+namespace sax_expatwrap {
+
+class Text2UnicodeConverter
+{
+
+public:
+ Text2UnicodeConverter( const OString & sEncoding );
+ ~Text2UnicodeConverter();
+
+ css::uno::Sequence < sal_Unicode > convert( const css::uno::Sequence<sal_Int8> & );
+ bool canContinue() const { return m_bCanContinue; }
+
+private:
+ void init( rtl_TextEncoding encoding );
+
+ rtl_TextToUnicodeConverter m_convText2Unicode;
+ rtl_TextToUnicodeContext m_contextText2Unicode;
+ bool m_bCanContinue;
+ bool m_bInitialized;
+ css::uno::Sequence<sal_Int8> m_seqSource;
+};
+
+/*----------------------------------------
+*
+* Unicode2TextConverter
+*
+**-----------------------------------------*/
+class Unicode2TextConverter
+{
+public:
+ Unicode2TextConverter( rtl_TextEncoding encoding );
+ ~Unicode2TextConverter();
+
+ css::uno::Sequence<sal_Int8> convert( const sal_Unicode * , sal_Int32 nLength );
+
+private:
+ rtl_UnicodeToTextConverter m_convUnicode2Text;
+ rtl_UnicodeToTextContext m_contextUnicode2Text;
+ css::uno::Sequence<sal_Unicode> m_seqSource;
+};
+
+
+/*----------------------------------------
+*
+* XMLFile2UTFConverter
+*
+**-----------------------------------------*/
+class XMLFile2UTFConverter
+{
+public:
+ XMLFile2UTFConverter( ):
+ m_bStarted( false )
+ {}
+
+ void setInputStream( css::uno::Reference< css::io::XInputStream > const &r ) { m_in = r; }
+ void setEncoding( const OString &s ) { m_sEncoding = s; }
+
+
+ // @param nMaxToRead The number of chars, that should be read. Note that this is no exact number. There
+ // may be returned less or more bytes than ordered.
+ /// @throws css::io::IOException
+ /// @throws css::io::NotConnectedException
+ /// @throws css::io::BufferSizeExceededException
+ /// @throws css::uno::RuntimeException
+ sal_Int32 readAndConvert( css::uno::Sequence<sal_Int8> &seq , sal_Int32 nMaxToRead );
+
+private:
+
+ // Called only on first Sequence of bytes. Tries to figure out file format and encoding information.
+ // @return TRUE, when encoding information could be retrieved
+ // @return FALSE, when no encoding information was found in file
+ bool scanForEncoding( css::uno::Sequence<sal_Int8> &seq );
+
+ // Called only on first Sequence of bytes. Tries to figure out
+ // if enough data is available to scan encoding
+ // @return TRUE, when encoding is retrievable
+ // @return FALSE, when more data is needed
+ static bool isEncodingRecognizable( const css::uno::Sequence< sal_Int8 > & seq );
+
+ // When encoding attribute is within the text (in the first line), it is removed.
+ static void removeEncoding( css::uno::Sequence<sal_Int8> &seq );
+
+ // Initializes decoding depending on m_sEncoding setting
+ void initializeDecoding();
+private:
+ css::uno::Reference< css::io::XInputStream > m_in;
+
+ bool m_bStarted;
+ OString m_sEncoding;
+
+ std::unique_ptr<Text2UnicodeConverter> m_pText2Unicode;
+ std::unique_ptr<Unicode2TextConverter> m_pUnicode2Text;
+};
+}
+
+#endif // INCLUDED_SAX_INC_XML2UTF_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */